summaryrefslogtreecommitdiff
path: root/gfx
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2021-09-23 18:23:04 -0400
committerMatt A. Tobin <email@mattatobin.com>2021-09-23 18:23:04 -0400
commit6acb3d91365c63763c58166b562bac23eba6710b (patch)
tree28cbd03027fd3c6855410a8b6bbc488882816c22 /gfx
parent53d20b4e60cfb8385d4411a09fb82fc680852f62 (diff)
downloadaura-central-6acb3d91365c63763c58166b562bac23eba6710b.tar.gz
Issue %3003 - Move ycbcr to libs/
Diffstat (limited to 'gfx')
-rw-r--r--gfx/moz.build1
-rw-r--r--gfx/ycbcr/LICENSE27
-rw-r--r--gfx/ycbcr/QuellGccWarnings.patch40
-rw-r--r--gfx/ycbcr/README29
-rw-r--r--gfx/ycbcr/TypeFromSize.patch58
-rw-r--r--gfx/ycbcr/YCbCrUtils.cpp157
-rw-r--r--gfx/ycbcr/YCbCrUtils.h30
-rw-r--r--gfx/ycbcr/chromium_types.h50
-rw-r--r--gfx/ycbcr/convert.patch.outdated3143
-rw-r--r--gfx/ycbcr/moz.build64
-rw-r--r--gfx/ycbcr/scale_yuv_argb.cpp1128
-rw-r--r--gfx/ycbcr/scale_yuv_argb.h39
-rw-r--r--gfx/ycbcr/update.sh12
-rw-r--r--gfx/ycbcr/win64.patch210
-rw-r--r--gfx/ycbcr/ycbcr_to_rgb565.cpp672
-rw-r--r--gfx/ycbcr/ycbcr_to_rgb565.h72
-rw-r--r--gfx/ycbcr/yuv_convert.cpp510
-rw-r--r--gfx/ycbcr/yuv_convert.h110
-rw-r--r--gfx/ycbcr/yuv_convert_arm.cpp232
-rw-r--r--gfx/ycbcr/yuv_convert_mmx.cpp45
-rw-r--r--gfx/ycbcr/yuv_convert_sse2.cpp47
-rw-r--r--gfx/ycbcr/yuv_row.h142
-rw-r--r--gfx/ycbcr/yuv_row_arm.s304
-rw-r--r--gfx/ycbcr/yuv_row_c.cpp133
-rw-r--r--gfx/ycbcr/yuv_row_other.cpp34
-rw-r--r--gfx/ycbcr/yuv_row_posix.cpp894
-rw-r--r--gfx/ycbcr/yuv_row_table.cpp233
-rw-r--r--gfx/ycbcr/yuv_row_win.cpp498
-rw-r--r--gfx/ycbcr/yuv_row_win64.cpp205
29 files changed, 0 insertions, 9119 deletions
diff --git a/gfx/moz.build b/gfx/moz.build
index 6d825bae9..7914cea6b 100644
--- a/gfx/moz.build
+++ b/gfx/moz.build
@@ -5,7 +5,6 @@
DIRS += [
'2d',
- 'ycbcr',
'src',
'gl',
'layers',
diff --git a/gfx/ycbcr/LICENSE b/gfx/ycbcr/LICENSE
deleted file mode 100644
index 8dc35041d..000000000
--- a/gfx/ycbcr/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/gfx/ycbcr/QuellGccWarnings.patch b/gfx/ycbcr/QuellGccWarnings.patch
deleted file mode 100644
index d580ac981..000000000
--- a/gfx/ycbcr/QuellGccWarnings.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -337,16 +337,17 @@ void ScaleYCbCrToRGB32(const uint* yplan
- source_dx_uv >> kFractionBits);
- }
- }
- else {
- ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- }
- #else
-+ (void)source_dx_uv;
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- #endif
- }
- }
- // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
- if (has_mmx)
- EMMS();
-diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
---- a/gfx/ycbcr/yuv_row.h
-+++ b/gfx/ycbcr/yuv_row.h
-@@ -129,14 +129,14 @@ extern SIMD_ALIGNED(int16 kCoefficientsR
- #if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64)
- #if defined(_MSC_VER)
- #define EMMS() __asm emms
- #pragma warning(disable: 4799)
- #else
- #define EMMS() asm("emms")
- #endif
- #else
--#define EMMS()
-+#define EMMS() ((void)0)
- #endif
-
- } // extern "C"
-
- #endif // MEDIA_BASE_YUV_ROW_H_
diff --git a/gfx/ycbcr/README b/gfx/ycbcr/README
deleted file mode 100644
index a951bc83a..000000000
--- a/gfx/ycbcr/README
+++ /dev/null
@@ -1,29 +0,0 @@
-This color conversion code is from the Chromium open source project available here:
-
-http://code.google.com/chromium/
-
-The code comes from svn revision 63840 on 2010-10-26.
-
-If you just want to check out this individual directory, use:
-
-svn co -r 63840 http://src.chromium.org/svn/trunk/src/media/base
-
-The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection.
-
-convert.patch contains the following changes:
-
- * Change Chromium code to build using Mozilla build system.
- * Add runtime CPU detection for MMX
- * Move default C implementation to work on all platforms.
- * Change Chromium code to allow a picture region.
- * The YUV conversion will convert within this picture region only.
- * Add YCbCr 4:4:4 support
- * Bug 619178 - Update CPU detection in yuv_convert to new SSE.h interface.
- * Bug 616778 - Split yuv_convert FilterRows vectorized code into separate files so it can
- be properly guarded with cpuid() calls.
-
-win64.patch: SSE2 optimization for Microsoft Visual C++ x64 version
-
-TypeFromSize.patch: Bug 656185 - Add a method to detect YUVType from plane sizes.
-
-QuellGccWarnings.patch: Bug 711895 - Avoid some GCC compilation warnings.
diff --git a/gfx/ycbcr/TypeFromSize.patch b/gfx/ycbcr/TypeFromSize.patch
deleted file mode 100644
index d08a19690..000000000
--- a/gfx/ycbcr/TypeFromSize.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -26,16 +26,32 @@ namespace mozilla {
-
- namespace gfx {
-
- // 16.16 fixed point arithmetic
- const int kFractionBits = 16;
- const int kFractionMax = 1 << kFractionBits;
- const int kFractionMask = ((1 << kFractionBits) - 1);
-
-+YUVType TypeFromSize(int ywidth,
-+ int yheight,
-+ int cbcrwidth,
-+ int cbcrheight)
-+{
-+ if (ywidth == cbcrwidth && yheight == cbcrheight) {
-+ return YV24;
-+ }
-+ else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) {
-+ return YV16;
-+ }
-+ else {
-+ return YV12;
-+ }
-+}
-+
- // Convert a frame of YUV to 32 bit ARGB.
- void ConvertYCbCrToRGB32(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int pic_x,
- int pic_y,
- int pic_width,
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -36,16 +36,18 @@ enum Rotate {
- // Filter affects how scaling looks.
- enum ScaleFilter {
- FILTER_NONE = 0, // No filter (point sampled).
- FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
- FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
- FILTER_BILINEAR = 3 // Bilinear filter.
- };
-
-+YUVType TypeFromSize(int ywidth, int yheight, int cbcrwidth, int cbcrheight);
-+
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- void ConvertYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int pic_x,
- int pic_y,
diff --git a/gfx/ycbcr/YCbCrUtils.cpp b/gfx/ycbcr/YCbCrUtils.cpp
deleted file mode 100644
index 882197857..000000000
--- a/gfx/ycbcr/YCbCrUtils.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "gfx2DGlue.h"
-
-#include "YCbCrUtils.h"
-#include "yuv_convert.h"
-#include "ycbcr_to_rgb565.h"
-
-namespace mozilla {
-namespace gfx {
-
-void
-GetYCbCrToRGBDestFormatAndSize(const layers::PlanarYCbCrData& aData,
- SurfaceFormat& aSuggestedFormat,
- IntSize& aSuggestedSize)
-{
- YUVType yuvtype =
- TypeFromSize(aData.mYSize.width,
- aData.mYSize.height,
- aData.mCbCrSize.width,
- aData.mCbCrSize.height);
-
- // 'prescale' is true if the scaling is to be done as part of the
- // YCbCr to RGB conversion rather than on the RGB data when rendered.
- bool prescale = aSuggestedSize.width > 0 && aSuggestedSize.height > 0 &&
- aSuggestedSize != aData.mPicSize;
-
- if (aSuggestedFormat == SurfaceFormat::R5G6B5_UINT16) {
-#if defined(HAVE_YCBCR_TO_RGB565)
- if (prescale &&
- !IsScaleYCbCrToRGB565Fast(aData.mPicX,
- aData.mPicY,
- aData.mPicSize.width,
- aData.mPicSize.height,
- aSuggestedSize.width,
- aSuggestedSize.height,
- yuvtype,
- FILTER_BILINEAR) &&
- IsConvertYCbCrToRGB565Fast(aData.mPicX,
- aData.mPicY,
- aData.mPicSize.width,
- aData.mPicSize.height,
- yuvtype)) {
- prescale = false;
- }
-#else
- // yuv2rgb16 function not available
- aSuggestedFormat = SurfaceFormat::B8G8R8X8;
-#endif
- }
- else if (aSuggestedFormat != SurfaceFormat::B8G8R8X8) {
- // No other formats are currently supported.
- aSuggestedFormat = SurfaceFormat::B8G8R8X8;
- }
- if (aSuggestedFormat == SurfaceFormat::B8G8R8X8) {
- /* ScaleYCbCrToRGB32 does not support a picture offset, nor 4:4:4 data.
- See bugs 639415 and 640073. */
- if (aData.mPicX != 0 || aData.mPicY != 0 || yuvtype == YV24)
- prescale = false;
- }
- if (!prescale) {
- aSuggestedSize = aData.mPicSize;
- }
-}
-
-void
-ConvertYCbCrToRGB(const layers::PlanarYCbCrData& aData,
- const SurfaceFormat& aDestFormat,
- const IntSize& aDestSize,
- unsigned char* aDestBuffer,
- int32_t aStride)
-{
- // ConvertYCbCrToRGB et al. assume the chroma planes are rounded up if the
- // luma plane is odd sized.
- MOZ_ASSERT((aData.mCbCrSize.width == aData.mYSize.width ||
- aData.mCbCrSize.width == (aData.mYSize.width + 1) >> 1) &&
- (aData.mCbCrSize.height == aData.mYSize.height ||
- aData.mCbCrSize.height == (aData.mYSize.height + 1) >> 1));
- YUVType yuvtype =
- TypeFromSize(aData.mYSize.width,
- aData.mYSize.height,
- aData.mCbCrSize.width,
- aData.mCbCrSize.height);
-
- // Convert from YCbCr to RGB now, scaling the image if needed.
- if (aDestSize != aData.mPicSize) {
-#if defined(HAVE_YCBCR_TO_RGB565)
- if (aDestFormat == SurfaceFormat::R5G6B5_UINT16) {
- ScaleYCbCrToRGB565(aData.mYChannel,
- aData.mCbChannel,
- aData.mCrChannel,
- aDestBuffer,
- aData.mPicX,
- aData.mPicY,
- aData.mPicSize.width,
- aData.mPicSize.height,
- aDestSize.width,
- aDestSize.height,
- aData.mYStride,
- aData.mCbCrStride,
- aStride,
- yuvtype,
- FILTER_BILINEAR);
- } else
-#endif
- ScaleYCbCrToRGB32(aData.mYChannel, //
- aData.mCbChannel,
- aData.mCrChannel,
- aDestBuffer,
- aData.mPicSize.width,
- aData.mPicSize.height,
- aDestSize.width,
- aDestSize.height,
- aData.mYStride,
- aData.mCbCrStride,
- aStride,
- yuvtype,
- aData.mYUVColorSpace,
- FILTER_BILINEAR);
- } else { // no prescale
-#if defined(HAVE_YCBCR_TO_RGB565)
- if (aDestFormat == SurfaceFormat::R5G6B5_UINT16) {
- ConvertYCbCrToRGB565(aData.mYChannel,
- aData.mCbChannel,
- aData.mCrChannel,
- aDestBuffer,
- aData.mPicX,
- aData.mPicY,
- aData.mPicSize.width,
- aData.mPicSize.height,
- aData.mYStride,
- aData.mCbCrStride,
- aStride,
- yuvtype);
- } else // aDestFormat != SurfaceFormat::R5G6B5_UINT16
-#endif
- ConvertYCbCrToRGB32(aData.mYChannel, //
- aData.mCbChannel,
- aData.mCrChannel,
- aDestBuffer,
- aData.mPicX,
- aData.mPicY,
- aData.mPicSize.width,
- aData.mPicSize.height,
- aData.mYStride,
- aData.mCbCrStride,
- aStride,
- yuvtype,
- aData.mYUVColorSpace);
- }
-}
-
-} // namespace gfx
-} // namespace mozilla
diff --git a/gfx/ycbcr/YCbCrUtils.h b/gfx/ycbcr/YCbCrUtils.h
deleted file mode 100644
index 1cd2e1c4f..000000000
--- a/gfx/ycbcr/YCbCrUtils.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef MOZILLA_GFX_UTILS_H_
-#define MOZILLA_GFX_UTILS_H_
-
-#include "mozilla/gfx/Types.h"
-#include "ImageContainer.h"
-
-namespace mozilla {
-namespace gfx {
-
-void
-GetYCbCrToRGBDestFormatAndSize(const layers::PlanarYCbCrData& aData,
- SurfaceFormat& aSuggestedFormat,
- IntSize& aSuggestedSize);
-
-void
-ConvertYCbCrToRGB(const layers::PlanarYCbCrData& aData,
- const SurfaceFormat& aDestFormat,
- const IntSize& aDestSize,
- unsigned char* aDestBuffer,
- int32_t aStride);
-
-} // namespace gfx
-} // namespace mozilla
-
-#endif /* MOZILLA_GFX_UTILS_H_ */
diff --git a/gfx/ycbcr/chromium_types.h b/gfx/ycbcr/chromium_types.h
deleted file mode 100644
index dceac4766..000000000
--- a/gfx/ycbcr/chromium_types.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-#ifndef GFX_CHROMIUMTYPES_H
-#define GFX_CHROMIUMTYPES_H
-
-#include <stdint.h>
-
-#include "libyuv/basic_types.h"
-
-// From Chromium build_config.h:
-// Processor architecture detection. For more info on what's defined, see:
-// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
-// http://www.agner.org/optimize/calling_conventions.pdf
-// or with gcc, run: "echo | gcc -E -dM -"
-#if defined(_M_X64) || defined(__x86_64__)
-#define ARCH_CPU_X86_FAMILY 1
-#define ARCH_CPU_X86_64 1
-#define ARCH_CPU_64_BITS 1
-#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
-#define ARCH_CPU_X86_FAMILY 1
-#define ARCH_CPU_X86_32 1
-#define ARCH_CPU_X86 1
-#define ARCH_CPU_32_BITS 1
-#elif defined(__ARMEL__)
-#define ARCH_CPU_ARM_FAMILY 1
-#define ARCH_CPU_ARMEL 1
-#define ARCH_CPU_32_BITS 1
-#elif defined(__ppc__) || defined(__powerpc) || defined(__PPC__)
-#define ARCH_CPU_PPC_FAMILY 1
-#define ARCH_CPU_PPC 1
-#define ARCH_CPU_32_BITS 1
-#elif defined(__sparc)
-#define ARCH_CPU_SPARC_FAMILY 1
-#define ARCH_CPU_SPARC 1
-#define ARCH_CPU_32_BITS 1
-#elif defined(__sparcv9)
-#define ARCH_CPU_SPARC_FAMILY 1
-#define ARCH_CPU_SPARC 1
-#define ARCH_CPU_64_BITS 1
-#elif defined(__aarch64__)
-#define ARCH_CPU_AARCH64_FAMILY 1
-#define ARCH_CPU_AARCH64 1
-#define ARCH_CPU_64_BITS 1
-#else
-#warning Please add support for your architecture in chromium_types.h
-#endif
-
-#endif // GFX_CHROMIUMTYPES_H
diff --git a/gfx/ycbcr/convert.patch.outdated b/gfx/ycbcr/convert.patch.outdated
deleted file mode 100644
index e39f923b3..000000000
--- a/gfx/ycbcr/convert.patch.outdated
+++ /dev/null
@@ -1,3143 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -6,145 +6,102 @@
- // http://www.fourcc.org/yuv.php
- // The actual conversion is best described here
- // http://en.wikipedia.org/wiki/YUV
- // An article on optimizing YUV conversion using tables instead of multiplies
- // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
- //
- // YV12 is a full plane of Y and a half height, half width chroma planes
- // YV16 is a full plane of Y and a full height, half width chroma planes
-+// YV24 is a full plane of Y and a full height, full width chroma planes
- //
- // ARGB pixel format is output, which on little endian is stored as BGRA.
- // The alpha is set to 255, allowing the application to use RGBA or RGB32.
-
--#include "media/base/yuv_convert.h"
-+#include "yuv_convert.h"
-
- // Header for low level row functions.
--#include "media/base/yuv_row.h"
--
--#if USE_MMX
--#if defined(_MSC_VER)
--#include <intrin.h>
--#else
--#include <mmintrin.h>
--#endif
--#endif
--
--#if USE_SSE2
--#include <emmintrin.h>
--#endif
--
--namespace media {
--
-+#include "yuv_row.h"
-+#include "mozilla/SSE.h"
-+
-+namespace mozilla {
-+
-+namespace gfx {
-+
- // 16.16 fixed point arithmetic
- const int kFractionBits = 16;
- const int kFractionMax = 1 << kFractionBits;
- const int kFractionMask = ((1 << kFractionBits) - 1);
-
- // Convert a frame of YUV to 32 bit ARGB.
--void ConvertYUVToRGB32(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int height,
-- int y_pitch,
-- int uv_pitch,
-- int rgb_pitch,
-- YUVType yuv_type) {
-- unsigned int y_shift = yuv_type;
-- for (int y = 0; y < height; ++y) {
-- uint8* rgb_row = rgb_buf + y * rgb_pitch;
-- const uint8* y_ptr = y_buf + y * y_pitch;
-- const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
-- const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
--
-- FastConvertYUVToRGB32Row(y_ptr,
-- u_ptr,
-- v_ptr,
-- rgb_row,
-- width);
-- }
-+void ConvertYCbCrToRGB32(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int pic_x,
-+ int pic_y,
-+ int pic_width,
-+ int pic_height,
-+ int y_pitch,
-+ int uv_pitch,
-+ int rgb_pitch,
-+ YUVType yuv_type) {
-+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
-+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-+ // Test for SSE because the optimized code uses movntq, which is not part of MMX.
-+ bool has_sse = supports_mmx() && supports_sse();
-+ // There is no optimized YV24 SSE routine so we check for this and
-+ // fall back to the C code.
-+ has_sse &= yuv_type != YV24;
-+ bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
-+ int x_width = odd_pic_x ? pic_width - 1 : pic_width;
-+
-+ for (int y = pic_y; y < pic_height + pic_y; ++y) {
-+ uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
-+ const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
-+ const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-+ const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-+
-+ if (odd_pic_x) {
-+ // Handle the single odd pixel manually and use the
-+ // fast routines for the remaining.
-+ FastConvertYUVToRGB32Row_C(y_ptr++,
-+ u_ptr++,
-+ v_ptr++,
-+ rgb_row,
-+ 1,
-+ x_shift);
-+ rgb_row += 4;
-+ }
-+
-+ if (has_sse) {
-+ FastConvertYUVToRGB32Row(y_ptr,
-+ u_ptr,
-+ v_ptr,
-+ rgb_row,
-+ x_width);
-+ }
-+ else {
-+ FastConvertYUVToRGB32Row_C(y_ptr,
-+ u_ptr,
-+ v_ptr,
-+ rgb_row,
-+ x_width,
-+ x_shift);
-+ }
-+ }
-
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-- EMMS();
--}
--
--#if USE_SSE2
--// FilterRows combines two rows of the image using linear interpolation.
--// SSE2 version does 16 pixels at a time
--
--static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-- int source_width, int source_y_fraction) {
-- __m128i zero = _mm_setzero_si128();
-- __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
-- __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
--
-- const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
-- const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
-- __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
-- __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
--
-- do {
-- __m128i y0 = _mm_loadu_si128(y0_ptr128);
-- __m128i y1 = _mm_loadu_si128(y1_ptr128);
-- __m128i y2 = _mm_unpackhi_epi8(y0, zero);
-- __m128i y3 = _mm_unpackhi_epi8(y1, zero);
-- y0 = _mm_unpacklo_epi8(y0, zero);
-- y1 = _mm_unpacklo_epi8(y1, zero);
-- y0 = _mm_mullo_epi16(y0, y0_fraction);
-- y1 = _mm_mullo_epi16(y1, y1_fraction);
-- y2 = _mm_mullo_epi16(y2, y0_fraction);
-- y3 = _mm_mullo_epi16(y3, y1_fraction);
-- y0 = _mm_add_epi16(y0, y1);
-- y2 = _mm_add_epi16(y2, y3);
-- y0 = _mm_srli_epi16(y0, 8);
-- y2 = _mm_srli_epi16(y2, 8);
-- y0 = _mm_packus_epi16(y0, y2);
-- *dest128++ = y0;
-- ++y0_ptr128;
-- ++y1_ptr128;
-- } while (dest128 < end128);
--}
--#elif USE_MMX
--// MMX version does 8 pixels at a time
--static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-- int source_width, int source_y_fraction) {
-- __m64 zero = _mm_setzero_si64();
-- __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
-- __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
--
-- const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
-- const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
-- __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
-- __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
--
-- do {
-- __m64 y0 = *y0_ptr64++;
-- __m64 y1 = *y1_ptr64++;
-- __m64 y2 = _mm_unpackhi_pi8(y0, zero);
-- __m64 y3 = _mm_unpackhi_pi8(y1, zero);
-- y0 = _mm_unpacklo_pi8(y0, zero);
-- y1 = _mm_unpacklo_pi8(y1, zero);
-- y0 = _mm_mullo_pi16(y0, y0_fraction);
-- y1 = _mm_mullo_pi16(y1, y1_fraction);
-- y2 = _mm_mullo_pi16(y2, y0_fraction);
-- y3 = _mm_mullo_pi16(y3, y1_fraction);
-- y0 = _mm_add_pi16(y0, y1);
-- y2 = _mm_add_pi16(y2, y3);
-- y0 = _mm_srli_pi16(y0, 8);
-- y2 = _mm_srli_pi16(y2, 8);
-- y0 = _mm_packs_pu16(y0, y2);
-- *dest64++ = y0;
-- } while (dest64 < end64);
--}
--#else // no MMX or SSE2
-+ if (has_sse)
-+ EMMS();
-+}
-+
- // C version does 8 at a time to mimic MMX code
--static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-- int source_width, int source_y_fraction) {
-+static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-+ int source_width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- uint8* end = ybuf + source_width;
- do {
- ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
- ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
- ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
- ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
-@@ -152,46 +140,77 @@ static void FilterRows(uint8* ybuf, cons
- ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
- ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
- ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
- y0_ptr += 8;
- y1_ptr += 8;
- ybuf += 8;
- } while (ybuf < end);
- }
--#endif
-+
-+#ifdef MOZILLA_MAY_SUPPORT_MMX
-+void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-+ int source_width, int source_y_fraction);
-+#endif
-+
-+#ifdef MOZILLA_MAY_SUPPORT_SSE2
-+void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-+ int source_width, int source_y_fraction);
-+#endif
-+
-+static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
-+ const uint8* y1_ptr, int source_width,
-+ int source_y_fraction) {
-+#ifdef MOZILLA_MAY_SUPPORT_SSE2
-+ if (mozilla::supports_sse2()) {
-+ FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
-+ return;
-+ }
-+#endif
-+
-+#ifdef MOZILLA_MAY_SUPPORT_MMX
-+ if (mozilla::supports_mmx()) {
-+ FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
-+ return;
-+ }
-+#endif
-+
-+ FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
-+}
-
-
- // Scale a frame of YUV to 32 bit ARGB.
--void ScaleYUVToRGB32(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int source_width,
-- int source_height,
-- int width,
-- int height,
-- int y_pitch,
-- int uv_pitch,
-- int rgb_pitch,
-- YUVType yuv_type,
-- Rotate view_rotate,
-- ScaleFilter filter) {
-+void ScaleYCbCrToRGB32(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int source_width,
-+ int source_height,
-+ int width,
-+ int height,
-+ int y_pitch,
-+ int uv_pitch,
-+ int rgb_pitch,
-+ YUVType yuv_type,
-+ Rotate view_rotate,
-+ ScaleFilter filter) {
-+ bool has_mmx = supports_mmx();
-+
- // 4096 allows 3 buffers to fit in 12k.
- // Helps performance on CPU with 16K L1 cache.
- // Large enough for 3830x2160 and 30" displays which are 2560x1600.
- const int kFilterBufferSize = 4096;
- // Disable filtering if the screen is too big (to avoid buffer overflows).
- // This should never happen to regular users: they don't have monitors
- // wider than 4096 pixels.
- // TODO(fbarchard): Allow rotated videos to filter.
- if (source_width > kFilterBufferSize || view_rotate)
- filter = FILTER_NONE;
-
-- unsigned int y_shift = yuv_type;
-+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
- // Diagram showing origin and direction of source sampling.
- // ->0 4<-
- // 7 3
- //
- // 6 5
- // ->1 2<-
- // Rotations that start at right side of image.
- if ((view_rotate == ROTATE_180) ||
-@@ -276,17 +295,17 @@ void ScaleYUVToRGB32(const uint8* y_buf,
- int source_uv_fraction =
- ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
-
- const uint8* y_ptr = y0_ptr;
- const uint8* u_ptr = u0_ptr;
- const uint8* v_ptr = v0_ptr;
- // Apply vertical filtering if necessary.
- // TODO(fbarchard): Remove memcpy when not necessary.
-- if (filter & media::FILTER_BILINEAR_V) {
-+ if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
- if (yscale_fixed != kFractionMax &&
- source_y_fraction && ((source_y + 1) < source_height)) {
- FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
- } else {
- memcpy(ybuf, y0_ptr, source_width);
- }
- y_ptr = ybuf;
- ybuf[source_width] = ybuf[source_width-1];
-@@ -303,44 +322,50 @@ void ScaleYUVToRGB32(const uint8* y_buf,
- u_ptr = ubuf;
- v_ptr = vbuf;
- ubuf[uv_source_width] = ubuf[uv_source_width - 1];
- vbuf[uv_source_width] = vbuf[uv_source_width - 1];
- }
- if (source_dx == kFractionMax) { // Not scaled
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width);
-- } else {
-- if (filter & FILTER_BILINEAR_H) {
-+ } else if (filter & FILTER_BILINEAR_H) {
- LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- } else {
- // Specialized scalers and rotation.
--#if USE_MMX && defined(_MSC_VER)
-+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
-+ if(mozilla::supports_sse()) {
- if (width == (source_width * 2)) {
-- DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-- dest_pixel, width);
-+ DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
-+ dest_pixel, width);
- } else if ((source_dx & kFractionMask) == 0) {
- // Scaling by integer scale factor. ie half.
-- ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-- dest_pixel, width,
-- source_dx >> kFractionBits);
-+ ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
-+ dest_pixel, width,
-+ source_dx >> kFractionBits);
- } else if (source_dx_uv == source_dx) { // Not rotated.
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- } else {
-- RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-- dest_pixel, width,
-- source_dx >> kFractionBits,
-- source_dx_uv >> kFractionBits);
-+ RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
-+ dest_pixel, width,
-+ source_dx >> kFractionBits,
-+ source_dx_uv >> kFractionBits);
- }
-+ }
-+ else {
-+ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+ dest_pixel, width, source_dx);
-+ }
- #else
-- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-- dest_pixel, width, source_dx);
--#endif
-- }
-+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+ dest_pixel, width, source_dx);
-+#endif
- }
- }
- // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
-- EMMS();
--}
--
--} // namespace media
-+ if (has_mmx)
-+ EMMS();
-+}
-+
-+} // namespace gfx
-+} // namespace mozilla
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -1,72 +1,79 @@
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
-
- #ifndef MEDIA_BASE_YUV_CONVERT_H_
- #define MEDIA_BASE_YUV_CONVERT_H_
-
--#include "base/basictypes.h"
--
--namespace media {
--
-+#include "chromium_types.h"
-+#include "gfxCore.h"
-+
-+namespace mozilla {
-+
-+namespace gfx {
-+
- // Type of YUV surface.
- // The value of these enums matter as they are used to shift vertical indices.
- enum YUVType {
-- YV16 = 0, // YV16 is half width and full height chroma channels.
-- YV12 = 1, // YV12 is half width and half height chroma channels.
-+ YV12 = 0, // YV12 is half width and half height chroma channels.
-+ YV16 = 1, // YV16 is half width and full height chroma channels.
-+ YV24 = 2 // YV24 is full width and full height chroma channels.
- };
-
- // Mirror means flip the image horizontally, as in looking in a mirror.
- // Rotate happens after mirroring.
- enum Rotate {
- ROTATE_0, // Rotation off.
- ROTATE_90, // Rotate clockwise.
- ROTATE_180, // Rotate upside down.
- ROTATE_270, // Rotate counter clockwise.
- MIRROR_ROTATE_0, // Mirror horizontally.
- MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
- MIRROR_ROTATE_180, // Mirror vertically.
-- MIRROR_ROTATE_270, // Transpose.
-+ MIRROR_ROTATE_270 // Transpose.
- };
-
- // Filter affects how scaling looks.
- enum ScaleFilter {
- FILTER_NONE = 0, // No filter (point sampled).
- FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
- FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
-- FILTER_BILINEAR = 3, // Bilinear filter.
-+ FILTER_BILINEAR = 3 // Bilinear filter.
- };
-
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
--void ConvertYUVToRGB32(const uint8* yplane,
-- const uint8* uplane,
-- const uint8* vplane,
-- uint8* rgbframe,
-- int width,
-- int height,
-- int ystride,
-- int uvstride,
-- int rgbstride,
-- YUVType yuv_type);
-+void ConvertYCbCrToRGB32(const uint8* yplane,
-+ const uint8* uplane,
-+ const uint8* vplane,
-+ uint8* rgbframe,
-+ int pic_x,
-+ int pic_y,
-+ int pic_width,
-+ int pic_height,
-+ int ystride,
-+ int uvstride,
-+ int rgbstride,
-+ YUVType yuv_type);
-
- // Scale a frame of YUV to 32 bit ARGB.
- // Supports rotation and mirroring.
--void ScaleYUVToRGB32(const uint8* yplane,
-- const uint8* uplane,
-- const uint8* vplane,
-- uint8* rgbframe,
-- int source_width,
-- int source_height,
-- int width,
-- int height,
-- int ystride,
-- int uvstride,
-- int rgbstride,
-- YUVType yuv_type,
-- Rotate view_rotate,
-- ScaleFilter filter);
--
--} // namespace media
--
-+void ScaleYCbCrToRGB32(const uint8* yplane,
-+ const uint8* uplane,
-+ const uint8* vplane,
-+ uint8* rgbframe,
-+ int source_width,
-+ int source_height,
-+ int width,
-+ int height,
-+ int ystride,
-+ int uvstride,
-+ int rgbstride,
-+ YUVType yuv_type,
-+ Rotate view_rotate,
-+ ScaleFilter filter);
-+
-+} // namespace gfx
-+} // namespace mozilla
-+
- #endif // MEDIA_BASE_YUV_CONVERT_H_
-diff --git a/gfx/ycbcr/yuv_convert_mmx.cpp b/gfx/ycbcr/yuv_convert_mmx.cpp
-new file mode 100644
---- /dev/null
-+++ b/gfx/ycbcr/yuv_convert_mmx.cpp
-@@ -0,0 +1,45 @@
-+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-+// Use of this source code is governed by a BSD-style license that can be
-+// found in the LICENSE file.
-+
-+#include <mmintrin.h>
-+#include "yuv_row.h"
-+
-+namespace mozilla {
-+namespace gfx {
-+
-+// FilterRows combines two rows of the image using linear interpolation.
-+// MMX version does 8 pixels at a time.
-+void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-+ int source_width, int source_y_fraction) {
-+ __m64 zero = _mm_setzero_si64();
-+ __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
-+ __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
-+
-+ const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
-+ const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
-+ __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
-+ __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
-+
-+ do {
-+ __m64 y0 = *y0_ptr64++;
-+ __m64 y1 = *y1_ptr64++;
-+ __m64 y2 = _mm_unpackhi_pi8(y0, zero);
-+ __m64 y3 = _mm_unpackhi_pi8(y1, zero);
-+ y0 = _mm_unpacklo_pi8(y0, zero);
-+ y1 = _mm_unpacklo_pi8(y1, zero);
-+ y0 = _mm_mullo_pi16(y0, y0_fraction);
-+ y1 = _mm_mullo_pi16(y1, y1_fraction);
-+ y2 = _mm_mullo_pi16(y2, y0_fraction);
-+ y3 = _mm_mullo_pi16(y3, y1_fraction);
-+ y0 = _mm_add_pi16(y0, y1);
-+ y2 = _mm_add_pi16(y2, y3);
-+ y0 = _mm_srli_pi16(y0, 8);
-+ y2 = _mm_srli_pi16(y2, 8);
-+ y0 = _mm_packs_pu16(y0, y2);
-+ *dest64++ = y0;
-+ } while (dest64 < end64);
-+}
-+
-+}
-+}
-diff --git a/gfx/ycbcr/yuv_convert_sse2.cpp b/gfx/ycbcr/yuv_convert_sse2.cpp
-new file mode 100644
---- /dev/null
-+++ b/gfx/ycbcr/yuv_convert_sse2.cpp
-@@ -0,0 +1,47 @@
-+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-+// Use of this source code is governed by a BSD-style license that can be
-+// found in the LICENSE file.
-+
-+#include <emmintrin.h>
-+#include "yuv_row.h"
-+
-+namespace mozilla {
-+namespace gfx {
-+
-+// FilterRows combines two rows of the image using linear interpolation.
-+// SSE2 version does 16 pixels at a time.
-+void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-+ int source_width, int source_y_fraction) {
-+ __m128i zero = _mm_setzero_si128();
-+ __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
-+ __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
-+
-+ const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
-+ const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
-+ __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
-+ __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
-+
-+ do {
-+ __m128i y0 = _mm_loadu_si128(y0_ptr128);
-+ __m128i y1 = _mm_loadu_si128(y1_ptr128);
-+ __m128i y2 = _mm_unpackhi_epi8(y0, zero);
-+ __m128i y3 = _mm_unpackhi_epi8(y1, zero);
-+ y0 = _mm_unpacklo_epi8(y0, zero);
-+ y1 = _mm_unpacklo_epi8(y1, zero);
-+ y0 = _mm_mullo_epi16(y0, y0_fraction);
-+ y1 = _mm_mullo_epi16(y1, y1_fraction);
-+ y2 = _mm_mullo_epi16(y2, y0_fraction);
-+ y3 = _mm_mullo_epi16(y3, y1_fraction);
-+ y0 = _mm_add_epi16(y0, y1);
-+ y2 = _mm_add_epi16(y2, y3);
-+ y0 = _mm_srli_epi16(y0, 8);
-+ y2 = _mm_srli_epi16(y2, 8);
-+ y0 = _mm_packus_epi16(y0, y2);
-+ *dest128++ = y0;
-+ ++y0_ptr128;
-+ ++y1_ptr128;
-+ } while (dest128 < end128);
-+}
-+
-+}
-+}
-diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
---- a/gfx/ycbcr/yuv_row.h
-+++ b/gfx/ycbcr/yuv_row.h
-@@ -5,109 +5,133 @@
- // yuv_row internal functions to handle YUV conversion and scaling to RGB.
- // These functions are used from both yuv_convert.cc and yuv_scale.cc.
-
- // TODO(fbarchard): Write function that can handle rotation and scaling.
-
- #ifndef MEDIA_BASE_YUV_ROW_H_
- #define MEDIA_BASE_YUV_ROW_H_
-
--#include "base/basictypes.h"
-+#include "chromium_types.h"
-
- extern "C" {
- // Can only do 1x.
- // This is the second fastest of the scalers.
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
--// Can do 1x, half size or any scale down by an integer amount.
--// Step can be negative (mirroring, rotate 180).
--// This is the third fastest of the scalers.
--void ConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int step);
--
--// Rotate is like Convert, but applies different step to Y versus U and V.
--// This allows rotation by 90 or 270, by stepping by stride.
--// This is the forth fastest of the scalers.
--void RotateConvertYUVToRGB32Row(const uint8* y_buf,
-+void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
-- int ystep,
-- int uvstep);
-+ unsigned int x_shift);
-+
-+void FastConvertYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width);
-+
-+// Can do 1x, half size or any scale down by an integer amount.
-+// Step can be negative (mirroring, rotate 180).
-+// This is the third fastest of the scalers.
-+// Only defined on Windows x86-32.
-+void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int step);
-+
-+// Rotate is like Convert, but applies different step to Y versus U and V.
-+// This allows rotation by 90 or 270, by stepping by stride.
-+// This is the forth fastest of the scalers.
-+// Only defined on Windows x86-32.
-+void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int ystep,
-+ int uvstep);
-
- // Doubler does 4 pixels at a time. Each pixel is replicated.
- // This is the fastest of the scalers.
--void DoubleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width);
-+// Only defined on Windows x86-32.
-+void DoubleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width);
-
- // Handles arbitrary scaling up or down.
- // Mirroring is supported, but not 90 or 270 degree rotation.
- // Chroma is under sampled every 2 pixels for performance.
- void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
-+
-+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
-+
- // Handles arbitrary scaling up or down with bilinear filtering.
- // Mirroring is supported, but not 90 or 270 degree rotation.
- // Chroma is under sampled every 2 pixels for performance.
- // This is the slowest of the scalers.
- void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
-+
-+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
-+
-+
- #if defined(_MSC_VER)
- #define SIMD_ALIGNED(var) __declspec(align(16)) var
- #else
- #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
- #endif
- extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]);
-
--// Method to force C version.
--//#define USE_MMX 0
--//#define USE_SSE2 0
--
--#if !defined(USE_MMX)
--// Windows, Mac and Linux/BSD use MMX
--#if defined(__MMX__) || defined(_MSC_VER)
--#define USE_MMX 1
--#else
--#define USE_MMX 0
--#endif
--#endif
--
--#if !defined(USE_SSE2)
--#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
--#define USE_SSE2 1
--#else
--#define USE_SSE2 0
--#endif
--#endif
--
- // x64 uses MMX2 (SSE) so emms is not required.
- // Warning C4799: function has no EMMS instruction.
- // EMMS() is slow and should be called by the calling function once per image.
--#if USE_MMX && !defined(ARCH_CPU_X86_64)
-+#if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64)
- #if defined(_MSC_VER)
- #define EMMS() __asm emms
- #pragma warning(disable: 4799)
- #else
- #define EMMS() asm("emms")
- #endif
- #else
- #define EMMS()
-diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
---- a/gfx/ycbcr/yuv_row_c.cpp
-+++ b/gfx/ycbcr/yuv_row_c.cpp
-@@ -1,812 +1,18 @@
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
-
--#include "media/base/yuv_row.h"
--
--#ifdef _DEBUG
--#include "base/logging.h"
--#else
-+#include "yuv_row.h"
-+
- #define DCHECK(a)
--#endif
-
- extern "C" {
-
--#if USE_SSE2 && defined(ARCH_CPU_X86_64)
--
--// AMD64 ABI uses register paremters.
--void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
-- const uint8* u_buf, // rsi
-- const uint8* v_buf, // rdx
-- uint8* rgb_buf, // rcx
-- int width) { // r8
-- asm(
-- "jmp convertend\n"
--"convertloop:"
-- "movzb (%1),%%r10\n"
-- "add $0x1,%1\n"
-- "movzb (%2),%%r11\n"
-- "add $0x1,%2\n"
-- "movq 2048(%5,%%r10,8),%%xmm0\n"
-- "movzb (%0),%%r10\n"
-- "movq 4096(%5,%%r11,8),%%xmm1\n"
-- "movzb 0x1(%0),%%r11\n"
-- "paddsw %%xmm1,%%xmm0\n"
-- "movq (%5,%%r10,8),%%xmm2\n"
-- "add $0x2,%0\n"
-- "movq (%5,%%r11,8),%%xmm3\n"
-- "paddsw %%xmm0,%%xmm2\n"
-- "paddsw %%xmm0,%%xmm3\n"
-- "shufps $0x44,%%xmm3,%%xmm2\n"
-- "psraw $0x6,%%xmm2\n"
-- "packuswb %%xmm2,%%xmm2\n"
-- "movq %%xmm2,0x0(%3)\n"
-- "add $0x8,%3\n"
--"convertend:"
-- "sub $0x2,%4\n"
-- "jns convertloop\n"
--
--"convertnext:"
-- "add $0x1,%4\n"
-- "js convertdone\n"
--
-- "movzb (%1),%%r10\n"
-- "movq 2048(%5,%%r10,8),%%xmm0\n"
-- "movzb (%2),%%r10\n"
-- "movq 4096(%5,%%r10,8),%%xmm1\n"
-- "paddsw %%xmm1,%%xmm0\n"
-- "movzb (%0),%%r10\n"
-- "movq (%5,%%r10,8),%%xmm1\n"
-- "paddsw %%xmm0,%%xmm1\n"
-- "psraw $0x6,%%xmm1\n"
-- "packuswb %%xmm1,%%xmm1\n"
-- "movd %%xmm1,0x0(%3)\n"
--"convertdone:"
-- :
-- : "r"(y_buf), // %0
-- "r"(u_buf), // %1
-- "r"(v_buf), // %2
-- "r"(rgb_buf), // %3
-- "r"(width), // %4
-- "r" (kCoefficientsRgbY) // %5
-- : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
--);
--}
--
--void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
-- const uint8* u_buf, // rsi
-- const uint8* v_buf, // rdx
-- uint8* rgb_buf, // rcx
-- int width, // r8
-- int source_dx) { // r9
-- asm(
-- "xor %%r11,%%r11\n"
-- "sub $0x2,%4\n"
-- "js scalenext\n"
--
--"scaleloop:"
-- "mov %%r11,%%r10\n"
-- "sar $0x11,%%r10\n"
-- "movzb (%1,%%r10,1),%%rax\n"
-- "movq 2048(%5,%%rax,8),%%xmm0\n"
-- "movzb (%2,%%r10,1),%%rax\n"
-- "movq 4096(%5,%%rax,8),%%xmm1\n"
-- "lea (%%r11,%6),%%r10\n"
-- "sar $0x10,%%r11\n"
-- "movzb (%0,%%r11,1),%%rax\n"
-- "paddsw %%xmm1,%%xmm0\n"
-- "movq (%5,%%rax,8),%%xmm1\n"
-- "lea (%%r10,%6),%%r11\n"
-- "sar $0x10,%%r10\n"
-- "movzb (%0,%%r10,1),%%rax\n"
-- "movq (%5,%%rax,8),%%xmm2\n"
-- "paddsw %%xmm0,%%xmm1\n"
-- "paddsw %%xmm0,%%xmm2\n"
-- "shufps $0x44,%%xmm2,%%xmm1\n"
-- "psraw $0x6,%%xmm1\n"
-- "packuswb %%xmm1,%%xmm1\n"
-- "movq %%xmm1,0x0(%3)\n"
-- "add $0x8,%3\n"
-- "sub $0x2,%4\n"
-- "jns scaleloop\n"
--
--"scalenext:"
-- "add $0x1,%4\n"
-- "js scaledone\n"
--
-- "mov %%r11,%%r10\n"
-- "sar $0x11,%%r10\n"
-- "movzb (%1,%%r10,1),%%rax\n"
-- "movq 2048(%5,%%rax,8),%%xmm0\n"
-- "movzb (%2,%%r10,1),%%rax\n"
-- "movq 4096(%5,%%rax,8),%%xmm1\n"
-- "paddsw %%xmm1,%%xmm0\n"
-- "sar $0x10,%%r11\n"
-- "movzb (%0,%%r11,1),%%rax\n"
-- "movq (%5,%%rax,8),%%xmm1\n"
-- "paddsw %%xmm0,%%xmm1\n"
-- "psraw $0x6,%%xmm1\n"
-- "packuswb %%xmm1,%%xmm1\n"
-- "movd %%xmm1,0x0(%3)\n"
--
--"scaledone:"
-- :
-- : "r"(y_buf), // %0
-- "r"(u_buf), // %1
-- "r"(v_buf), // %2
-- "r"(rgb_buf), // %3
-- "r"(width), // %4
-- "r" (kCoefficientsRgbY), // %5
-- "r"(static_cast<long>(source_dx)) // %6
-- : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
--);
--}
--
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-- asm(
-- "xor %%r11,%%r11\n" // x = 0
-- "sub $0x2,%4\n"
-- "js .lscalenext\n"
-- "cmp $0x20000,%6\n" // if source_dx >= 2.0
-- "jl .lscalehalf\n"
-- "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
--".lscalehalf:"
--
--".lscaleloop:"
-- "mov %%r11,%%r10\n"
-- "sar $0x11,%%r10\n"
--
-- "movzb (%1, %%r10, 1), %%r13 \n"
-- "movzb 1(%1, %%r10, 1), %%r14 \n"
-- "mov %%r11, %%rax \n"
-- "and $0x1fffe, %%rax \n"
-- "imul %%rax, %%r14 \n"
-- "xor $0x1fffe, %%rax \n"
-- "imul %%rax, %%r13 \n"
-- "add %%r14, %%r13 \n"
-- "shr $17, %%r13 \n"
-- "movq 2048(%5,%%r13,8), %%xmm0\n"
--
-- "movzb (%2, %%r10, 1), %%r13 \n"
-- "movzb 1(%2, %%r10, 1), %%r14 \n"
-- "mov %%r11, %%rax \n"
-- "and $0x1fffe, %%rax \n"
-- "imul %%rax, %%r14 \n"
-- "xor $0x1fffe, %%rax \n"
-- "imul %%rax, %%r13 \n"
-- "add %%r14, %%r13 \n"
-- "shr $17, %%r13 \n"
-- "movq 4096(%5,%%r13,8), %%xmm1\n"
--
-- "mov %%r11, %%rax \n"
-- "lea (%%r11,%6),%%r10\n"
-- "sar $0x10,%%r11\n"
-- "paddsw %%xmm1,%%xmm0\n"
--
-- "movzb (%0, %%r11, 1), %%r13 \n"
-- "movzb 1(%0, %%r11, 1), %%r14 \n"
-- "and $0xffff, %%rax \n"
-- "imul %%rax, %%r14 \n"
-- "xor $0xffff, %%rax \n"
-- "imul %%rax, %%r13 \n"
-- "add %%r14, %%r13 \n"
-- "shr $16, %%r13 \n"
-- "movq (%5,%%r13,8),%%xmm1\n"
--
-- "mov %%r10, %%rax \n"
-- "lea (%%r10,%6),%%r11\n"
-- "sar $0x10,%%r10\n"
--
-- "movzb (%0,%%r10,1), %%r13 \n"
-- "movzb 1(%0,%%r10,1), %%r14 \n"
-- "and $0xffff, %%rax \n"
-- "imul %%rax, %%r14 \n"
-- "xor $0xffff, %%rax \n"
-- "imul %%rax, %%r13 \n"
-- "add %%r14, %%r13 \n"
-- "shr $16, %%r13 \n"
-- "movq (%5,%%r13,8),%%xmm2\n"
--
-- "paddsw %%xmm0,%%xmm1\n"
-- "paddsw %%xmm0,%%xmm2\n"
-- "shufps $0x44,%%xmm2,%%xmm1\n"
-- "psraw $0x6,%%xmm1\n"
-- "packuswb %%xmm1,%%xmm1\n"
-- "movq %%xmm1,0x0(%3)\n"
-- "add $0x8,%3\n"
-- "sub $0x2,%4\n"
-- "jns .lscaleloop\n"
--
--".lscalenext:"
-- "add $0x1,%4\n"
-- "js .lscaledone\n"
--
-- "mov %%r11,%%r10\n"
-- "sar $0x11,%%r10\n"
--
-- "movzb (%1,%%r10,1), %%r13 \n"
-- "movq 2048(%5,%%r13,8),%%xmm0\n"
--
-- "movzb (%2,%%r10,1), %%r13 \n"
-- "movq 4096(%5,%%r13,8),%%xmm1\n"
--
-- "paddsw %%xmm1,%%xmm0\n"
-- "sar $0x10,%%r11\n"
--
-- "movzb (%0,%%r11,1), %%r13 \n"
-- "movq (%5,%%r13,8),%%xmm1\n"
--
-- "paddsw %%xmm0,%%xmm1\n"
-- "psraw $0x6,%%xmm1\n"
-- "packuswb %%xmm1,%%xmm1\n"
-- "movd %%xmm1,0x0(%3)\n"
--
--".lscaledone:"
-- :
-- : "r"(y_buf), // %0
-- "r"(u_buf), // %1
-- "r"(v_buf), // %2
-- "r"(rgb_buf), // %3
-- "r"(width), // %4
-- "r" (kCoefficientsRgbY), // %5
-- "r"(static_cast<long>(source_dx)) // %6
-- : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
--);
--}
--
--#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__)
--
--// PIC version is slower because less registers are available, so
--// non-PIC is used on platforms where it is possible.
--
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width);
-- asm(
-- ".text\n"
-- ".global FastConvertYUVToRGB32Row\n"
--"FastConvertYUVToRGB32Row:\n"
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x28(%esp),%edi\n"
-- "mov 0x2c(%esp),%esi\n"
-- "mov 0x30(%esp),%ebp\n"
-- "mov 0x34(%esp),%ecx\n"
-- "jmp convertend\n"
--
--"convertloop:"
-- "movzbl (%edi),%eax\n"
-- "add $0x1,%edi\n"
-- "movzbl (%esi),%ebx\n"
-- "add $0x1,%esi\n"
-- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-- "movzbl (%edx),%eax\n"
-- "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
-- "movzbl 0x1(%edx),%ebx\n"
-- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
-- "add $0x2,%edx\n"
-- "movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--"convertend:"
-- "sub $0x2,%ecx\n"
-- "jns convertloop\n"
--
-- "and $0x1,%ecx\n"
-- "je convertdone\n"
--
-- "movzbl (%edi),%eax\n"
-- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-- "movzbl (%esi),%eax\n"
-- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-- "movzbl (%edx),%eax\n"
-- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
-- "paddsw %mm0,%mm1\n"
-- "psraw $0x6,%mm1\n"
-- "packuswb %mm1,%mm1\n"
-- "movd %mm1,0x0(%ebp)\n"
--"convertdone:"
-- "popa\n"
-- "ret\n"
--);
--
--
--void ScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx);
-- asm(
-- ".text\n"
-- ".global ScaleYUVToRGB32Row\n"
--"ScaleYUVToRGB32Row:\n"
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x28(%esp),%edi\n"
-- "mov 0x2c(%esp),%esi\n"
-- "mov 0x30(%esp),%ebp\n"
-- "mov 0x34(%esp),%ecx\n"
-- "xor %ebx,%ebx\n"
-- "jmp scaleend\n"
--
--"scaleloop:"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%edi,%eax,1),%eax\n"
-- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%esi,%eax,1),%eax\n"
-- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--"scaleend:"
-- "sub $0x2,%ecx\n"
-- "jns scaleloop\n"
--
-- "and $0x1,%ecx\n"
-- "je scaledone\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%edi,%eax,1),%eax\n"
-- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%esi,%eax,1),%eax\n"
-- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
-- "paddsw %mm0,%mm1\n"
-- "psraw $0x6,%mm1\n"
-- "packuswb %mm1,%mm1\n"
-- "movd %mm1,0x0(%ebp)\n"
--
--"scaledone:"
-- "popa\n"
-- "ret\n"
--);
--
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx);
-- asm(
-- ".text\n"
-- ".global LinearScaleYUVToRGB32Row\n"
--"LinearScaleYUVToRGB32Row:\n"
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x28(%esp),%edi\n"
-- "mov 0x30(%esp),%ebp\n"
--
-- // source_width = width * source_dx + ebx
-- "mov 0x34(%esp), %ecx\n"
-- "imull 0x38(%esp), %ecx\n"
-- "mov %ecx, 0x34(%esp)\n"
--
-- "mov 0x38(%esp), %ecx\n"
-- "xor %ebx,%ebx\n" // x = 0
-- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
-- "jl .lscaleend\n"
-- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
-- "jmp .lscaleend\n"
--
--".lscaleloop:"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
--
-- "movzbl (%edi,%eax,1),%ecx\n"
-- "movzbl 1(%edi,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "andl $0x1fffe, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0x1fffe, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $17, %ecx \n"
-- "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
--
-- "mov 0x2c(%esp),%esi\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
--
-- "movzbl (%esi,%eax,1),%ecx\n"
-- "movzbl 1(%esi,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "andl $0x1fffe, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0x1fffe, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $17, %ecx \n"
-- "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%ecx\n"
-- "movzbl 1(%edx,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "andl $0xffff, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0xffff, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $16, %ecx \n"
-- "movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
--
-- "cmp 0x34(%esp), %ebx\n"
-- "jge .lscalelastpixel\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%ecx\n"
-- "movzbl 1(%edx,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "andl $0xffff, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0xffff, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $16, %ecx \n"
-- "movq kCoefficientsRgbY(,%ecx,8),%mm2\n"
--
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--
--".lscaleend:"
-- "cmp 0x34(%esp), %ebx\n"
-- "jl .lscaleloop\n"
-- "popa\n"
-- "ret\n"
--
--".lscalelastpixel:"
-- "paddsw %mm0, %mm1\n"
-- "psraw $6, %mm1\n"
-- "packuswb %mm1, %mm1\n"
-- "movd %mm1, (%ebp)\n"
-- "popa\n"
-- "ret\n"
--);
--
--#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__)
--
--extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int16 *kCoefficientsRgbY);
-- asm(
-- ".text\n"
--#if defined(OS_MACOSX)
--"_PICConvertYUVToRGB32Row:\n"
--#else
--"PICConvertYUVToRGB32Row:\n"
--#endif
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x28(%esp),%edi\n"
-- "mov 0x2c(%esp),%esi\n"
-- "mov 0x30(%esp),%ebp\n"
-- "mov 0x38(%esp),%ecx\n"
--
-- "jmp .Lconvertend\n"
--
--".Lconvertloop:"
-- "movzbl (%edi),%eax\n"
-- "add $0x1,%edi\n"
-- "movzbl (%esi),%ebx\n"
-- "add $0x1,%esi\n"
-- "movq 2048(%ecx,%eax,8),%mm0\n"
-- "movzbl (%edx),%eax\n"
-- "paddsw 4096(%ecx,%ebx,8),%mm0\n"
-- "movzbl 0x1(%edx),%ebx\n"
-- "movq 0(%ecx,%eax,8),%mm1\n"
-- "add $0x2,%edx\n"
-- "movq 0(%ecx,%ebx,8),%mm2\n"
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--".Lconvertend:"
-- "subl $0x2,0x34(%esp)\n"
-- "jns .Lconvertloop\n"
--
-- "andl $0x1,0x34(%esp)\n"
-- "je .Lconvertdone\n"
--
-- "movzbl (%edi),%eax\n"
-- "movq 2048(%ecx,%eax,8),%mm0\n"
-- "movzbl (%esi),%eax\n"
-- "paddsw 4096(%ecx,%eax,8),%mm0\n"
-- "movzbl (%edx),%eax\n"
-- "movq 0(%ecx,%eax,8),%mm1\n"
-- "paddsw %mm0,%mm1\n"
-- "psraw $0x6,%mm1\n"
-- "packuswb %mm1,%mm1\n"
-- "movd %mm1,0x0(%ebp)\n"
--".Lconvertdone:\n"
-- "popa\n"
-- "ret\n"
--);
--
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width) {
-- PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
-- &kCoefficientsRgbY[0][0]);
--}
--
--extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx,
-- int16 *kCoefficientsRgbY);
--
-- asm(
-- ".text\n"
--#if defined(OS_MACOSX)
--"_PICScaleYUVToRGB32Row:\n"
--#else
--"PICScaleYUVToRGB32Row:\n"
--#endif
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x28(%esp),%edi\n"
-- "mov 0x2c(%esp),%esi\n"
-- "mov 0x30(%esp),%ebp\n"
-- "mov 0x3c(%esp),%ecx\n"
-- "xor %ebx,%ebx\n"
-- "jmp Lscaleend\n"
--
--"Lscaleloop:"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%edi,%eax,1),%eax\n"
-- "movq 2048(%ecx,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%esi,%eax,1),%eax\n"
-- "paddsw 4096(%ecx,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq 0(%ecx,%eax,8),%mm1\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq 0(%ecx,%eax,8),%mm2\n"
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--"Lscaleend:"
-- "subl $0x2,0x34(%esp)\n"
-- "jns Lscaleloop\n"
--
-- "andl $0x1,0x34(%esp)\n"
-- "je Lscaledone\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%edi,%eax,1),%eax\n"
-- "movq 2048(%ecx,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-- "movzbl (%esi,%eax,1),%eax\n"
-- "paddsw 4096(%ecx,%eax,8),%mm0\n"
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%eax\n"
-- "movq 0(%ecx,%eax,8),%mm1\n"
-- "paddsw %mm0,%mm1\n"
-- "psraw $0x6,%mm1\n"
-- "packuswb %mm1,%mm1\n"
-- "movd %mm1,0x0(%ebp)\n"
--
--"Lscaledone:"
-- "popa\n"
-- "ret\n"
--);
--
--
--void ScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-- PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
-- &kCoefficientsRgbY[0][0]);
--}
--
--void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx,
-- int16 *kCoefficientsRgbY);
-- asm(
-- ".text\n"
--#if defined(OS_MACOSX)
--"_PICLinearScaleYUVToRGB32Row:\n"
--#else
--"PICLinearScaleYUVToRGB32Row:\n"
--#endif
-- "pusha\n"
-- "mov 0x24(%esp),%edx\n"
-- "mov 0x30(%esp),%ebp\n"
-- "mov 0x34(%esp),%ecx\n"
-- "mov 0x3c(%esp),%edi\n"
-- "xor %ebx,%ebx\n"
--
-- // source_width = width * source_dx + ebx
-- "mov 0x34(%esp), %ecx\n"
-- "imull 0x38(%esp), %ecx\n"
-- "mov %ecx, 0x34(%esp)\n"
--
-- "mov 0x38(%esp), %ecx\n"
-- "xor %ebx,%ebx\n" // x = 0
-- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
-- "jl .lscaleend\n"
-- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
-- "jmp .lscaleend\n"
--
--".lscaleloop:"
-- "mov 0x28(%esp),%esi\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
--
-- "movzbl (%esi,%eax,1),%ecx\n"
-- "movzbl 1(%esi,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "andl $0x1fffe, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0x1fffe, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $17, %ecx \n"
-- "movq 2048(%edi,%ecx,8),%mm0\n"
--
-- "mov 0x2c(%esp),%esi\n"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
--
-- "movzbl (%esi,%eax,1),%ecx\n"
-- "movzbl 1(%esi,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "andl $0x1fffe, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0x1fffe, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $17, %ecx \n"
-- "paddsw 4096(%edi,%ecx,8),%mm0\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%ecx\n"
-- "movzbl 1(%edx,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "andl $0xffff, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0xffff, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $16, %ecx \n"
-- "movq (%edi,%ecx,8),%mm1\n"
--
-- "cmp 0x34(%esp), %ebx\n"
-- "jge .lscalelastpixel\n"
--
-- "mov %ebx,%eax\n"
-- "sar $0x10,%eax\n"
-- "movzbl (%edx,%eax,1),%ecx\n"
-- "movzbl 1(%edx,%eax,1),%esi\n"
-- "mov %ebx,%eax\n"
-- "add 0x38(%esp),%ebx\n"
-- "andl $0xffff, %eax \n"
-- "imul %eax, %esi \n"
-- "xorl $0xffff, %eax \n"
-- "imul %eax, %ecx \n"
-- "addl %esi, %ecx \n"
-- "shrl $16, %ecx \n"
-- "movq (%edi,%ecx,8),%mm2\n"
--
-- "paddsw %mm0,%mm1\n"
-- "paddsw %mm0,%mm2\n"
-- "psraw $0x6,%mm1\n"
-- "psraw $0x6,%mm2\n"
-- "packuswb %mm2,%mm1\n"
-- "movntq %mm1,0x0(%ebp)\n"
-- "add $0x8,%ebp\n"
--
--".lscaleend:"
-- "cmp %ebx, 0x34(%esp)\n"
-- "jg .lscaleloop\n"
-- "popa\n"
-- "ret\n"
--
--".lscalelastpixel:"
-- "paddsw %mm0, %mm1\n"
-- "psraw $6, %mm1\n"
-- "packuswb %mm1, %mm1\n"
-- "movd %mm1, (%ebp)\n"
-- "popa\n"
-- "ret\n"
--);
--
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-- PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
-- &kCoefficientsRgbY[0][0]);
--}
--
--#else // USE_MMX
--
- // C reference code that mimic the YUV assembly.
- #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
- #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
- (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
-
- static inline void YuvPixel(uint8 y,
- uint8 u,
- uint8 v,
-@@ -833,66 +39,71 @@ static inline void YuvPixel(uint8 y,
- a >>= 6;
-
- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
- (packuswb(g) << 8) |
- (packuswb(r) << 16) |
- (packuswb(a) << 24);
- }
-
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width) {
-+void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ unsigned int x_shift) {
- for (int x = 0; x < width; x += 2) {
-- uint8 u = u_buf[x >> 1];
-- uint8 v = v_buf[x >> 1];
-+ uint8 u = u_buf[x >> x_shift];
-+ uint8 v = v_buf[x >> x_shift];
- uint8 y0 = y_buf[x];
- YuvPixel(y0, u, v, rgb_buf);
- if ((x + 1) < width) {
- uint8 y1 = y_buf[x + 1];
-+ if (x_shift == 0) {
-+ u = u_buf[x + 1];
-+ v = v_buf[x + 1];
-+ }
- YuvPixel(y1, u, v, rgb_buf + 4);
- }
- rgb_buf += 8; // Advance 2 pixels.
- }
- }
-
- // 16.16 fixed point is used. A shift by 16 isolates the integer.
- // A shift by 17 is used to further subsample the chrominence channels.
- // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
- // for 1/65536 pixel accurate interpolation.
--void ScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
- int x = 0;
- for (int i = 0; i < width; i += 2) {
- int y = y_buf[x >> 16];
- int u = u_buf[(x >> 17)];
- int v = v_buf[(x >> 17)];
- YuvPixel(y, u, v, rgb_buf);
- x += source_dx;
- if ((i + 1) < width) {
- y = y_buf[x >> 16];
- YuvPixel(y, u, v, rgb_buf+4);
- x += source_dx;
- }
- rgb_buf += 8;
- }
- }
-
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
- int x = 0;
- if (source_dx >= 0x20000) {
- x = 32768;
- }
- for (int i = 0; i < width; i += 2) {
- int y0 = y_buf[x >> 16];
- int y1 = y_buf[(x >> 16) + 1];
- int u0 = u_buf[(x >> 17)];
-@@ -913,11 +124,10 @@ void LinearScaleYUVToRGB32Row(const uint
- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
- YuvPixel(y, u, v, rgb_buf+4);
- x += source_dx;
- }
- rgb_buf += 8;
- }
- }
-
--#endif // USE_MMX
- } // extern "C"
-
-diff --git a/gfx/ycbcr/yuv_row_posix.cpp b/gfx/ycbcr/yuv_row_posix.cpp
---- a/gfx/ycbcr/yuv_row_posix.cpp
-+++ b/gfx/ycbcr/yuv_row_posix.cpp
-@@ -1,33 +1,32 @@
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
-
--#include "media/base/yuv_row.h"
--
--#ifdef _DEBUG
--#include "base/logging.h"
--#else
-+#include "yuv_row.h"
-+#include "mozilla/SSE.h"
-+
- #define DCHECK(a)
--#endif
-
- extern "C" {
-
--#if USE_SSE2 && defined(ARCH_CPU_X86_64)
-+#if defined(ARCH_CPU_X86_64)
-+
-+// We don't need CPUID guards here, since x86-64 implies SSE2.
-
- // AMD64 ABI uses register paremters.
- void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width) { // r8
- asm(
-- "jmp convertend\n"
--"convertloop:"
-+ "jmp 1f\n"
-+"0:"
- "movzb (%1),%%r10\n"
- "add $0x1,%1\n"
- "movzb (%2),%%r11\n"
- "add $0x1,%2\n"
- "movq 2048(%5,%%r10,8),%%xmm0\n"
- "movzb (%0),%%r10\n"
- "movq 4096(%5,%%r11,8),%%xmm1\n"
- "movzb 0x1(%0),%%r11\n"
-@@ -37,36 +36,36 @@ void FastConvertYUVToRGB32Row(const uint
- "movq (%5,%%r11,8),%%xmm3\n"
- "paddsw %%xmm0,%%xmm2\n"
- "paddsw %%xmm0,%%xmm3\n"
- "shufps $0x44,%%xmm3,%%xmm2\n"
- "psraw $0x6,%%xmm2\n"
- "packuswb %%xmm2,%%xmm2\n"
- "movq %%xmm2,0x0(%3)\n"
- "add $0x8,%3\n"
--"convertend:"
-+"1:"
- "sub $0x2,%4\n"
-- "jns convertloop\n"
--
--"convertnext:"
-+ "jns 0b\n"
-+
-+"2:"
- "add $0x1,%4\n"
-- "js convertdone\n"
-+ "js 3f\n"
-
- "movzb (%1),%%r10\n"
- "movq 2048(%5,%%r10,8),%%xmm0\n"
- "movzb (%2),%%r10\n"
- "movq 4096(%5,%%r10,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movzb (%0),%%r10\n"
- "movq (%5,%%r10,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
--"convertdone:"
-+"3:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY) // %5
- : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
-@@ -77,19 +76,19 @@ void ScaleYUVToRGB32Row(const uint8* y_b
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width, // r8
- int source_dx) { // r9
- asm(
- "xor %%r11,%%r11\n"
- "sub $0x2,%4\n"
-- "js scalenext\n"
--
--"scaleloop:"
-+ "js 1f\n"
-+
-+"0:"
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "lea (%%r11,%6),%%r10\n"
- "sar $0x10,%%r11\n"
-@@ -103,38 +102,38 @@ void ScaleYUVToRGB32Row(const uint8* y_b
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
-- "jns scaleloop\n"
--
--"scalenext:"
-+ "jns 0b\n"
-+
-+"1:"
- "add $0x1,%4\n"
-- "js scaledone\n"
-+ "js 2f\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "sar $0x10,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
--"scaledone:"
-+"2:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(source_dx)) // %6
-@@ -146,23 +145,23 @@ void LinearScaleYUVToRGB32Row(const uint
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- asm(
- "xor %%r11,%%r11\n" // x = 0
- "sub $0x2,%4\n"
-- "js .lscalenext\n"
-+ "js 2f\n"
- "cmp $0x20000,%6\n" // if source_dx >= 2.0
-- "jl .lscalehalf\n"
-+ "jl 0f\n"
- "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
--".lscalehalf:"
--
--".lscaleloop:"
-+"0:"
-+
-+"1:"
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
-
- "movzb (%1, %%r10, 1), %%r13 \n"
- "movzb 1(%1, %%r10, 1), %%r14 \n"
- "mov %%r11, %%rax \n"
- "and $0x1fffe, %%rax \n"
- "imul %%rax, %%r14 \n"
-@@ -215,21 +214,21 @@ void LinearScaleYUVToRGB32Row(const uint
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
-- "jns .lscaleloop\n"
--
--".lscalenext:"
-+ "jns 1b\n"
-+
-+"2:"
- "add $0x1,%4\n"
-- "js .lscaledone\n"
-+ "js 3f\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
-
- "movzb (%1,%%r10,1), %%r13 \n"
- "movq 2048(%5,%%r13,8),%%xmm0\n"
-
- "movzb (%2,%%r10,1), %%r13 \n"
-@@ -241,52 +240,52 @@ void LinearScaleYUVToRGB32Row(const uint
- "movzb (%0,%%r11,1), %%r13 \n"
- "movq (%5,%%r13,8),%%xmm1\n"
-
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
--".lscaledone:"
-+"3:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(source_dx)) // %6
- : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
- );
- }
-
--#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__)
-+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
-
- // PIC version is slower because less registers are available, so
- // non-PIC is used on platforms where it is possible.
--
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width);
-+void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width);
- asm(
- ".text\n"
-- ".global FastConvertYUVToRGB32Row\n"
--"FastConvertYUVToRGB32Row:\n"
-+ ".global FastConvertYUVToRGB32Row_SSE\n"
-+ ".type FastConvertYUVToRGB32Row_SSE, @function\n"
-+"FastConvertYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
-- "jmp convertend\n"
--
--"convertloop:"
-+ "jmp 1f\n"
-+
-+"0:"
- "movzbl (%edi),%eax\n"
- "add $0x1,%edi\n"
- "movzbl (%esi),%ebx\n"
- "add $0x1,%esi\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
- "movzbl 0x1(%edx),%ebx\n"
-@@ -295,59 +294,77 @@ void FastConvertYUVToRGB32Row(const uint
- "movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
--"convertend:"
-+"1:"
- "sub $0x2,%ecx\n"
-- "jns convertloop\n"
-+ "jns 0b\n"
-
- "and $0x1,%ecx\n"
-- "je convertdone\n"
-+ "je 2f\n"
-
- "movzbl (%edi),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "movzbl (%esi),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
--"convertdone:"
-+"2:"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
--
--void ScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx);
-+void FastConvertYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width)
-+{
-+ if (mozilla::supports_sse()) {
-+ FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
-+ return;
-+ }
-+
-+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-+}
-+
-+
-+void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
- asm(
- ".text\n"
-- ".global ScaleYUVToRGB32Row\n"
--"ScaleYUVToRGB32Row:\n"
-+ ".global ScaleYUVToRGB32Row_SSE\n"
-+ ".type ScaleYUVToRGB32Row_SSE, @function\n"
-+"ScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
-- "jmp scaleend\n"
--
--"scaleloop:"
-+ "jmp 1f\n"
-+
-+"0:"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-@@ -363,22 +380,22 @@ void ScaleYUVToRGB32Row(const uint8* y_b
- "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
--"scaleend:"
-+"1:"
- "sub $0x2,%ecx\n"
-- "jns scaleloop\n"
-+ "jns 0b\n"
-
- "and $0x1,%ecx\n"
-- "je scaledone\n"
-+ "je 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
-@@ -387,51 +404,71 @@ void ScaleYUVToRGB32Row(const uint8* y_b
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
--"scaledone:"
-+"2:"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx);
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx)
-+{
-+ if (mozilla::supports_sse()) {
-+ ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
-+ width, source_dx);
-+ }
-+
-+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
-+ width, source_dx);
-+}
-+
-+void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx);
- asm(
- ".text\n"
-- ".global LinearScaleYUVToRGB32Row\n"
--"LinearScaleYUVToRGB32Row:\n"
-+ ".global LinearScaleYUVToRGB32Row_SSE\n"
-+ ".type LinearScaleYUVToRGB32Row_SSE, @function\n"
-+"LinearScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x30(%esp),%ebp\n"
-
- // source_width = width * source_dx + ebx
- "mov 0x34(%esp), %ecx\n"
- "imull 0x38(%esp), %ecx\n"
- "mov %ecx, 0x34(%esp)\n"
-
- "mov 0x38(%esp), %ecx\n"
- "xor %ebx,%ebx\n" // x = 0
- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
-- "jl .lscaleend\n"
-+ "jl 1f\n"
- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
-- "jmp .lscaleend\n"
--
--".lscaleloop:"
-- "mov %ebx,%eax\n"
-- "sar $0x11,%eax\n"
-+ "jmp 1f\n"
-+
-+"0:"
-+ "mov %ebx,%eax\n"
-+ "sar $0x11,%eax\n"
-
- "movzbl (%edi,%eax,1),%ecx\n"
- "movzbl 1(%edi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0x1fffe, %eax \n"
- "imul %eax, %ecx \n"
-@@ -464,17 +501,17 @@ void LinearScaleYUVToRGB32Row(const uint
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
-
- "cmp 0x34(%esp), %ebx\n"
-- "jge .lscalelastpixel\n"
-+ "jge 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
-@@ -488,56 +525,76 @@ void LinearScaleYUVToRGB32Row(const uint
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-
--".lscaleend:"
-+"1:"
- "cmp 0x34(%esp), %ebx\n"
-- "jl .lscaleloop\n"
-+ "jl 0b\n"
- "popa\n"
- "ret\n"
-
--".lscalelastpixel:"
-+"2:"
- "paddsw %mm0, %mm1\n"
- "psraw $6, %mm1\n"
- "packuswb %mm1, %mm1\n"
- "movd %mm1, (%ebp)\n"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
--#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__)
--
--extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int16 *kCoefficientsRgbY);
-+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx)
-+{
-+ if (mozilla::supports_sse()) {
-+ LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
-+ width, source_dx);
-+ }
-+
-+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
-+ width, source_dx);
-+}
-+
-+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)
-+
-+void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int16 *kCoefficientsRgbY);
-+
- asm(
- ".text\n"
--#if defined(OS_MACOSX)
--"_PICConvertYUVToRGB32Row:\n"
-+#if defined(XP_MACOSX)
-+"_PICConvertYUVToRGB32Row_SSE:\n"
- #else
--"PICConvertYUVToRGB32Row:\n"
-+"PICConvertYUVToRGB32Row_SSE:\n"
- #endif
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x38(%esp),%ecx\n"
-
-- "jmp .Lconvertend\n"
--
--".Lconvertloop:"
-+ "jmp 1f\n"
-+
-+"0:"
- "movzbl (%edi),%eax\n"
- "add $0x1,%edi\n"
- "movzbl (%esi),%ebx\n"
- "add $0x1,%esi\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "paddsw 4096(%ecx,%ebx,8),%mm0\n"
- "movzbl 0x1(%edx),%ebx\n"
-@@ -546,72 +603,81 @@ extern void PICConvertYUVToRGB32Row(cons
- "movq 0(%ecx,%ebx,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
--".Lconvertend:"
-+"1:"
- "subl $0x2,0x34(%esp)\n"
-- "jns .Lconvertloop\n"
-+ "jns 0b\n"
-
- "andl $0x1,0x34(%esp)\n"
-- "je .Lconvertdone\n"
-+ "je 2f\n"
-
- "movzbl (%edi),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "movzbl (%esi),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
--".Lconvertdone:\n"
-+"2:"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
-- int width) {
-- PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
-- &kCoefficientsRgbY[0][0]);
--}
--
--extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
-+ int width)
-+{
-+ if (mozilla::supports_sse()) {
-+ PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
-+ &kCoefficientsRgbY[0][0]);
-+ return;
-+ }
-+
-+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-+}
-+
-+void PICScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx,
- int16 *kCoefficientsRgbY);
-
- asm(
- ".text\n"
--#if defined(OS_MACOSX)
--"_PICScaleYUVToRGB32Row:\n"
-+#if defined(XP_MACOSX)
-+"_PICScaleYUVToRGB32Row_SSE:\n"
- #else
--"PICScaleYUVToRGB32Row:\n"
-+"PICScaleYUVToRGB32Row_SSE:\n"
- #endif
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x3c(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
-- "jmp Lscaleend\n"
--
--"Lscaleloop:"
-+ "jmp 1f\n"
-+
-+"0:"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
-@@ -627,22 +693,22 @@ extern void PICScaleYUVToRGB32Row(const
- "movq 0(%ecx,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
--"Lscaleend:"
-+"1:"
- "subl $0x2,0x34(%esp)\n"
-- "jns Lscaleloop\n"
-+ "jns 0b\n"
-
- "andl $0x1,0x34(%esp)\n"
-- "je Lscaledone\n"
-+ "je 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
-@@ -651,66 +717,75 @@ extern void PICScaleYUVToRGB32Row(const
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
--"Lscaledone:"
-+"2:"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
--
- void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
-- int source_dx) {
-- PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
-- &kCoefficientsRgbY[0][0]);
--}
--
--void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx,
-- int16 *kCoefficientsRgbY);
-+ int source_dx)
-+{
-+ if (mozilla::supports_sse()) {
-+ PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
-+ &kCoefficientsRgbY[0][0]);
-+ return;
-+ }
-+
-+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-+
-+void PICLinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx,
-+ int16 *kCoefficientsRgbY);
-+
- asm(
- ".text\n"
--#if defined(OS_MACOSX)
--"_PICLinearScaleYUVToRGB32Row:\n"
-+#if defined(XP_MACOSX)
-+"_PICLinearScaleYUVToRGB32Row_SSE:\n"
- #else
--"PICLinearScaleYUVToRGB32Row:\n"
-+"PICLinearScaleYUVToRGB32Row_SSE:\n"
- #endif
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "mov 0x3c(%esp),%edi\n"
- "xor %ebx,%ebx\n"
-
- // source_width = width * source_dx + ebx
- "mov 0x34(%esp), %ecx\n"
- "imull 0x38(%esp), %ecx\n"
- "mov %ecx, 0x34(%esp)\n"
-
- "mov 0x38(%esp), %ecx\n"
- "xor %ebx,%ebx\n" // x = 0
- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
-- "jl .lscaleend\n"
-+ "jl 1f\n"
- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
-- "jmp .lscaleend\n"
--
--".lscaleloop:"
-+ "jmp 1f\n"
-+
-+"0:"
- "mov 0x28(%esp),%esi\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
-
- "movzbl (%esi,%eax,1),%ecx\n"
- "movzbl 1(%esi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
-@@ -746,17 +821,17 @@ void PICLinearScaleYUVToRGB32Row(const u
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq (%edi,%ecx,8),%mm1\n"
-
- "cmp 0x34(%esp), %ebx\n"
-- "jge .lscalelastpixel\n"
-+ "jge 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
-@@ -770,154 +845,71 @@ void PICLinearScaleYUVToRGB32Row(const u
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-
--".lscaleend:"
-+"1:"
- "cmp %ebx, 0x34(%esp)\n"
-- "jg .lscaleloop\n"
-+ "jg 0b\n"
- "popa\n"
- "ret\n"
-
--".lscalelastpixel:"
-+"2:"
- "paddsw %mm0, %mm1\n"
- "psraw $6, %mm1\n"
- "packuswb %mm1, %mm1\n"
- "movd %mm1, (%ebp)\n"
- "popa\n"
- "ret\n"
-+#if !defined(XP_MACOSX)
-+ ".previous\n"
-+#endif
- );
-
-+
- void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-- PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
-- &kCoefficientsRgbY[0][0]);
--}
--
--#else // USE_MMX
--
--// C reference code that mimic the YUV assembly.
--#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
--#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
-- (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
--
--static inline void YuvPixel(uint8 y,
-- uint8 u,
-- uint8 v,
-- uint8* rgb_buf) {
--
-- int b = kCoefficientsRgbY[256+u][0];
-- int g = kCoefficientsRgbY[256+u][1];
-- int r = kCoefficientsRgbY[256+u][2];
-- int a = kCoefficientsRgbY[256+u][3];
--
-- b = paddsw(b, kCoefficientsRgbY[512+v][0]);
-- g = paddsw(g, kCoefficientsRgbY[512+v][1]);
-- r = paddsw(r, kCoefficientsRgbY[512+v][2]);
-- a = paddsw(a, kCoefficientsRgbY[512+v][3]);
--
-- b = paddsw(b, kCoefficientsRgbY[y][0]);
-- g = paddsw(g, kCoefficientsRgbY[y][1]);
-- r = paddsw(r, kCoefficientsRgbY[y][2]);
-- a = paddsw(a, kCoefficientsRgbY[y][3]);
--
-- b >>= 6;
-- g >>= 6;
-- r >>= 6;
-- a >>= 6;
--
-- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
-- (packuswb(g) << 8) |
-- (packuswb(r) << 16) |
-- (packuswb(a) << 24);
--}
--
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx)
-+{
-+ if (mozilla::supports_sse()) {
-+ PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
-+ source_dx, &kCoefficientsRgbY[0][0]);
-+ return;
-+ }
-+
-+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-+#else
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
-- for (int x = 0; x < width; x += 2) {
-- uint8 u = u_buf[x >> 1];
-- uint8 v = v_buf[x >> 1];
-- uint8 y0 = y_buf[x];
-- YuvPixel(y0, u, v, rgb_buf);
-- if ((x + 1) < width) {
-- uint8 y1 = y_buf[x + 1];
-- YuvPixel(y1, u, v, rgb_buf + 4);
-- }
-- rgb_buf += 8; // Advance 2 pixels.
-- }
--}
--
--// 16.16 fixed point is used. A shift by 16 isolates the integer.
--// A shift by 17 is used to further subsample the chrominence channels.
--// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
--// for 1/65536 pixel accurate interpolation.
-+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-+}
-+
- void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-- int x = 0;
-- for (int i = 0; i < width; i += 2) {
-- int y = y_buf[x >> 16];
-- int u = u_buf[(x >> 17)];
-- int v = v_buf[(x >> 17)];
-- YuvPixel(y, u, v, rgb_buf);
-- x += source_dx;
-- if ((i + 1) < width) {
-- y = y_buf[x >> 16];
-- YuvPixel(y, u, v, rgb_buf+4);
-- x += source_dx;
-- }
-- rgb_buf += 8;
-- }
--}
-+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-
- void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-- int x = 0;
-- if (source_dx >= 0x20000) {
-- x = 32768;
-- }
-- for (int i = 0; i < width; i += 2) {
-- int y0 = y_buf[x >> 16];
-- int y1 = y_buf[(x >> 16) + 1];
-- int u0 = u_buf[(x >> 17)];
-- int u1 = u_buf[(x >> 17) + 1];
-- int v0 = v_buf[(x >> 17)];
-- int v1 = v_buf[(x >> 17) + 1];
-- int y_frac = (x & 65535);
-- int uv_frac = ((x >> 1) & 65535);
-- int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
-- int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
-- int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
-- YuvPixel(y, u, v, rgb_buf);
-- x += source_dx;
-- if ((i + 1) < width) {
-- y0 = y_buf[x >> 16];
-- y1 = y_buf[(x >> 16) + 1];
-- y_frac = (x & 65535);
-- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
-- YuvPixel(y, u, v, rgb_buf+4);
-- x += source_dx;
-- }
-- rgb_buf += 8;
-- }
--}
--
--#endif // USE_MMX
--} // extern "C"
--
-+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-+#endif
-+
-+}
-diff --git a/gfx/ycbcr/yuv_row_table.cpp b/gfx/ycbcr/yuv_row_table.cpp
---- a/gfx/ycbcr/yuv_row_table.cpp
-+++ b/gfx/ycbcr/yuv_row_table.cpp
-@@ -1,13 +1,13 @@
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
-
--#include "media/base/yuv_row.h"
-+#include "yuv_row.h"
-
- extern "C" {
-
- #define RGBY(i) { \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- 0 \
-diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
---- a/gfx/ycbcr/yuv_row_win.cpp
-+++ b/gfx/ycbcr/yuv_row_win.cpp
-@@ -1,26 +1,27 @@
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
-
--#include "media/base/yuv_row.h"
-+#include "yuv_row.h"
-+#include "mozilla/SSE.h"
-
- #define kCoefficientsRgbU kCoefficientsRgbY + 2048
- #define kCoefficientsRgbV kCoefficientsRgbY + 4096
-
- extern "C" {
-
--#if USE_MMX
--__declspec(naked)
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width) {
-+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-+__declspec(naked)
-+void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp convertend
-@@ -64,22 +65,22 @@ void FastConvertYUVToRGB32Row(const uint
- convertdone :
-
- popad
- ret
- }
- }
-
- __declspec(naked)
--void ConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int step) {
-+void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int step) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- mov ebx, [esp + 32 + 24] // step
-@@ -125,23 +126,23 @@ void ConvertYUVToRGB32Row(const uint8* y
- wdone :
-
- popad
- ret
- }
- }
-
- __declspec(naked)
--void RotateConvertYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int ystep,
-- int uvstep) {
-+void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int ystep,
-+ int uvstep) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-@@ -188,21 +189,21 @@ void RotateConvertYUVToRGB32Row(const ui
- wdone :
-
- popad
- ret
- }
- }
-
- __declspec(naked)
--void DoubleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width) {
-+void DoubleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-@@ -256,26 +257,26 @@ void DoubleYUVToRGB32Row(const uint8* y_
- jns wloop1
- wdone :
- popad
- ret
- }
- }
-
- // This version does general purpose scaling by any amount, up or down.
--// The only thing it can not do it rotation by 90 or 270.
--// For performance the chroma is under sampled, reducing cost of a 3x
-+// The only thing it cannot do is rotation by 90 or 270.
-+// For performance the chroma is under-sampled, reducing cost of a 3x
- // 1080p scale from 8.4 ms to 5.4 ms.
- __declspec(naked)
--void ScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-+void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- xor ebx, ebx // x
-@@ -333,22 +334,22 @@ void ScaleYUVToRGB32Row(const uint8* y_b
-
- scaledone :
- popad
- ret
- }
- }
-
- __declspec(naked)
--void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-- const uint8* u_buf,
-- const uint8* v_buf,
-- uint8* rgb_buf,
-- int width,
-- int source_dx) {
-+void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- // [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- imul ecx, [esp + 32 + 24] // source_dx
-@@ -438,152 +439,60 @@ lscalelastpixel:
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- popad
- ret
- };
- }
--#else // USE_MMX
--
--// C reference code that mimic the YUV assembly.
--#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
--#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
-- (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
--
--static inline void YuvPixel(uint8 y,
-- uint8 u,
-- uint8 v,
-- uint8* rgb_buf) {
--
-- int b = kCoefficientsRgbY[256+u][0];
-- int g = kCoefficientsRgbY[256+u][1];
-- int r = kCoefficientsRgbY[256+u][2];
-- int a = kCoefficientsRgbY[256+u][3];
--
-- b = paddsw(b, kCoefficientsRgbY[512+v][0]);
-- g = paddsw(g, kCoefficientsRgbY[512+v][1]);
-- r = paddsw(r, kCoefficientsRgbY[512+v][2]);
-- a = paddsw(a, kCoefficientsRgbY[512+v][3]);
--
-- b = paddsw(b, kCoefficientsRgbY[y][0]);
-- g = paddsw(g, kCoefficientsRgbY[y][1]);
-- r = paddsw(r, kCoefficientsRgbY[y][2]);
-- a = paddsw(a, kCoefficientsRgbY[y][3]);
--
-- b >>= 6;
-- g >>= 6;
-- r >>= 6;
-- a >>= 6;
--
-- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
-- (packuswb(g) << 8) |
-- (packuswb(r) << 16) |
-- (packuswb(a) << 24);
--}
--
--#if TEST_MMX_YUV
--static inline void YuvPixel(uint8 y,
-- uint8 u,
-- uint8 v,
-- uint8* rgb_buf) {
--
-- __asm {
-- movzx eax, u
-- movq mm0, [kCoefficientsRgbY+2048 + 8 * eax]
-- movzx eax, v
-- paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax]
-- movzx eax, y
-- movq mm1, [kCoefficientsRgbY + 8 * eax]
-- paddsw mm1, mm0
-- psraw mm1, 6
-- packuswb mm1, mm1
-- mov eax, rgb_buf
-- movd [eax], mm1
-- emms
-- }
--}
--#endif
-+#endif // if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
-- for (int x = 0; x < width; x += 2) {
-- uint8 u = u_buf[x >> 1];
-- uint8 v = v_buf[x >> 1];
-- uint8 y0 = y_buf[x];
-- YuvPixel(y0, u, v, rgb_buf);
-- if ((x + 1) < width) {
-- uint8 y1 = y_buf[x + 1];
-- YuvPixel(y1, u, v, rgb_buf + 4);
-- }
-- rgb_buf += 8; // Advance 2 pixels.
-- }
--}
--
--// 16.16 fixed point is used. A shift by 16 isolates the integer.
--// A shift by 17 is used to further subsample the chrominence channels.
--// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
--// for 1/65536 pixel accurate interpolation.
-+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-+ if (mozilla::supports_sse()) {
-+ FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
-+ return;
-+ }
-+#endif
-+
-+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-+}
-+
- void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-- int x = 0;
-- for (int i = 0; i < width; i += 2) {
-- int y = y_buf[x >> 16];
-- int u = u_buf[(x >> 17)];
-- int v = v_buf[(x >> 17)];
-- YuvPixel(y, u, v, rgb_buf);
-- x += source_dx;
-- if ((i + 1) < width) {
-- y = y_buf[x >> 16];
-- YuvPixel(y, u, v, rgb_buf+4);
-- x += source_dx;
-- }
-- rgb_buf += 8;
-- }
--}
-+
-+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-+ if (mozilla::supports_sse()) {
-+ ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+ return;
-+ }
-+#endif
-+
-+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-
- void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-- int x = 0;
-- if (source_dx >= 0x20000) {
-- x = 32768;
-- }
-- for (int i = 0; i < width; i += 2) {
-- int y0 = y_buf[x >> 16];
-- int y1 = y_buf[(x >> 16) + 1];
-- int u0 = u_buf[(x >> 17)];
-- int u1 = u_buf[(x >> 17) + 1];
-- int v0 = v_buf[(x >> 17)];
-- int v1 = v_buf[(x >> 17) + 1];
-- int y_frac = (x & 65535);
-- int uv_frac = ((x >> 1) & 65535);
-- int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
-- int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
-- int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
-- YuvPixel(y, u, v, rgb_buf);
-- x += source_dx;
-- if ((i + 1) < width) {
-- y0 = y_buf[x >> 16];
-- y1 = y_buf[(x >> 16) + 1];
-- y_frac = (x & 65535);
-- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
-- YuvPixel(y, u, v, rgb_buf+4);
-- x += source_dx;
-- }
-- rgb_buf += 8;
-- }
--}
--
--#endif // USE_MMX
--} // extern "C"
--
-+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-+ if (mozilla::supports_sse()) {
-+ LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
-+ source_dx);
-+ return;
-+ }
-+#endif
-+
-+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-+
-+} // extern "C"
diff --git a/gfx/ycbcr/moz.build b/gfx/ycbcr/moz.build
deleted file mode 100644
index 7e04c96ed..000000000
--- a/gfx/ycbcr/moz.build
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-EXPORTS += [
- 'YCbCrUtils.h',
-]
-
-SOURCES += [
- 'scale_yuv_argb.cpp',
- 'ycbcr_to_rgb565.cpp',
- 'YCbCrUtils.cpp',
- 'yuv_convert.cpp',
- 'yuv_row_c.cpp',
- 'yuv_row_table.cpp',
-]
-
-if CONFIG['INTEL_ARCHITECTURE']:
- # These files use MMX and SSE2 intrinsics, so they need special compile flags
- # on some compilers.
- SOURCES += ['yuv_convert_sse2.cpp']
- SOURCES['yuv_convert_sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
-
- # MSVC doesn't support MMX when targeting AMD64.
- if CONFIG['_MSC_VER']:
- if CONFIG['OS_TEST'] != 'x86_64':
- SOURCES += [
- 'yuv_convert_mmx.cpp',
- ]
- else:
- SOURCES += ['yuv_convert_mmx.cpp']
- SOURCES['yuv_convert_mmx.cpp'].flags += CONFIG['MMX_FLAGS']
-
-if CONFIG['_MSC_VER']:
- if CONFIG['OS_TEST'] == 'x86_64':
- SOURCES += [
- 'yuv_row_win64.cpp',
- ]
- else:
- SOURCES += [
- 'yuv_row_win.cpp',
- ]
-elif CONFIG['OS_ARCH'] in ('Linux', 'SunOS', 'Darwin', 'DragonFly',
- 'FreeBSD', 'NetBSD', 'OpenBSD'):
- SOURCES += [
- 'yuv_row_posix.cpp',
- ]
-else:
- SOURCES += [
- 'yuv_row_other.cpp',
- ]
-
-if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['HAVE_ARM_NEON']:
- SOURCES += [
- 'yuv_row_arm.s',
- ]
- SOURCES += [
- 'yuv_convert_arm.cpp',
- ]
-
-LOCAL_INCLUDES += ['/media/libyuv/include']
-
-FINAL_LIBRARY = 'xul'
diff --git a/gfx/ycbcr/scale_yuv_argb.cpp b/gfx/ycbcr/scale_yuv_argb.cpp
deleted file mode 100644
index 13b16c802..000000000
--- a/gfx/ycbcr/scale_yuv_argb.cpp
+++ /dev/null
@@ -1,1128 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- * Copyright 2016 Mozilla Foundation
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "scale_yuv_argb.h"
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// YUV to RGB conversion and scaling functions were implemented by referencing
-// scale_argb.cc
-//
-// libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not
-// completed yet. Implementations of the functions are based on it.
-// At first, ScaleYUVToARGBBilinearUp() was implemented by modidying the
-// libyuv's one. Then all another functions were implemented similarly.
-//
-// Function relationship between yuv_convert.cpp abd scale_argb.cc are like
-// the followings
-// - ScaleYUVToARGBDown2() <-- ScaleARGBDown2()
-// - ScaleYUVToARGBDownEven() <-- ScaleARGBDownEven()
-// - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown()
-// - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv
-// - ScaleYUVToARGBSimple() <-- ScaleARGBSimple()
-// - ScaleYUVToARGB() <-- ScaleARGB() // Removed some function calls for simplicity.
-// - YUVToARGBScale() <-- ARGBScale()
-//
-// Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were
-// kept as same as possible.
-//
-// The followings changes were done to each scaling functions.
-//
-// -[1] Allocate YUV conversion buffer and use it as source buffer of scaling.
-// Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp().
-// -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter.
-// It is for handling multiple yuv color formats.
-// -[3] Modified scaling functions as to handle YUV conversion buffer and
-// use YUVBuferIter.
-// -[4] Color conversion function selections in YUVBuferIter were borrowed from
-// I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix()
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-struct YUVBuferIter {
- int src_width;
- int src_height;
- int src_stride_y;
- int src_stride_u;
- int src_stride_v;
- const uint8* src_y;
- const uint8* src_u;
- const uint8* src_v;
-
- uint32 src_fourcc;
- const struct YuvConstants* yuvconstants;
- int y_index;
- const uint8* src_row_y;
- const uint8* src_row_u;
- const uint8* src_row_v;
-
- void (*YUVToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width);
- void (*MoveTo)(YUVBuferIter& iter, int y_index);
- void (*MoveToNextRow)(YUVBuferIter& iter);
-};
-
-void YUVBuferIter_InitI422(YUVBuferIter& iter) {
- iter.YUVToARGBRow = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(iter.src_width, 8)) {
- iter.YUVToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(iter.src_width, 16)) {
- iter.YUVToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- iter.YUVToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(iter.src_width, 8)) {
- iter.YUVToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) &&
- IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) &&
- IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) &&
- IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) {
- // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)
- iter.YUVToARGBRow = I422ToARGBRow_DSPR2;
- }
-#endif
-}
-
-void YUVBuferIter_InitI444(YUVBuferIter& iter) {
- iter.YUVToARGBRow = I444ToARGBRow_C;
-#if defined(HAS_I444TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(iter.src_width, 8)) {
- iter.YUVToARGBRow = I444ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I444TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(iter.src_width, 16)) {
- iter.YUVToARGBRow = I444ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- iter.YUVToARGBRow = I444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(iter.src_width, 8)) {
- iter.YUVToARGBRow = I444ToARGBRow_NEON;
- }
- }
-#endif
-}
-
-
-static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) {
- iter.y_index = y_index;
- iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
- iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
- iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
-}
-
-static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) {
- iter.src_row_y += iter.src_stride_y;
- iter.src_row_u += iter.src_stride_u;
- iter.src_row_v += iter.src_stride_v;
- iter.y_index++;
-}
-
-static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) {
- iter.y_index = y_index;
- iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
- iter.src_row_u = iter.src_u + y_index * iter.src_stride_u;
- iter.src_row_v = iter.src_v + y_index * iter.src_stride_v;
-}
-
-static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) {
- iter.src_row_y += iter.src_stride_y;
- iter.src_row_u += iter.src_stride_u;
- iter.src_row_v += iter.src_stride_v;
- iter.y_index++;
-}
-
-static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) {
- const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
- int uv_y_index = y_index >> kYShift;
-
- iter.y_index = y_index;
- iter.src_row_y = iter.src_y + y_index * iter.src_stride_y;
- iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u;
- iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v;
-}
-
-static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) {
- iter.src_row_y += iter.src_stride_y;
- if (iter.y_index & 1) {
- iter.src_row_u += iter.src_stride_u;
- iter.src_row_v += iter.src_stride_v;
- }
- iter.y_index++;
-}
-
-static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8* argb_row) {
- iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width);
-}
-
-void YUVBuferIter_Init(YUVBuferIter& iter, uint32 src_fourcc, mozilla::YUVColorSpace yuv_color_space) {
- iter.src_fourcc = src_fourcc;
- iter.y_index = 0;
- iter.src_row_y = iter.src_y;
- iter.src_row_u = iter.src_u;
- iter.src_row_v = iter.src_v;
- if (yuv_color_space == mozilla::YUVColorSpace::BT709) {
- iter.yuvconstants = &kYuvH709Constants;
- } else {
- iter.yuvconstants = &kYuvI601Constants;
- }
-
- if (src_fourcc == FOURCC_I444) {
- YUVBuferIter_InitI444(iter);
- iter.MoveTo = YUVBuferIter_MoveToForI444;
- iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444;
- } else if(src_fourcc == FOURCC_I422){
- YUVBuferIter_InitI422(iter);
- iter.MoveTo = YUVBuferIter_MoveToForI422;
- iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422;
- } else {
- assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420
- YUVBuferIter_InitI422(iter);
- iter.MoveTo = YUVBuferIter_MoveToForI420;
- iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420;
- }
-}
-
-// ScaleARGB ARGB, 1/2
-// This is an optimized version for scaling down a ARGB to 1/2 of
-// its original size.
-static void ScaleYUVToARGBDown2(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space) {
- int j;
-
- // Allocate 2 rows of ARGB for source conversion.
- const int kRowSize = (src_width * 4 + 15) & ~15;
- align_buffer_64(argb_cnv_row, kRowSize * 2);
- uint8* argb_cnv_rowptr = argb_cnv_row;
- int argb_cnv_rowstride = kRowSize;
-
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
-
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) =
- filtering == kFilterNone ? ScaleARGBRowDown2_C :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
- ScaleARGBRowDown2Box_C);
- assert(dx == 65536 * 2); // Test scale factor of 2.
- assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
- // Advance to odd row, even column.
- int yi = y >> 16;
- iter.MoveTo(iter, yi);
- ptrdiff_t x_offset;
- if (filtering == kFilterBilinear) {
- x_offset = (x >> 16) * 4;
- } else {
- x_offset = ((x >> 16) - 1) * 4;
- }
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
- ScaleARGBRowDown2Box_Any_SSE2);
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
- ScaleARGBRowDown2Box_SSE2);
- }
- }
-
-#endif
-#if defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
- ScaleARGBRowDown2Box_Any_NEON);
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
- ScaleARGBRowDown2Box_NEON);
- }
- }
-#endif
-
- const int dyi = dy >> 16;
- int lastyi = yi;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- // Prepare next row if necessary
- if (filtering != kFilterLinear) {
- if ((yi + dyi) < (src_height - 1)) {
- iter.MoveTo(iter, yi + dyi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- }
-
- if (filtering == kFilterLinear) {
- argb_cnv_rowstride = 0;
- }
- const int max_yi = src_height - 1;
- const int max_yi_minus_dyi = max_yi - dyi;
- for (j = 0; j < dst_height; ++j) {
- if (yi != lastyi) {
- if (yi > max_yi) {
- yi = max_yi;
- }
- if (yi != lastyi) {
- if (filtering == kFilterLinear) {
- iter.MoveTo(iter, yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- lastyi = yi;
- } else {
- // Prepare current row
- if (yi == iter.y_index) {
- argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
- argb_cnv_rowstride = - argb_cnv_rowstride;
- } else {
- iter.MoveTo(iter, yi);
- argb_cnv_rowptr = argb_cnv_row;
- argb_cnv_rowstride = kRowSize;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- }
- // Prepare next row if necessary
- if (iter.y_index < max_yi) {
- int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
- iter.MoveTo(iter, next_yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- lastyi = yi;
- }
- }
- }
- ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width);
- dst_argb += dst_stride_argb;
- yi += dyi;
- }
-
- free_aligned_buffer_64(argb_cnv_row);
-}
-
-// ScaleARGB ARGB Even
-// This is an optimized version for scaling down a ARGB to even
-// multiple of its original size.
-static void ScaleYUVToARGBDownEven(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space) {
- int j;
- // Allocate 2 rows of ARGB for source conversion.
- const int kRowSize = (src_width * 4 + 15) & ~15;
- align_buffer_64(argb_cnv_row, kRowSize * 2);
- uint8* argb_cnv_rowptr = argb_cnv_row;
- int argb_cnv_rowstride = kRowSize;
-
- int col_step = dx >> 16;
- void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
- int src_step, uint8* dst_argb, int dst_width) =
- filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
- assert(IS_ALIGNED(src_width, 2));
- assert(IS_ALIGNED(src_height, 2));
- int yi = y >> 16;
- const ptrdiff_t x_offset = (x >> 16) * 4;
-
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
- ScaleARGBRowDownEven_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
- ScaleARGBRowDownEven_SSE2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
- ScaleARGBRowDownEven_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
- ScaleARGBRowDownEven_NEON;
- }
- }
-#endif
-
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
-
- const int dyi = dy >> 16;
- int lastyi = yi;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- // Prepare next row if necessary
- if (filtering != kFilterLinear) {
- if ((yi + dyi) < (src_height - 1)) {
- iter.MoveTo(iter, yi + dyi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- }
-
- if (filtering == kFilterLinear) {
- argb_cnv_rowstride = 0;
- }
- const int max_yi = src_height - 1;
- const int max_yi_minus_dyi = max_yi - dyi;
- for (j = 0; j < dst_height; ++j) {
- if (yi != lastyi) {
- if (yi > max_yi) {
- yi = max_yi;
- }
- if (yi != lastyi) {
- if (filtering == kFilterLinear) {
- iter.MoveTo(iter, yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- lastyi = yi;
- } else {
- // Prepare current row
- if (yi == iter.y_index) {
- argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
- argb_cnv_rowstride = - argb_cnv_rowstride;
- } else {
- iter.MoveTo(iter, yi);
- argb_cnv_rowptr = argb_cnv_row;
- argb_cnv_rowstride = kRowSize;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- }
- // Prepare next row if necessary
- if (iter.y_index < max_yi) {
- int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi;
- iter.MoveTo(iter, next_yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- lastyi = yi;
- }
- }
- }
- ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width);
- dst_argb += dst_stride_argb;
- yi += dyi;
- }
- free_aligned_buffer_64(argb_cnv_row);
-}
-
-// Scale YUV to ARGB down with bilinear interpolation.
-static void ScaleYUVToARGBBilinearDown(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
- int64 xlast = x + (int64)(dst_width - 1) * dx;
- int64 xl = (dx >= 0) ? x : xlast;
- int64 xr = (dx >= 0) ? xlast : x;
- int clip_src_width;
- xl = (xl >> 16) & ~3; // Left edge aligned.
- xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
- xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
- if (xr > src_width) {
- xr = src_width;
- }
- clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
- const ptrdiff_t xl_offset = xl * 4;
- x -= (int)(xl << 16);
-
- // Allocate 2 row of ARGB for source conversion.
- const int kRowSize = (src_width * 4 + 15) & ~15;
- align_buffer_64(argb_cnv_row, kRowSize * 2);
- uint8* argb_cnv_rowptr = argb_cnv_row;
- int argb_cnv_rowstride = kRowSize;
-
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(clip_src_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) {
- InterpolateRow = InterpolateRow_Any_DSPR2;
- if (IS_ALIGNED(clip_src_width, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
- }
- }
-#endif
-
- int yi = y >> 16;
-
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
- iter.MoveTo(iter, yi);
-
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row of ARGB.
- align_buffer_64(row, clip_src_width * 4);
-
- int lastyi = yi;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- // Prepare next row if necessary
- if (filtering != kFilterLinear) {
- if ((yi + 1) < src_height) {
- iter.MoveToNextRow(iter);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- }
-
- const int max_y = (src_height - 1) << 16;
- const int max_yi = src_height - 1;
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lastyi) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- }
- if (yi != lastyi) {
- if (filtering == kFilterLinear) {
- iter.MoveTo(iter, yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- lastyi = yi;
- } else {
- // Prepare current row
- if (yi == iter.y_index) {
- argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride;
- argb_cnv_rowstride = - argb_cnv_rowstride;
- } else {
- iter.MoveTo(iter, yi);
- argb_cnv_rowptr = argb_cnv_row;
- argb_cnv_rowstride = kRowSize;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr);
- }
- // Prepare next row if necessary
- if (iter.y_index < max_yi) {
- iter.MoveToNextRow(iter);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride);
- } else {
- argb_cnv_rowstride = 0;
- }
- lastyi = yi;
- }
- }
- }
- if (filtering == kFilterLinear) {
- ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf);
- ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
- }
- dst_argb += dst_stride_argb;
- y += dy;
- }
- free_aligned_buffer_64(row);
- free_aligned_buffer_64(argb_cnv_row);
-}
-
-// Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
- const int max_y = (src_height - 1) << 16;
-
- // Allocate 1 row of ARGB for source conversion.
- align_buffer_64(argb_cnv_row, src_width * 4);
-
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_DSPR2)
- if (TestCpuFlag(kCpuHasDSPR2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- InterpolateRow = InterpolateRow_DSPR2;
- }
-#endif
- if (src_width >= 32768) {
- ScaleARGBFilterCols = filtering ?
- ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
- }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
- if (filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBCols_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
- if (!filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBCols_NEON;
- }
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
-
- int yi = y >> 16;
-
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
- iter.MoveTo(iter, yi);
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lastyi = yi;
-
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
- ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
-
- if (filtering == kFilterLinear) {
- rowstride = 0;
- }
- // Prepare next row if necessary
- if (filtering != kFilterLinear) {
- if ((yi + 1) < src_height) {
- iter.MoveToNextRow(iter);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
- ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
- }else {
- rowstride = 0;
- }
- }
-
- const int max_yi = src_height - 1;
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lastyi) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- }
- if (yi != lastyi) {
- if (filtering == kFilterLinear) {
- iter.MoveToNextRow(iter);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
- ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx);
- } else {
- // Prepare next row if necessary
- if (yi < max_yi) {
- iter.MoveToNextRow(iter);
- rowptr += rowstride;
- rowstride = -rowstride;
- // TODO(fbarchard): Convert the clipped region of row.
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
- ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx);
- } else {
- rowstride = 0;
- }
- }
- lastyi = yi;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
- }
- dst_argb += dst_stride_argb;
- y += dy;
- }
- free_aligned_buffer_64(row);
- free_aligned_buffer_64(argb_cnv_row);
-}
-
-// Scale ARGB to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScaleYUVToARGBSimple(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space) {
- int j;
- void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
-
- // Allocate 1 row of ARGB for source conversion.
- align_buffer_64(argb_cnv_row, src_width * 4);
-
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBCols = ScaleARGBCols_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBCols = ScaleARGBCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBCols = ScaleARGBCols_NEON;
- }
- }
-#endif
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleARGBCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- int yi = y >> 16;
-
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
- iter.MoveTo(iter, yi);
-
- int lasty = yi;
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- iter.MoveTo(iter, yi);
- YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row);
- lasty = yi;
- }
- ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx);
- dst_argb += dst_stride_argb;
- y += dy;
- }
- free_aligned_buffer_64(argb_cnv_row);
-}
-
-static void YUVToARGBCopy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space)
-{
- YUVBuferIter iter;
- iter.src_width = src_width;
- iter.src_height = src_height;
- iter.src_stride_y = src_stride_y;
- iter.src_stride_u = src_stride_u;
- iter.src_stride_v = src_stride_v;
- iter.src_y = src_y;
- iter.src_u = src_u;
- iter.src_v = src_v;
- YUVBuferIter_Init(iter, src_fourcc, yuv_color_space);
-
- for (int j = 0; j < dst_height; ++j) {
- YUVBuferIter_ConvertToARGBRow(iter, dst_argb);
- iter.MoveToNextRow(iter);
- dst_argb += dst_stride_argb;
- }
-}
-
-static void ScaleYUVToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space)
-{
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // ARGB does not support box filter yet, but allow the user to pass it.
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
-
- // Special case for integer step values.
- if (((dx | dy) & 0xffff) == 0) {
- if (!dx || !dy) { // 1 pixel wide and/or tall.
- filtering = kFilterNone;
- } else {
- // Optimized even scale down. ie 2, 4, 6, 8, 10x.
- if (!(dx & 0x10000) && !(dy & 0x10000)) {
- if (dx == 0x20000) {
- // Optimized 1/2 downsample.
- ScaleYUVToARGBDown2(src_width, src_height,
- dst_width, dst_height,
- src_stride_y,
- src_stride_u,
- src_stride_v,
- dst_stride_argb,
- src_y,
- src_u,
- src_v,
- dst_argb,
- x, dx, y, dy,
- filtering,
- src_fourcc,
- yuv_color_space);
- return;
- }
- ScaleYUVToARGBDownEven(src_width, src_height,
- dst_width, dst_height,
- src_stride_y,
- src_stride_u,
- src_stride_v,
- dst_stride_argb,
- src_y,
- src_u,
- src_v,
- dst_argb,
- x, dx, y, dy,
- filtering,
- src_fourcc,
- yuv_color_space);
- return;
- }
- // Optimized odd scale down. ie 3, 5, 7, 9x.
- if ((dx & 0x10000) && (dy & 0x10000)) {
- filtering = kFilterNone;
- if (dx == 0x10000 && dy == 0x10000) {
- // Straight conversion and copy.
- YUVToARGBCopy(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- src_width, src_height,
- dst_argb, dst_stride_argb,
- dst_width, dst_height,
- src_fourcc,
- yuv_color_space);
- return;
- }
- }
- }
- }
- if (filtering && dy < 65536) {
- ScaleYUVToARGBBilinearUp(src_width, src_height,
- dst_width, dst_height,
- src_stride_y,
- src_stride_u,
- src_stride_v,
- dst_stride_argb,
- src_y,
- src_u,
- src_v,
- dst_argb,
- x, dx, y, dy,
- filtering,
- src_fourcc,
- yuv_color_space);
- return;
- }
- if (filtering) {
- ScaleYUVToARGBBilinearDown(src_width, src_height,
- dst_width, dst_height,
- src_stride_y,
- src_stride_u,
- src_stride_v,
- dst_stride_argb,
- src_y,
- src_u,
- src_v,
- dst_argb,
- x, dx, y, dy,
- filtering,
- src_fourcc,
- yuv_color_space);
- return;
- }
- ScaleYUVToARGBSimple(src_width, src_height,
- dst_width, dst_height,
- src_stride_y,
- src_stride_u,
- src_stride_v,
- dst_stride_argb,
- src_y,
- src_u,
- src_v,
- dst_argb,
- x, dx, y, dy,
- src_fourcc,
- yuv_color_space);
-}
-
-bool IsConvertSupported(uint32 src_fourcc)
-{
- if (src_fourcc == FOURCC_I444 ||
- src_fourcc == FOURCC_I422 ||
- src_fourcc == FOURCC_I420) {
- return true;
- }
- return false;
-}
-
-LIBYUV_API
-int YUVToARGBScale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering)
-{
- if (!src_y || !src_u || !src_v ||
- src_width == 0 || src_height == 0 ||
- !dst_argb || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
- if (!IsConvertSupported(src_fourcc)) {
- return -1;
- }
- ScaleYUVToARGB(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- src_width, src_height,
- dst_argb, dst_stride_argb,
- dst_width, dst_height,
- filtering,
- src_fourcc,
- yuv_color_space);
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/gfx/ycbcr/scale_yuv_argb.h b/gfx/ycbcr/scale_yuv_argb.h
deleted file mode 100644
index d1a42db1b..000000000
--- a/gfx/ycbcr/scale_yuv_argb.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/scale.h" // For FilterMode
-
-#include "ImageTypes.h" // For YUVColorSpace
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-int YUVToARGBScale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint32 src_fourcc,
- mozilla::YUVColorSpace yuv_color_space,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_ NOLINT
diff --git a/gfx/ycbcr/update.sh b/gfx/ycbcr/update.sh
deleted file mode 100644
index 3a38fe81a..000000000
--- a/gfx/ycbcr/update.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-# update.sh <chromium-src-directory>
-cp $1/media/base/yuv_convert.h .
-cp $1/media/base/yuv_convert.cc yuv_convert.cpp
-cp $1/media/base/yuv_row.h .
-cp $1/media/base/yuv_row_table.cc yuv_row_table.cpp
-cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp
-cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp
-cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp
-patch -p3 <convert.patch
-patch -p3 <win64.patch
-patch -p3 <TypeFromSize.patch
-patch -p3 <QuellGccWarnings.patch
diff --git a/gfx/ycbcr/win64.patch b/gfx/ycbcr/win64.patch
deleted file mode 100644
index bdccf2784..000000000
--- a/gfx/ycbcr/win64.patch
+++ /dev/null
@@ -1,210 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
-new file mode 100644
---- /dev/null
-+++ b/gfx/ycbcr/yuv_row_win64.cpp
-@@ -0,0 +1,205 @@
-+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-+// Use of this source code is governed by a BSD-style license that can be
-+// found in the LICENSE file.
-+
-+#include "yuv_row.h"
-+
-+extern "C" {
-+
-+// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics.
-+
-+#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
-+#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
-+
-+#include <emmintrin.h>
-+
-+static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width) {
-+ __m128i xmm0, xmmY1, xmmY2;
-+ __m128 xmmY;
-+
-+ while (width >= 2) {
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
-+
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+
-+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
-+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-+
-+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
-+ 0x44);
-+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+
-+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
-+ rgb_buf += 8;
-+ width -= 2;
-+ }
-+
-+ if (width) {
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+ xmmY1 = _mm_srai_epi16(xmmY1, 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
-+ }
-+}
-+
-+static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
-+ __m128i xmm0, xmmY1, xmmY2;
-+ __m128 xmmY;
-+ uint8 u, v, y;
-+ int x = 0;
-+
-+ while (width >= 2) {
-+ u = u_buf[x >> 17];
-+ v = v_buf[x >> 17];
-+ y = y_buf[x >> 16];
-+ x += source_dx;
-+
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+
-+ y = y_buf[x >> 16];
-+ x += source_dx;
-+
-+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-+
-+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
-+ 0x44);
-+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+
-+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
-+ rgb_buf += 8;
-+ width -= 2;
-+ }
-+
-+ if (width) {
-+ u = u_buf[x >> 17];
-+ v = v_buf[x >> 17];
-+ y = y_buf[x >> 16];
-+
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+ xmmY1 = _mm_srai_epi16(xmmY1, 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
-+ }
-+}
-+
-+static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
-+ __m128i xmm0, xmmY1, xmmY2;
-+ __m128 xmmY;
-+ uint8 u0, u1, v0, v1, y0, y1;
-+ uint32 uv_frac, y_frac, u, v, y;
-+ int x = 0;
-+
-+ if (source_dx >= 0x20000) {
-+ x = 32768;
-+ }
-+
-+ while(width >= 2) {
-+ u0 = u_buf[x >> 17];
-+ u1 = u_buf[(x >> 17) + 1];
-+ v0 = v_buf[x >> 17];
-+ v1 = v_buf[(x >> 17) + 1];
-+ y0 = y_buf[x >> 16];
-+ y1 = y_buf[(x >> 16) + 1];
-+ uv_frac = (x & 0x1fffe);
-+ y_frac = (x & 0xffff);
-+ u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
-+ v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
-+ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
-+ x += source_dx;
-+
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+
-+ y0 = y_buf[x >> 16];
-+ y1 = y_buf[(x >> 16) + 1];
-+ y_frac = (x & 0xffff);
-+ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
-+ x += source_dx;
-+
-+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-+
-+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
-+ 0x44);
-+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+
-+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
-+ rgb_buf += 8;
-+ width -= 2;
-+ }
-+
-+ if (width) {
-+ u = u_buf[x >> 17];
-+ v = v_buf[x >> 17];
-+ y = y_buf[x >> 16];
-+
-+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
-+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
-+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
-+
-+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-+ xmmY1 = _mm_srai_epi16(xmmY1, 6);
-+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
-+ }
-+}
-+
-+void FastConvertYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width) {
-+ FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
-+}
-+
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
-+ ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-+}
-+
-+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
-+ const uint8* u_buf,
-+ const uint8* v_buf,
-+ uint8* rgb_buf,
-+ int width,
-+ int source_dx) {
-+ LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
-+ source_dx);
-+}
-+
-+} // extern "C"
diff --git a/gfx/ycbcr/ycbcr_to_rgb565.cpp b/gfx/ycbcr/ycbcr_to_rgb565.cpp
deleted file mode 100644
index 0572e3e09..000000000
--- a/gfx/ycbcr/ycbcr_to_rgb565.cpp
+++ /dev/null
@@ -1,672 +0,0 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include <stdlib.h>
-#include <limits.h>
-#include "nsDebug.h"
-#include "ycbcr_to_rgb565.h"
-#include "nsAlgorithm.h"
-
-
-
-#ifdef HAVE_YCBCR_TO_RGB565
-
-namespace mozilla {
-
-namespace gfx {
-
-/*This contains all of the parameters that are needed to convert a row.
- Passing them in a struct instead of as individual parameters saves the need
- to continually push onto the stack the ones that are fixed for every row.*/
-struct yuv2rgb565_row_scale_bilinear_ctx{
- uint16_t *rgb_row;
- const uint8_t *y_row;
- const uint8_t *u_row;
- const uint8_t *v_row;
- int y_yweight;
- int y_pitch;
- int width;
- int source_x0_q16;
- int source_dx_q16;
- /*Not used for 4:4:4, except with chroma-nearest.*/
- int source_uv_xoffs_q16;
- /*Not used for 4:4:4 or chroma-nearest.*/
- int uv_pitch;
- /*Not used for 4:2:2, 4:4:4, or chroma-nearest.*/
- int uv_yweight;
-};
-
-
-
-/*This contains all of the parameters that are needed to convert a row.
- Passing them in a struct instead of as individual parameters saves the need
- to continually push onto the stack the ones that are fixed for every row.*/
-struct yuv2rgb565_row_scale_nearest_ctx{
- uint16_t *rgb_row;
- const uint8_t *y_row;
- const uint8_t *u_row;
- const uint8_t *v_row;
- int width;
- int source_x0_q16;
- int source_dx_q16;
- /*Not used for 4:4:4.*/
- int source_uv_xoffs_q16;
-};
-
-
-
-typedef void (*yuv2rgb565_row_scale_bilinear_func)(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
-
-typedef void (*yuv2rgb565_row_scale_nearest_func)(
- const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither);
-
-
-
-//TODO: fix NEON asm for iOS
-# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__)
-
-extern "C" void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
-
-void __attribute((noinline)) yuv42x_to_rgb565_row_neon(uint16 *dst,
- const uint8 *y,
- const uint8 *u,
- const uint8 *v,
- int n,
- int oddflag);
-
-#endif
-
-
-
-/*Bilinear interpolation of a single value.
- This uses the exact same formulas as the asm, even though it adds some extra
- shifts that do nothing but reduce accuracy.*/
-static int bislerp(const uint8_t *row,
- int pitch,
- int source_x,
- int xweight,
- int yweight) {
- int a;
- int b;
- int c;
- int d;
- a = row[source_x];
- b = row[source_x+1];
- c = row[source_x+pitch];
- d = row[source_x+pitch+1];
- a = ((a<<8)+(c-a)*yweight+128)>>8;
- b = ((b<<8)+(d-b)*yweight+128)>>8;
- return ((a<<8)+(b-a)*xweight+128)>>8;
-}
-
-/*Convert a single pixel from Y'CbCr to RGB565.
- This uses the exact same formulas as the asm, even though we could make the
- constants a lot more accurate with 32-bit wide registers.*/
-static uint16_t yu2rgb565(int y, int u, int v, int dither) {
- /*This combines the constant offset that needs to be added during the Y'CbCr
- conversion with a rounding offset that depends on the dither parameter.*/
- static const int DITHER_BIAS[4][3]={
- {-14240, 8704, -17696},
- {-14240+128,8704+64, -17696+128},
- {-14240+256,8704+128,-17696+256},
- {-14240+384,8704+192,-17696+384}
- };
- int r;
- int g;
- int b;
- r = clamped((74*y+102*v+DITHER_BIAS[dither][0])>>9, 0, 31);
- g = clamped((74*y-25*u-52*v+DITHER_BIAS[dither][1])>>8, 0, 63);
- b = clamped((74*y+129*u+DITHER_BIAS[dither][2])>>9, 0, 31);
- return (uint16_t)(r<<11 | g<<5 | b);
-}
-
-static void ScaleYCbCr420ToRGB565_Bilinear_Row_C(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
- int x;
- int source_x_q16;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- int source_x;
- int xweight;
- int y;
- int u;
- int v;
- xweight = ((source_x_q16&0xFFFF)+128)>>8;
- source_x = source_x_q16>>16;
- y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
- source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
- source_x_q16 += ctx->source_dx_q16;
- u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
- v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr422ToRGB565_Bilinear_Row_C(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
- int x;
- int source_x_q16;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- int source_x;
- int xweight;
- int y;
- int u;
- int v;
- xweight = ((source_x_q16&0xFFFF)+128)>>8;
- source_x = source_x_q16>>16;
- y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
- source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
- source_x_q16 += ctx->source_dx_q16;
- u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
- v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr444ToRGB565_Bilinear_Row_C(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
- int x;
- int source_x_q16;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- int source_x;
- int xweight;
- int y;
- int u;
- int v;
- xweight = ((source_x_q16&0xFFFF)+128)>>8;
- source_x = source_x_q16>>16;
- source_x_q16 += ctx->source_dx_q16;
- y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- u = bislerp(ctx->u_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- v = bislerp(ctx->v_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr42xToRGB565_BilinearY_Row_C(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
- int x;
- int source_x_q16;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- int source_x;
- int xweight;
- int y;
- int u;
- int v;
- xweight = ((source_x_q16&0xFFFF)+128)>>8;
- source_x = source_x_q16>>16;
- y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
- source_x_q16 += ctx->source_dx_q16;
- u = ctx->u_row[source_x];
- v = ctx->v_row[source_x];
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr444ToRGB565_BilinearY_Row_C(
- const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
- int x;
- int source_x_q16;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- int source_x;
- int xweight;
- int y;
- int u;
- int v;
- xweight = ((source_x_q16&0xFFFF)+128)>>8;
- source_x = source_x_q16>>16;
- y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
- source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>16;
- source_x_q16 += ctx->source_dx_q16;
- u = ctx->u_row[source_x];
- v = ctx->v_row[source_x];
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr42xToRGB565_Nearest_Row_C(
- const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
- int y;
- int u;
- int v;
- int x;
- int source_x_q16;
- int source_x;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- source_x = source_x_q16>>16;
- y = ctx->y_row[source_x];
- source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
- source_x_q16 += ctx->source_dx_q16;
- u = ctx->u_row[source_x];
- v = ctx->v_row[source_x];
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-static void ScaleYCbCr444ToRGB565_Nearest_Row_C(
- const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
- int y;
- int u;
- int v;
- int x;
- int source_x_q16;
- int source_x;
- source_x_q16 = ctx->source_x0_q16;
- for (x = 0; x < ctx->width; x++) {
- source_x = source_x_q16>>16;
- source_x_q16 += ctx->source_dx_q16;
- y = ctx->y_row[source_x];
- u = ctx->u_row[source_x];
- v = ctx->v_row[source_x];
- ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
- dither ^= 3;
- }
-}
-
-void ScaleYCbCrToRGB565(const uint8_t *y_buf,
- const uint8_t *u_buf,
- const uint8_t *v_buf,
- uint8_t *rgb_buf,
- int source_x0,
- int source_y0,
- int source_width,
- int source_height,
- int width,
- int height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- ScaleFilter filter) {
- int source_x0_q16;
- int source_y0_q16;
- int source_dx_q16;
- int source_dy_q16;
- int source_uv_xoffs_q16;
- int source_uv_yoffs_q16;
- int x_shift;
- int y_shift;
- int ymin;
- int ymax;
- int uvmin;
- int uvmax;
- int dither;
- /*We don't support negative destination rectangles (just flip the source
- instead), and for empty ones there's nothing to do.*/
- if (width <= 0 || height <= 0)
- return;
- /*These bounds are required to avoid 16.16 fixed-point overflow.*/
- NS_ASSERTION(source_x0 > (INT_MIN>>16) && source_x0 < (INT_MAX>>16),
- "ScaleYCbCrToRGB565 source X offset out of bounds.");
- NS_ASSERTION(source_x0+source_width > (INT_MIN>>16)
- && source_x0+source_width < (INT_MAX>>16),
- "ScaleYCbCrToRGB565 source width out of bounds.");
- NS_ASSERTION(source_y0 > (INT_MIN>>16) && source_y0 < (INT_MAX>>16),
- "ScaleYCbCrToRGB565 source Y offset out of bounds.");
- NS_ASSERTION(source_y0+source_height > (INT_MIN>>16)
- && source_y0+source_height < (INT_MAX>>16),
- "ScaleYCbCrToRGB565 source height out of bounds.");
- /*We require the same stride for Y' and Cb and Cr for 4:4:4 content.*/
- NS_ASSERTION(yuv_type != YV24 || y_pitch == uv_pitch,
- "ScaleYCbCrToRGB565 luma stride differs from chroma for 4:4:4 content.");
- /*We assume we can read outside the bounds of the input, because it makes
- the code much simpler (and in practice is true: both Theora and VP8 return
- padded reference frames).
- In practice, we do not even _have_ the actual bounds of the source, as
- we are passed a crop rectangle from it, and not the dimensions of the full
- image.
- This assertion will not guarantee our out-of-bounds reads are safe, but it
- should at least catch the simple case of passing in an unpadded buffer.*/
- NS_ASSERTION(abs(y_pitch) >= abs(source_width)+16,
- "ScaleYCbCrToRGB565 source image unpadded?");
- /*The NEON code requires the pointers to be aligned to a 16-byte boundary at
- the start of each row.
- This should be true for all of our sources.
- We could try to fix this up if it's not true by adjusting source_x0, but
- that would require the mis-alignment to be the same for the U and V
- planes.*/
- NS_ASSERTION((y_pitch&15) == 0 && (uv_pitch&15) == 0 &&
- ((y_buf-(uint8_t *)nullptr)&15) == 0 &&
- ((u_buf-(uint8_t *)nullptr)&15) == 0 &&
- ((v_buf-(uint8_t *)nullptr)&15) == 0,
- "ScaleYCbCrToRGB565 source image unaligned");
- /*We take an area-based approach to pixel coverage to avoid shifting by small
- amounts (or not so small, when up-scaling or down-scaling by a large
- factor).
-
- An illustrative example: scaling 4:2:0 up by 2, using JPEG chroma cositing^.
-
- + = RGB destination locations
- * = Y' source locations
- - = Cb, Cr source locations
-
- + + + + + + + +
- * * * *
- + + + + + + + +
- - -
- + + + + + + + +
- * * * *
- + + + + + + + +
-
- + + + + + + + +
- * * * *
- + + + + + + + +
- - -
- + + + + + + + +
- * * * *
- + + + + + + + +
-
- So, the coordinates of the upper-left + (first destination site) should
- be (-0.25,-0.25) in the source Y' coordinate system.
- Similarly, the coordinates should be (-0.375,-0.375) in the source Cb, Cr
- coordinate system.
- Note that the origin and scale of these two coordinate systems is not the
- same!
-
- ^JPEG cositing is required for Theora; VP8 doesn't specify cositing rules,
- but nearly all software converters in existence (at least those that are
- open source, and many that are not) use JPEG cositing instead of MPEG.*/
- source_dx_q16 = (source_width<<16) / width;
- source_x0_q16 = (source_x0<<16)+(source_dx_q16>>1)-0x8000;
- source_dy_q16 = (source_height<<16) / height;
- source_y0_q16 = (source_y0<<16)+(source_dy_q16>>1)-0x8000;
- x_shift = (yuv_type != YV24);
- y_shift = (yuv_type == YV12);
- /*These two variables hold the difference between the origins of the Y' and
- the Cb, Cr coordinate systems, using the scale of the Y' coordinate
- system.*/
- source_uv_xoffs_q16 = -(x_shift<<15);
- source_uv_yoffs_q16 = -(y_shift<<15);
- /*Compute the range of source rows we'll actually use.
- This doesn't guarantee we won't read outside this range.*/
- ymin = source_height >= 0 ? source_y0 : source_y0+source_height-1;
- ymax = source_height >= 0 ? source_y0+source_height-1 : source_y0;
- uvmin = ymin>>y_shift;
- uvmax = ((ymax+1+y_shift)>>y_shift)-1;
- /*Pick a dithering pattern.
- The "&3" at the end is just in case RAND_MAX is lying.*/
- dither = (rand()/(RAND_MAX>>2))&3;
- /*Nearest-neighbor scaling.*/
- if (filter == FILTER_NONE) {
- yuv2rgb565_row_scale_nearest_ctx ctx;
- yuv2rgb565_row_scale_nearest_func scale_row;
- int y;
- /*Add rounding offsets once, in advance.*/
- source_x0_q16 += 0x8000;
- source_y0_q16 += 0x8000;
- source_uv_xoffs_q16 += (x_shift<<15);
- source_uv_yoffs_q16 += (y_shift<<15);
- if (yuv_type == YV12)
- scale_row = ScaleYCbCr42xToRGB565_Nearest_Row_C;
- else
- scale_row = ScaleYCbCr444ToRGB565_Nearest_Row_C;
- ctx.width = width;
- ctx.source_x0_q16 = source_x0_q16;
- ctx.source_dx_q16 = source_dx_q16;
- ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
- for (y=0; y<height; y++) {
- int source_y;
- ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
- source_y = source_y0_q16>>16;
- source_y = clamped(source_y, ymin, ymax);
- ctx.y_row = y_buf + source_y*y_pitch;
- source_y = (source_y0_q16+source_uv_yoffs_q16)>>(16+y_shift);
- source_y = clamped(source_y, uvmin, uvmax);
- source_y0_q16 += source_dy_q16;
- ctx.u_row = u_buf + source_y*uv_pitch;
- ctx.v_row = v_buf + source_y*uv_pitch;
- (*scale_row)(&ctx, dither);
- dither ^= 2;
- }
- }
- /*Bilinear scaling.*/
- else {
- yuv2rgb565_row_scale_bilinear_ctx ctx;
- yuv2rgb565_row_scale_bilinear_func scale_row;
- int uvxscale_min;
- int uvxscale_max;
- int uvyscale_min;
- int uvyscale_max;
- int y;
- /*Check how close the chroma scaling is to unity.
- If it's close enough, we can get away with nearest-neighbor chroma
- sub-sampling, and only doing bilinear on luma.
- If a given axis is subsampled, we use bounds on the luma step of
- [0.67...2], which is equivalent to scaling chroma by [1...3].
- If it's not subsampled, we use bounds of [0.5...1.33], which is
- equivalent to scaling chroma by [0.75...2].
- The lower bound is chosen as a trade-off between speed and how terrible
- nearest neighbor looks when upscaling.*/
-# define CHROMA_NEAREST_SUBSAMP_STEP_MIN 0xAAAA
-# define CHROMA_NEAREST_NORMAL_STEP_MIN 0x8000
-# define CHROMA_NEAREST_SUBSAMP_STEP_MAX 0x20000
-# define CHROMA_NEAREST_NORMAL_STEP_MAX 0x15555
- uvxscale_min = yuv_type != YV24 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
- uvxscale_max = yuv_type != YV24 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
- uvyscale_min = yuv_type == YV12 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
- uvyscale_max = yuv_type == YV12 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
- if (uvxscale_min <= abs(source_dx_q16)
- && abs(source_dx_q16) <= uvxscale_max
- && uvyscale_min <= abs(source_dy_q16)
- && abs(source_dy_q16) <= uvyscale_max) {
- /*Add the rounding offsets now.*/
- source_uv_xoffs_q16 += 1<<(15+x_shift);
- source_uv_yoffs_q16 += 1<<(15+y_shift);
- if (yuv_type != YV24) {
- scale_row =
-//TODO: fix NEON asm for iOS
-# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__)
- supports_neon() ? ScaleYCbCr42xToRGB565_BilinearY_Row_NEON :
-# endif
- ScaleYCbCr42xToRGB565_BilinearY_Row_C;
- }
- else
- scale_row = ScaleYCbCr444ToRGB565_BilinearY_Row_C;
- }
- else {
- if (yuv_type == YV12)
- scale_row = ScaleYCbCr420ToRGB565_Bilinear_Row_C;
- else if (yuv_type == YV16)
- scale_row = ScaleYCbCr422ToRGB565_Bilinear_Row_C;
- else
- scale_row = ScaleYCbCr444ToRGB565_Bilinear_Row_C;
- }
- ctx.width = width;
- ctx.y_pitch = y_pitch;
- ctx.source_x0_q16 = source_x0_q16;
- ctx.source_dx_q16 = source_dx_q16;
- ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
- ctx.uv_pitch = uv_pitch;
- for (y=0; y<height; y++) {
- int source_y;
- int yweight;
- int uvweight;
- ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
- source_y = (source_y0_q16+128)>>16;
- yweight = ((source_y0_q16+128)>>8)&0xFF;
- if (source_y < ymin) {
- source_y = ymin;
- yweight = 0;
- }
- if (source_y > ymax) {
- source_y = ymax;
- yweight = 0;
- }
- ctx.y_row = y_buf + source_y*y_pitch;
- source_y = source_y0_q16+source_uv_yoffs_q16+(128<<y_shift);
- source_y0_q16 += source_dy_q16;
- uvweight = source_y>>(8+y_shift)&0xFF;
- source_y >>= 16+y_shift;
- if (source_y < uvmin) {
- source_y = uvmin;
- uvweight = 0;
- }
- if (source_y > uvmax) {
- source_y = uvmax;
- uvweight = 0;
- }
- ctx.u_row = u_buf + source_y*uv_pitch;
- ctx.v_row = v_buf + source_y*uv_pitch;
- ctx.y_yweight = yweight;
- ctx.uv_yweight = uvweight;
- (*scale_row)(&ctx, dither);
- dither ^= 2;
- }
- }
-}
-
-bool IsScaleYCbCrToRGB565Fast(int source_x0,
- int source_y0,
- int source_width,
- int source_height,
- int width,
- int height,
- YUVType yuv_type,
- ScaleFilter filter)
-{
- // Very fast.
- if (width <= 0 || height <= 0)
- return true;
-# if defined(MOZILLA_MAY_SUPPORT_NEON)
- if (filter != FILTER_NONE) {
- int source_dx_q16;
- int source_dy_q16;
- int uvxscale_min;
- int uvxscale_max;
- int uvyscale_min;
- int uvyscale_max;
- source_dx_q16 = (source_width<<16) / width;
- source_dy_q16 = (source_height<<16) / height;
- uvxscale_min = yuv_type != YV24 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
- uvxscale_max = yuv_type != YV24 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
- uvyscale_min = yuv_type == YV12 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
- uvyscale_max = yuv_type == YV12 ?
- CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
- if (uvxscale_min <= abs(source_dx_q16)
- && abs(source_dx_q16) <= uvxscale_max
- && uvyscale_min <= abs(source_dy_q16)
- && abs(source_dy_q16) <= uvyscale_max) {
- if (yuv_type != YV24)
- return supports_neon();
- }
- }
-# endif
- return false;
-}
-
-
-
-void yuv_to_rgb565_row_c(uint16 *dst,
- const uint8 *y,
- const uint8 *u,
- const uint8 *v,
- int x_shift,
- int pic_x,
- int pic_width)
-{
- int x;
- for (x = 0; x < pic_width; x++)
- {
- dst[x] = yu2rgb565(y[pic_x+x],
- u[(pic_x+x)>>x_shift],
- v[(pic_x+x)>>x_shift],
- 2); // Disable dithering for now.
- }
-}
-
-void ConvertYCbCrToRGB565(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type)
-{
- int x_shift;
- int y_shift;
- x_shift = yuv_type != YV24;
- y_shift = yuv_type == YV12;
-//TODO: fix NEON asm for iOS
-# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__)
- if (yuv_type != YV24 && supports_neon())
- {
- for (int i = 0; i < pic_height; i++) {
- int yoffs;
- int uvoffs;
- yoffs = y_pitch * (pic_y+i) + pic_x;
- uvoffs = uv_pitch * ((pic_y+i)>>y_shift) + (pic_x>>x_shift);
- yuv42x_to_rgb565_row_neon((uint16*)(rgb_buf + rgb_pitch * i),
- y_buf + yoffs,
- u_buf + uvoffs,
- v_buf + uvoffs,
- pic_width,
- pic_x&x_shift);
- }
- }
- else
-# endif
- {
- for (int i = 0; i < pic_height; i++) {
- int yoffs;
- int uvoffs;
- yoffs = y_pitch * (pic_y+i);
- uvoffs = uv_pitch * ((pic_y+i)>>y_shift);
- yuv_to_rgb565_row_c((uint16*)(rgb_buf + rgb_pitch * i),
- y_buf + yoffs,
- u_buf + uvoffs,
- v_buf + uvoffs,
- x_shift,
- pic_x,
- pic_width);
- }
- }
-}
-
-bool IsConvertYCbCrToRGB565Fast(int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- YUVType yuv_type)
-{
-# if defined(MOZILLA_MAY_SUPPORT_NEON)
- return (yuv_type != YV24 && supports_neon());
-# else
- return false;
-# endif
-}
-
-} // namespace gfx
-
-} // namespace mozilla
-
-#endif // HAVE_YCBCR_TO_RGB565
diff --git a/gfx/ycbcr/ycbcr_to_rgb565.h b/gfx/ycbcr/ycbcr_to_rgb565.h
deleted file mode 100644
index 41272223b..000000000
--- a/gfx/ycbcr/ycbcr_to_rgb565.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-#ifndef MEDIA_BASE_YCBCR_TO_RGB565_H_
-#define MEDIA_BASE_YCBCR_TO_RGB565_H_
-#include "yuv_convert.h"
-#include "mozilla/arm.h"
-
-// It's currently only worth including this if we have NEON support.
-#ifdef MOZILLA_MAY_SUPPORT_NEON
-#define HAVE_YCBCR_TO_RGB565 1
-#endif
-
-namespace mozilla {
-
-namespace gfx {
-
-#ifdef HAVE_YCBCR_TO_RGB565
-// Convert a frame of YUV to 16 bit RGB565.
-void ConvertYCbCrToRGB565(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type);
-
-// Used to test if we have an accelerated version.
-bool IsConvertYCbCrToRGB565Fast(int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- YUVType yuv_type);
-
-// Scale a frame of YUV to 16 bit RGB565.
-void ScaleYCbCrToRGB565(const uint8_t *yplane,
- const uint8_t *uplane,
- const uint8_t *vplane,
- uint8_t *rgbframe,
- int source_x0,
- int source_y0,
- int source_width,
- int source_height,
- int width,
- int height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- ScaleFilter filter);
-
-// Used to test if we have an accelerated version.
-bool IsScaleYCbCrToRGB565Fast(int source_x0,
- int source_y0,
- int source_width,
- int source_height,
- int width,
- int height,
- YUVType yuv_type,
- ScaleFilter filter);
-#endif // HAVE_YCBCR_TO_RGB565
-
-} // namespace gfx
-
-} // namespace mozilla
-
-#endif // MEDIA_BASE_YCBCR_TO_RGB565_H_
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
deleted file mode 100644
index 78fd4ee89..000000000
--- a/gfx/ycbcr/yuv_convert.cpp
+++ /dev/null
@@ -1,510 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// This webpage shows layout of YV12 and other YUV formats
-// http://www.fourcc.org/yuv.php
-// The actual conversion is best described here
-// http://en.wikipedia.org/wiki/YUV
-// An article on optimizing YUV conversion using tables instead of multiplies
-// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
-//
-// YV12 is a full plane of Y and a half height, half width chroma planes
-// YV16 is a full plane of Y and a full height, half width chroma planes
-// YV24 is a full plane of Y and a full height, full width chroma planes
-//
-// ARGB pixel format is output, which on little endian is stored as BGRA.
-// The alpha is set to 255, allowing the application to use RGBA or RGB32.
-
-#include "yuv_convert.h"
-
-#include "gfxPrefs.h"
-#include "libyuv.h"
-#include "scale_yuv_argb.h"
-// Header for low level row functions.
-#include "yuv_row.h"
-#include "mozilla/SSE.h"
-
-namespace mozilla {
-
-namespace gfx {
-
-// 16.16 fixed point arithmetic
-const int kFractionBits = 16;
-const int kFractionMax = 1 << kFractionBits;
-const int kFractionMask = ((1 << kFractionBits) - 1);
-
-YUVType TypeFromSize(int ywidth,
- int yheight,
- int cbcrwidth,
- int cbcrheight)
-{
- if (ywidth == cbcrwidth && yheight == cbcrheight) {
- return YV24;
- }
- else if ((ywidth + 1) / 2 == cbcrwidth && yheight == cbcrheight) {
- return YV16;
- }
- else {
- return YV12;
- }
-}
-
-libyuv::FourCC FourCCFromYUVType(YUVType aYUVType)
-{
- if (aYUVType == YV24) {
- return libyuv::FOURCC_I444;
- } else if (aYUVType == YV16) {
- return libyuv::FOURCC_I422;
- } else if (aYUVType == YV12) {
- return libyuv::FOURCC_I420;
- } else {
- return libyuv::FOURCC_ANY;
- }
-}
-
-// Convert a frame of YUV to 32 bit ARGB.
-void ConvertYCbCrToRGB32(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- YUVColorSpace yuv_color_space) {
-
-
- // Deprecated function's conversion is accurate.
- // libyuv converion is a bit inaccurate to get performance. It dynamically
- // calculates RGB from YUV to use simd. In it, signed byte is used for conversion's
- // coefficient, but it requests 129. libyuv cut 129 to 127. And only 6 bits are
- // used for a decimal part during the dynamic calculation.
- //
- // The function is still fast on some old intel chips.
- // See Bug 1256475.
- bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() ||
- (supports_mmx() && supports_sse() && !supports_sse3() &&
- yuv_color_space == YUVColorSpace::BT601);
- // The deprecated function only support BT601.
- // See Bug 1210357.
- if (yuv_color_space != YUVColorSpace::BT601) {
- use_deprecated = false;
- }
- if (use_deprecated) {
- ConvertYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf, rgb_buf,
- pic_x, pic_y, pic_width, pic_height,
- y_pitch, uv_pitch, rgb_pitch, yuv_type);
- return;
- }
-
- if (yuv_type == YV24) {
- const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
- const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x;
- const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x;
- DebugOnly<int> err = libyuv::I444ToARGB(src_y, y_pitch,
- src_u, uv_pitch,
- src_v, uv_pitch,
- rgb_buf, rgb_pitch,
- pic_width, pic_height);
- MOZ_ASSERT(!err);
- } else if (yuv_type == YV16) {
- const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
- const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x / 2;
- const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x / 2;
- DebugOnly<int> err = libyuv::I422ToARGB(src_y, y_pitch,
- src_u, uv_pitch,
- src_v, uv_pitch,
- rgb_buf, rgb_pitch,
- pic_width, pic_height);
- MOZ_ASSERT(!err);
- } else {
- MOZ_ASSERT(yuv_type == YV12);
- const uint8* src_y = y_buf + y_pitch * pic_y + pic_x;
- const uint8* src_u = u_buf + (uv_pitch * pic_y + pic_x) / 2;
- const uint8* src_v = v_buf + (uv_pitch * pic_y + pic_x) / 2;
- if (yuv_color_space == YUVColorSpace::BT709) {
- DebugOnly<int> err = libyuv::H420ToARGB(src_y, y_pitch,
- src_u, uv_pitch,
- src_v, uv_pitch,
- rgb_buf, rgb_pitch,
- pic_width, pic_height);
- MOZ_ASSERT(!err);
- } else {
- MOZ_ASSERT(yuv_color_space == YUVColorSpace::BT601);
- DebugOnly<int> err = libyuv::I420ToARGB(src_y, y_pitch,
- src_u, uv_pitch,
- src_v, uv_pitch,
- rgb_buf, rgb_pitch,
- pic_width, pic_height);
- MOZ_ASSERT(!err);
- }
- }
-}
-
-// Convert a frame of YUV to 32 bit ARGB.
-void ConvertYCbCrToRGB32_deprecated(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type) {
- unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
- unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
- // Test for SSE because the optimized code uses movntq, which is not part of MMX.
- bool has_sse = supports_mmx() && supports_sse();
- // There is no optimized YV24 SSE routine so we check for this and
- // fall back to the C code.
- has_sse &= yuv_type != YV24;
- bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
- int x_width = odd_pic_x ? pic_width - 1 : pic_width;
-
- for (int y = pic_y; y < pic_height + pic_y; ++y) {
- uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
- const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
- const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
- const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-
- if (odd_pic_x) {
- // Handle the single odd pixel manually and use the
- // fast routines for the remaining.
- FastConvertYUVToRGB32Row_C(y_ptr++,
- u_ptr++,
- v_ptr++,
- rgb_row,
- 1,
- x_shift);
- rgb_row += 4;
- }
-
- if (has_sse) {
- FastConvertYUVToRGB32Row(y_ptr,
- u_ptr,
- v_ptr,
- rgb_row,
- x_width);
- }
- else {
- FastConvertYUVToRGB32Row_C(y_ptr,
- u_ptr,
- v_ptr,
- rgb_row,
- x_width,
- x_shift);
- }
- }
-
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
- if (has_sse)
- EMMS();
-}
-
-// C version does 8 at a time to mimic MMX code
-static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- uint8* end = ybuf + source_width;
- do {
- ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
- ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
- ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
- ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
- ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
- ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
- ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
- ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
- y0_ptr += 8;
- y1_ptr += 8;
- ybuf += 8;
- } while (ybuf < end);
-}
-
-#ifdef MOZILLA_MAY_SUPPORT_MMX
-void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction);
-#endif
-
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction);
-#endif
-
-static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
- const uint8* y1_ptr, int source_width,
- int source_y_fraction) {
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
- if (mozilla::supports_sse2()) {
- FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
- return;
- }
-#endif
-
-#ifdef MOZILLA_MAY_SUPPORT_MMX
- if (mozilla::supports_mmx()) {
- FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
- return;
- }
-#endif
-
- FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
-}
-
-
-// Scale a frame of YUV to 32 bit ARGB.
-void ScaleYCbCrToRGB32(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int source_width,
- int source_height,
- int width,
- int height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- YUVColorSpace yuv_color_space,
- ScaleFilter filter) {
-
- bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() ||
-#if defined(XP_WIN) && defined(_M_X64)
- // libyuv does not support SIMD scaling on win 64bit. See Bug 1295927.
- supports_sse3() ||
-#endif
- (supports_mmx() && supports_sse() && !supports_sse3());
- // The deprecated function only support BT601.
- // See Bug 1210357.
- if (yuv_color_space != YUVColorSpace::BT601) {
- use_deprecated = false;
- }
- if (use_deprecated) {
- ScaleYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf,
- rgb_buf,
- source_width, source_height,
- width, height,
- y_pitch, uv_pitch,
- rgb_pitch,
- yuv_type,
- ROTATE_0,
- filter);
- return;
- }
-
- DebugOnly<int> err =
- libyuv::YUVToARGBScale(y_buf, y_pitch,
- u_buf, uv_pitch,
- v_buf, uv_pitch,
- FourCCFromYUVType(yuv_type),
- yuv_color_space,
- source_width, source_height,
- rgb_buf, rgb_pitch,
- width, height,
- libyuv::kFilterBilinear);
- MOZ_ASSERT(!err);
- return;
-}
-
-// Scale a frame of YUV to 32 bit ARGB.
-void ScaleYCbCrToRGB32_deprecated(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int source_width,
- int source_height,
- int width,
- int height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- Rotate view_rotate,
- ScaleFilter filter) {
- bool has_mmx = supports_mmx();
-
- // 4096 allows 3 buffers to fit in 12k.
- // Helps performance on CPU with 16K L1 cache.
- // Large enough for 3830x2160 and 30" displays which are 2560x1600.
- const int kFilterBufferSize = 4096;
- // Disable filtering if the screen is too big (to avoid buffer overflows).
- // This should never happen to regular users: they don't have monitors
- // wider than 4096 pixels.
- // TODO(fbarchard): Allow rotated videos to filter.
- if (source_width > kFilterBufferSize || view_rotate)
- filter = FILTER_NONE;
-
- unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
- // Diagram showing origin and direction of source sampling.
- // ->0 4<-
- // 7 3
- //
- // 6 5
- // ->1 2<-
- // Rotations that start at right side of image.
- if ((view_rotate == ROTATE_180) ||
- (view_rotate == ROTATE_270) ||
- (view_rotate == MIRROR_ROTATE_0) ||
- (view_rotate == MIRROR_ROTATE_90)) {
- y_buf += source_width - 1;
- u_buf += source_width / 2 - 1;
- v_buf += source_width / 2 - 1;
- source_width = -source_width;
- }
- // Rotations that start at bottom of image.
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_180) ||
- (view_rotate == MIRROR_ROTATE_90) ||
- (view_rotate == MIRROR_ROTATE_180)) {
- y_buf += (source_height - 1) * y_pitch;
- u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
- v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
- source_height = -source_height;
- }
-
- // Handle zero sized destination.
- if (width == 0 || height == 0)
- return;
- int source_dx = source_width * kFractionMax / width;
- int source_dy = source_height * kFractionMax / height;
- int source_dx_uv = source_dx;
-
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_270)) {
- int tmp = height;
- height = width;
- width = tmp;
- tmp = source_height;
- source_height = source_width;
- source_width = tmp;
- int original_dx = source_dx;
- int original_dy = source_dy;
- source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
- source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
- source_dy = original_dx;
- if (view_rotate == ROTATE_90) {
- y_pitch = -1;
- uv_pitch = -1;
- source_height = -source_height;
- } else {
- y_pitch = 1;
- uv_pitch = 1;
- }
- }
-
- // Need padding because FilterRows() will write 1 to 16 extra pixels
- // after the end for SSE2 version.
- uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
- uint8* ybuf =
- reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
- uint8* ubuf = ybuf + kFilterBufferSize;
- uint8* vbuf = ubuf + kFilterBufferSize;
- // TODO(fbarchard): Fixed point math is off by 1 on negatives.
- int yscale_fixed = (source_height << kFractionBits) / height;
-
- // TODO(fbarchard): Split this into separate function for better efficiency.
- for (int y = 0; y < height; ++y) {
- uint8* dest_pixel = rgb_buf + y * rgb_pitch;
- int source_y_subpixel = (y * yscale_fixed);
- if (yscale_fixed >= (kFractionMax * 2)) {
- source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter.
- }
- int source_y = source_y_subpixel >> kFractionBits;
-
- const uint8* y0_ptr = y_buf + source_y * y_pitch;
- const uint8* y1_ptr = y0_ptr + y_pitch;
-
- const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
- const uint8* u1_ptr = u0_ptr + uv_pitch;
- const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
- const uint8* v1_ptr = v0_ptr + uv_pitch;
-
- // vertical scaler uses 16.8 fixed point
- int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
- int source_uv_fraction =
- ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
-
- const uint8* y_ptr = y0_ptr;
- const uint8* u_ptr = u0_ptr;
- const uint8* v_ptr = v0_ptr;
- // Apply vertical filtering if necessary.
- // TODO(fbarchard): Remove memcpy when not necessary.
- if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
- if (yscale_fixed != kFractionMax &&
- source_y_fraction && ((source_y + 1) < source_height)) {
- FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
- } else {
- memcpy(ybuf, y0_ptr, source_width);
- }
- y_ptr = ybuf;
- ybuf[source_width] = ybuf[source_width-1];
- int uv_source_width = (source_width + 1) / 2;
- if (yscale_fixed != kFractionMax &&
- source_uv_fraction &&
- (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
- FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
- FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
- } else {
- memcpy(ubuf, u0_ptr, uv_source_width);
- memcpy(vbuf, v0_ptr, uv_source_width);
- }
- u_ptr = ubuf;
- v_ptr = vbuf;
- ubuf[uv_source_width] = ubuf[uv_source_width - 1];
- vbuf[uv_source_width] = vbuf[uv_source_width - 1];
- }
- if (source_dx == kFractionMax) { // Not scaled
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width);
- } else if (filter & FILTER_BILINEAR_H) {
- LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- } else {
-// Specialized scalers and rotation.
-#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
- if(mozilla::supports_sse()) {
- if (width == (source_width * 2)) {
- DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
- dest_pixel, width);
- } else if ((source_dx & kFractionMask) == 0) {
- // Scaling by integer scale factor. ie half.
- ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
- dest_pixel, width,
- source_dx >> kFractionBits);
- } else if (source_dx_uv == source_dx) { // Not rotated.
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- } else {
- RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
- dest_pixel, width,
- source_dx >> kFractionBits,
- source_dx_uv >> kFractionBits);
- }
- }
- else {
- ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
- }
-#else
- (void)source_dx_uv;
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, width, source_dx);
-#endif
- }
- }
- // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
- if (has_mmx)
- EMMS();
-}
-
-} // namespace gfx
-} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
deleted file mode 100644
index 266a23d45..000000000
--- a/gfx/ycbcr/yuv_convert.h
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef MEDIA_BASE_YUV_CONVERT_H_
-#define MEDIA_BASE_YUV_CONVERT_H_
-
-#include "chromium_types.h"
-#include "ImageTypes.h"
-
-namespace mozilla {
-
-namespace gfx {
-
-// Type of YUV surface.
-// The value of these enums matter as they are used to shift vertical indices.
-enum YUVType {
- YV12 = 0, // YV12 is half width and half height chroma channels.
- YV16 = 1, // YV16 is half width and full height chroma channels.
- YV24 = 2 // YV24 is full width and full height chroma channels.
-};
-
-// Mirror means flip the image horizontally, as in looking in a mirror.
-// Rotate happens after mirroring.
-enum Rotate {
- ROTATE_0, // Rotation off.
- ROTATE_90, // Rotate clockwise.
- ROTATE_180, // Rotate upside down.
- ROTATE_270, // Rotate counter clockwise.
- MIRROR_ROTATE_0, // Mirror horizontally.
- MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
- MIRROR_ROTATE_180, // Mirror vertically.
- MIRROR_ROTATE_270 // Transpose.
-};
-
-// Filter affects how scaling looks.
-enum ScaleFilter {
- FILTER_NONE = 0, // No filter (point sampled).
- FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
- FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
- FILTER_BILINEAR = 3 // Bilinear filter.
-};
-
-YUVType TypeFromSize(int ywidth, int yheight, int cbcrwidth, int cbcrheight);
-
-// Convert a frame of YUV to 32 bit ARGB.
-// Pass in YV16/YV12 depending on source format
-void ConvertYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- YUVColorSpace yuv_color_space);
-
-void ConvertYCbCrToRGB32_deprecated(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type);
-
-// Scale a frame of YUV to 32 bit ARGB.
-// Supports rotation and mirroring.
-void ScaleYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int source_width,
- int source_height,
- int width,
- int height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- YUVColorSpace yuv_color_space,
- ScaleFilter filter);
-
-void ScaleYCbCrToRGB32_deprecated(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int source_width,
- int source_height,
- int width,
- int height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- Rotate view_rotate,
- ScaleFilter filter);
-
-} // namespace gfx
-} // namespace mozilla
-
-#endif // MEDIA_BASE_YUV_CONVERT_H_
diff --git a/gfx/ycbcr/yuv_convert_arm.cpp b/gfx/ycbcr/yuv_convert_arm.cpp
deleted file mode 100644
index 081343b0b..000000000
--- a/gfx/ycbcr/yuv_convert_arm.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// contributor Siarhei Siamashka <siarhei.siamashka@gmail.com>
-
-#include "yuv_convert.h"
-#include "ycbcr_to_rgb565.h"
-
-
-
-#ifdef HAVE_YCBCR_TO_RGB565
-
-namespace mozilla {
-
-namespace gfx {
-
-# if defined(MOZILLA_MAY_SUPPORT_NEON)
-# if defined(__clang__)
-void __attribute((noinline))
-# else
-void __attribute((noinline,optimize("-fomit-frame-pointer")))
-# endif
- yuv42x_to_rgb565_row_neon(uint16 *dst,
- const uint8 *y,
- const uint8 *u,
- const uint8 *v,
- int n,
- int oddflag)
-{
- static __attribute__((aligned(16))) uint16 acc_r[8] = {
- 22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840,
- };
- static __attribute__((aligned(16))) uint16 acc_g[8] = {
- 17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312,
- };
- static __attribute__((aligned(16))) uint16 acc_b[8] = {
- 28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832,
- };
- /*
- * Registers:
- * q0, q1 : d0, d1, d2, d3 - are used for initial loading of YUV data
- * q2 : d4, d5 - are used for storing converted RGB data
- * q3 : d6, d7 - are used for temporary storage
- *
- * q4-q7 - reserved
- *
- * q8, q9 : d16, d17, d18, d19 - are used for expanded Y data
- * q10 : d20, d21
- * q11 : d22, d23
- * q12 : d24, d25
- * q13 : d26, d27
- * q13, q14, q15 - various constants (#16, #149, #204, #50, #104, #154)
- */
- asm volatile (
-".fpu neon\n"
-/* Allow to build on targets not supporting neon, and force the object file
- * target to avoid bumping the final binary target */
-".arch armv7-a\n"
-".object_arch armv4t\n"
-".macro convert_macroblock size\n"
-/* load up to 16 source pixels */
- ".if \\size == 16\n"
- "pld [%[y], #64]\n"
- "pld [%[u], #64]\n"
- "pld [%[v], #64]\n"
- "vld1.8 {d1}, [%[y]]!\n"
- "vld1.8 {d3}, [%[y]]!\n"
- "vld1.8 {d0}, [%[u]]!\n"
- "vld1.8 {d2}, [%[v]]!\n"
- ".elseif \\size == 8\n"
- "vld1.8 {d1}, [%[y]]!\n"
- "vld1.8 {d0[0]}, [%[u]]!\n"
- "vld1.8 {d0[1]}, [%[u]]!\n"
- "vld1.8 {d0[2]}, [%[u]]!\n"
- "vld1.8 {d0[3]}, [%[u]]!\n"
- "vld1.8 {d2[0]}, [%[v]]!\n"
- "vld1.8 {d2[1]}, [%[v]]!\n"
- "vld1.8 {d2[2]}, [%[v]]!\n"
- "vld1.8 {d2[3]}, [%[v]]!\n"
- ".elseif \\size == 4\n"
- "vld1.8 {d1[0]}, [%[y]]!\n"
- "vld1.8 {d1[1]}, [%[y]]!\n"
- "vld1.8 {d1[2]}, [%[y]]!\n"
- "vld1.8 {d1[3]}, [%[y]]!\n"
- "vld1.8 {d0[0]}, [%[u]]!\n"
- "vld1.8 {d0[1]}, [%[u]]!\n"
- "vld1.8 {d2[0]}, [%[v]]!\n"
- "vld1.8 {d2[1]}, [%[v]]!\n"
- ".elseif \\size == 2\n"
- "vld1.8 {d1[0]}, [%[y]]!\n"
- "vld1.8 {d1[1]}, [%[y]]!\n"
- "vld1.8 {d0[0]}, [%[u]]!\n"
- "vld1.8 {d2[0]}, [%[v]]!\n"
- ".elseif \\size == 1\n"
- "vld1.8 {d1[0]}, [%[y]]!\n"
- "vld1.8 {d0[0]}, [%[u]]!\n"
- "vld1.8 {d2[0]}, [%[v]]!\n"
- ".else\n"
- ".error \"unsupported macroblock size\"\n"
- ".endif\n"
-
- /* d1 - Y data (first 8 bytes) */
- /* d3 - Y data (next 8 bytes) */
- /* d0 - U data, d2 - V data */
-
- /* split even and odd Y color components */
- "vuzp.8 d1, d3\n" /* d1 - evenY, d3 - oddY */
- /* clip upper and lower boundaries */
- "vqadd.u8 q0, q0, q4\n"
- "vqadd.u8 q1, q1, q4\n"
- "vqsub.u8 q0, q0, q5\n"
- "vqsub.u8 q1, q1, q5\n"
-
- "vshr.u8 d4, d2, #1\n" /* d4 = V >> 1 */
-
- "vmull.u8 q8, d1, d27\n" /* q8 = evenY * 149 */
- "vmull.u8 q9, d3, d27\n" /* q9 = oddY * 149 */
-
- "vld1.16 {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */
- "vsubw.u8 q10, q10, d4\n" /* red acc -= (V >> 1) */
- "vmlsl.u8 q10, d2, d28\n" /* red acc -= V * 204 */
- "vld1.16 {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */
- "vmlsl.u8 q11, d2, d30\n" /* green acc -= V * 104 */
- "vmlsl.u8 q11, d0, d29\n" /* green acc -= U * 50 */
- "vld1.16 {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */
- "vmlsl.u8 q12, d0, d30\n" /* blue acc -= U * 104 */
- "vmlsl.u8 q12, d0, d31\n" /* blue acc -= U * 154 */
-
- "vhsub.s16 q3, q8, q10\n" /* calculate even red components */
- "vhsub.s16 q10, q9, q10\n" /* calculate odd red components */
- "vqshrun.s16 d0, q3, #6\n" /* right shift, narrow and saturate even red components */
- "vqshrun.s16 d3, q10, #6\n" /* right shift, narrow and saturate odd red components */
-
- "vhadd.s16 q3, q8, q11\n" /* calculate even green components */
- "vhadd.s16 q11, q9, q11\n" /* calculate odd green components */
- "vqshrun.s16 d1, q3, #6\n" /* right shift, narrow and saturate even green components */
- "vqshrun.s16 d4, q11, #6\n" /* right shift, narrow and saturate odd green components */
-
- "vhsub.s16 q3, q8, q12\n" /* calculate even blue components */
- "vhsub.s16 q12, q9, q12\n" /* calculate odd blue components */
- "vqshrun.s16 d2, q3, #6\n" /* right shift, narrow and saturate even blue components */
- "vqshrun.s16 d5, q12, #6\n" /* right shift, narrow and saturate odd blue components */
-
- "vzip.8 d0, d3\n" /* join even and odd red components */
- "vzip.8 d1, d4\n" /* join even and odd green components */
- "vzip.8 d2, d5\n" /* join even and odd blue components */
-
- "vshll.u8 q3, d0, #8\n\t"
- "vshll.u8 q8, d1, #8\n\t"
- "vshll.u8 q9, d2, #8\n\t"
- "vsri.u16 q3, q8, #5\t\n"
- "vsri.u16 q3, q9, #11\t\n"
- /* store pixel data to memory */
- ".if \\size == 16\n"
- " vst1.16 {d6, d7}, [%[dst]]!\n"
- " vshll.u8 q3, d3, #8\n\t"
- " vshll.u8 q8, d4, #8\n\t"
- " vshll.u8 q9, d5, #8\n\t"
- " vsri.u16 q3, q8, #5\t\n"
- " vsri.u16 q3, q9, #11\t\n"
- " vst1.16 {d6, d7}, [%[dst]]!\n"
- ".elseif \\size == 8\n"
- " vst1.16 {d6, d7}, [%[dst]]!\n"
- ".elseif \\size == 4\n"
- " vst1.16 {d6}, [%[dst]]!\n"
- ".elseif \\size == 2\n"
- " vst1.16 {d6[0]}, [%[dst]]!\n"
- " vst1.16 {d6[1]}, [%[dst]]!\n"
- ".elseif \\size == 1\n"
- " vst1.16 {d6[0]}, [%[dst]]!\n"
- ".endif\n"
- ".endm\n"
-
- "vmov.u8 d8, #15\n" /* add this to U/V to saturate upper boundary */
- "vmov.u8 d9, #20\n" /* add this to Y to saturate upper boundary */
- "vmov.u8 d10, #31\n" /* sub this from U/V to saturate lower boundary */
- "vmov.u8 d11, #36\n" /* sub this from Y to saturate lower boundary */
-
- "vmov.u8 d26, #16\n"
- "vmov.u8 d27, #149\n"
- "vmov.u8 d28, #204\n"
- "vmov.u8 d29, #50\n"
- "vmov.u8 d30, #104\n"
- "vmov.u8 d31, #154\n"
-
- "cmp %[oddflag], #0\n"
- "beq 1f\n"
- "convert_macroblock 1\n"
- "sub %[n], %[n], #1\n"
- "1:\n"
- "subs %[n], %[n], #16\n"
- "blt 2f\n"
- "1:\n"
- "convert_macroblock 16\n"
- "subs %[n], %[n], #16\n"
- "bge 1b\n"
- "2:\n"
- "tst %[n], #8\n"
- "beq 3f\n"
- "convert_macroblock 8\n"
- "3:\n"
- "tst %[n], #4\n"
- "beq 4f\n"
- "convert_macroblock 4\n"
- "4:\n"
- "tst %[n], #2\n"
- "beq 5f\n"
- "convert_macroblock 2\n"
- "5:\n"
- "tst %[n], #1\n"
- "beq 6f\n"
- "convert_macroblock 1\n"
- "6:\n"
- ".purgem convert_macroblock\n"
- : [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n)
- : [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]),
- [oddflag] "r" (oddflag)
- : "cc", "memory",
- "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d8", "d9", "d10", "d11", /* "d12", "d13", "d14", "d15", */
- "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
- "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
- );
-}
-# endif // MOZILLA_MAY_SUPPORT_NEON
-
-} // namespace gfx
-
-} // namespace mozilla
-
-#endif // HAVE_YCBCR_TO_RGB565
diff --git a/gfx/ycbcr/yuv_convert_mmx.cpp b/gfx/ycbcr/yuv_convert_mmx.cpp
deleted file mode 100644
index b5353e500..000000000
--- a/gfx/ycbcr/yuv_convert_mmx.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <mmintrin.h>
-#include "yuv_row.h"
-
-namespace mozilla {
-namespace gfx {
-
-// FilterRows combines two rows of the image using linear interpolation.
-// MMX version does 8 pixels at a time.
-void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction) {
- __m64 zero = _mm_setzero_si64();
- __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
- __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
-
- const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
- const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
- __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
- __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
-
- do {
- __m64 y0 = *y0_ptr64++;
- __m64 y1 = *y1_ptr64++;
- __m64 y2 = _mm_unpackhi_pi8(y0, zero);
- __m64 y3 = _mm_unpackhi_pi8(y1, zero);
- y0 = _mm_unpacklo_pi8(y0, zero);
- y1 = _mm_unpacklo_pi8(y1, zero);
- y0 = _mm_mullo_pi16(y0, y0_fraction);
- y1 = _mm_mullo_pi16(y1, y1_fraction);
- y2 = _mm_mullo_pi16(y2, y0_fraction);
- y3 = _mm_mullo_pi16(y3, y1_fraction);
- y0 = _mm_add_pi16(y0, y1);
- y2 = _mm_add_pi16(y2, y3);
- y0 = _mm_srli_pi16(y0, 8);
- y2 = _mm_srli_pi16(y2, 8);
- y0 = _mm_packs_pu16(y0, y2);
- *dest64++ = y0;
- } while (dest64 < end64);
-}
-
-} // namespace gfx
-} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert_sse2.cpp b/gfx/ycbcr/yuv_convert_sse2.cpp
deleted file mode 100644
index 25fe20639..000000000
--- a/gfx/ycbcr/yuv_convert_sse2.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <emmintrin.h>
-#include "yuv_row.h"
-
-namespace mozilla {
-namespace gfx {
-
-// FilterRows combines two rows of the image using linear interpolation.
-// SSE2 version does 16 pixels at a time.
-void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
- int source_width, int source_y_fraction) {
- __m128i zero = _mm_setzero_si128();
- __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
- __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
-
- const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
- const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
- __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
- __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
-
- do {
- __m128i y0 = _mm_loadu_si128(y0_ptr128);
- __m128i y1 = _mm_loadu_si128(y1_ptr128);
- __m128i y2 = _mm_unpackhi_epi8(y0, zero);
- __m128i y3 = _mm_unpackhi_epi8(y1, zero);
- y0 = _mm_unpacklo_epi8(y0, zero);
- y1 = _mm_unpacklo_epi8(y1, zero);
- y0 = _mm_mullo_epi16(y0, y0_fraction);
- y1 = _mm_mullo_epi16(y1, y1_fraction);
- y2 = _mm_mullo_epi16(y2, y0_fraction);
- y3 = _mm_mullo_epi16(y3, y1_fraction);
- y0 = _mm_add_epi16(y0, y1);
- y2 = _mm_add_epi16(y2, y3);
- y0 = _mm_srli_epi16(y0, 8);
- y2 = _mm_srli_epi16(y2, 8);
- y0 = _mm_packus_epi16(y0, y2);
- *dest128++ = y0;
- ++y0_ptr128;
- ++y1_ptr128;
- } while (dest128 < end128);
-}
-
-} // namespace gfx
-} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
deleted file mode 100644
index c89f54b8f..000000000
--- a/gfx/ycbcr/yuv_row.h
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// yuv_row internal functions to handle YUV conversion and scaling to RGB.
-// These functions are used from both yuv_convert.cc and yuv_scale.cc.
-
-// TODO(fbarchard): Write function that can handle rotation and scaling.
-
-#ifndef MEDIA_BASE_YUV_ROW_H_
-#define MEDIA_BASE_YUV_ROW_H_
-
-#include "chromium_types.h"
-
-extern "C" {
-// Can only do 1x.
-// This is the second fastest of the scalers.
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
-void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- unsigned int x_shift);
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
-// Can do 1x, half size or any scale down by an integer amount.
-// Step can be negative (mirroring, rotate 180).
-// This is the third fastest of the scalers.
-// Only defined on Windows x86-32.
-void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step);
-
-// Rotate is like Convert, but applies different step to Y versus U and V.
-// This allows rotation by 90 or 270, by stepping by stride.
-// This is the forth fastest of the scalers.
-// Only defined on Windows x86-32.
-void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep);
-
-// Doubler does 4 pixels at a time. Each pixel is replicated.
-// This is the fastest of the scalers.
-// Only defined on Windows x86-32.
-void DoubleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
-// Handles arbitrary scaling up or down.
-// Mirroring is supported, but not 90 or 270 degree rotation.
-// Chroma is under sampled every 2 pixels for performance.
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-// Handles arbitrary scaling up or down with bilinear filtering.
-// Mirroring is supported, but not 90 or 270 degree rotation.
-// Chroma is under sampled every 2 pixels for performance.
-// This is the slowest of the scalers.
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
-
-
-#if defined(_MSC_VER)
-#define SIMD_ALIGNED(var) __declspec(align(16)) var
-#else
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
-#endif
-extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
-
-// x64 uses MMX2 (SSE) so emms is not required.
-// Warning C4799: function has no EMMS instruction.
-// EMMS() is slow and should be called by the calling function once per image.
-#if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64)
-#if defined(_MSC_VER)
-#define EMMS() __asm emms
-#pragma warning(disable: 4799)
-#else
-#define EMMS() asm("emms")
-#endif
-#else
-#define EMMS() ((void)0)
-#endif
-
-} // extern "C"
-
-#endif // MEDIA_BASE_YUV_ROW_H_
diff --git a/gfx/ycbcr/yuv_row_arm.s b/gfx/ycbcr/yuv_row_arm.s
deleted file mode 100644
index 6a6c81bee..000000000
--- a/gfx/ycbcr/yuv_row_arm.s
+++ /dev/null
@@ -1,304 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
- .arch armv7-a
- .fpu neon
-/* Allow to build on targets not supporting neon, and force the object file
- * target to avoid bumping the final binary target */
- .object_arch armv4t
- .text
- .align
-
- .balign 64
-YCbCr42xToRGB565_DITHER03_CONSTS_NEON:
- .short -14240
- .short -14240+384
- .short 8672
- .short 8672+192
- .short -17696
- .short -17696+384
- .byte 102
- .byte 25
- .byte 52
- .byte 129
-YCbCr42xToRGB565_DITHER12_CONSTS_NEON:
- .short -14240+128
- .short -14240+256
- .short 8672+64
- .short 8672+128
- .short -17696+128
- .short -17696+256
- .byte 102
- .byte 25
- .byte 52
- .byte 129
-YCbCr42xToRGB565_DITHER21_CONSTS_NEON:
- .short -14240+256
- .short -14240+128
- .short 8672+128
- .short 8672+64
- .short -17696+256
- .short -17696+128
- .byte 102
- .byte 25
- .byte 52
- .byte 129
-YCbCr42xToRGB565_DITHER30_CONSTS_NEON:
- .short -14240+384
- .short -14240
- .short 8672+192
- .short 8672
- .short -17696+384
- .short -17696
- .byte 102
- .byte 25
- .byte 52
- .byte 129
-
-@ void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON(
-@ yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
-@
-@ ctx = {
-@ uint16_t *rgb_row; /*r0*/
-@ const uint8_t *y_row; /*r1*/
-@ const uint8_t *u_row; /*r2*/
-@ const uint8_t *v_row; /*r3*/
-@ int y_yweight; /*r4*/
-@ int y_pitch; /*r5*/
-@ int width; /*r6*/
-@ int source_x0_q16; /*r7*/
-@ int source_dx_q16; /*r8*/
-@ int source_uv_xoffs_q16; /*r9*/
-@ };
- .global ScaleYCbCr42xToRGB565_BilinearY_Row_NEON
- .type ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, %function
- .balign 64
- .fnstart
-ScaleYCbCr42xToRGB565_BilinearY_Row_NEON:
- STMFD r13!,{r4-r9,r14} @ 8 words.
- ADR r14,YCbCr42xToRGB565_DITHER03_CONSTS_NEON
- VPUSH {Q4-Q7} @ 16 words.
- ADD r14,r14,r1, LSL #4 @ Select the dither table to use
- LDMIA r0, {r0-r9}
- @ Set up image index registers.
- ADD r12,r8, r8
- VMOV.I32 D16,#0 @ Q8 = < 2| 2| 0| 0>*source_dx_q16
- VDUP.32 D17,r12
- ADD r12,r12,r12
- VTRN.32 D16,D17 @ Q2 = < 2| 0| 2| 0>*source_dx_q16
- VDUP.32 D19,r12 @ Q9 = < 4| 4| ?| ?>*source_dx_q16
- ADD r12,r12,r12
- VDUP.32 Q0, r7 @ Q0 = < 1| 1| 1| 1>*source_x0_q16
- VADD.I32 D17,D17,D19 @ Q8 = < 6| 4| 2| 0>*source_dx_q16
- CMP r8, #0 @ If source_dx_q16 is negative...
- VDUP.32 Q9, r12 @ Q9 = < 8| 8| 8| 8>*source_dx_q16
- ADDLT r7, r7, r8, LSL #4 @ Make r7 point to the end of the block
- VADD.I32 Q0, Q0, Q8 @ Q0 = < 6| 4| 2| 0>*source_dx_q16+source_x0_q16
- SUBLT r7, r7, r8 @ (i.e., the lowest address we'll use)
- VADD.I32 Q1, Q0, Q9 @ Q1 = <14|12|10| 8>*source_dx_q16+source_x0_q16
- VDUP.I32 Q9, r8 @ Q8 = < 1| 1| 1| 1>*source_dx_q16
- VADD.I32 Q2, Q0, Q9 @ Q2 = < 7| 5| 3| 1>*source_dx_q16+source_x0_q16
- VADD.I32 Q3, Q1, Q9 @ Q3 = <15|13|11| 9>*source_dx_q16+source_x0_q16
- VLD1.64 {D30,D31},[r14,:128] @ Load some constants
- VMOV.I8 D28,#52
- VMOV.I8 D29,#129
- @ The basic idea here is to do aligned loads of a block of data and then
- @ index into it using VTBL to extract the data from the source X
- @ coordinate corresponding to each destination pixel.
- @ This is significantly less code and significantly fewer cycles than doing
- @ a series of single-lane loads, but it means that the X step between
- @ pixels must be limited to 2.0 or less, otherwise we couldn't guarantee
- @ that we could read 8 pixels from a single aligned 32-byte block of data.
- @ Q0...Q3 contain the 16.16 fixed-point X coordinates of each pixel,
- @ separated into even pixels and odd pixels to make extracting offsets and
- @ weights easier.
- @ We then pull out two bytes from the middle of each coordinate: the top
- @ byte corresponds to the integer part of the X coordinate, and the bottom
- @ byte corresponds to the weight to use for bilinear blending.
- @ These are separated out into different registers with VTRN.
- @ Then by subtracting the integer X coordinate of the first pixel in the
- @ data block we loaded, we produce an index register suitable for use by
- @ VTBL.
-s42xbily_neon_loop:
- @ Load the Y' data.
- MOV r12,r7, ASR #16
- VRSHRN.S32 D16,Q0, #8
- AND r12,r12,#~15 @ Read 16-byte aligned blocks
- VDUP.I8 D20,r12
- ADD r12,r1, r12 @ r12 = y_row+(source_x&~7)
- VRSHRN.S32 D17,Q1, #8
- PLD [r12,#64]
- VLD1.64 {D8, D9, D10,D11},[r12,:128],r5 @ Load Y' top row
- ADD r14,r7, r8, LSL #3
- VRSHRN.S32 D18,Q2, #8
- MOV r14,r14,ASR #16
- VRSHRN.S32 D19,Q3, #8
- AND r14,r14,#~15 @ Read 16-byte aligned blocks
- VLD1.64 {D12,D13,D14,D15},[r12,:128] @ Load Y' bottom row
- PLD [r12,#64]
- VDUP.I8 D21,r14
- ADD r14,r1, r14 @ r14 = y_row+(source_x&~7)
- VMOV.I8 Q13,#1
- PLD [r14,#64]
- VTRN.8 Q8, Q9 @ Q8 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0>
- @ Q9 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0>
- VSUB.S8 Q9, Q9, Q10 @ Make offsets relative to the data we loaded.
- @ First 8 Y' pixels
- VTBL.8 D20,{D8, D9, D10,D11},D18 @ Index top row at source_x
- VTBL.8 D24,{D12,D13,D14,D15},D18 @ Index bottom row at source_x
- VADD.S8 Q13,Q9, Q13 @ Add 1 to source_x
- VTBL.8 D22,{D8, D9, D10,D11},D26 @ Index top row at source_x+1
- VTBL.8 D26,{D12,D13,D14,D15},D26 @ Index bottom row at source_x+1
- @ Next 8 Y' pixels
- VLD1.64 {D8, D9, D10,D11},[r14,:128],r5 @ Load Y' top row
- VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Y' bottom row
- PLD [r14,#64]
- VTBL.8 D21,{D8, D9, D10,D11},D19 @ Index top row at source_x
- VTBL.8 D25,{D12,D13,D14,D15},D19 @ Index bottom row at source_x
- VTBL.8 D23,{D8, D9, D10,D11},D27 @ Index top row at source_x+1
- VTBL.8 D27,{D12,D13,D14,D15},D27 @ Index bottom row at source_x+1
- @ Blend Y'.
- VDUP.I16 Q9, r4 @ Load the y weights.
- VSUBL.U8 Q4, D24,D20 @ Q5:Q4 = c-a
- VSUBL.U8 Q5, D25,D21
- VSUBL.U8 Q6, D26,D22 @ Q7:Q6 = d-b
- VSUBL.U8 Q7, D27,D23
- VMUL.S16 Q4, Q4, Q9 @ Q5:Q4 = (c-a)*yweight
- VMUL.S16 Q5, Q5, Q9
- VMUL.S16 Q6, Q6, Q9 @ Q7:Q6 = (d-b)*yweight
- VMUL.S16 Q7, Q7, Q9
- VMOVL.U8 Q12,D16 @ Promote the x weights to 16 bits.
- VMOVL.U8 Q13,D17 @ Sadly, there's no VMULW.
- VRSHRN.S16 D8, Q4, #8 @ Q4 = (c-a)*yweight+128>>8
- VRSHRN.S16 D9, Q5, #8
- VRSHRN.S16 D12,Q6, #8 @ Q6 = (d-b)*yweight+128>>8
- VRSHRN.S16 D13,Q7, #8
- VADD.I8 Q10,Q10,Q4 @ Q10 = a+((c-a)*yweight+128>>8)
- VADD.I8 Q11,Q11,Q6 @ Q11 = b+((d-b)*yweight+128>>8)
- VSUBL.U8 Q4, D22,D20 @ Q5:Q4 = b-a
- VSUBL.U8 Q5, D23,D21
- VMUL.S16 Q4, Q4, Q12 @ Q5:Q4 = (b-a)*xweight
- VMUL.S16 Q5, Q5, Q13
- VRSHRN.S16 D8, Q4, #8 @ Q4 = (b-a)*xweight+128>>8
- ADD r12,r7, r9
- VRSHRN.S16 D9, Q5, #8
- MOV r12,r12,ASR #17
- VADD.I8 Q8, Q10,Q4 @ Q8 = a+((b-a)*xweight+128>>8)
- @ Start extracting the chroma x coordinates, and load Cb and Cr.
- AND r12,r12,#~15 @ Read 16-byte aligned blocks
- VDUP.I32 Q9, r9 @ Q9 = source_uv_xoffs_q16 x 4
- ADD r14,r2, r12
- VADD.I32 Q10,Q0, Q9
- VLD1.64 {D8, D9, D10,D11},[r14,:128] @ Load Cb
- PLD [r14,#64]
- VADD.I32 Q11,Q1, Q9
- ADD r14,r3, r12
- VADD.I32 Q12,Q2, Q9
- VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Cr
- PLD [r14,#64]
- VADD.I32 Q13,Q3, Q9
- VRSHRN.S32 D20,Q10,#9 @ Q10 = <xEwExCwCxAwAx8w8x6w6x4w4x2w2x0w0>
- VRSHRN.S32 D21,Q11,#9
- VDUP.I8 Q9, r12
- VRSHRN.S32 D22,Q12,#9 @ Q11 = <xFwFxDwDxBwBx9w9x7w7x5w5x3w3x1w1>
- VRSHRN.S32 D23,Q13,#9
- @ We don't actually need the x weights, but we get them for free.
- @ Free ALU slot
- VTRN.8 Q10,Q11 @ Q10 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0>
- @ Free ALU slot @ Q11 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0>
- VSUB.S8 Q11,Q11,Q9 @ Make offsets relative to the data we loaded.
- VTBL.8 D18,{D8, D9, D10,D11},D22 @ Index Cb at source_x
- VMOV.I8 D24,#74
- VTBL.8 D19,{D8, D9, D10,D11},D23
- VMOV.I8 D26,#102
- VTBL.8 D20,{D12,D13,D14,D15},D22 @ Index Cr at source_x
- VMOV.I8 D27,#25
- VTBL.8 D21,{D12,D13,D14,D15},D23
- @ We now have Y' in Q8, Cb in Q9, and Cr in Q10
- @ We use VDUP to expand constants, because it's a permute instruction, so
- @ it can dual issue on the A8.
- SUBS r6, r6, #16 @ width -= 16
- VMULL.U8 Q4, D16,D24 @ Q5:Q4 = Y'*74
- VDUP.32 Q6, D30[1] @ Q7:Q6 = bias_G
- VMULL.U8 Q5, D17,D24
- VDUP.32 Q7, D30[1]
- VMLSL.U8 Q6, D18,D27 @ Q7:Q6 = -25*Cb+bias_G
- VDUP.32 Q11,D30[0] @ Q12:Q11 = bias_R
- VMLSL.U8 Q7, D19,D27
- VDUP.32 Q12,D30[0]
- VMLAL.U8 Q11,D20,D26 @ Q12:Q11 = 102*Cr+bias_R
- VDUP.32 Q8, D31[0] @ Q13:Q8 = bias_B
- VMLAL.U8 Q12,D21,D26
- VDUP.32 Q13,D31[0]
- VMLAL.U8 Q8, D18,D29 @ Q13:Q8 = 129*Cb+bias_B
- VMLAL.U8 Q13,D19,D29
- VMLSL.U8 Q6, D20,D28 @ Q7:Q6 = -25*Cb-52*Cr+bias_G
- VMLSL.U8 Q7, D21,D28
- VADD.S16 Q11,Q4, Q11 @ Q12:Q11 = 74*Y'+102*Cr+bias_R
- VADD.S16 Q12,Q5, Q12
- VQADD.S16 Q8, Q4, Q8 @ Q13:Q8 = 74*Y'+129*Cr+bias_B
- VQADD.S16 Q13,Q5, Q13
- VADD.S16 Q6, Q4, Q6 @ Q7:Q6 = 74*Y'-25*Cb-52*Cr+bias_G
- VADD.S16 Q7, Q5, Q7
- @ Push each value to the top of its word and saturate it.
- VQSHLU.S16 Q11,Q11,#2
- VQSHLU.S16 Q12,Q12,#2
- VQSHLU.S16 Q6, Q6, #2
- VQSHLU.S16 Q7, Q7, #2
- VQSHLU.S16 Q8, Q8, #2
- VQSHLU.S16 Q13,Q13,#2
- @ Merge G and B into R.
- VSRI.U16 Q11,Q6, #5
- VSRI.U16 Q12,Q7, #5
- VSRI.U16 Q11,Q8, #11
- MOV r14,r8, LSL #4
- VSRI.U16 Q12,Q13,#11
- BLT s42xbily_neon_tail
- VDUP.I32 Q13,r14
- @ Store the result.
- VST1.16 {D22,D23,D24,D25},[r0]!
- BEQ s42xbily_neon_done
- @ Advance the x coordinates.
- VADD.I32 Q0, Q0, Q13
- VADD.I32 Q1, Q1, Q13
- ADD r7, r14
- VADD.I32 Q2, Q2, Q13
- VADD.I32 Q3, Q3, Q13
- B s42xbily_neon_loop
-s42xbily_neon_tail:
- @ We have between 1 and 15 pixels left to write.
- @ -r6 == the number of pixels we need to skip writing.
- @ Adjust r0 to point to the last one we need to write, because we're going
- @ to write them in reverse order.
- ADD r0, r0, r6, LSL #1
- MOV r14,#-2
- ADD r0, r0, #30
- @ Skip past the ones we don't need to write.
- SUB PC, PC, r6, LSL #2
- ORR r0, r0, r0
- VST1.16 {D25[3]},[r0,:16],r14
- VST1.16 {D25[2]},[r0,:16],r14
- VST1.16 {D25[1]},[r0,:16],r14
- VST1.16 {D25[0]},[r0,:16],r14
- VST1.16 {D24[3]},[r0,:16],r14
- VST1.16 {D24[2]},[r0,:16],r14
- VST1.16 {D24[1]},[r0,:16],r14
- VST1.16 {D24[0]},[r0,:16],r14
- VST1.16 {D23[3]},[r0,:16],r14
- VST1.16 {D23[2]},[r0,:16],r14
- VST1.16 {D23[1]},[r0,:16],r14
- VST1.16 {D23[0]},[r0,:16],r14
- VST1.16 {D22[3]},[r0,:16],r14
- VST1.16 {D22[2]},[r0,:16],r14
- VST1.16 {D22[1]},[r0,:16],r14
- VST1.16 {D22[0]},[r0,:16]
-s42xbily_neon_done:
- VPOP {Q4-Q7} @ 16 words.
- LDMFD r13!,{r4-r9,PC} @ 8 words.
- .fnend
- .size ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, .-ScaleYCbCr42xToRGB565_BilinearY_Row_NEON
-
-#if defined(__ELF__)&&defined(__linux__)
- .section .note.GNU-stack,"",%progbits
-#endif
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
deleted file mode 100644
index d327f854e..000000000
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-#define DCHECK(a)
-
-extern "C" {
-
-// C reference code that mimic the YUV assembly.
-#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
-#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
- (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
-
-static inline void YuvPixel(uint8 y,
- uint8 u,
- uint8 v,
- uint8* rgb_buf) {
-
- int b = kCoefficientsRgbY[256+u][0];
- int g = kCoefficientsRgbY[256+u][1];
- int r = kCoefficientsRgbY[256+u][2];
- int a = kCoefficientsRgbY[256+u][3];
-
- b = paddsw(b, kCoefficientsRgbY[512+v][0]);
- g = paddsw(g, kCoefficientsRgbY[512+v][1]);
- r = paddsw(r, kCoefficientsRgbY[512+v][2]);
- a = paddsw(a, kCoefficientsRgbY[512+v][3]);
-
- b = paddsw(b, kCoefficientsRgbY[y][0]);
- g = paddsw(g, kCoefficientsRgbY[y][1]);
- r = paddsw(r, kCoefficientsRgbY[y][2]);
- a = paddsw(a, kCoefficientsRgbY[y][3]);
-
- b >>= 6;
- g >>= 6;
- r >>= 6;
- a >>= 6;
-
- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
- (packuswb(g) << 8) |
- (packuswb(r) << 16) |
- (packuswb(a) << 24);
-}
-
-void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- unsigned int x_shift) {
- for (int x = 0; x < width; x += 2) {
- uint8 u = u_buf[x >> x_shift];
- uint8 v = v_buf[x >> x_shift];
- uint8 y0 = y_buf[x];
- YuvPixel(y0, u, v, rgb_buf);
- if ((x + 1) < width) {
- uint8 y1 = y_buf[x + 1];
- if (x_shift == 0) {
- u = u_buf[x + 1];
- v = v_buf[x + 1];
- }
- YuvPixel(y1, u, v, rgb_buf + 4);
- }
- rgb_buf += 8; // Advance 2 pixels.
- }
-}
-
-// 16.16 fixed point is used. A shift by 16 isolates the integer.
-// A shift by 17 is used to further subsample the chrominence channels.
-// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
-// for 1/65536 pixel accurate interpolation.
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- int x = 0;
- for (int i = 0; i < width; i += 2) {
- int y = y_buf[x >> 16];
- int u = u_buf[(x >> 17)];
- int v = v_buf[(x >> 17)];
- YuvPixel(y, u, v, rgb_buf);
- x += source_dx;
- if ((i + 1) < width) {
- y = y_buf[x >> 16];
- YuvPixel(y, u, v, rgb_buf+4);
- x += source_dx;
- }
- rgb_buf += 8;
- }
-}
-
-void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- int x = 0;
- if (source_dx >= 0x20000) {
- x = 32768;
- }
- for (int i = 0; i < width; i += 2) {
- int y0 = y_buf[x >> 16];
- int y1 = y_buf[(x >> 16) + 1];
- int u0 = u_buf[(x >> 17)];
- int u1 = u_buf[(x >> 17) + 1];
- int v0 = v_buf[(x >> 17)];
- int v1 = v_buf[(x >> 17) + 1];
- int y_frac = (x & 65535);
- int uv_frac = ((x >> 1) & 65535);
- int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
- int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
- int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
- YuvPixel(y, u, v, rgb_buf);
- x += source_dx;
- if ((i + 1) < width) {
- y0 = y_buf[x >> 16];
- y1 = y_buf[(x >> 16) + 1];
- y_frac = (x & 65535);
- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
- YuvPixel(y, u, v, rgb_buf+4);
- x += source_dx;
- }
- rgb_buf += 8;
- }
-}
-
-} // extern "C"
-
diff --git a/gfx/ycbcr/yuv_row_other.cpp b/gfx/ycbcr/yuv_row_other.cpp
deleted file mode 100644
index c351139f9..000000000
--- a/gfx/ycbcr/yuv_row_other.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-extern "C" {
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-}
diff --git a/gfx/ycbcr/yuv_row_posix.cpp b/gfx/ycbcr/yuv_row_posix.cpp
deleted file mode 100644
index 152bfc778..000000000
--- a/gfx/ycbcr/yuv_row_posix.cpp
+++ /dev/null
@@ -1,894 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Copyright (c) 2021 Moonchild Productions.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-#include "mozilla/SSE.h"
-
-#define DCHECK(a)
-
-extern "C" {
-
-#if defined(ARCH_CPU_X86_64)
-
-// We don't need CPUID guards here, since x86-64 implies SSE2.
-
-// AMD64 ABI uses register paremters.
-void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width) { // r8
- asm(
- "jmp 1f\n"
-"0:"
- "movzb (%1),%%r10\n"
- "add $0x1,%1\n"
- "movzb (%2),%%r11\n"
- "add $0x1,%2\n"
- "movq 2048(%5,%%r10,8),%%xmm0\n"
- "movzb (%0),%%r10\n"
- "movq 4096(%5,%%r11,8),%%xmm1\n"
- "movzb 0x1(%0),%%r11\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movq (%5,%%r10,8),%%xmm2\n"
- "add $0x2,%0\n"
- "movq (%5,%%r11,8),%%xmm3\n"
- "paddsw %%xmm0,%%xmm2\n"
- "paddsw %%xmm0,%%xmm3\n"
- "shufps $0x44,%%xmm3,%%xmm2\n"
- "psraw $0x6,%%xmm2\n"
- "packuswb %%xmm2,%%xmm2\n"
- "movq %%xmm2,0x0(%3)\n"
- "add $0x8,%3\n"
-"1:"
- "sub $0x2,%4\n"
- "jns 0b\n"
-
-"2:"
- "add $0x1,%4\n"
- "js 3f\n"
-
- "movzb (%1),%%r10\n"
- "movq 2048(%5,%%r10,8),%%xmm0\n"
- "movzb (%2),%%r10\n"
- "movq 4096(%5,%%r10,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movzb (%0),%%r10\n"
- "movq (%5,%%r10,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-"3:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY) // %5
- : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
-);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width, // r8
- int source_dx) { // r9
- asm(
- "xor %%r11,%%r11\n"
- "sub $0x2,%4\n"
- "js 1f\n"
-
-"0:"
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "lea (%%r11,%6),%%r10\n"
- "sar $0x10,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "lea (%%r10,%6),%%r11\n"
- "sar $0x10,%%r10\n"
- "movzb (%0,%%r10,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm2\n"
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
- "jns 0b\n"
-
-"1:"
- "add $0x1,%4\n"
- "js 2f\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "sar $0x10,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
-"2:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(source_dx)) // %6
- : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
-);
-}
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- asm(
- "xor %%r11,%%r11\n" // x = 0
- "sub $0x2,%4\n"
- "js 2f\n"
- "cmp $0x20000,%6\n" // if source_dx >= 2.0
- "jl 0f\n"
- "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
-"0:"
-
-"1:"
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
-
- "movzb (%1, %%r10, 1), %%r13 \n"
- "movzb 1(%1, %%r10, 1), %%r14 \n"
- "mov %%r11, %%rax \n"
- "and $0x1fffe, %%rax \n"
- "imul %%rax, %%r14 \n"
- "xor $0x1fffe, %%rax \n"
- "imul %%rax, %%r13 \n"
- "add %%r14, %%r13 \n"
- "shr $17, %%r13 \n"
- "movq 2048(%5,%%r13,8), %%xmm0\n"
-
- "movzb (%2, %%r10, 1), %%r13 \n"
- "movzb 1(%2, %%r10, 1), %%r14 \n"
- "mov %%r11, %%rax \n"
- "and $0x1fffe, %%rax \n"
- "imul %%rax, %%r14 \n"
- "xor $0x1fffe, %%rax \n"
- "imul %%rax, %%r13 \n"
- "add %%r14, %%r13 \n"
- "shr $17, %%r13 \n"
- "movq 4096(%5,%%r13,8), %%xmm1\n"
-
- "mov %%r11, %%rax \n"
- "lea (%%r11,%6),%%r10\n"
- "sar $0x10,%%r11\n"
- "paddsw %%xmm1,%%xmm0\n"
-
- "movzb (%0, %%r11, 1), %%r13 \n"
- "movzb 1(%0, %%r11, 1), %%r14 \n"
- "and $0xffff, %%rax \n"
- "imul %%rax, %%r14 \n"
- "xor $0xffff, %%rax \n"
- "imul %%rax, %%r13 \n"
- "add %%r14, %%r13 \n"
- "shr $16, %%r13 \n"
- "movq (%5,%%r13,8),%%xmm1\n"
-
- "mov %%r10, %%rax \n"
- "lea (%%r10,%6),%%r11\n"
- "sar $0x10,%%r10\n"
-
- "movzb (%0,%%r10,1), %%r13 \n"
- "movzb 1(%0,%%r10,1), %%r14 \n"
- "and $0xffff, %%rax \n"
- "imul %%rax, %%r14 \n"
- "xor $0xffff, %%rax \n"
- "imul %%rax, %%r13 \n"
- "add %%r14, %%r13 \n"
- "shr $16, %%r13 \n"
- "movq (%5,%%r13,8),%%xmm2\n"
-
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
- "jns 1b\n"
-
-"2:"
- "add $0x1,%4\n"
- "js 3f\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x11,%%r10\n"
-
- "movzb (%1,%%r10,1), %%r13 \n"
- "movq 2048(%5,%%r13,8),%%xmm0\n"
-
- "movzb (%2,%%r10,1), %%r13 \n"
- "movq 4096(%5,%%r13,8),%%xmm1\n"
-
- "paddsw %%xmm1,%%xmm0\n"
- "sar $0x10,%%r11\n"
-
- "movzb (%0,%%r11,1), %%r13 \n"
- "movq (%5,%%r13,8),%%xmm1\n"
-
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
-"3:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(source_dx)) // %6
- : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
-);
-}
-
-#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
-
-// PIC version is slower because less registers are available, so
-// non-PIC is used on platforms where it is possible.
-void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
- asm(
- ".text\n"
- ".global FastConvertYUVToRGB32Row_SSE\n"
- ".type FastConvertYUVToRGB32Row_SSE, @function\n"
-"FastConvertYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "jmp 1f\n"
-
-"0:"
- "movzbl (%edi),%eax\n"
- "add $0x1,%edi\n"
- "movzbl (%esi),%ebx\n"
- "add $0x1,%esi\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
- "movzbl 0x1(%edx),%ebx\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "add $0x2,%edx\n"
- "movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"1:"
- "sub $0x2,%ecx\n"
- "jns 0b\n"
-
- "and $0x1,%ecx\n"
- "je 2f\n"
-
- "movzbl (%edi),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "movzbl (%esi),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-"2:"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width)
-{
- if (mozilla::supports_sse()) {
- FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
- return;
- }
-
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-
-void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
- asm(
- ".text\n"
- ".global ScaleYUVToRGB32Row_SSE\n"
- ".type ScaleYUVToRGB32Row_SSE, @function\n"
-"ScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp 1f\n"
-
-"0:"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"1:"
- "sub $0x2,%ecx\n"
- "jns 0b\n"
-
- "and $0x1,%ecx\n"
- "je 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"2:"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx)
-{
- if (mozilla::supports_sse()) {
- ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
- width, source_dx);
- return;
- }
-
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
- width, source_dx);
-}
-
-void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx);
- asm(
- ".text\n"
- ".global LinearScaleYUVToRGB32Row_SSE\n"
- ".type LinearScaleYUVToRGB32Row_SSE, @function\n"
-"LinearScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x30(%esp),%ebp\n"
-
- // source_width = width * source_dx + ebx
- "mov 0x34(%esp), %ecx\n"
- "imull 0x38(%esp), %ecx\n"
- "mov %ecx, 0x34(%esp)\n"
-
- "mov 0x38(%esp), %ecx\n"
- "xor %ebx,%ebx\n" // x = 0
- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
- "jl 1f\n"
- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
- "jmp 1f\n"
-
-"0:"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
-
- "movzbl (%edi,%eax,1),%ecx\n"
- "movzbl 1(%edi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0x1fffe, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $17, %ecx \n"
- "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
-
- "mov 0x2c(%esp),%esi\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
-
- "movzbl (%esi,%eax,1),%ecx\n"
- "movzbl 1(%esi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0x1fffe, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $17, %ecx \n"
- "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
-
- "cmp 0x34(%esp), %ebx\n"
- "jge 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq kCoefficientsRgbY(,%ecx,8),%mm2\n"
-
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-
-"1:"
- "cmp 0x34(%esp), %ebx\n"
- "jl 0b\n"
- "popa\n"
- "ret\n"
-
-"2:"
- "paddsw %mm0, %mm1\n"
- "psraw $6, %mm1\n"
- "packuswb %mm1, %mm1\n"
- "movd %mm1, (%ebp)\n"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx)
-{
- if (mozilla::supports_sse()) {
- LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
- width, source_dx);
- return;
- }
-
- LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
- width, source_dx);
-}
-
-#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)
-
-void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- const int16 *kCoefficientsRgbY);
-
- asm(
- ".text\n"
- "PICConvertYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x38(%esp),%ecx\n"
-
- "jmp 1f\n"
-
-"0:"
- "movzbl (%edi),%eax\n"
- "add $0x1,%edi\n"
- "movzbl (%esi),%ebx\n"
- "add $0x1,%esi\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "paddsw 4096(%ecx,%ebx,8),%mm0\n"
- "movzbl 0x1(%edx),%ebx\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "add $0x2,%edx\n"
- "movq 0(%ecx,%ebx,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"1:"
- "subl $0x2,0x34(%esp)\n"
- "jns 0b\n"
-
- "andl $0x1,0x34(%esp)\n"
- "je 2f\n"
-
- "movzbl (%edi),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "movzbl (%esi),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "movzbl (%edx),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-"2:"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width)
-{
- if (mozilla::supports_sse()) {
- PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
- &kCoefficientsRgbY[0][0]);
- return;
- }
-
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-void PICScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx,
- const int16 *kCoefficientsRgbY);
-
- asm(
- ".text\n"
- "PICScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x3c(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp 1f\n"
-
-"0:"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"1:"
- "subl $0x2,0x34(%esp)\n"
- "jns 0b\n"
-
- "andl $0x1,0x34(%esp)\n"
- "je 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"2:"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx)
-{
- if (mozilla::supports_sse()) {
- PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
- &kCoefficientsRgbY[0][0]);
- return;
- }
-
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-void PICLinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx,
- const int16 *kCoefficientsRgbY);
-
- asm(
- ".text\n"
- "PICLinearScaleYUVToRGB32Row_SSE:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "mov 0x3c(%esp),%edi\n"
- "xor %ebx,%ebx\n"
-
- // source_width = width * source_dx + ebx
- "mov 0x34(%esp), %ecx\n"
- "imull 0x38(%esp), %ecx\n"
- "mov %ecx, 0x34(%esp)\n"
-
- "mov 0x38(%esp), %ecx\n"
- "xor %ebx,%ebx\n" // x = 0
- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
- "jl 1f\n"
- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
- "jmp 1f\n"
-
-"0:"
- "mov 0x28(%esp),%esi\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
-
- "movzbl (%esi,%eax,1),%ecx\n"
- "movzbl 1(%esi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0x1fffe, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $17, %ecx \n"
- "movq 2048(%edi,%ecx,8),%mm0\n"
-
- "mov 0x2c(%esp),%esi\n"
- "mov %ebx,%eax\n"
- "sar $0x11,%eax\n"
-
- "movzbl (%esi,%eax,1),%ecx\n"
- "movzbl 1(%esi,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "andl $0x1fffe, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0x1fffe, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $17, %ecx \n"
- "paddsw 4096(%edi,%ecx,8),%mm0\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq (%edi,%ecx,8),%mm1\n"
-
- "cmp 0x34(%esp), %ebx\n"
- "jge 2f\n"
-
- "mov %ebx,%eax\n"
- "sar $0x10,%eax\n"
- "movzbl (%edx,%eax,1),%ecx\n"
- "movzbl 1(%edx,%eax,1),%esi\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "andl $0xffff, %eax \n"
- "imul %eax, %esi \n"
- "xorl $0xffff, %eax \n"
- "imul %eax, %ecx \n"
- "addl %esi, %ecx \n"
- "shrl $16, %ecx \n"
- "movq (%edi,%ecx,8),%mm2\n"
-
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-
-"1:"
- "cmp %ebx, 0x34(%esp)\n"
- "jg 0b\n"
- "popa\n"
- "ret\n"
-
-"2:"
- "paddsw %mm0, %mm1\n"
- "psraw $6, %mm1\n"
- "packuswb %mm1, %mm1\n"
- "movd %mm1, (%ebp)\n"
- "popa\n"
- "ret\n"
- ".previous\n"
-);
-
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx)
-{
- if (mozilla::supports_sse()) {
- PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
- source_dx, &kCoefficientsRgbY[0][0]);
- return;
- }
-
- LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-#else
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-#endif
-
-}
diff --git a/gfx/ycbcr/yuv_row_table.cpp b/gfx/ycbcr/yuv_row_table.cpp
deleted file mode 100644
index c531b60c2..000000000
--- a/gfx/ycbcr/yuv_row_table.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-extern "C" {
-
-#define RGBY(i) { \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
- 0 \
-}
-
-#define RGBU(i) { \
- static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
- static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
- 0, \
- static_cast<int16>(256 * 64 - 1) \
-}
-
-#define RGBV(i) { \
- 0, \
- static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
- static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
- 0 \
-}
-
-SIMD_ALIGNED(const int16 kCoefficientsRgbY[256 * 3][4]) = {
- RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
- RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
- RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
- RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
- RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
- RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
- RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
- RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
- RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
- RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
- RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
- RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
- RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
- RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
- RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
- RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
- RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
- RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
- RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
- RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
- RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
- RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
- RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
- RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
- RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
- RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
- RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
- RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
- RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
- RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
- RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
- RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
- RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
- RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
- RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
- RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
- RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
- RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
- RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
- RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
- RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
- RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
- RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
- RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
- RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
- RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
- RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
- RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
- RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
- RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
- RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
- RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
- RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
- RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
- RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
- RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
- RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
- RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
- RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
- RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
- RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
- RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
- RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
- RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
-
- // Chroma U table.
- RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
- RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
- RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
- RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
- RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
- RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
- RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
- RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
- RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
- RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
- RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
- RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
- RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
- RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
- RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
- RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
- RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
- RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
- RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
- RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
- RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
- RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
- RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
- RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
- RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
- RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
- RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
- RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
- RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
- RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
- RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
- RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
- RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
- RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
- RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
- RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
- RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
- RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
- RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
- RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
- RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
- RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
- RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
- RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
- RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
- RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
- RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
- RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
- RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
- RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
- RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
- RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
- RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
- RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
- RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
- RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
- RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
- RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
- RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
- RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
- RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
- RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
- RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
- RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
-
- // Chroma V table.
- RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
- RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
- RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
- RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
- RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
- RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
- RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
- RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
- RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
- RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
- RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
- RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
- RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
- RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
- RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
- RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
- RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
- RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
- RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
- RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
- RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
- RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
- RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
- RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
- RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
- RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
- RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
- RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
- RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
- RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
- RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
- RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
- RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
- RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
- RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
- RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
- RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
- RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
- RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
- RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
- RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
- RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
- RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
- RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
- RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
- RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
- RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
- RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
- RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
- RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
- RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
- RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
- RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
- RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
- RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
- RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
- RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
- RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
- RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
- RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
- RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
- RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
- RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
- RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
-};
-
-#undef RGBY
-#undef RGBU
-#undef RGBV
-
-} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
deleted file mode 100644
index 5cd931139..000000000
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ /dev/null
@@ -1,498 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-#include "mozilla/SSE.h"
-
-#define kCoefficientsRgbU kCoefficientsRgbY + 2048
-#define kCoefficientsRgbV kCoefficientsRgbY + 4096
-
-extern "C" {
-
-#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-__declspec(naked)
-void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp convertend
-
- convertloop :
- movzx eax, byte ptr [edi]
- add edi, 1
- movzx ebx, byte ptr [esi]
- add esi, 1
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [edx]
- paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
- movzx ebx, byte ptr [edx + 1]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- add edx, 2
- movq mm2, [kCoefficientsRgbY + 8 * ebx]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- convertend :
- sub ecx, 2
- jns convertloop
-
- and ecx, 1 // odd number of pixels?
- jz convertdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- convertdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- mov ebx, [esp + 32 + 24] // step
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- mov ebx, [esp + 32 + 28] // uvstep
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- mov ebx, [esp + 32 + 24] // ystep
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void DoubleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, 1
- movzx ebx, byte ptr [esi]
- add esi, 1
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [edx]
- paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- punpckldq mm1, mm1
- movntq [ebp], mm1
-
- movzx ebx, byte ptr [edx + 1]
- add edx, 2
- paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
- psraw mm0, 6
- packuswb mm0, mm0
- punpckldq mm0, mm0
- movntq [ebp+8], mm0
- add ebp, 16
- wend :
- sub ecx, 4
- jns wloop
-
- add ecx, 4
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- jmp wend1
-
- wloop1 :
- movd [ebp], mm1
- add ebp, 4
- wend1 :
- sub ecx, 1
- jns wloop1
- wdone :
- popad
- ret
- }
-}
-
-// This version does general purpose scaling by any amount, up or down.
-// The only thing it cannot do is rotation by 90 or 270.
-// For performance the chroma is under-sampled, reducing cost of a 3x
-// 1080p scale from 8.4 ms to 5.4 ms.
-__declspec(naked)
-void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- xor ebx, ebx // x
- jmp scaleend
-
- scaleloop :
- mov eax, ebx
- sar eax, 17
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 17
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += source_dx
- sar eax, 16
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += source_dx
- sar eax, 16
- movzx eax, byte ptr [edx + eax]
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- scaleend :
- sub ecx, 2
- jns scaleloop
-
- and ecx, 1 // odd number of pixels?
- jz scaledone
-
- mov eax, ebx
- sar eax, 17
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 17
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- sar eax, 16
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
-
- scaledone :
- popad
- ret
- }
-}
-
-__declspec(naked)
-void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- // [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- imul ecx, [esp + 32 + 24] // source_dx
- mov [esp + 32 + 20], ecx // source_width = width * source_dx
- mov ecx, [esp + 32 + 24] // source_dx
- xor ebx, ebx // x = 0
- cmp ecx, 0x20000
- jl lscaleend
- mov ebx, 0x8000 // x = 0.5 for 1/2 or less
- jmp lscaleend
-lscaleloop:
- mov eax, ebx
- sar eax, 0x11
-
- movzx ecx, byte ptr [edi + eax]
- movzx esi, byte ptr [edi + eax + 1]
- mov eax, ebx
- and eax, 0x1fffe
- imul esi, eax
- xor eax, 0x1fffe
- imul ecx, eax
- add ecx, esi
- shr ecx, 17
- movq mm0, [kCoefficientsRgbU + 8 * ecx]
-
- mov esi, [esp + 32 + 12]
- mov eax, ebx
- sar eax, 0x11
-
- movzx ecx, byte ptr [esi + eax]
- movzx esi, byte ptr [esi + eax + 1]
- mov eax, ebx
- and eax, 0x1fffe
- imul esi, eax
- xor eax, 0x1fffe
- imul ecx, eax
- add ecx, esi
- shr ecx, 17
- paddsw mm0, [kCoefficientsRgbV + 8 * ecx]
-
- mov eax, ebx
- sar eax, 0x10
- movzx ecx, byte ptr [edx + eax]
- movzx esi, byte ptr [1 + edx + eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24]
- and eax, 0xffff
- imul esi, eax
- xor eax, 0xffff
- imul ecx, eax
- add ecx, esi
- shr ecx, 16
- movq mm1, [kCoefficientsRgbY + 8 * ecx]
-
- cmp ebx, [esp + 32 + 20]
- jge lscalelastpixel
-
- mov eax, ebx
- sar eax, 0x10
- movzx ecx, byte ptr [edx + eax]
- movzx esi, byte ptr [edx + eax + 1]
- mov eax, ebx
- add ebx, [esp + 32 + 24]
- and eax, 0xffff
- imul esi, eax
- xor eax, 0xffff
- imul ecx, eax
- add ecx, esi
- shr ecx, 16
- movq mm2, [kCoefficientsRgbY + 8 * ecx]
-
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 0x6
- psraw mm2, 0x6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 0x8
-
-lscaleend:
- cmp ebx, [esp + 32 + 20]
- jl lscaleloop
- popad
- ret
-
-lscalelastpixel:
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- popad
- ret
- };
-}
-#endif // if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
-#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
- if (mozilla::supports_sse()) {
- FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
- return;
- }
-#endif
-
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-
-#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
- if (mozilla::supports_sse()) {
- ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
- return;
- }
-#endif
-
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
-#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
- if (mozilla::supports_sse()) {
- LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
- source_dx);
- return;
- }
-#endif
-
- LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
deleted file mode 100644
index 6a34f840a..000000000
--- a/gfx/ycbcr/yuv_row_win64.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-extern "C" {
-
-// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics.
-
-#define kCoefficientsRgbU (reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 2048)
-#define kCoefficientsRgbV (reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 4096)
-
-#include <emmintrin.h>
-
-static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __m128i xmm0, xmmY1, xmmY2;
- __m128 xmmY;
-
- while (width >= 2) {
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
-
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-
- xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
- xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-
- xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
- 0x44);
- xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-
- _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
- rgb_buf += 8;
- width -= 2;
- }
-
- if (width) {
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
- xmmY1 = _mm_srai_epi16(xmmY1, 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
- *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
- }
-}
-
-static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- __m128i xmm0, xmmY1, xmmY2;
- __m128 xmmY;
- uint8 u, v, y;
- int x = 0;
-
- while (width >= 2) {
- u = u_buf[x >> 17];
- v = v_buf[x >> 17];
- y = y_buf[x >> 16];
- x += source_dx;
-
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v)));
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-
- y = y_buf[x >> 16];
- x += source_dx;
-
- xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
- xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-
- xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
- 0x44);
- xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-
- _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
- rgb_buf += 8;
- width -= 2;
- }
-
- if (width) {
- u = u_buf[x >> 17];
- v = v_buf[x >> 17];
- y = y_buf[x >> 16];
-
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v)));
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
- xmmY1 = _mm_srai_epi16(xmmY1, 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
- *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
- }
-}
-
-static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- __m128i xmm0, xmmY1, xmmY2;
- __m128 xmmY;
- uint8 u0, u1, v0, v1, y0, y1;
- uint32 uv_frac, y_frac, u, v, y;
- int x = 0;
-
- if (source_dx >= 0x20000) {
- x = 32768;
- }
-
- while(width >= 2) {
- u0 = u_buf[x >> 17];
- u1 = u_buf[(x >> 17) + 1];
- v0 = v_buf[x >> 17];
- v1 = v_buf[(x >> 17) + 1];
- y0 = y_buf[x >> 16];
- y1 = y_buf[(x >> 16) + 1];
- uv_frac = (x & 0x1fffe);
- y_frac = (x & 0xffff);
- u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
- v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
- y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
- x += source_dx;
-
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v)));
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
-
- y0 = y_buf[x >> 16];
- y1 = y_buf[(x >> 16) + 1];
- y_frac = (x & 0xffff);
- y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
- x += source_dx;
-
- xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
- xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
-
- xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
- 0x44);
- xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
-
- _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
- rgb_buf += 8;
- width -= 2;
- }
-
- if (width) {
- u = u_buf[x >> 17];
- v = v_buf[x >> 17];
- y = y_buf[x >> 16];
-
- xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)),
- _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v)));
- xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y));
-
- xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
- xmmY1 = _mm_srai_epi16(xmmY1, 6);
- xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
- *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
- }
-}
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
-}
-
-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int source_dx) {
- LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
- source_dx);
-}
-
-} // extern "C"