Update aom to v1.0.0

Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0.
author: trav90 <travawine@palemoon.org> 2018-10-19 21:52:15 -0500
committer: trav90 <travawine@palemoon.org> 2018-10-19 21:52:20 -0500
commit: bbcc64772580c8a979288791afa02d30bc476d2e (patch)
tree: 437ce94c3fdd7497508e5b55de06c6d011678597 /third_party/aom/test
parent: 14805f6ddbfb173c327768fff9f81f40ce5e81b0 (diff)
download: uxp-bbcc64772580c8a979288791afa02d30bc476d2e.tar.gz
186 files changed, 17753 insertions, 15435 deletions
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc
index e8387d0dc0..8b5c8af135 100644
--- a/third_party/aom/test/accounting_test.cc
+++ b/third_party/aom/test/accounting_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <math.h>
 #include <stdlib.h>
@@ -35,10 +35,7 @@ TEST(AV1, TestAccounting) {
   }
   aom_stop_encode(&bw);
   aom_reader br;
-#if CONFIG_ANS && ANS_MAX_SYMBOLS
-  br.window_size = 1 << 16;
-#endif
-  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  aom_reader_init(&br, bw_buffer, bw.pos);
 
   Accounting accounting;
   aom_accounting_init(&accounting);
@@ -54,7 +51,7 @@ TEST(AV1, TestAccounting) {
   GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
 
   // Should record 2 * kSymbols accounting symbols.
-  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  aom_reader_init(&br, bw_buffer, bw.pos);
   br.accounting = &accounting;
   for (int i = 0; i < kSymbols; i++) {
     aom_read(&br, 32, "A");
diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h
index 4842345ff6..0233870612 100644
--- a/third_party/aom/test/acm_random.h
+++ b/third_party/aom/test/acm_random.h
@@ -36,6 +36,19 @@ class ACMRandom {
     return (value >> 15) & 0xffff;
   }
 
+  int16_t Rand15Signed(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    return (value >> 17) & 0xffff;
+  }
+
+  uint16_t Rand12(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 19) & 0xfff;
+  }
+
   int16_t Rand9Signed(void) {
     // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
     const uint32_t value = random_.Generate(512);
diff --git a/third_party/aom/test/active_map_refresh_test.cc b/third_party/aom/test/active_map_refresh_test.cc
deleted file mode 100644
index 184692ca8d..0000000000
--- a/third_party/aom/test/active_map_refresh_test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <algorithm>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-namespace {
-
-// Check if any pixel in a 16x16 macroblock varies between frames.
-int CheckMb(const aom_image_t &current, const aom_image_t &previous, int mb_r,
-            int mb_c) {
-  for (int plane = 0; plane < 3; plane++) {
-    int r = 16 * mb_r;
-    int c0 = 16 * mb_c;
-    int r_top = std::min(r + 16, static_cast<int>(current.d_h));
-    int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
-    r = std::max(r, 0);
-    c0 = std::max(c0, 0);
-    if (plane > 0 && current.x_chroma_shift) {
-      c_top = (c_top + 1) >> 1;
-      c0 >>= 1;
-    }
-    if (plane > 0 && current.y_chroma_shift) {
-      r_top = (r_top + 1) >> 1;
-      r >>= 1;
-    }
-    for (; r < r_top; ++r) {
-      for (int c = c0; c < c_top; ++c) {
-        if (current.planes[plane][current.stride[plane] * r + c] !=
-            previous.planes[plane][previous.stride[plane] * r + c])
-          return 1;
-      }
-    }
-  }
-  return 0;
-}
-
-void GenerateMap(int mb_rows, int mb_cols, const aom_image_t &current,
-                 const aom_image_t &previous, uint8_t *map) {
-  for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
-    for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
-      map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
-    }
-  }
-}
-
-const int kAqModeCyclicRefresh = 3;
-
-class ActiveMapRefreshTest
-    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
-      public ::libaom_test::EncoderTest {
- protected:
-  ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~ActiveMapRefreshTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    cpu_used_ = GET_PARAM(2);
-  }
-
-  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
-                                  ::libaom_test::Encoder *encoder) {
-    ::libaom_test::Y4mVideoSource *y4m_video =
-        static_cast<libaom_test::Y4mVideoSource *>(video);
-    if (video->frame() == 1) {
-      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
-      encoder->Control(AV1E_SET_AQ_MODE, kAqModeCyclicRefresh);
-    } else if (video->frame() >= 2 && video->img()) {
-      aom_image_t *current = video->img();
-      aom_image_t *previous = y4m_holder_->img();
-      ASSERT_TRUE(previous != NULL);
-      aom_active_map_t map = aom_active_map_t();
-      const int width = static_cast<int>(current->d_w);
-      const int height = static_cast<int>(current->d_h);
-      const int mb_width = (width + 15) / 16;
-      const int mb_height = (height + 15) / 16;
-      uint8_t *active_map = new uint8_t[mb_width * mb_height];
-      GenerateMap(mb_height, mb_width, *current, *previous, active_map);
-      map.cols = mb_width;
-      map.rows = mb_height;
-      map.active_map = active_map;
-      encoder->Control(AOME_SET_ACTIVEMAP, &map);
-      delete[] active_map;
-    }
-    if (video->img()) {
-      y4m_video->SwapBuffers(y4m_holder_);
-    }
-  }
-
-  int cpu_used_;
-  ::libaom_test::Y4mVideoSource *y4m_holder_;
-};
-
-TEST_P(ActiveMapRefreshTest, Test) {
-  cfg_.g_lag_in_frames = 0;
-  cfg_.g_profile = 1;
-  cfg_.rc_target_bitrate = 600;
-  cfg_.rc_resize_mode = 0;
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 30;
-  cfg_.g_pass = AOM_RC_ONE_PASS;
-  cfg_.rc_end_usage = AOM_CBR;
-  cfg_.kf_max_dist = 90000;
-
-  ::libaom_test::Y4mVideoSource video("desktop_credits.y4m", 0, 10);
-  ::libaom_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 10);
-  video_holder.Begin();
-  y4m_holder_ = &video_holder;
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-AV1_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
-                          ::testing::Values(::libaom_test::kRealTime),
-                          ::testing::Range(5, 6));
-}  // namespace
diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc
index 318a8518b6..a2b0546edb 100644
--- a/third_party/aom/test/active_map_test.cc
+++ b/third_party/aom/test/active_map_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <climits>
 #include <vector>
diff --git a/third_party/aom/test/android/Android.mk b/third_party/aom/test/android/Android.mk
deleted file mode 100644
index 74f9d7cbaf..0000000000
--- a/third_party/aom/test/android/Android.mk
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-# This make file builds aom_test app for android.
-# The test app itself runs on the command line through adb shell
-# The paths are really messed up as the libaom make file
-# expects to be made from a parent directory.
-CUR_WD := $(call my-dir)
-BINDINGS_DIR := $(CUR_WD)/../../..
-LOCAL_PATH := $(CUR_WD)/../../..
-
-#libwebm
-include $(CLEAR_VARS)
-include $(BINDINGS_DIR)/libaom/third_party/libwebm/Android.mk
-LOCAL_PATH := $(CUR_WD)/../../..
-
-#libaom
-include $(CLEAR_VARS)
-LOCAL_STATIC_LIBRARIES := libwebm
-include $(BINDINGS_DIR)/libaom/build/make/Android.mk
-LOCAL_PATH := $(CUR_WD)/../..
-
-#libgtest
-include $(CLEAR_VARS)
-LOCAL_ARM_MODE := arm
-LOCAL_CPP_EXTENSION := .cc
-LOCAL_MODULE := gtest
-LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/googletest/src
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/googletest/include
-LOCAL_SRC_FILES := ./third_party/googletest/src/googletest/src/gtest-all.cc
-include $(BUILD_STATIC_LIBRARY)
-
-#libaom_test
-include $(CLEAR_VARS)
-LOCAL_ARM_MODE := arm
-LOCAL_MODULE := libaom_test
-LOCAL_STATIC_LIBRARIES := gtest libwebm
-
-ifeq ($(ENABLE_SHARED),1)
-  LOCAL_SHARED_LIBRARIES := aom
-else
-  LOCAL_STATIC_LIBRARIES += aom
-endif
-
-include $(LOCAL_PATH)/test/test.mk
-LOCAL_C_INCLUDES := $(BINDINGS_DIR)
-FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBAOM_TEST_SRCS-yes)))
-LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
-# some test files depend on *_rtcd.h, ensure they're generated first.
-$(eval $(call rtcd_dep_template))
-include $(BUILD_EXECUTABLE)
diff --git a/third_party/aom/test/android/README b/third_party/aom/test/android/README
deleted file mode 100644
index 35c8297386..0000000000
--- a/third_party/aom/test/android/README
+++ /dev/null
@@ -1,32 +0,0 @@
-Android.mk will build aom unittests on android.
-1) Configure libaom from the parent directory:
-./libaom/configure --target=armv7-android-gcc --enable-external-build \
-  --enable-postproc --disable-install-srcs --enable-multi-res-encoding \
-  --enable-temporal-denoising --disable-unit-tests --disable-install-docs \
-  --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK
-
-2) From the parent directory, invoke ndk-build:
-NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libaom/test/android/Android.mk \
-  APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \
-  APP_STL=gnustl_static
-
-Note: Both adb and ndk-build are available prebuilt at:
-  https://chromium.googlesource.com/android_tools
-
-3) Run get_files.py to download the test files:
-python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \
-  -u http://downloads.webmproject.org/test_data/libaom
-
-4) Transfer files to device using adb. Ensure you have proper permissions for
-the target
-
-adb push /path/to/test_files /data/local/tmp
-adb push /path/to/built_libs /data/local/tmp
-
-NOTE: Built_libs defaults to parent_dir/libs/armeabi-v7a
-
-5) Run tests:
-adb shell
-(on device)
-cd /data/local/tmp
-LD_LIBRARY_PATH=. ./aom_test
diff --git a/third_party/aom/test/android/get_files.py b/third_party/aom/test/android/get_files.py
deleted file mode 100644
index bdae9a315e..0000000000
--- a/third_party/aom/test/android/get_files.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-# This simple script pulls test files from the webm homepage
-# It is intelligent enough to only pull files if
-#   1) File / test_data folder does not exist
-#   2) SHA mismatch
-
-import pycurl
-import csv
-import hashlib
-import re
-import os.path
-import time
-import itertools
-import sys
-import getopt
-
-#globals
-url = ''
-file_list_path = ''
-local_resource_path = ''
-
-# Helper functions:
-# A simple function which returns the sha hash of a file in hex
-def get_file_sha(filename):
-  try:
-    sha_hash = hashlib.sha1()
-    with open(filename, 'rb') as file:
-      buf = file.read(HASH_CHUNK)
-      while len(buf) > 0:
-        sha_hash.update(buf)
-        buf = file.read(HASH_CHUNK)
-      return sha_hash.hexdigest()
-  except IOError:
-    print "Error reading " + filename
-
-# Downloads a file from a url, and then checks the sha against the passed
-# in sha
-def download_and_check_sha(url, filename, sha):
-  path = os.path.join(local_resource_path, filename)
-  fp = open(path, "wb")
-  curl = pycurl.Curl()
-  curl.setopt(pycurl.URL, url + "/" + filename)
-  curl.setopt(pycurl.WRITEDATA, fp)
-  curl.perform()
-  curl.close()
-  fp.close()
-  return get_file_sha(path) == sha
-
-#constants
-ftp_retries = 3
-
-SHA_COL = 0
-NAME_COL = 1
-EXPECTED_COL = 2
-HASH_CHUNK = 65536
-
-# Main script
-try:
-  opts, args = \
-      getopt.getopt(sys.argv[1:], \
-                    "u:i:o:", ["url=", "input_csv=", "output_dir="])
-except:
-  print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
-  sys.exit(2)
-
-for opt, arg in opts:
-  if opt == '-u':
-    url = arg
-  elif opt in ("-i", "--input_csv"):
-    file_list_path = os.path.join(arg)
-  elif opt in ("-o", "--output_dir"):
-    local_resource_path = os.path.join(arg)
-
-if len(sys.argv) != 7:
-  print "Expects two paths and a url!"
-  exit(1)
-
-if not os.path.isdir(local_resource_path):
-  os.makedirs(local_resource_path)
-
-file_list_csv = open(file_list_path, "rb")
-
-# Our 'csv' file uses multiple spaces as a delimiter, python's
-# csv class only uses single character delimiters, so we convert them below
-file_list_reader = csv.reader((re.sub(' +', ' ', line) \
-    for line in file_list_csv), delimiter = ' ')
-
-file_shas = []
-file_names = []
-
-for row in file_list_reader:
-  if len(row) != EXPECTED_COL:
-      continue
-  file_shas.append(row[SHA_COL])
-  file_names.append(row[NAME_COL])
-
-file_list_csv.close()
-
-# Download files, only if they don't already exist and have correct shas
-for filename, sha in itertools.izip(file_names, file_shas):
-  path = os.path.join(local_resource_path, filename)
-  if os.path.isfile(path) \
-      and get_file_sha(path) == sha:
-    print path + ' exists, skipping'
-    continue
-  for retry in range(0, ftp_retries):
-    print "Downloading " + path
-    if not download_and_check_sha(url, filename, sha):
-      print "Sha does not match, retrying..."
-    else:
-      break
diff --git a/third_party/aom/test/android/scrape_gtest_log.py b/third_party/aom/test/android/scrape_gtest_log.py
deleted file mode 100644
index e0c929a5d5..0000000000
--- a/third_party/aom/test/android/scrape_gtest_log.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-
-"""Standalone script which parses a gtest log for json.
-
-Json is returned returns as an array.  This script is used by the libaom
-waterfall to gather json results mixed in with gtest logs.  This is
-dubious software engineering.
-"""
-
-import getopt
-import json
-import os
-import re
-import sys
-
-
-def main():
-  if len(sys.argv) != 3:
-    print "Expects a file to write json to!"
-    exit(1)
-
-  try:
-    opts, _ = \
-        getopt.getopt(sys.argv[1:], \
-                      'o:', ['output-json='])
-  except getopt.GetOptError:
-    print 'scrape_gtest_log.py -o <output_json>'
-    sys.exit(2)
-
-  output_json = ''
-  for opt, arg in opts:
-    if opt in ('-o', '--output-json'):
-      output_json = os.path.join(arg)
-
-  blob = sys.stdin.read()
-  json_string = '[' + ','.join('{' + x + '}' for x in
-                               re.findall(r'{([^}]*.?)}', blob)) + ']'
-  print blob
-
-  output = json.dumps(json.loads(json_string), indent=4, sort_keys=True)
-  print output
-
-  path = os.path.dirname(output_json)
-  if path and not os.path.exists(path):
-    os.makedirs(path)
-
-  outfile = open(output_json, 'w')
-  outfile.write(output)
-
-if __name__ == '__main__':
-  sys.exit(main())
diff --git a/third_party/aom/test/ans_codec_test.cc b/third_party/aom/test/ans_codec_test.cc
deleted file mode 100644
index 59d352b2d5..0000000000
--- a/third_party/aom/test/ans_codec_test.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "aom_dsp/ans.h"
-#include "av1/av1_dx_iface.c"
-
-// A note on ANS_MAX_SYMBOLS == 0:
-// Fused gtest doesn't work with EXPECT_FATAL_FAILURE [1]. Just run with a
-// single iteration and don't try to check the window size if we are unwindowed.
-// [1] https://github.com/google/googletest/issues/356
-
-namespace {
-
-const char kTestVideoName[] = "niklas_1280_720_30.y4m";
-const int kTestVideoFrames = 10;
-
-class AnsCodecTest : public ::libaom_test::CodecTestWithParam<int>,
-                     public ::libaom_test::EncoderTest {
- protected:
-  AnsCodecTest()
-      : EncoderTest(GET_PARAM(0)), ans_window_size_log2_(GET_PARAM(1)) {}
-
-  virtual ~AnsCodecTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(::libaom_test::kOnePassGood);
-    cfg_.g_lag_in_frames = 25;
-    cfg_.rc_end_usage = AOM_CQ;
-  }
-
-  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
-                                  ::libaom_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-#if ANS_MAX_SYMBOLS
-      encoder->Control(AV1E_SET_ANS_WINDOW_SIZE_LOG2, ans_window_size_log2_);
-#endif
-      // Try to push a high symbol count through the codec
-      encoder->Control(AOME_SET_CQ_LEVEL, 8);
-      encoder->Control(AOME_SET_CPUUSED, 2);
-      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
-      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
-      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
-      encoder->Control(AV1E_SET_TILE_COLUMNS, 0);
-      encoder->Control(AV1E_SET_TILE_ROWS, 0);
-    }
-  }
-
-  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
-                                  libaom_test::Decoder *decoder) {
-    aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder();
-#if ANS_MAX_SYMBOLS
-    aom_codec_alg_priv_t *const priv =
-        reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv);
-    FrameWorkerData *const worker_data =
-        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
-    AV1_COMMON *const common = &worker_data->pbi->common;
-
-    EXPECT_EQ(ans_window_size_log2_, common->ans_window_size_log2);
-#endif
-
-    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
-    return AOM_CODEC_OK == res_dec;
-  }
-
- private:
-  int ans_window_size_log2_;
-};
-
-TEST_P(AnsCodecTest, BitstreamParms) {
-  testing::internal::scoped_ptr<libaom_test::VideoSource> video(
-      new libaom_test::Y4mVideoSource(kTestVideoName, 0, kTestVideoFrames));
-  ASSERT_TRUE(video.get() != NULL);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-#if ANS_MAX_SYMBOLS
-AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(8, 24));
-#else
-AV1_INSTANTIATE_TEST_CASE(AnsCodecTest, ::testing::Range(0, 1));
-#endif
-}  // namespace
diff --git a/third_party/aom/test/ans_test.cc b/third_party/aom/test/ans_test.cc
deleted file mode 100644
index fd460f409d..0000000000
--- a/third_party/aom/test/ans_test.cc
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <ctime>
-#include <utility>
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "aom_dsp/ansreader.h"
-#include "aom_dsp/buf_ans.h"
-
-namespace {
-typedef std::vector<std::pair<uint8_t, bool> > PvVec;
-
-const int kPrintStats = 0;
-// Use a small buffer size to exercise ANS window spills or buffer growth
-const int kBufAnsSize = 1 << 8;
-
-PvVec abs_encode_build_vals(int iters) {
-  PvVec ret;
-  libaom_test::ACMRandom gen(0x30317076);
-  double entropy = 0;
-  for (int i = 0; i < iters; ++i) {
-    uint8_t p;
-    do {
-      p = gen.Rand8();
-    } while (p == 0);  // zero is not a valid coding probability
-    bool b = gen.Rand8() < p;
-    ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
-    if (kPrintStats) {
-      double d = p / 256.;
-      entropy += -d * log2(d) - (1 - d) * log2(1 - d);
-    }
-  }
-  if (kPrintStats) printf("entropy %f\n", entropy);
-  return ret;
-}
-
-bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
-  BufAnsCoder a;
-  a.size = kBufAnsSize;
-  aom_buf_ans_alloc(&a, NULL);
-  buf_ans_write_init(&a, buf);
-
-  std::clock_t start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    buf_rabs_write(&a, it->second, 256 - it->first);
-  }
-  aom_buf_ans_flush(&a);
-  std::clock_t enc_time = std::clock() - start;
-  int offset = buf_ans_write_end(&a);
-  aom_buf_ans_free(&a);
-  bool okay = true;
-  AnsDecoder d;
-#if ANS_MAX_SYMBOLS
-  d.window_size = kBufAnsSize;
-#endif
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    okay = okay && (rabs_read(&d, 256 - it->first) != 0) == it->second;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  if (kPrintStats)
-    printf("uABS size %d enc_time %f dec_time %f\n", offset,
-           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d) != 0;
-}
-
-const aom_cdf_prob spareto65[] = { 8320, 6018, 4402, 3254, 4259,
-                                   3919, 2057, 492,  45,   2 };
-
-const int kRansSymbols =
-    static_cast<int>(sizeof(spareto65) / sizeof(spareto65[0]));
-
-struct rans_sym {
-  aom_cdf_prob prob;
-  aom_cdf_prob cum_prob;  // not-inclusive
-};
-
-std::vector<int> ans_encode_build_vals(rans_sym *const tab, int iters) {
-  aom_cdf_prob sum = 0;
-  for (int i = 0; i < kRansSymbols; ++i) {
-    tab[i].cum_prob = sum;
-    tab[i].prob = spareto65[i];
-    sum += spareto65[i];
-  }
-  std::vector<int> p_to_sym;
-  for (int i = 0; i < kRansSymbols; ++i) {
-    p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
-  }
-  assert(p_to_sym.size() == RANS_PRECISION);
-  std::vector<int> ret;
-  libaom_test::ACMRandom gen(18543637);
-  for (int i = 0; i < iters; ++i) {
-    int sym =
-        p_to_sym[((gen.Rand8() << 8) + gen.Rand8()) & (RANS_PRECISION - 1)];
-    ret.push_back(sym);
-  }
-  return ret;
-}
-
-void rans_build_dec_tab(const struct rans_sym sym_tab[],
-                        aom_cdf_prob *dec_tab) {
-  unsigned int sum = 0;
-  for (int i = 0; sum < RANS_PRECISION; ++i) {
-    dec_tab[i] = sum += sym_tab[i].prob;
-  }
-}
-
-bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
-                uint8_t *buf) {
-  BufAnsCoder a;
-  a.size = kBufAnsSize;
-  aom_buf_ans_alloc(&a, NULL);
-  buf_ans_write_init(&a, buf);
-  aom_cdf_prob dec_tab[kRansSymbols];
-  rans_build_dec_tab(tab, dec_tab);
-
-  std::clock_t start = std::clock();
-  for (std::vector<int>::const_iterator it = sym_vec.begin();
-       it != sym_vec.end(); ++it) {
-    buf_rans_write(&a, tab[*it].cum_prob, tab[*it].prob);
-  }
-  aom_buf_ans_flush(&a);
-  std::clock_t enc_time = std::clock() - start;
-  int offset = buf_ans_write_end(&a);
-  aom_buf_ans_free(&a);
-  bool okay = true;
-  AnsDecoder d;
-#if ANS_MAX_SYMBOLS
-  d.window_size = kBufAnsSize;
-#endif
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (std::vector<int>::const_iterator it = sym_vec.begin();
-       it != sym_vec.end(); ++it) {
-    okay &= rans_read(&d, dec_tab) == *it;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  if (kPrintStats)
-    printf("rANS size %d enc_time %f dec_time %f\n", offset,
-           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d) != 0;
-}
-
-class AbsTestFix : public ::testing::Test {
- protected:
-  static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
-  virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
-  virtual void TearDown() { delete[] buf_; }
-  static const int kNumBools = 100000000;
-  static PvVec pv_vec_;
-  uint8_t *buf_;
-};
-PvVec AbsTestFix::pv_vec_;
-
-class AnsTestFix : public ::testing::Test {
- protected:
-  static void SetUpTestCase() {
-    sym_vec_ = ans_encode_build_vals(rans_sym_tab_, kNumSyms);
-  }
-  virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
-  virtual void TearDown() { delete[] buf_; }
-  static const int kNumSyms = 25000000;
-  static std::vector<int> sym_vec_;
-  static rans_sym rans_sym_tab_[kRansSymbols];
-  uint8_t *buf_;
-};
-std::vector<int> AnsTestFix::sym_vec_;
-rans_sym AnsTestFix::rans_sym_tab_[kRansSymbols];
-
-TEST_F(AbsTestFix, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); }
-TEST_F(AnsTestFix, Rans) {
-  EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab_, buf_));
-}
-TEST(AnsTest, FinalStateSerialization) {
-  for (unsigned i = L_BASE; i < L_BASE * IO_BASE; ++i) {
-    uint8_t buf[8];
-    AnsCoder c;
-    ans_write_init(&c, buf);
-    c.state = i;
-    const int written_size = ans_write_end(&c);
-    ASSERT_LT(static_cast<size_t>(written_size), sizeof(buf));
-    AnsDecoder d;
-#if ANS_MAX_SYMBOLS
-    // There is no real data window here because no symbols are sent through
-    // ans (only synthetic states), so use a dummy value
-    d.window_size = 1024;
-#endif
-    const int read_init_status = ans_read_init(&d, buf, written_size);
-    EXPECT_EQ(read_init_status, 0);
-    EXPECT_EQ(d.state, i);
-  }
-}
-}  // namespace
diff --git a/third_party/aom/test/aom_integer_test.cc b/third_party/aom/test/aom_integer_test.cc
new file mode 100644
index 0000000000..fe88a54e9e
--- /dev/null
+++ b/third_party/aom/test/aom_integer_test.cc
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom/aom_integer.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+const uint64_t kMaximumLeb128CodedSize = 8;
+const uint8_t kLeb128PadByte = 0x80;  // Binary: 10000000
+const uint64_t kMaximumLeb128Value = UINT32_MAX;
+const uint32_t kSizeTestNumValues = 6;
+const uint32_t kSizeTestExpectedSizes[kSizeTestNumValues] = {
+  1, 1, 2, 3, 4, 5
+};
+const uint64_t kSizeTestInputs[kSizeTestNumValues] = {
+  0, 0x7f, 0x3fff, 0x1fffff, 0xffffff, 0x10000000
+};
+
+const uint8_t kOutOfRangeLeb128Value[5] = { 0x80, 0x80, 0x80, 0x80,
+                                            0x10 };  // UINT32_MAX + 1
+}  // namespace
+
+TEST(AomLeb128, DecodeTest) {
+  const size_t num_leb128_bytes = 3;
+  const uint8_t leb128_bytes[num_leb128_bytes] = { 0xE5, 0x8E, 0x26 };
+  const uint64_t expected_value = 0x98765;  // 624485
+  const size_t expected_length = 3;
+  uint64_t value = ~0ULL;  // make sure value is cleared by the function
+  size_t length;
+  ASSERT_EQ(
+      aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes, &value, &length), 0);
+  ASSERT_EQ(expected_value, value);
+  ASSERT_EQ(expected_length, length);
+
+  // Make sure the decoder stops on the last marked LEB128 byte.
+  aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes + 1, &value, &length);
+  ASSERT_EQ(expected_value, value);
+  ASSERT_EQ(expected_length, length);
+}
+
+TEST(AomLeb128, EncodeTest) {
+  const uint32_t test_value = 0x98765;  // 624485
+  const uint8_t expected_bytes[3] = { 0xE5, 0x8E, 0x26 };
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(aom_uleb_encode(test_value, kWriteBufferSize, &write_buffer[0],
+                            &bytes_written),
+            0);
+  ASSERT_EQ(bytes_written, 3u);
+  for (size_t i = 0; i < bytes_written; ++i) {
+    ASSERT_EQ(write_buffer[i], expected_bytes[i]);
+  }
+}
+
+TEST(AomLeb128, EncodeDecodeTest) {
+  const uint32_t value = 0x98765;  // 624485
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(aom_uleb_encode(value, kWriteBufferSize, &write_buffer[0],
+                            &bytes_written),
+            0);
+  ASSERT_EQ(bytes_written, 3u);
+  uint64_t decoded_value;
+  size_t decoded_length;
+  aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
+                  &decoded_length);
+  ASSERT_EQ(value, decoded_value);
+  ASSERT_EQ(bytes_written, decoded_length);
+}
+
+TEST(AomLeb128, FixedSizeEncodeTest) {
+  const uint32_t test_value = 0x123;
+  const uint8_t expected_bytes[4] = { 0xa3, 0x82, 0x80, 0x00 };
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(0, aom_uleb_encode_fixed_size(test_value, kWriteBufferSize,
+                                          kWriteBufferSize, &write_buffer[0],
+                                          &bytes_written));
+  ASSERT_EQ(kWriteBufferSize, bytes_written);
+  for (size_t i = 0; i < bytes_written; ++i) {
+    ASSERT_EQ(write_buffer[i], expected_bytes[i]);
+  }
+}
+
+TEST(AomLeb128, FixedSizeEncodeDecodeTest) {
+  const uint32_t value = 0x1;
+  const size_t kWriteBufferSize = 4;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t bytes_written = 0;
+  ASSERT_EQ(
+      aom_uleb_encode_fixed_size(value, kWriteBufferSize, kWriteBufferSize,
+                                 &write_buffer[0], &bytes_written),
+      0);
+  ASSERT_EQ(bytes_written, 4u);
+  uint64_t decoded_value;
+  size_t decoded_length;
+  aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
+                  &decoded_length);
+  ASSERT_EQ(value, decoded_value);
+  ASSERT_EQ(bytes_written, decoded_length);
+}
+
+TEST(AomLeb128, SizeTest) {
+  for (size_t i = 0; i < kSizeTestNumValues; ++i) {
+    ASSERT_EQ(kSizeTestExpectedSizes[i],
+              aom_uleb_size_in_bytes(kSizeTestInputs[i]));
+  }
+}
+
+TEST(AomLeb128, DecodeFailTest) {
+  // Input buffer containing what would be a valid 9 byte LEB128 encoded
+  // unsigned integer.
+  const uint8_t kAllPadBytesBuffer[kMaximumLeb128CodedSize + 1] = {
+    kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
+    kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
+    kLeb128PadByte, kLeb128PadByte, 0
+  };
+  uint64_t decoded_value;
+
+  // Test that decode fails when result would be valid 9 byte integer.
+  ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize + 1,
+                            &decoded_value, NULL),
+            -1);
+
+  // Test that encoded value missing terminator byte within available buffer
+  // range causes decode error.
+  ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize,
+                            &decoded_value, NULL),
+            -1);
+
+  // Test that LEB128 input that decodes to a value larger than 32-bits fails.
+  size_t value_size = 0;
+  ASSERT_EQ(aom_uleb_decode(&kOutOfRangeLeb128Value[0],
+                            sizeof(kOutOfRangeLeb128Value), &decoded_value,
+                            &value_size),
+            -1);
+}
+
+TEST(AomLeb128, EncodeFailTest) {
+  const size_t kWriteBufferSize = 4;
+  const uint32_t kValidTestValue = 1;
+  uint8_t write_buffer[kWriteBufferSize] = { 0 };
+  size_t coded_size = 0;
+  ASSERT_EQ(
+      aom_uleb_encode(kValidTestValue, kWriteBufferSize, NULL, &coded_size),
+      -1);
+  ASSERT_EQ(aom_uleb_encode(kValidTestValue, kWriteBufferSize, &write_buffer[0],
+                            NULL),
+            -1);
+
+  const uint32_t kValueOutOfRangeForBuffer = 0xFFFFFFFF;
+  ASSERT_EQ(aom_uleb_encode(kValueOutOfRangeForBuffer, kWriteBufferSize,
+                            &write_buffer[0], &coded_size),
+            -1);
+
+  const uint64_t kValueOutOfRange = kMaximumLeb128Value + 1;
+  ASSERT_EQ(aom_uleb_encode(kValueOutOfRange, kWriteBufferSize,
+                            &write_buffer[0], &coded_size),
+            -1);
+
+  const size_t kPadSizeOutOfRange = 5;
+  ASSERT_EQ(aom_uleb_encode_fixed_size(kValidTestValue, kWriteBufferSize,
+                                       kPadSizeOutOfRange, &write_buffer[0],
+                                       &coded_size),
+            -1);
+}
diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh
index 28901ed1bd..5f54ae0aff 100755
--- a/third_party/aom/test/aomdec.sh
+++ b/third_party/aom/test/aomdec.sh
@@ -17,10 +17,12 @@
 # Environment check: Make sure input is available.
 aomdec_verify_environment() {
   if [ "$(av1_encode_available)" != "yes" ] ; then
-    if [ ! -e "${AV1_WEBM_FILE}" ] || \
-      [ ! -e "${AV1_FPM_WEBM_FILE}" ] || \
-      [ ! -e "${AV1_LT_50_FRAMES_WEBM_FILE}" ] ; then
-      elog "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
+    if [ ! -e "${AV1_IVF_FILE}" ] || \
+       [ ! -e "${AV1_OBU_ANNEXB_FILE}" ] || \
+       [ ! -e "${AV1_OBU_SEC5_FILE}" ] || \
+       [ ! -e "${AV1_WEBM_FILE}" ]; then
+      elog "Libaom test data must exist before running this test script when " \
+           " encoding is disabled. "
       return 1
     fi
   fi
@@ -38,10 +40,8 @@ aomdec_pipe() {
   local readonly input="$1"
   shift
   if [ ! -e "${input}" ]; then
-    local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
-    encode_yuv_raw_input_av1 "${file}" --ivf
-  else
-    local file="${input}"
+    elog "Input file ($input) missing in aomdec_pipe()"
+    return 1
   fi
   cat "${file}" | aomdec - "$@" ${devnull}
 }
@@ -63,62 +63,85 @@ aomdec_can_decode_av1() {
   fi
 }
 
-aomdec_aom_ivf_pipe_input() {
+aomdec_av1_ivf() {
   if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
-    aomdec_pipe "${AOM_IVF_FILE}" --summary --noblit
+    local readonly file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf
+    fi
+    aomdec "${AV1_IVF_FILE}" --summary --noblit
   fi
 }
 
-aomdec_av1_webm() {
-  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
-     [ "$(webm_io_available)" = "yes" ]; then
-    if [ ! -e "${AV1_WEBM_FILE}" ]; then
-      local file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm"
-      encode_yuv_raw_input_av1 "${file}"
-    else
-      aomdec "${AV1_WEBM_FILE}" --summary --noblit
+aomdec_av1_ivf_error_resilient() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local readonly file="av1.error-resilient.ivf"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf --error-resilient=1
     fi
+    aomdec "${file}" --summary --noblit
   fi
 }
 
-aomdec_av1_webm_frame_parallel() {
-  if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
-     [ "$(webm_io_available)" = "yes" ]; then
-    local file
-    if [ ! -e "${AV1_WEBM_FILE}" ]; then
-      file="${AOM_TEST_OUTPUT_DIR}/test_encode.webm"
-      encode_yuv_raw_input_av1 "${file}" "--ivf --error-resilient=1 "
-    else
-      file="${AV1_FPM_WEBM_FILE}"
+aomdec_av1_ivf_multithread() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local readonly file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf
     fi
     for threads in 2 3 4 5 6 7 8; do
-      aomdec "${file}" --summary --noblit --threads=$threads \
-        --frame-parallel
+      aomdec "${file}" --summary --noblit --threads=$threads
     done
   fi
 }
 
-# TODO(vigneshv): Enable or remove this test and associated code.
-DISABLED_aomdec_av1_webm_less_than_50_frames() {
-  # ensure that reaching eof in webm_guess_framerate doesn't result in invalid
-  # frames in actual webm_read_frame calls.
+aomdec_aom_ivf_pipe_input() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local readonly file="${AV1_IVF_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --ivf
+    fi
+    aomdec_pipe "${AV1_IVF_FILE}" --summary --noblit
+  fi
+}
+
+aomdec_av1_obu_annexb() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local readonly file="${AV1_OBU_ANNEXB_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu --annexb=1
+    fi
+    aomdec "${file}" --summary --noblit --annexb
+  fi
+}
+
+aomdec_av1_obu_section5() {
+  if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
+    local readonly file="${AV1_OBU_SEC5_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}" --obu
+    fi
+    aomdec "${file}" --summary --noblit
+  fi
+}
+
+aomdec_av1_webm() {
   if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
      [ "$(webm_io_available)" = "yes" ]; then
-    local readonly decoder="$(aom_tool_path aomdec)"
-    local readonly expected=10
-    local readonly num_frames=$(${AOM_TEST_PREFIX} "${decoder}" \
-      "${AV1_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \
-      | awk '/^[0-9]+ decoded frames/ { print $1 }')
-    if [ "$num_frames" -ne "$expected" ]; then
-      elog "Output frames ($num_frames) != expected ($expected)"
-      return 1
+    local readonly file="${AV1_WEBM_FILE}"
+    if [ ! -e "${file}" ]; then
+      encode_yuv_raw_input_av1 "${file}"
     fi
+    aomdec "${AV1_WEBM_FILE}" --summary --noblit
   fi
 }
 
-aomdec_tests="aomdec_av1_webm
-              aomdec_av1_webm_frame_parallel
+aomdec_tests="aomdec_av1_ivf
+              aomdec_av1_ivf_error_resilient
+              aomdec_av1_ivf_multithread
               aomdec_aom_ivf_pipe_input
-              DISABLED_aomdec_av1_webm_less_than_50_frames"
+              aomdec_av1_obu_annexb
+              aomdec_av1_obu_section5
+              aomdec_av1_webm"
 
 run_tests aomdec_verify_environment "${aomdec_tests}"
diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh
index 57a4c28a5a..a0ab8c8aa2 100755
--- a/third_party/aom/test/aomenc.sh
+++ b/third_party/aom/test/aomenc.sh
@@ -15,8 +15,6 @@
 ##
 . $(dirname $0)/tools_common.sh
 
-readonly TEST_FRAMES=5
-
 # Environment check: Make sure input is available.
 aomenc_verify_environment() {
   if [ ! -e "${YUV_RAW_INPUT}" ]; then
@@ -57,32 +55,6 @@ y4m_input_720p() {
   echo ""${Y4M_720P_INPUT}""
 }
 
-# Echo default aomenc real time encoding params. $1 is the codec, which defaults
-# to av1 if unspecified.
-aomenc_rt_params() {
-  local readonly codec="${1:-av1}"
-  echo "--codec=${codec}
-    --buf-initial-sz=500
-    --buf-optimal-sz=600
-    --buf-sz=1000
-    --cpu-used=-6
-    --end-usage=cbr
-    --error-resilient=1
-    --kf-max-dist=90000
-    --lag-in-frames=0
-    --max-intra-rate=300
-    --max-q=56
-    --min-q=2
-    --noise-sensitivity=0
-    --overshoot-pct=50
-    --passes=1
-    --profile=0
-    --resize-allowed=0
-    --rt
-    --static-thresh=0
-    --undershoot-pct=50"
-}
-
 # Wrapper function for running aomenc with pipe input. Requires that
 # LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the
 # input file path and shifted away. All remaining parameters are passed through
@@ -110,10 +82,12 @@ aomenc() {
 
 aomenc_av1_ivf() {
   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
-    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.ivf"
+    local output="${AV1_IVF_FILE}"
+    if [ -e "${AV1_IVF_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf"
+    fi
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
+      $(aomenc_encode_test_fast_params) \
       --ivf \
       --output="${output}"
 
@@ -124,13 +98,52 @@ aomenc_av1_ivf() {
   fi
 }
 
+aomenc_av1_obu_annexb() {
+   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_OBU_ANNEXB_FILE}"
+    if [ -e "${AV1_OBU_ANNEXB_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.annexb.obu"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --obu \
+      --annexb=1 \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
+aomenc_av1_obu_section5() {
+   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
+    local output="${AV1_OBU_SEC5_FILE}"
+    if [ -e "${AV1_OBU_SEC5_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.section5.obu"
+    fi
+    aomenc $(yuv_raw_input) \
+      $(aomenc_encode_test_fast_params) \
+      --obu \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
 aomenc_av1_webm() {
   if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
      [ "$(webm_io_available)" = "yes" ]; then
-    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm"
+    local output="${AV1_WEBM_FILE}"
+    if [ -e "${AV1_WEBM_FILE}" ]; then
+      output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
+    fi
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
+      $(aomenc_encode_test_fast_params) \
       --output="${output}"
 
     if [ ! -e "${output}" ]; then
@@ -140,15 +153,14 @@ aomenc_av1_webm() {
   fi
 }
 
-aomenc_av1_webm_2pass() {
+aomenc_av1_webm_1pass() {
   if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
      [ "$(webm_io_available)" = "yes" ]; then
-    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1.webm"
+    local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
-      --output="${output}" \
-      --passes=2
+      $(aomenc_encode_test_fast_params) \
+      --passes=1 \
+      --output="${output}"
 
     if [ ! -e "${output}" ]; then
       elog "Output file does not exist."
@@ -161,8 +173,7 @@ aomenc_av1_ivf_lossless() {
   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
     local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf"
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
+      $(aomenc_encode_test_fast_params) \
       --ivf \
       --output="${output}" \
       --lossless=1
@@ -178,8 +189,7 @@ aomenc_av1_ivf_minq0_maxq0() {
   if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
     local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf"
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
+      $(aomenc_encode_test_fast_params) \
       --ivf \
       --output="${output}" \
       --min-q=0 \
@@ -199,12 +209,10 @@ aomenc_av1_webm_lag5_frames10() {
     local readonly lag_frames=5
     local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm"
     aomenc $(yuv_raw_input) \
-      --codec=av1 \
-      --limit="${lag_total_frames}" \
-      --lag-in-frames="${lag_frames}" \
-      --output="${output}" \
-      --passes=2 \
-      --auto-alt-ref=1
+      $(aomenc_encode_test_fast_params) \
+      --limit=${lag_total_frames} \
+      --lag-in-frames=${lag_frames} \
+      --output="${output}"
 
     if [ ! -e "${output}" ]; then
       elog "Output file does not exist."
@@ -219,8 +227,7 @@ aomenc_av1_webm_non_square_par() {
      [ "$(webm_io_available)" = "yes" ]; then
     local readonly output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm"
     aomenc $(y4m_input_non_square_par) \
-      --codec=av1 \
-      --limit="${TEST_FRAMES}" \
+      $(aomenc_encode_test_fast_params) \
       --output="${output}"
 
     if [ ! -e "${output}" ]; then
@@ -230,12 +237,33 @@ aomenc_av1_webm_non_square_par() {
   fi
 }
 
+aomenc_av1_webm_cdf_update_mode() {
+  if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    for mode in 0 1 2; do
+      local readonly output="${AOM_TEST_OUTPUT_DIR}/cdf_mode_${mode}.webm"
+      aomenc $(yuv_raw_input) \
+        $(aomenc_encode_test_fast_params) \
+        --cdf-update-mode=${mode} \
+        --output="${output}"
+
+      if [ ! -e "${output}" ]; then
+        elog "Output file does not exist."
+        return 1
+      fi
+    done
+  fi
+}
+
 aomenc_tests="aomenc_av1_ivf
+              aomenc_av1_obu_annexb
+              aomenc_av1_obu_section5
               aomenc_av1_webm
-              aomenc_av1_webm_2pass
+              aomenc_av1_webm_1pass
               aomenc_av1_ivf_lossless
               aomenc_av1_ivf_minq0_maxq0
               aomenc_av1_webm_lag5_frames10
-              aomenc_av1_webm_non_square_par"
+              aomenc_av1_webm_non_square_par
+              aomenc_av1_webm_cdf_update_mode"
 
 run_tests aomenc_verify_environment "${aomenc_tests}"
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
index 57db0d0ffd..bbb5027d4d 100644
--- a/third_party/aom/test/aq_segment_test.cc
+++ b/third_party/aom/test/aq_segment_test.cc
@@ -7,9 +7,10 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
+
+#include "config/aom_config.h"
 
-#include "./aom_config.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
@@ -37,18 +38,14 @@ class AqSegmentTest
     if (video->frame() == 1) {
       encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
       encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
-#if CONFIG_EXT_DELTA_Q
       encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_);
-#endif
       encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
     }
   }
 
   void DoTest(int aq_mode) {
     aq_mode_ = aq_mode;
-#if CONFIG_EXT_DELTA_Q
     deltaq_mode_ = 0;
-#endif
     cfg_.kf_max_dist = 12;
     cfg_.rc_min_quantizer = 8;
     cfg_.rc_max_quantizer = 56;
@@ -65,9 +62,7 @@ class AqSegmentTest
 
   int set_cpu_used_;
   int aq_mode_;
-#if CONFIG_EXT_DELTA_Q
   int deltaq_mode_;
-#endif
 };
 
 // Validate that this AQ segmentation mode (AQ=1, variance_ap)
@@ -90,21 +85,6 @@ TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ2) { DoTest(2); }
 
 TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ3) { DoTest(3); }
 
-#if !CONFIG_EXT_DELTA_Q
-// Validate that this AQ mode (AQ=4, delta q)
-// encodes and decodes without a mismatch.
-TEST_P(AqSegmentTest, TestNoMisMatchAQ4) {
-  cfg_.rc_end_usage = AOM_CQ;
-  aq_mode_ = 4;
-
-  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 15);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-#endif
-
-#if CONFIG_EXT_DELTA_Q
 // Validate that this delta q mode
 // encodes and decodes without a mismatch.
 TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
@@ -116,7 +96,6 @@ TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
-#endif
 
 AV1_INSTANTIATE_TEST_CASE(AqSegmentTest,
                           ::testing::Values(::libaom_test::kRealTime,
diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc
index b4b17c9abd..083f4022f4 100644
--- a/third_party/aom/test/arf_freq_test.cc
+++ b/third_party/aom/test/arf_freq_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
@@ -50,9 +50,7 @@ const TestVideoParam kTestVectors[] = {
   { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420,
     AOM_BITS_8, 0 },
   { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
-#if CONFIG_HIGHBITDEPTH
-// Add list of profile 2/3 test videos here ...
-#endif  // CONFIG_HIGHBITDEPTH
+  // Add list of profile 2/3 test videos here ...
 };
 
 const TestEncodeParam kEncodeVectors[] = {
@@ -208,7 +206,6 @@ TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
   }
 }
 
-#if CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS
 #if CONFIG_AV1_ENCODER
 // TODO(angiebird): 25-29 fail in high bitdepth mode.
 // TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as
@@ -223,9 +220,4 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors),
         ::testing::ValuesIn(kMinArfVectors)));
 #endif  // CONFIG_AV1_ENCODER
-#else
-AV1_INSTANTIATE_TEST_CASE(ArfFreqTestLarge, ::testing::ValuesIn(kTestVectors),
-                          ::testing::ValuesIn(kEncodeVectors),
-                          ::testing::ValuesIn(kMinArfVectors));
-#endif  // CONFIG_HIGHBITDEPTH || CONFIG_EXT_REFS
 }  // namespace
diff --git a/third_party/aom/test/av1_convolve_2d_test.cc b/third_party/aom/test/av1_convolve_2d_test.cc
index 002ede4032..03286260e8 100644
--- a/third_party/aom/test/av1_convolve_2d_test.cc
+++ b/third_party/aom/test/av1_convolve_2d_test.cc
@@ -12,29 +12,238 @@
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/av1_convolve_2d_test_util.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
-using libaom_test::AV1Convolve2D::AV1Convolve2DTest;
-#if CONFIG_HIGHBITDEPTH
-using libaom_test::AV1HighbdConvolve2D::AV1HighbdConvolve2DTest;
-#endif
-
+using libaom_test::AV1Convolve2D::AV1Convolve2DSrTest;
+using libaom_test::AV1Convolve2D::AV1JntConvolve2DTest;
+using libaom_test::AV1HighbdConvolve2D::AV1HighbdConvolve2DSrTest;
+using libaom_test::AV1HighbdConvolve2D::AV1HighbdJntConvolve2DTest;
 namespace {
 
-TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(2)); }
+TEST_P(AV1Convolve2DSrTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+TEST_P(AV1Convolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+
+INSTANTIATE_TEST_CASE_P(
+    C_COPY, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_copy_sr_c, 0, 0));
+INSTANTIATE_TEST_CASE_P(
+    C_X, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_c, 1, 0));
+INSTANTIATE_TEST_CASE_P(
+    C_Y, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_c, 0, 1));
+INSTANTIATE_TEST_CASE_P(
+    C, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_c, 1, 1));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1Convolve2DSrTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_convolve_2d_copy_sr_sse2, 0, 0));
+INSTANTIATE_TEST_CASE_P(
+    SSE2_X, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_sse2, 1, 0));
+INSTANTIATE_TEST_CASE_P(
+    SSE2_Y, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_sse2, 0, 1));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_sse2, 1, 1));
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1Convolve2DSrTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_convolve_2d_copy_sr_avx2, 0, 0));
+INSTANTIATE_TEST_CASE_P(
+    AVX2_X, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_avx2, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2_Y, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_avx2, 0, 1));
 
 INSTANTIATE_TEST_CASE_P(
-    SSE2, AV1Convolve2DTest,
-    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sse2));
+    AVX2, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_avx2, 1, 1));
+#endif  // HAVE_AVX2
+#endif  // HAVE_SSE2
 
-#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
-TEST_P(AV1HighbdConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON_X, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_neon, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON_Y, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_neon, 0, 1));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, AV1Convolve2DSrTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_neon, 1, 1));
 
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdConvolve2DTest,
+INSTANTIATE_TEST_CASE_P(NEON_COPY, AV1Convolve2DSrTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_convolve_2d_copy_sr_neon, 0, 0));
+#endif  // HAVE_NEON
+
+TEST_P(AV1JntConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+TEST_P(AV1JntConvolve2DTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+INSTANTIATE_TEST_CASE_P(
+    C_COPY, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_copy_c, 0, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    C_X, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_c, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    C_Y, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_c, 0, 1));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1JntConvolve2DTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_jnt_convolve_2d_copy_sse2, 0, 0));
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE2_X, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_sse2, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2_Y, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_sse2, 0, 1));
+
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_ssse3, 1, 1));
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1JntConvolve2DTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_jnt_convolve_2d_copy_avx2, 0, 0));
+INSTANTIATE_TEST_CASE_P(
+    AVX2_X, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_avx2, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2_Y, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_avx2, 0, 1));
+
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_avx2, 1, 1));
+#endif  // HAVE_AVX2
+#endif  // HAVE_SSE4_1
+#endif  // HAVE_SSE2
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON_COPY, AV1JntConvolve2DTest,
+                        libaom_test::AV1Convolve2D::BuildParams(
+                            av1_jnt_convolve_2d_copy_neon, 0, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_neon, 1, 1));
+INSTANTIATE_TEST_CASE_P(
+    NEON_X, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_neon, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON_Y, AV1JntConvolve2DTest,
+    libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_neon, 0, 1));
+#endif  // HAVE_NEON
+
+TEST_P(AV1HighbdConvolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
+TEST_P(AV1HighbdConvolve2DSrTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
+
+INSTANTIATE_TEST_CASE_P(C_X, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_x_sr_c, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(C_Y, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_y_sr_c, 0, 1));
+
+INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_2d_copy_sr_c, 0, 0));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_2d_copy_sr_sse2, 0, 0));
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_2d_sr_ssse3, 1, 1));
+INSTANTIATE_TEST_CASE_P(SSSE3_X, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_x_sr_ssse3, 1, 0));
+INSTANTIATE_TEST_CASE_P(SSSE3_Y, AV1HighbdConvolve2DSrTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
-                            av1_highbd_convolve_2d_ssse3));
+                            av1_highbd_convolve_y_sr_ssse3, 0, 1));
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_2d_sr_avx2, 1, 1));
+INSTANTIATE_TEST_CASE_P(AVX2_X, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_x_sr_avx2, 1, 0));
+INSTANTIATE_TEST_CASE_P(AVX2_Y, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_y_sr_avx2, 0, 1));
+INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1HighbdConvolve2DSrTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_convolve_2d_copy_sr_avx2, 0, 0));
+#endif  // HAVE_AVX2
+#endif  // HAVE_SSSE3
+#endif  // HAVE_SSE2
+TEST_P(AV1HighbdJntConvolve2DTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
 
-#endif
+TEST_P(AV1HighbdJntConvolve2DTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
 
+INSTANTIATE_TEST_CASE_P(C_X, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_x_c, 1, 0));
+
+INSTANTIATE_TEST_CASE_P(C_Y, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_y_c, 0, 1));
+
+INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_2d_copy_c, 0, 0));
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1_COPY, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_2d_copy_sse4_1, 0, 0));
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_2d_sse4_1, 1, 1));
+INSTANTIATE_TEST_CASE_P(SSE4_1_X, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_x_sse4_1, 1, 0));
+INSTANTIATE_TEST_CASE_P(SSE4_1_Y, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_y_sse4_1, 0, 1));
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_2d_copy_avx2, 0, 0));
+INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_2d_avx2, 1, 1));
+INSTANTIATE_TEST_CASE_P(AVX2_X, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_x_avx2, 1, 0));
+INSTANTIATE_TEST_CASE_P(AVX2_Y, AV1HighbdJntConvolve2DTest,
+                        libaom_test::AV1HighbdConvolve2D::BuildParams(
+                            av1_highbd_jnt_convolve_y_avx2, 0, 1));
+#endif  // HAVE_AVX2
+#endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.cc b/third_party/aom/test/av1_convolve_2d_test_util.cc
index 3b61f6bb74..cbe3f8c9f9 100644
--- a/third_party/aom/test/av1_convolve_2d_test_util.cc
+++ b/third_party/aom/test/av1_convolve_2d_test_util.cc
@@ -11,183 +11,695 @@
 
 #include "test/av1_convolve_2d_test_util.h"
 
+#include "aom_ports/aom_timer.h"
+#include "av1/common/common_data.h"
 #include "av1/common/convolve.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 namespace libaom_test {
 
+const int kMaxSize = 128 + 32;  // padding
 namespace AV1Convolve2D {
 
 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
-    convolve_2d_func filter) {
-  const Convolve2DParam params[] = {
-    make_tuple(4, 4, filter),   make_tuple(8, 8, filter),
-    make_tuple(64, 64, filter), make_tuple(4, 16, filter),
-    make_tuple(32, 8, filter),
-  };
-  return ::testing::ValuesIn(params);
+    convolve_2d_func filter, int has_subx, int has_suby) {
+  return ::testing::Combine(::testing::Values(filter),
+                            ::testing::Values(has_subx),
+                            ::testing::Values(has_suby),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
 }
 
-AV1Convolve2DTest::~AV1Convolve2DTest() {}
-void AV1Convolve2DTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+AV1Convolve2DSrTest::~AV1Convolve2DSrTest() {}
+void AV1Convolve2DSrTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1Convolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1Convolve2DSrTest::RunCheckOutput(convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int has_subx = GET_PARAM(1);
+  const int has_suby = GET_PARAM(2);
+  const int block_idx = GET_PARAM(3);
+  int hfilter, vfilter, subx, suby;
+  uint8_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, output2[MAX_SB_SQUARE]);
+
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+  for (int i = 0; i < MAX_SB_SQUARE; ++i)
+    output[i] = output2[i] = rnd_.Rand31();
+
+  // Make sure that sizes 2xN and Nx2 are also tested for chroma.
+  const int num_sizes =
+      (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
+                                                                           : 1;
+  for (int shift = 0; shift < num_sizes; ++shift) {  // luma and chroma
+    const int out_w = block_size_wide[block_idx] >> shift;
+    const int out_h = block_size_high[block_idx] >> shift;
+    for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
+      for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
+           ++vfilter) {
+        InterpFilterParams filter_params_x =
+            av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                         out_w);
+        InterpFilterParams filter_params_y =
+            av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                         out_h);
+        for (int do_average = 0; do_average < 1; ++do_average) {
+          ConvolveParams conv_params1 =
+              get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, 8);
+          ConvolveParams conv_params2 =
+              get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, 8);
 
-void AV1Convolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
+          const int subx_range = has_subx ? 16 : 1;
+          const int suby_range = has_suby ? 16 : 1;
+          for (subx = 0; subx < subx_range; ++subx) {
+            for (suby = 0; suby < suby_range; ++suby) {
+              // Choose random locations within the source block
+              const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+              const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+              av1_convolve_2d_sr_c(input + offset_r * w + offset_c, w, output,
+                                   MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                                   &filter_params_y, subx, suby, &conv_params1);
+              test_impl(input + offset_r * w + offset_c, w, output2,
+                        MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                        &filter_params_y, subx, suby, &conv_params2);
+
+              if (memcmp(output, output2, sizeof(output))) {
+                for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                  for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                    int idx = i * MAX_SB_SIZE + j;
+                    ASSERT_EQ(output[idx], output2[idx])
+                        << out_w << "x" << out_h << " Pixel mismatch at index "
+                        << idx << " = (" << i << ", " << j
+                        << "), sub pixel offset = (" << suby << ", " << subx
+                        << ")";
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
 
-void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
-  const int w = 128, h = 128;
-  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
-  int i, j, k;
+void AV1Convolve2DSrTest::RunSpeedTest(convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int has_subx = GET_PARAM(1);
+  const int has_suby = GET_PARAM(2);
+  const int block_idx = GET_PARAM(3);
 
-  uint8_t *input = new uint8_t[h * w];
+  uint8_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
 
-  int output_n = out_h * MAX_SB_SIZE;
-  CONV_BUF_TYPE *output = new CONV_BUF_TYPE[output_n];
-  CONV_BUF_TYPE *output2 = new CONV_BUF_TYPE[output_n];
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
 
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+  int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
+  int subx = 0, suby = 0;
 
+  const int do_average = 0;
+  ConvolveParams conv_params2 =
+      get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, 8);
+
+  // Make sure that sizes 2xN and Nx2 are also tested for chroma.
+  const int num_sizes =
+      (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
+                                                                           : 1;
+  for (int shift = 0; shift < num_sizes; ++shift) {  // luma and chroma
+    const int out_w = block_size_wide[block_idx] >> shift;
+    const int out_h = block_size_high[block_idx] >> shift;
+    const int num_loops = 1000000000 / (out_w + out_h);
+
+    InterpFilterParams filter_params_x =
+        av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                     out_w);
+    InterpFilterParams filter_params_y =
+        av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                     out_h);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(input, w, output, MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                &filter_params_y, subx, suby, &conv_params2);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
+           out_h, 1000.0 * elapsed_time / num_loops);
+  }
+}
+
+AV1JntConvolve2DTest::~AV1JntConvolve2DTest() {}
+void AV1JntConvolve2DTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1JntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int has_subx = GET_PARAM(1);
+  const int has_suby = GET_PARAM(2);
+  const int block_idx = GET_PARAM(3);
   int hfilter, vfilter, subx, suby;
+  uint8_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, output8_1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, output8_2[MAX_SB_SQUARE]);
+
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    output1[i] = output2[i] = rnd_.Rand16();
+    output8_1[i] = output8_2[i] = rnd_.Rand8();
+  }
+
+  const int out_w = block_size_wide[block_idx];
+  const int out_h = block_size_high[block_idx];
   for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
     for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
       InterpFilterParams filter_params_x =
-          av1_get_interp_filter_params((InterpFilter)hfilter);
+          av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                       out_w);
       InterpFilterParams filter_params_y =
-          av1_get_interp_filter_params((InterpFilter)vfilter);
-      const int do_average = rnd_.Rand8() & 1;
-      ConvolveParams conv_params1 =
-          get_conv_params_no_round(0, do_average, 0, output, MAX_SB_SIZE);
-      ConvolveParams conv_params2 =
-          get_conv_params_no_round(0, do_average, 0, output2, MAX_SB_SIZE);
-
-      for (subx = 0; subx < 16; ++subx)
-        for (suby = 0; suby < 16; ++suby) {
-          // av1_convolve_2d is designed for accumulate two predicted blocks for
-          // compound mode, so we set num_iter to two here.
-          // A larger number may introduce overflow
-          const int num_iters = 2;
-          memset(output, 0, output_n * sizeof(*output));
-          memset(output2, 0, output_n * sizeof(*output2));
-          for (i = 0; i < num_iters; ++i) {
+          av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                       out_h);
+      for (int do_average = 0; do_average <= 1; ++do_average) {
+        ConvolveParams conv_params1 = get_conv_params_no_round(
+            0, do_average, 0, output1, MAX_SB_SIZE, 1, 8);
+        ConvolveParams conv_params2 = get_conv_params_no_round(
+            0, do_average, 0, output2, MAX_SB_SIZE, 1, 8);
+
+        // Test special case where jnt_comp_avg is not used
+        conv_params1.use_jnt_comp_avg = 0;
+        conv_params2.use_jnt_comp_avg = 0;
+
+        const int subx_range = has_subx ? 16 : 1;
+        const int suby_range = has_suby ? 16 : 1;
+        for (subx = 0; subx < subx_range; ++subx) {
+          for (suby = 0; suby < suby_range; ++suby) {
             // Choose random locations within the source block
-            int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
-            int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_convolve_2d_c(input + offset_r * w + offset_c, w, output,
-                              MAX_SB_SIZE, out_w, out_h, &filter_params_x,
-                              &filter_params_y, subx, suby, &conv_params1);
-            test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
-                      out_w, out_h, &filter_params_x, &filter_params_y, subx,
-                      suby, &conv_params2);
-
-            for (j = 0; j < out_h; ++j)
-              for (k = 0; k < out_w; ++k) {
-                int idx = j * MAX_SB_SIZE + k;
-                ASSERT_EQ(output[idx], output2[idx])
-                    << "Pixel mismatch at index " << idx << " = (" << j << ", "
-                    << k << "), sub pixel offset = (" << suby << ", " << subx
-                    << ")";
+            const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+            const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+            av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, output8_1,
+                                  MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                                  &filter_params_y, subx, suby, &conv_params1);
+            test_impl(input + offset_r * w + offset_c, w, output8_2,
+                      MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                      &filter_params_y, subx, suby, &conv_params2);
+
+            for (int i = 0; i < out_h; ++i) {
+              for (int j = 0; j < out_w; ++j) {
+                int idx = i * MAX_SB_SIZE + j;
+                ASSERT_EQ(output1[idx], output2[idx])
+                    << "Mismatch at unit tests for av1_jnt_convolve_2d\n"
+                    << out_w << "x" << out_h << " Pixel mismatch at index "
+                    << idx << " = (" << i << ", " << j
+                    << "), sub pixel offset = (" << suby << ", " << subx << ")";
+              }
+            }
+
+            if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
+              for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                  int idx = i * MAX_SB_SIZE + j;
+                  ASSERT_EQ(output8_1[idx], output8_2[idx])
+                      << out_w << "x" << out_h << " Pixel mismatch at index "
+                      << idx << " = (" << i << ", " << j
+                      << "), sub pixel offset = (" << suby << ", " << subx
+                      << ")";
+                }
+              }
+            }
+          }
+        }
+
+        // Test different combination of fwd and bck offset weights
+        for (int k = 0; k < 2; ++k) {
+          for (int l = 0; l < 4; ++l) {
+            conv_params1.use_jnt_comp_avg = 1;
+            conv_params2.use_jnt_comp_avg = 1;
+            conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
+            conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
+            conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
+            conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
+
+            for (subx = 0; subx < subx_range; ++subx) {
+              for (suby = 0; suby < suby_range; ++suby) {
+                // Choose random locations within the source block
+                const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+                const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+                av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
+                                      output8_1, MAX_SB_SIZE, out_w, out_h,
+                                      &filter_params_x, &filter_params_y, subx,
+                                      suby, &conv_params1);
+                test_impl(input + offset_r * w + offset_c, w, output8_2,
+                          MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                          &filter_params_y, subx, suby, &conv_params2);
+
+                for (int i = 0; i < out_h; ++i) {
+                  for (int j = 0; j < out_w; ++j) {
+                    int idx = i * MAX_SB_SIZE + j;
+                    ASSERT_EQ(output1[idx], output2[idx])
+                        << "Mismatch at unit tests for "
+                           "av1_jnt_convolve_2d\n"
+                        << out_w << "x" << out_h << " Pixel mismatch at index "
+                        << idx << " = (" << i << ", " << j
+                        << "), sub pixel offset = (" << suby << ", " << subx
+                        << ")";
+                  }
+                }
+                if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
+                  for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                    for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                      int idx = i * MAX_SB_SIZE + j;
+                      ASSERT_EQ(output8_1[idx], output8_2[idx])
+                          << out_w << "x" << out_h
+                          << " Pixel mismatch at index " << idx << " = (" << i
+                          << ", " << j << "), sub pixel offset = (" << suby
+                          << ", " << subx << ")";
+                    }
+                  }
+                }
               }
+            }
           }
         }
+      }
     }
   }
-  delete[] input;
-  delete[] output;
-  delete[] output2;
+}
+
+void AV1JntConvolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int has_subx = GET_PARAM(1);
+  const int has_suby = GET_PARAM(2);
+  const int block_idx = GET_PARAM(3);
+
+  int subx = 0, suby = 0;
+  uint8_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, output8[MAX_SB_SQUARE]);
+  int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    output[i] = rnd_.Rand16();
+    output8[i] = rnd_.Rand8();
+  }
+
+  const int out_w = block_size_wide[block_idx];
+  const int out_h = block_size_high[block_idx];
+  const int num_loops = 1000000000 / (out_w + out_h);
+  const int do_average = 0;
+
+  InterpFilterParams filter_params_x =
+      av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                   out_w);
+  InterpFilterParams filter_params_y =
+      av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                   out_h);
+
+  ConvolveParams conv_params =
+      get_conv_params_no_round(0, do_average, 0, output, MAX_SB_SIZE, 1, 8);
+
+  conv_params.use_jnt_comp_avg = 0;
+
+  // Choose random locations within the source block
+  const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+  const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(input + offset_r * w + offset_c, w, output8, MAX_SB_SIZE, out_w,
+              out_h, &filter_params_x, &filter_params_y, subx, suby,
+              &conv_params);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
+         out_h, 1000.0 * elapsed_time / num_loops);
 }
 }  // namespace AV1Convolve2D
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdConvolve2D {
-
 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
-    highbd_convolve_2d_func filter) {
-  const HighbdConvolve2DParam params[] = {
-    make_tuple(4, 4, 8, filter),    make_tuple(8, 8, 8, filter),
-    make_tuple(64, 64, 8, filter),  make_tuple(4, 16, 8, filter),
-    make_tuple(32, 8, 8, filter),   make_tuple(4, 4, 10, filter),
-    make_tuple(8, 8, 10, filter),   make_tuple(64, 64, 10, filter),
-    make_tuple(4, 16, 10, filter),  make_tuple(32, 8, 10, filter),
-    make_tuple(4, 4, 12, filter),   make_tuple(8, 8, 12, filter),
-    make_tuple(64, 64, 12, filter), make_tuple(4, 16, 12, filter),
-    make_tuple(32, 8, 12, filter),
-  };
-  return ::testing::ValuesIn(params);
+    highbd_convolve_2d_func filter, int has_subx, int has_suby) {
+  return ::testing::Combine(
+      ::testing::Range(8, 13, 2), ::testing::Values(filter),
+      ::testing::Values(has_subx), ::testing::Values(has_suby),
+      ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
 }
 
-AV1HighbdConvolve2DTest::~AV1HighbdConvolve2DTest() {}
-void AV1HighbdConvolve2DTest::SetUp() {
+AV1HighbdConvolve2DSrTest::~AV1HighbdConvolve2DSrTest() {}
+void AV1HighbdConvolve2DSrTest::SetUp() {
   rnd_.Reset(ACMRandom::DeterministicSeed());
 }
 
-void AV1HighbdConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
+void AV1HighbdConvolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
 
-void AV1HighbdConvolve2DTest::RunCheckOutput(
+void AV1HighbdConvolve2DSrTest::RunSpeedTest(
     highbd_convolve_2d_func test_impl) {
-  const int w = 128, h = 128;
-  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
-  const int bd = GET_PARAM(2);
-  int i, j, k;
+  const int w = kMaxSize, h = kMaxSize;
+  const int bd = GET_PARAM(0);
+  const int has_subx = GET_PARAM(2);
+  const int has_suby = GET_PARAM(3);
+  const int block_idx = GET_PARAM(4);
+  int hfilter, vfilter, subx, suby;
+  uint16_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
+
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j)
+      input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  hfilter = EIGHTTAP_REGULAR;
+  vfilter = EIGHTTAP_REGULAR;
+  int do_average = 0;
+
+  const int offset_r = 3;
+  const int offset_c = 3;
+  subx = 0;
+  suby = 0;
+
+  ConvolveParams conv_params =
+      get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, bd);
+
+  // Make sure that sizes 2xN and Nx2 are also tested for chroma.
+  const int num_sizes =
+      (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
+                                                                           : 1;
+
+  for (int shift = 0; shift < num_sizes; ++shift) {  // luma and chroma
+    const int out_w = block_size_wide[block_idx] >> shift;
+    const int out_h = block_size_high[block_idx] >> shift;
+    const int num_loops = 1000000000 / (out_w + out_h);
 
-  uint16_t *input = new uint16_t[h * w];
+    InterpFilterParams filter_params_x =
+        av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                     out_w);
+    InterpFilterParams filter_params_y =
+        av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                     out_h);
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(input + offset_r * w + offset_c, w, output, MAX_SB_SIZE, out_w,
+                out_h, &filter_params_x, &filter_params_y, subx, suby,
+                &conv_params, bd);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
+           out_h, 1000.0 * elapsed_time / num_loops);
+  }
+}
+
+void AV1HighbdConvolve2DSrTest::RunCheckOutput(
+    highbd_convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int bd = GET_PARAM(0);
+  const int has_subx = GET_PARAM(2);
+  const int has_suby = GET_PARAM(3);
+  const int block_idx = GET_PARAM(4);
+  int hfilter, vfilter, subx, suby;
+  uint16_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, output2[MAX_SB_SQUARE]);
 
-  int output_n = out_h * MAX_SB_SIZE;
-  CONV_BUF_TYPE *output = new CONV_BUF_TYPE[output_n];
-  CONV_BUF_TYPE *output2 = new CONV_BUF_TYPE[output_n];
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j)
+      input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i)
+    output[i] = output2[i] = rnd_.Rand31();
 
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+  // Make sure that sizes 2xN and Nx2 are also tested for chroma.
+  const int num_sizes =
+      (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
+                                                                           : 1;
+  for (int shift = 0; shift < num_sizes; ++shift) {  // luma and chroma
+    const int out_w = block_size_wide[block_idx] >> shift;
+    const int out_h = block_size_high[block_idx] >> shift;
+    for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
+      for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
+           ++vfilter) {
+        InterpFilterParams filter_params_x =
+            av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                         out_w);
+        InterpFilterParams filter_params_y =
+            av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                         out_h);
+        for (int do_average = 0; do_average < 1; ++do_average) {
+          ConvolveParams conv_params1 =
+              get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, bd);
+          ConvolveParams conv_params2 =
+              get_conv_params_no_round(0, do_average, 0, NULL, 0, 0, bd);
 
+          const int subx_range = has_subx ? 16 : 1;
+          const int suby_range = has_suby ? 16 : 1;
+          for (subx = 0; subx < subx_range; ++subx) {
+            for (suby = 0; suby < suby_range; ++suby) {
+              // Choose random locations within the source block
+              const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+              const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+              av1_highbd_convolve_2d_sr_c(input + offset_r * w + offset_c, w,
+                                          output, MAX_SB_SIZE, out_w, out_h,
+                                          &filter_params_x, &filter_params_y,
+                                          subx, suby, &conv_params1, bd);
+              test_impl(input + offset_r * w + offset_c, w, output2,
+                        MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                        &filter_params_y, subx, suby, &conv_params2, bd);
+
+              if (memcmp(output, output2, sizeof(output))) {
+                for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                  for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                    int idx = i * MAX_SB_SIZE + j;
+                    ASSERT_EQ(output[idx], output2[idx])
+                        << out_w << "x" << out_h << " Pixel mismatch at index "
+                        << idx << " = (" << i << ", " << j
+                        << "), sub pixel offset = (" << suby << ", " << subx
+                        << ")";
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+AV1HighbdJntConvolve2DTest::~AV1HighbdJntConvolve2DTest() {}
+void AV1HighbdJntConvolve2DTest::SetUp() {
+  rnd_.Reset(ACMRandom::DeterministicSeed());
+}
+
+void AV1HighbdJntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1HighbdJntConvolve2DTest::RunSpeedTest(
+    highbd_convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int bd = GET_PARAM(0);
+  const int block_idx = GET_PARAM(4);
   int hfilter, vfilter, subx, suby;
+  uint16_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, output16[MAX_SB_SQUARE]);
+
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j)
+      input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) output[i] = rnd_.Rand16();
+  hfilter = EIGHTTAP_REGULAR;
+  vfilter = EIGHTTAP_REGULAR;
+  int do_average = 0;
+  const int out_w = block_size_wide[block_idx];
+  const int out_h = block_size_high[block_idx];
+
+  InterpFilterParams filter_params_x =
+      av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                   out_w);
+  InterpFilterParams filter_params_y =
+      av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                   out_h);
+
+  ConvolveParams conv_params =
+      get_conv_params_no_round(0, do_average, 0, output, MAX_SB_SIZE, 1, bd);
+
+  // Test special case where jnt_comp_avg is not used
+  conv_params.use_jnt_comp_avg = 0;
+
+  subx = 0;
+  suby = 0;
+  // Choose random locations within the source block
+  const int offset_r = 3;
+  const int offset_c = 3;
+
+  const int num_loops = 1000000000 / (out_w + out_h);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(input + offset_r * w + offset_c, w, output16, MAX_SB_SIZE, out_w,
+              out_h, &filter_params_x, &filter_params_y, subx, suby,
+              &conv_params, bd);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("convolve %3dx%-3d: %7.2f us\n", out_w, out_h,
+         1000.0 * elapsed_time / num_loops);
+}
+
+void AV1HighbdJntConvolve2DTest::RunCheckOutput(
+    highbd_convolve_2d_func test_impl) {
+  const int w = kMaxSize, h = kMaxSize;
+  const int bd = GET_PARAM(0);
+  const int has_subx = GET_PARAM(2);
+  const int has_suby = GET_PARAM(3);
+  const int block_idx = GET_PARAM(4);
+  int hfilter, vfilter, subx, suby;
+  uint16_t input[kMaxSize * kMaxSize];
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, output16_1[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, output16_2[MAX_SB_SQUARE]);
+
+  for (int i = 0; i < h; ++i)
+    for (int j = 0; j < w; ++j)
+      input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    output1[i] = output2[i] = rnd_.Rand16();
+    output16_1[i] = output16_2[i] = rnd_.Rand16();
+  }
+
+  const int out_w = block_size_wide[block_idx];
+  const int out_h = block_size_high[block_idx];
   for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
     for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
       InterpFilterParams filter_params_x =
-          av1_get_interp_filter_params((InterpFilter)hfilter);
+          av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
+                                                       out_w);
       InterpFilterParams filter_params_y =
-          av1_get_interp_filter_params((InterpFilter)vfilter);
-      ConvolveParams conv_params1 =
-          get_conv_params_no_round(0, 0, 0, output, MAX_SB_SIZE);
-      ConvolveParams conv_params2 =
-          get_conv_params_no_round(0, 0, 0, output2, MAX_SB_SIZE);
-
-      for (subx = 0; subx < 16; ++subx)
-        for (suby = 0; suby < 16; ++suby) {
-          // av1_convolve_2d is designed for accumulate two predicted blocks for
-          // compound mode, so we set num_iter to two here.
-          // A larger number may introduce overflow
-          const int num_iters = 2;
-          memset(output, 0, output_n * sizeof(*output));
-          memset(output2, 0, output_n * sizeof(*output2));
-          for (i = 0; i < num_iters; ++i) {
+          av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
+                                                       out_h);
+      for (int do_average = 0; do_average <= 1; ++do_average) {
+        ConvolveParams conv_params1 = get_conv_params_no_round(
+            0, do_average, 0, output1, MAX_SB_SIZE, 1, bd);
+        ConvolveParams conv_params2 = get_conv_params_no_round(
+            0, do_average, 0, output2, MAX_SB_SIZE, 1, bd);
+
+        // Test special case where jnt_comp_avg is not used
+        conv_params1.use_jnt_comp_avg = 0;
+        conv_params2.use_jnt_comp_avg = 0;
+
+        const int subx_range = has_subx ? 16 : 1;
+        const int suby_range = has_suby ? 16 : 1;
+        for (subx = 0; subx < subx_range; ++subx) {
+          for (suby = 0; suby < suby_range; ++suby) {
             // Choose random locations within the source block
-            int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
-            int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_highbd_convolve_2d_c(input + offset_r * w + offset_c, w, output,
-                                     MAX_SB_SIZE, out_w, out_h,
-                                     &filter_params_x, &filter_params_y, subx,
-                                     suby, &conv_params1, bd);
-            test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
-                      out_w, out_h, &filter_params_x, &filter_params_y, subx,
-                      suby, &conv_params2, bd);
-
-            for (j = 0; j < out_h; ++j)
-              for (k = 0; k < out_w; ++k) {
-                int idx = j * MAX_SB_SIZE + k;
-                ASSERT_EQ(output[idx], output2[idx])
-                    << "Pixel mismatch at index " << idx << " = (" << j << ", "
-                    << k << "), sub pixel offset = (" << suby << ", " << subx
-                    << ")";
+            const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+            const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+            av1_highbd_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
+                                         output16_1, MAX_SB_SIZE, out_w, out_h,
+                                         &filter_params_x, &filter_params_y,
+                                         subx, suby, &conv_params1, bd);
+            test_impl(input + offset_r * w + offset_c, w, output16_2,
+                      MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                      &filter_params_y, subx, suby, &conv_params2, bd);
+
+            for (int i = 0; i < out_h; ++i) {
+              for (int j = 0; j < out_w; ++j) {
+                int idx = i * MAX_SB_SIZE + j;
+                ASSERT_EQ(output1[idx], output2[idx])
+                    << out_w << "x" << out_h << " Pixel mismatch at index "
+                    << idx << " = (" << i << ", " << j
+                    << "), sub pixel offset = (" << suby << ", " << subx << ")";
+              }
+            }
+
+            if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
+              for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                  int idx = i * MAX_SB_SIZE + j;
+                  ASSERT_EQ(output16_1[idx], output16_2[idx])
+                      << out_w << "x" << out_h << " Pixel mismatch at index "
+                      << idx << " = (" << i << ", " << j
+                      << "), sub pixel offset = (" << suby << ", " << subx
+                      << ")";
+                }
+              }
+            }
+          }
+        }
+
+        // Test different combination of fwd and bck offset weights
+        for (int k = 0; k < 2; ++k) {
+          for (int l = 0; l < 4; ++l) {
+            conv_params1.use_jnt_comp_avg = 1;
+            conv_params2.use_jnt_comp_avg = 1;
+            conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
+            conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
+            conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
+            conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
+
+            const int subx_range = has_subx ? 16 : 1;
+            const int suby_range = has_suby ? 16 : 1;
+            for (subx = 0; subx < subx_range; ++subx) {
+              for (suby = 0; suby < suby_range; ++suby) {
+                // Choose random locations within the source block
+                const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
+                const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
+                av1_highbd_jnt_convolve_2d_c(
+                    input + offset_r * w + offset_c, w, output16_1, MAX_SB_SIZE,
+                    out_w, out_h, &filter_params_x, &filter_params_y, subx,
+                    suby, &conv_params1, bd);
+                test_impl(input + offset_r * w + offset_c, w, output16_2,
+                          MAX_SB_SIZE, out_w, out_h, &filter_params_x,
+                          &filter_params_y, subx, suby, &conv_params2, bd);
+
+                for (int i = 0; i < out_h; ++i) {
+                  for (int j = 0; j < out_w; ++j) {
+                    int idx = i * MAX_SB_SIZE + j;
+                    ASSERT_EQ(output1[idx], output2[idx])
+                        << out_w << "x" << out_h << " Pixel mismatch at index "
+                        << idx << " = (" << i << ", " << j
+                        << "), sub pixel offset = (" << suby << ", " << subx
+                        << ")";
+                  }
+                }
+
+                if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
+                  for (int i = 0; i < MAX_SB_SIZE; ++i) {
+                    for (int j = 0; j < MAX_SB_SIZE; ++j) {
+                      int idx = i * MAX_SB_SIZE + j;
+                      ASSERT_EQ(output16_1[idx], output16_2[idx])
+                          << out_w << "x" << out_h
+                          << " Pixel mismatch at index " << idx << " = (" << i
+                          << ", " << j << "), sub pixel offset = (" << suby
+                          << ", " << subx << ")";
+                    }
+                  }
+                }
               }
+            }
           }
         }
+      }
     }
   }
-  delete[] input;
-  delete[] output;
-  delete[] output2;
 }
 }  // namespace AV1HighbdConvolve2D
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace libaom_test
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.h b/third_party/aom/test/av1_convolve_2d_test_util.h
index 013126b4a8..3a53dbdfe3 100644
--- a/third_party/aom/test/av1_convolve_2d_test_util.h
+++ b/third_party/aom/test/av1_convolve_2d_test_util.h
@@ -12,11 +12,13 @@
 #ifndef TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
 #define TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
 
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/acm_random.h"
 #include "test/util.h"
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
+
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 
@@ -25,62 +27,90 @@ namespace libaom_test {
 namespace AV1Convolve2D {
 
 typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
-                                 CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                 int h, InterpFilterParams *filter_params_x,
+                                 uint8_t *dst, int dst_stride, int w, int h,
+                                 InterpFilterParams *filter_params_x,
                                  InterpFilterParams *filter_params_y,
                                  const int subpel_x_q4, const int subpel_y_q4,
                                  ConvolveParams *conv_params);
 
-typedef std::tr1::tuple<int, int, convolve_2d_func> Convolve2DParam;
+typedef ::testing::tuple<convolve_2d_func, int, int, BLOCK_SIZE>
+    Convolve2DParam;
 
 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
-    convolve_2d_func filter);
+    convolve_2d_func filter, int subx_exist, int suby_exist);
 
-class AV1Convolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
+class AV1Convolve2DSrTest : public ::testing::TestWithParam<Convolve2DParam> {
  public:
-  virtual ~AV1Convolve2DTest();
+  virtual ~AV1Convolve2DSrTest();
   virtual void SetUp();
 
   virtual void TearDown();
 
  protected:
   void RunCheckOutput(convolve_2d_func test_impl);
+  void RunSpeedTest(convolve_2d_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
+class AV1JntConvolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
+ public:
+  virtual ~AV1JntConvolve2DTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  void RunCheckOutput(convolve_2d_func test_impl);
+  void RunSpeedTest(convolve_2d_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
 }  // namespace AV1Convolve2D
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdConvolve2D {
 typedef void (*highbd_convolve_2d_func)(
-    const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride,
-    int w, int h, InterpFilterParams *filter_params_x,
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+    int h, InterpFilterParams *filter_params_x,
     InterpFilterParams *filter_params_y, const int subpel_x_q4,
     const int subpel_y_q4, ConvolveParams *conv_params, int bd);
 
-typedef std::tr1::tuple<int, int, int, highbd_convolve_2d_func>
+typedef ::testing::tuple<int, highbd_convolve_2d_func, int, int, BLOCK_SIZE>
     HighbdConvolve2DParam;
 
 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
-    highbd_convolve_2d_func filter);
+    highbd_convolve_2d_func filter, int subx_exist, int suby_exist);
 
-class AV1HighbdConvolve2DTest
+class AV1HighbdConvolve2DSrTest
     : public ::testing::TestWithParam<HighbdConvolve2DParam> {
  public:
-  virtual ~AV1HighbdConvolve2DTest();
+  virtual ~AV1HighbdConvolve2DSrTest();
   virtual void SetUp();
 
   virtual void TearDown();
 
  protected:
   void RunCheckOutput(highbd_convolve_2d_func test_impl);
+  void RunSpeedTest(highbd_convolve_2d_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
+class AV1HighbdJntConvolve2DTest
+    : public ::testing::TestWithParam<HighbdConvolve2DParam> {
+ public:
+  virtual ~AV1HighbdJntConvolve2DTest();
+  virtual void SetUp();
+
+  virtual void TearDown();
+
+ protected:
+  void RunCheckOutput(highbd_convolve_2d_func test_impl);
+  void RunSpeedTest(highbd_convolve_2d_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+};
 }  // namespace AV1HighbdConvolve2D
-#endif  // CONFIG_HIGHBITDEPTH
 
 }  // namespace libaom_test
 
diff --git a/third_party/aom/test/av1_convolve_optimz_test.cc b/third_party/aom/test/av1_convolve_optimz_test.cc
deleted file mode 100644
index 95bf63f44b..0000000000
--- a/third_party/aom/test/av1_convolve_optimz_test.cc
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-
-using std::tr1::tuple;
-using libaom_test::ACMRandom;
-
-typedef void (*ConvInit)();
-typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int,
-                              const InterpFilterParams, int, int,
-                              ConvolveParams *);
-#if CONFIG_HIGHBITDEPTH
-typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int,
-                                  int, const InterpFilterParams, int, int, int,
-                                  int);
-#endif
-
-// Test parameter list:
-//  <convolve_horiz_func, convolve_vert_func,
-//  <width, height>, filter_params, subpel_x_q4, avg>
-typedef tuple<int, int> BlockDimension;
-typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension,
-              InterpFilter, int, int>
-    ConvParams;
-#if CONFIG_HIGHBITDEPTH
-// Test parameter list:
-//  <convolve_horiz_func, convolve_vert_func,
-//  <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
-typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
-              InterpFilter, int, int, int>
-    HbdConvParams;
-#endif
-
-// Note:
-//  src_ and src_ref_ have special boundary requirement
-//  dst_ and dst_ref_ don't
-const size_t maxWidth = 256;
-const size_t maxHeight = 256;
-const size_t maxBlockSize = maxWidth * maxHeight;
-const int horizOffset = 32;
-const int vertiOffset = 32;
-const int stride = 128;
-const int x_step_q4 = 16;
-
-class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
- public:
-  virtual ~AV1ConvolveOptimzTest() {}
-  virtual void SetUp() {
-    ConvInit conv_init = GET_PARAM(0);
-    conv_init();
-    conv_horiz_ = GET_PARAM(1);
-    conv_vert_ = GET_PARAM(2);
-    BlockDimension block = GET_PARAM(3);
-    width_ = std::tr1::get<0>(block);
-    height_ = std::tr1::get<1>(block);
-    filter_ = GET_PARAM(4);
-    subpel_ = GET_PARAM(5);
-    int ref = GET_PARAM(6);
-    const int plane = 0;
-    conv_params_ = get_conv_params(ref, ref, plane);
-
-    alloc_ = new uint8_t[maxBlockSize * 4];
-    src_ = alloc_ + (vertiOffset * maxWidth);
-    src_ += horizOffset;
-    src_ref_ = src_ + maxBlockSize;
-
-    dst_ = alloc_ + 2 * maxBlockSize;
-    dst_ref_ = alloc_ + 3 * maxBlockSize;
-  }
-
-  virtual void TearDown() {
-    delete[] alloc_;
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunHorizFilterBitExactCheck();
-  void RunVertFilterBitExactCheck();
-
- private:
-  void PrepFilterBuffer();
-  void DiffFilterBuffer();
-  conv_filter_t conv_horiz_;
-  conv_filter_t conv_vert_;
-  uint8_t *alloc_;
-  uint8_t *src_;
-  uint8_t *dst_;
-  uint8_t *src_ref_;
-  uint8_t *dst_ref_;
-  int width_;
-  int height_;
-  InterpFilter filter_;
-  int subpel_;
-  ConvolveParams conv_params_;
-};
-
-void AV1ConvolveOptimzTest::PrepFilterBuffer() {
-  int r, c;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
-
-  uint8_t *src_ptr = src_;
-  uint8_t *dst_ptr = dst_;
-  uint8_t *src_ref_ptr = src_ref_;
-  uint8_t *dst_ref_ptr = dst_ref_;
-
-  for (r = 0; r < height_; ++r) {
-    for (c = 0; c < width_; ++c) {
-      src_ptr[c] = rnd.Rand8();
-      src_ref_ptr[c] = src_ptr[c];
-      dst_ptr[c] = rnd.Rand8();
-      dst_ref_ptr[c] = dst_ptr[c];
-    }
-    src_ptr += stride;
-    src_ref_ptr += stride;
-    dst_ptr += stride;
-    dst_ref_ptr += stride;
-  }
-}
-
-void AV1ConvolveOptimzTest::DiffFilterBuffer() {
-  int r, c;
-  const uint8_t *dst_ptr = dst_;
-  const uint8_t *dst_ref_ptr = dst_ref_;
-  for (r = 0; r < height_; ++r) {
-    for (c = 0; c < width_; ++c) {
-      EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c])
-          << "Error at row: " << r << " col: " << c << " "
-          << "w = " << width_ << " "
-          << "h = " << height_ << " "
-          << "filter group index = " << filter_ << " "
-          << "filter index = " << subpel_;
-    }
-    dst_ptr += stride;
-    dst_ref_ptr += stride;
-  }
-}
-
-void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
-  PrepFilterBuffer();
-
-  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
-
-  av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, height_,
-                       filter_params, subpel_, x_step_q4, &conv_params_);
-
-  conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
-              subpel_, x_step_q4, &conv_params_);
-
-  DiffFilterBuffer();
-
-  // Note:
-  // Here we need calculate a height which is different from the specified one
-  // and test again.
-  int intermediate_height =
-      (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
-  PrepFilterBuffer();
-
-  av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
-                       intermediate_height, filter_params, subpel_, x_step_q4,
-                       &conv_params_);
-
-  conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
-              filter_params, subpel_, x_step_q4, &conv_params_);
-
-  DiffFilterBuffer();
-}
-
-void AV1ConvolveOptimzTest::RunVertFilterBitExactCheck() {
-  PrepFilterBuffer();
-
-  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
-
-  av1_convolve_vert_c(src_ref_, stride, dst_ref_, stride, width_, height_,
-                      filter_params, subpel_, x_step_q4, &conv_params_);
-
-  conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
-             subpel_, x_step_q4, &conv_params_);
-
-  DiffFilterBuffer();
-}
-
-TEST_P(AV1ConvolveOptimzTest, HorizBitExactCheck) {
-  RunHorizFilterBitExactCheck();
-}
-TEST_P(AV1ConvolveOptimzTest, VerticalBitExactCheck) {
-  RunVertFilterBitExactCheck();
-}
-
-using std::tr1::make_tuple;
-
-#if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_DUAL_FILTER
-const BlockDimension kBlockDim[] = {
-  make_tuple(2, 2),    make_tuple(2, 4),    make_tuple(4, 4),
-  make_tuple(4, 8),    make_tuple(8, 4),    make_tuple(8, 8),
-  make_tuple(8, 16),   make_tuple(16, 8),   make_tuple(16, 16),
-  make_tuple(16, 32),  make_tuple(32, 16),  make_tuple(32, 32),
-  make_tuple(32, 64),  make_tuple(64, 32),  make_tuple(64, 64),
-  make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
-};
-
-// 10/12-tap filters
-const InterpFilter kFilter[] = { EIGHTTAP_REGULAR, BILINEAR, MULTITAP_SHARP };
-
-const int kSubpelQ4[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-
-const int kAvg[] = { 0, 1 };
-#endif
-
-#if HAVE_SSSE3 && CONFIG_DUAL_FILTER
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, AV1ConvolveOptimzTest,
-    ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3),
-                       ::testing::Values(av1_convolve_horiz_ssse3),
-                       ::testing::Values(av1_convolve_vert_ssse3),
-                       ::testing::ValuesIn(kBlockDim),
-                       ::testing::ValuesIn(kFilter),
-                       ::testing::ValuesIn(kSubpelQ4),
-                       ::testing::ValuesIn(kAvg)));
-#endif  // HAVE_SSSE3 && CONFIG_DUAL_FILTER
-
-#if CONFIG_HIGHBITDEPTH
-typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams;
-class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams {
- public:
-  virtual ~AV1HbdConvolveOptimzTest() {}
-  virtual void SetUp() {
-    ConvInit conv_init = GET_PARAM(0);
-    conv_init();
-    conv_horiz_ = GET_PARAM(1);
-    conv_vert_ = GET_PARAM(2);
-    BlockDimension block = GET_PARAM(3);
-    width_ = std::tr1::get<0>(block);
-    height_ = std::tr1::get<1>(block);
-    filter_ = GET_PARAM(4);
-    subpel_ = GET_PARAM(5);
-    avg_ = GET_PARAM(6);
-    bit_depth_ = GET_PARAM(7);
-
-    alloc_ = new uint16_t[maxBlockSize * 4];
-    src_ = alloc_ + (vertiOffset * maxWidth);
-    src_ += horizOffset;
-    src_ref_ = src_ + maxBlockSize;
-
-    dst_ = alloc_ + 2 * maxBlockSize;
-    dst_ref_ = alloc_ + 3 * maxBlockSize;
-  }
-
-  virtual void TearDown() {
-    delete[] alloc_;
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunHorizFilterBitExactCheck();
-  void RunVertFilterBitExactCheck();
-
- private:
-  void PrepFilterBuffer();
-  void DiffFilterBuffer();
-  hbd_conv_filter_t conv_horiz_;
-  hbd_conv_filter_t conv_vert_;
-  uint16_t *alloc_;
-  uint16_t *src_;
-  uint16_t *dst_;
-  uint16_t *src_ref_;
-  uint16_t *dst_ref_;
-  int width_;
-  int height_;
-  InterpFilter filter_;
-  int subpel_;
-  int avg_;
-  int bit_depth_;
-};
-
-void AV1HbdConvolveOptimzTest::PrepFilterBuffer() {
-  int r, c;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
-
-  uint16_t *src_ptr = src_;
-  uint16_t *dst_ptr = dst_;
-  uint16_t *dst_ref_ptr = dst_ref_;
-  uint16_t hbd_mask = (1 << bit_depth_) - 1;
-
-  for (r = 0; r < height_; ++r) {
-    for (c = 0; c < width_; ++c) {
-      src_ptr[c] = rnd.Rand16() & hbd_mask;
-      dst_ptr[c] = rnd.Rand16() & hbd_mask;
-      dst_ref_ptr[c] = dst_ptr[c];
-    }
-    src_ptr += stride;
-    dst_ptr += stride;
-    dst_ref_ptr += stride;
-  }
-}
-
-void AV1HbdConvolveOptimzTest::DiffFilterBuffer() {
-  int r, c;
-  const uint16_t *dst_ptr = dst_;
-  const uint16_t *dst_ref_ptr = dst_ref_;
-  for (r = 0; r < height_; ++r) {
-    for (c = 0; c < width_; ++c) {
-      EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c])
-          << "Error at row: " << r << " col: " << c << " "
-          << "w = " << width_ << " "
-          << "h = " << height_ << " "
-          << "filter group index = " << filter_ << " "
-          << "filter index = " << subpel_ << " "
-          << "bit depth = " << bit_depth_;
-    }
-    dst_ptr += stride;
-    dst_ref_ptr += stride;
-  }
-}
-
-void AV1HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() {
-  PrepFilterBuffer();
-
-  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
-
-  av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_, height_,
-                              filter_params, subpel_, x_step_q4, avg_,
-                              bit_depth_);
-
-  conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
-              subpel_, x_step_q4, avg_, bit_depth_);
-
-  DiffFilterBuffer();
-
-  // Note:
-  // Here we need calculate a height which is different from the specified one
-  // and test again.
-  int intermediate_height =
-      (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
-  PrepFilterBuffer();
-
-  av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
-                              intermediate_height, filter_params, subpel_,
-                              x_step_q4, avg_, bit_depth_);
-
-  conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
-              filter_params, subpel_, x_step_q4, avg_, bit_depth_);
-
-  DiffFilterBuffer();
-}
-
-void AV1HbdConvolveOptimzTest::RunVertFilterBitExactCheck() {
-  PrepFilterBuffer();
-
-  InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
-
-  av1_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_,
-                             filter_params, subpel_, x_step_q4, avg_,
-                             bit_depth_);
-
-  conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
-             subpel_, x_step_q4, avg_, bit_depth_);
-
-  DiffFilterBuffer();
-}
-
-TEST_P(AV1HbdConvolveOptimzTest, HorizBitExactCheck) {
-  RunHorizFilterBitExactCheck();
-}
-TEST_P(AV1HbdConvolveOptimzTest, VertBitExactCheck) {
-  RunVertFilterBitExactCheck();
-}
-
-#if HAVE_SSE4_1 && CONFIG_DUAL_FILTER
-
-const int kBitdepth[] = { 10, 12 };
-
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, AV1HbdConvolveOptimzTest,
-    ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1),
-                       ::testing::Values(av1_highbd_convolve_horiz_sse4_1),
-                       ::testing::Values(av1_highbd_convolve_vert_sse4_1),
-                       ::testing::ValuesIn(kBlockDim),
-                       ::testing::ValuesIn(kFilter),
-                       ::testing::ValuesIn(kSubpelQ4),
-                       ::testing::ValuesIn(kAvg),
-                       ::testing::ValuesIn(kBitdepth)));
-#endif  // HAVE_SSE4_1 && CONFIG_DUAL_FILTER
-#endif  // CONFIG_HIGHBITDEPTH
-}  // namespace
diff --git a/third_party/aom/test/av1_convolve_scale_test.cc b/third_party/aom/test/av1_convolve_scale_test.cc
index 9d8be888de..e0571423c0 100644
--- a/third_party/aom/test/av1_convolve_scale_test.cc
+++ b/third_party/aom/test/av1_convolve_scale_test.cc
@@ -13,13 +13,16 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
 #include "aom_ports/aom_timer.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 
+#include "av1/common/common_data.h"
+
 namespace {
 const int kTestIters = 10;
 const int kPerfIters = 1000;
@@ -29,8 +32,8 @@ const int kHPad = 32;
 const int kXStepQn = 16;
 const int kYStepQn = 20;
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
 
 enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
@@ -120,6 +123,7 @@ class TestImage {
     // Allocate image data
     src_data_.resize(2 * src_block_size());
     dst_data_.resize(2 * dst_block_size());
+    dst_16_data_.resize(2 * dst_block_size());
   }
 
   void Initialize(ACMRandom *rnd);
@@ -136,8 +140,13 @@ class TestImage {
     return borders ? block : block + kHPad + src_stride_ * kVPad;
   }
 
-  int32_t *GetDstData(bool ref, bool borders) {
-    int32_t *block = &dst_data_[ref ? 0 : dst_block_size()];
+  SrcPixel *GetDstData(bool ref, bool borders) {
+    SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+  CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) {
+    CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()];
     return borders ? block : block + kHPad + dst_stride_ * kVPad;
   }
 
@@ -146,7 +155,8 @@ class TestImage {
   int src_stride_, dst_stride_;
 
   std::vector<SrcPixel> src_data_;
-  std::vector<int32_t> dst_data_;
+  std::vector<SrcPixel> dst_data_;
+  std::vector<CONV_BUF_TYPE> dst_16_data_;
 };
 
 template <typename Pixel>
@@ -190,17 +200,23 @@ template <typename SrcPixel>
 void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
   PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
   PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
+  PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]);
 }
 
 template <typename SrcPixel>
 void TestImage<SrcPixel>::Check() const {
   // If memcmp returns 0, there's nothing to do.
   const int num_pixels = dst_block_size();
-  const int32_t *ref_dst = &dst_data_[0];
-  const int32_t *tst_dst = &dst_data_[num_pixels];
+  const SrcPixel *ref_dst = &dst_data_[0];
+  const SrcPixel *tst_dst = &dst_data_[num_pixels];
 
-  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
+  const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0];
+  const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels];
 
+  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) {
+    if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels))
+      return;
+  }
   // Otherwise, iterate through the buffer looking for differences (including
   // the edges)
   const int stride = dst_stride_;
@@ -213,6 +229,17 @@ void TestImage<SrcPixel>::Check() const {
           << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
     }
   }
+
+  for (int r = 0; r < h_ + 2 * kVPad; ++r) {
+    for (int c = 0; c < w_ + 2 * kHPad; ++c) {
+      const int32_t ref_value = ref_16_dst[r * stride + c];
+      const int32_t tst_value = tst_16_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error in 16 bit buffer "
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
+    }
+  }
 }
 
 typedef tuple<int, int> BlockDimension;
@@ -242,8 +269,8 @@ class ConvolveScaleTestBase : public ::testing::Test {
 
  protected:
   void SetParams(const BaseParams &params, int bd) {
-    width_ = std::tr1::get<0>(params.dims);
-    height_ = std::tr1::get<1>(params.dims);
+    width_ = ::testing::get<0>(params.dims);
+    height_ = ::testing::get<1>(params.dims);
     ntaps_x_ = params.ntaps_x;
     ntaps_y_ = params.ntaps_y;
     bd_ = bd;
@@ -251,19 +278,54 @@ class ConvolveScaleTestBase : public ::testing::Test {
 
     filter_x_.set(ntaps_x_, false);
     filter_y_.set(ntaps_y_, true);
-    convolve_params_ = get_conv_params_no_round(0, avg_ != false, 0, NULL, 0);
+    convolve_params_ =
+        get_conv_params_no_round(0, avg_ != false, 0, NULL, 0, 1, bd);
 
     delete image_;
     image_ = new TestImage<SrcPixel>(width_, height_, bd_);
   }
 
+  void SetConvParamOffset(int i, int j, int is_compound, int do_average,
+                          int use_jnt_comp_avg) {
+    if (i == -1 && j == -1) {
+      convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg;
+      convolve_params_.is_compound = is_compound;
+      convolve_params_.do_average = do_average;
+    } else {
+      convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg;
+      convolve_params_.fwd_offset = quant_dist_lookup_table[i][j][0];
+      convolve_params_.bck_offset = quant_dist_lookup_table[i][j][1];
+      convolve_params_.is_compound = is_compound;
+      convolve_params_.do_average = do_average;
+    }
+  }
+
   void Run() {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     for (int i = 0; i < kTestIters; ++i) {
+      int is_compound = 0;
+      SetConvParamOffset(-1, -1, is_compound, 0, 0);
       Prep(&rnd);
       RunOne(true);
       RunOne(false);
       image_->Check();
+
+      is_compound = 1;
+      for (int do_average = 0; do_average < 2; do_average++) {
+        for (int use_jnt_comp_avg = 0; use_jnt_comp_avg < 2;
+             use_jnt_comp_avg++) {
+          for (int j = 0; j < 2; ++j) {
+            for (int k = 0; k < 4; ++k) {
+              SetConvParamOffset(j, k, is_compound, do_average,
+                                 use_jnt_comp_avg);
+              Prep(&rnd);
+              RunOne(true);
+              RunOne(false);
+              image_->Check();
+            }
+          }
+        }
+      }
     }
   }
 
@@ -327,7 +389,7 @@ class ConvolveScaleTestBase : public ::testing::Test {
 typedef tuple<int, int> BlockDimension;
 
 typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
-                                  int32_t *dst, int dst_stride, int w, int h,
+                                  uint8_t *dst, int dst_stride, int w, int h,
                                   InterpFilterParams *filter_params_x,
                                   InterpFilterParams *filter_params_y,
                                   const int subpel_x_qn, const int x_step_qn,
@@ -359,10 +421,10 @@ class LowBDConvolveScaleTest
 
   void RunOne(bool ref) {
     const uint8_t *src = image_->GetSrcData(ref, false);
-    CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
+    uint8_t *dst = image_->GetDstData(ref, false);
+    convolve_params_.dst = image_->GetDst16Data(ref, false);
     const int src_stride = image_->src_stride();
     const int dst_stride = image_->dst_stride();
-
     if (ref) {
       av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
                               &filter_x_.params_, &filter_y_.params_, subpel_x_,
@@ -387,7 +449,7 @@ const BlockDimension kBlockDim[] = {
   make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
 };
 
-const NTaps kNTaps[] = { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
+const NTaps kNTaps[] = { EIGHT_TAP };
 
 TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
 TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
@@ -399,9 +461,8 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
                        ::testing::Bool()));
 
-#if CONFIG_HIGHBITDEPTH
 typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
-                                   int32_t *dst, int dst_stride, int w, int h,
+                                   uint16_t *dst, int dst_stride, int w, int h,
                                    InterpFilterParams *filter_params_x,
                                    InterpFilterParams *filter_params_y,
                                    const int subpel_x_qn, const int x_step_qn,
@@ -433,7 +494,8 @@ class HighBDConvolveScaleTest
 
   void RunOne(bool ref) {
     const uint16_t *src = image_->GetSrcData(ref, false);
-    CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
+    uint16_t *dst = image_->GetDstData(ref, false);
+    convolve_params_.dst = image_->GetDst16Data(ref, false);
     const int src_stride = image_->src_stride();
     const int dst_stride = image_->dst_stride();
 
@@ -464,6 +526,4 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::ValuesIn(kBlockDim),
                        ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
                        ::testing::Bool(), ::testing::ValuesIn(kBDs)));
-
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc
deleted file mode 100644
index aaef7cfe07..0000000000
--- a/third_party/aom/test/av1_convolve_test.cc
+++ /dev/null
@@ -1,514 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <algorithm>
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-#include "av1/common/filter.h"
-#include "av1/common/convolve.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-using std::tr1::tuple;
-static void filter_block1d_horiz_c(const uint8_t *src_ptr, int src_stride,
-                                   const int16_t *filter, int tap,
-                                   uint8_t *dst_ptr, int dst_stride, int w,
-                                   int h) {
-  src_ptr -= tap / 2 - 1;
-  for (int r = 0; r < h; ++r) {
-    for (int c = 0; c < w; ++c) {
-      int sum = 0;
-      for (int i = 0; i < tap; ++i) {
-        sum += src_ptr[c + i] * filter[i];
-      }
-      dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
-    }
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-static void filter_block1d_vert_c(const uint8_t *src_ptr, int src_stride,
-                                  const int16_t *filter, int tap,
-                                  uint8_t *dst_ptr, int dst_stride, int w,
-                                  int h) {
-  src_ptr -= (tap / 2 - 1) * src_stride;
-  for (int r = 0; r < h; ++r) {
-    for (int c = 0; c < w; ++c) {
-      int sum = 0;
-      for (int i = 0; i < tap; ++i) {
-        sum += src_ptr[c + i * src_stride] * filter[i];
-      }
-      dst_ptr[c] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
-    }
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-static int match(const uint8_t *out, int out_stride, const uint8_t *ref_out,
-                 int ref_out_stride, int w, int h) {
-  for (int r = 0; r < h; ++r) {
-    for (int c = 0; c < w; ++c) {
-      if (out[r * out_stride + c] != ref_out[r * ref_out_stride + c]) return 0;
-    }
-  }
-  return 1;
-}
-
-typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
-                             int dst_stride, int w, int h,
-                             const InterpFilterParams filter_params,
-                             const int subpel_q4, int step_q4,
-                             ConvolveParams *conv_params);
-
-struct ConvolveFunctions {
-  ConvolveFunctions(ConvolveFunc hf, ConvolveFunc vf) : hf_(hf), vf_(vf) {}
-  ConvolveFunc hf_;
-  ConvolveFunc vf_;
-};
-
-typedef tuple<ConvolveFunctions *, InterpFilter /*filter_x*/,
-              InterpFilter /*filter_y*/>
-    ConvolveParam;
-
-class Av1ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
- public:
-  virtual void SetUp() {
-    rnd_(ACMRandom::DeterministicSeed());
-    cfs_ = GET_PARAM(0);
-    interp_filter_ls_[0] = GET_PARAM(2);
-    interp_filter_ls_[2] = interp_filter_ls_[0];
-    interp_filter_ls_[1] = GET_PARAM(1);
-    interp_filter_ls_[3] = interp_filter_ls_[1];
-  }
-  virtual void TearDown() {
-    while (buf_ls_.size() > 0) {
-      uint8_t *buf = buf_ls_.back();
-      aom_free(buf);
-      buf_ls_.pop_back();
-    }
-  }
-  virtual uint8_t *add_input(int w, int h, int *stride) {
-    uint8_t *buf =
-        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
-    buf_ls_.push_back(buf);
-    *stride = w + MAX_FILTER_TAP - 1;
-    int offset = MAX_FILTER_TAP / 2 - 1;
-    for (int r = 0; r < h + MAX_FILTER_TAP - 1; ++r) {
-      for (int c = 0; c < w + MAX_FILTER_TAP - 1; ++c) {
-        buf[r * (*stride) + c] = rnd_.Rand8();
-      }
-    }
-    return buf + offset * (*stride) + offset;
-  }
-  virtual uint8_t *add_output(int w, int /*h*/, int *stride) {
-    uint8_t *buf =
-        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, kBufferSize));
-    buf_ls_.push_back(buf);
-    *stride = w;
-    return buf;
-  }
-  virtual void random_init_buf(uint8_t *buf, int w, int h, int stride) {
-    for (int r = 0; r < h; ++r) {
-      for (int c = 0; c < w; ++c) {
-        buf[r * stride + c] = rnd_.Rand8();
-      }
-    }
-  }
-
- protected:
-  static const int kDataAlignment = 16;
-  static const int kOuterBlockSize = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
-  static const int kBufferSize = kOuterBlockSize * kOuterBlockSize;
-  std::vector<uint8_t *> buf_ls_;
-  InterpFilter interp_filter_ls_[4];
-  ConvolveFunctions *cfs_;
-  ACMRandom rnd_;
-};
-
-int bsize_ls[] = { 1, 2, 4, 8, 16, 32, 64, 3, 7, 15, 31, 63 };
-int bsize_num = NELEMENTS(bsize_ls);
-
-TEST_P(Av1ConvolveTest, av1_convolve_vert) {
-  const int y_step_q4 = 16;
-  ConvolveParams conv_params = get_conv_params(0, 0, 0);
-
-  int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
-  uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
-  uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
-  uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
-  uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
-  uint8_t *ref_avg_out =
-      add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
-  for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
-    for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
-      int w = bsize_ls[hb_idx];
-      int h = bsize_ls[vb_idx];
-      for (int subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; ++subpel_y_q4) {
-        InterpFilter filter_y = interp_filter_ls_[0];
-        InterpFilterParams param_vert = av1_get_interp_filter_params(filter_y);
-        const int16_t *filter_vert =
-            av1_get_interp_filter_subpel_kernel(param_vert, subpel_y_q4);
-
-        filter_block1d_vert_c(in, in_stride, filter_vert, param_vert.taps,
-                              ref_out, ref_out_stride, w, h);
-
-        conv_params.ref = 0;
-        conv_params.do_average = 0;
-        cfs_->vf_(in, in_stride, out, out_stride, w, h, param_vert, subpel_y_q4,
-                  y_step_q4, &conv_params);
-        EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
-            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
-            << filter_y << " subpel_y_q4 " << subpel_y_q4;
-
-        random_init_buf(avg_out, w, h, avg_out_stride);
-        for (int r = 0; r < h; ++r) {
-          for (int c = 0; c < w; ++c) {
-            ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
-                avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
-          }
-        }
-        conv_params.ref = 1;
-        conv_params.do_average = 1;
-        cfs_->vf_(in, in_stride, avg_out, avg_out_stride, w, h, param_vert,
-                  subpel_y_q4, y_step_q4, &conv_params);
-        EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
-                        ref_avg_out_stride, w, h),
-                  1)
-            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_y "
-            << filter_y << " subpel_y_q4 " << subpel_y_q4;
-      }
-    }
-  }
-};
-
-TEST_P(Av1ConvolveTest, av1_convolve_horiz) {
-  const int x_step_q4 = 16;
-  ConvolveParams conv_params = get_conv_params(0, 0, 0);
-
-  int in_stride, out_stride, ref_out_stride, avg_out_stride, ref_avg_out_stride;
-  uint8_t *in = add_input(MAX_SB_SIZE, MAX_SB_SIZE, &in_stride);
-  uint8_t *out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &out_stride);
-  uint8_t *ref_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_out_stride);
-  uint8_t *avg_out = add_output(MAX_SB_SIZE, MAX_SB_SIZE, &avg_out_stride);
-  uint8_t *ref_avg_out =
-      add_output(MAX_SB_SIZE, MAX_SB_SIZE, &ref_avg_out_stride);
-  for (int hb_idx = 0; hb_idx < bsize_num; ++hb_idx) {
-    for (int vb_idx = 0; vb_idx < bsize_num; ++vb_idx) {
-      int w = bsize_ls[hb_idx];
-      int h = bsize_ls[vb_idx];
-      for (int subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; ++subpel_x_q4) {
-        InterpFilter filter_x = interp_filter_ls_[1];
-        InterpFilterParams param_horiz = av1_get_interp_filter_params(filter_x);
-        const int16_t *filter_horiz =
-            av1_get_interp_filter_subpel_kernel(param_horiz, subpel_x_q4);
-
-        filter_block1d_horiz_c(in, in_stride, filter_horiz, param_horiz.taps,
-                               ref_out, ref_out_stride, w, h);
-
-        conv_params.ref = 0;
-        conv_params.do_average = 0;
-        cfs_->hf_(in, in_stride, out, out_stride, w, h, param_horiz,
-                  subpel_x_q4, x_step_q4, &conv_params);
-        EXPECT_EQ(match(out, out_stride, ref_out, ref_out_stride, w, h), 1)
-            << " hb_idx " << hb_idx << " vb_idx " << vb_idx << " filter_x "
-            << filter_x << " subpel_x_q4 " << subpel_x_q4;
-
-        random_init_buf(avg_out, w, h, avg_out_stride);
-        for (int r = 0; r < h; ++r) {
-          for (int c = 0; c < w; ++c) {
-            ref_avg_out[r * ref_avg_out_stride + c] = ROUND_POWER_OF_TWO(
-                avg_out[r * avg_out_stride + c] + out[r * out_stride + c], 1);
-          }
-        }
-        conv_params.ref = 1;
-        conv_params.do_average = 1;
-        cfs_->hf_(in, in_stride, avg_out, avg_out_stride, w, h, param_horiz,
-                  subpel_x_q4, x_step_q4, &conv_params);
-        EXPECT_EQ(match(avg_out, avg_out_stride, ref_avg_out,
-                        ref_avg_out_stride, w, h),
-                  1)
-            << "hb_idx " << hb_idx << "vb_idx" << vb_idx << " filter_x "
-            << filter_x << "subpel_x_q4 " << subpel_x_q4;
-      }
-    }
-  }
-};
-
-ConvolveFunctions convolve_functions_c(av1_convolve_horiz_c,
-                                       av1_convolve_vert_c);
-
-InterpFilter filter_ls[] = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH,
-                             MULTITAP_SHARP };
-
-INSTANTIATE_TEST_CASE_P(
-    C, Av1ConvolveTest,
-    ::testing::Combine(::testing::Values(&convolve_functions_c),
-                       ::testing::ValuesIn(filter_ls),
-                       ::testing::ValuesIn(filter_ls)));
-
-#if CONFIG_HIGHBITDEPTH
-#ifndef __clang_analyzer__
-TEST(AV1ConvolveTest, av1_highbd_convolve) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(EIGHTTAP_REGULAR);
-  int filter_size = filter_params.taps;
-  int filter_center = filter_size / 2 - 1;
-  uint16_t src[12 * 12];
-  int src_stride = filter_size;
-  uint16_t dst[1] = { 0 };
-  int dst_stride = 1;
-  int x_step_q4 = 16;
-  int y_step_q4 = 16;
-  int avg = 0;
-  int bd = 10;
-  int w = 1;
-  int h = 1;
-
-  int subpel_x_q4;
-  int subpel_y_q4;
-
-  for (int i = 0; i < filter_size * filter_size; i++) {
-    src[i] = rnd.Rand16() % (1 << bd);
-  }
-
-  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
-    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
-      av1_highbd_convolve(
-          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
-          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filters,
-          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
-
-      const int16_t *x_filter =
-          av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
-      const int16_t *y_filter =
-          av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
-
-      int temp[12];
-      int dst_ref = 0;
-      for (int r = 0; r < filter_size; r++) {
-        temp[r] = 0;
-        for (int c = 0; c < filter_size; c++) {
-          temp[r] += x_filter[c] * src[r * filter_size + c];
-        }
-        temp[r] =
-            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
-        dst_ref += temp[r] * y_filter[r];
-      }
-      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
-      EXPECT_EQ(dst[0], dst_ref);
-    }
-  }
-}
-#endif
-
-TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(EIGHTTAP_REGULAR);
-  int filter_size = filter_params.taps;
-  int filter_center = filter_size / 2 - 1;
-  uint16_t src0[12 * 12];
-  uint16_t src1[12 * 12];
-  int src_stride = filter_size;
-  uint16_t dst0[1] = { 0 };
-  uint16_t dst1[1] = { 0 };
-  uint16_t dst[1] = { 0 };
-  int dst_stride = 1;
-  int x_step_q4 = 16;
-  int y_step_q4 = 16;
-  int avg = 0;
-  int bd = 10;
-
-  int w = 1;
-  int h = 1;
-
-  int subpel_x_q4;
-  int subpel_y_q4;
-
-  for (int i = 0; i < filter_size * filter_size; i++) {
-    src0[i] = rnd.Rand16() % (1 << bd);
-    src1[i] = rnd.Rand16() % (1 << bd);
-  }
-
-  for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
-    for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
-      int offset = filter_size * filter_center + filter_center;
-
-      avg = 0;
-      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                          CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
-                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
-                          y_step_q4, avg, bd);
-      avg = 0;
-      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                          CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
-                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
-                          y_step_q4, avg, bd);
-
-      avg = 0;
-      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
-                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
-                          y_step_q4, avg, bd);
-      avg = 1;
-      av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                          CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
-                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
-                          y_step_q4, avg, bd);
-
-      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
-    }
-  }
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-#define CONVOLVE_SPEED_TEST 0
-#if CONVOLVE_SPEED_TEST
-#define highbd_convolve_speed(func, block_size, frame_size)                  \
-  TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) {          \
-    ACMRandom rnd(ACMRandom::DeterministicSeed());                           \
-    InterpFilter interp_filter = EIGHTTAP;                                   \
-    InterpFilterParams filter_params =                                       \
-        av1_get_interp_filter_params(interp_filter);                         \
-    int filter_size = filter_params.tap;                                     \
-    int filter_center = filter_size / 2 - 1;                                 \
-    DECLARE_ALIGNED(16, uint16_t,                                            \
-                    src[(frame_size + 7) * (frame_size + 7)]) = { 0 };       \
-    int src_stride = frame_size + 7;                                         \
-    DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 };     \
-    int dst_stride = frame_size;                                             \
-    int x_step_q4 = 16;                                                      \
-    int y_step_q4 = 16;                                                      \
-    int subpel_x_q4 = 8;                                                     \
-    int subpel_y_q4 = 6;                                                     \
-    int bd = 10;                                                             \
-                                                                             \
-    int w = block_size;                                                      \
-    int h = block_size;                                                      \
-                                                                             \
-    const int16_t *filter_x =                                                \
-        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);            \
-    const int16_t *filter_y =                                                \
-        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);            \
-                                                                             \
-    for (int i = 0; i < src_stride * src_stride; i++) {                      \
-      src[i] = rnd.Rand16() % (1 << bd);                                     \
-    }                                                                        \
-                                                                             \
-    int offset = filter_center * src_stride + filter_center;                 \
-    int row_offset = 0;                                                      \
-    int col_offset = 0;                                                      \
-    for (int i = 0; i < 100000; i++) {                                       \
-      int src_total_offset = offset + col_offset * src_stride + row_offset;  \
-      int dst_total_offset = col_offset * dst_stride + row_offset;           \
-      func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride,           \
-           CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
-           x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
-      if (offset + w + w < frame_size) {                                     \
-        row_offset += w;                                                     \
-      } else {                                                               \
-        row_offset = 0;                                                      \
-        col_offset += h;                                                     \
-      }                                                                      \
-      if (col_offset + h >= frame_size) {                                    \
-        col_offset = 0;                                                      \
-      }                                                                      \
-    }                                                                        \
-  }
-
-#define lowbd_convolve_speed(func, block_size, frame_size)                  \
-  TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) {       \
-    ACMRandom rnd(ACMRandom::DeterministicSeed());                          \
-    InterpFilter interp_filter = EIGHTTAP;                                  \
-    InterpFilterParams filter_params =                                      \
-        av1_get_interp_filter_params(interp_filter);                        \
-    int filter_size = filter_params.tap;                                    \
-    int filter_center = filter_size / 2 - 1;                                \
-    DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
-    int src_stride = frame_size + 7;                                        \
-    DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]);             \
-    int dst_stride = frame_size;                                            \
-    int x_step_q4 = 16;                                                     \
-    int y_step_q4 = 16;                                                     \
-    int subpel_x_q4 = 8;                                                    \
-    int subpel_y_q4 = 6;                                                    \
-    int bd = 8;                                                             \
-                                                                            \
-    int w = block_size;                                                     \
-    int h = block_size;                                                     \
-                                                                            \
-    const int16_t *filter_x =                                               \
-        av1_get_interp_filter_kernel(filter_params, subpel_x_q4);           \
-    const int16_t *filter_y =                                               \
-        av1_get_interp_filter_kernel(filter_params, subpel_y_q4);           \
-                                                                            \
-    for (int i = 0; i < src_stride * src_stride; i++) {                     \
-      src[i] = rnd.Rand16() % (1 << bd);                                    \
-    }                                                                       \
-                                                                            \
-    int offset = filter_center * src_stride + filter_center;                \
-    int row_offset = 0;                                                     \
-    int col_offset = 0;                                                     \
-    for (int i = 0; i < 100000; i++) {                                      \
-      func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4,  \
-           filter_y, y_step_q4, w, h);                                      \
-      if (offset + w + w < frame_size) {                                    \
-        row_offset += w;                                                    \
-      } else {                                                              \
-        row_offset = 0;                                                     \
-        col_offset += h;                                                    \
-      }                                                                     \
-      if (col_offset + h >= frame_size) {                                   \
-        col_offset = 0;                                                     \
-      }                                                                     \
-    }                                                                       \
-  }
-
-// This experiment shows that when frame size is 64x64
-// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
-// However when frame size becomes 1024x1024
-// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
-// we think the bottleneck is from memory IO
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);
-
-lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
-lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
-lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
-lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);
-
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
-highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);
-
-lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
-lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
-lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
-lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
-#endif  // CONVOLVE_SPEED_TEST
-}  // namespace
diff --git a/third_party/aom/test/av1_dct_test.cc b/third_party/aom/test/av1_dct_test.cc
deleted file mode 100644
index fdaf9abb98..0000000000
--- a/third_party/aom/test/av1_dct_test.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <new>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "./aom_config.h"
-#include "aom_ports/msvc.h"
-
-#undef CONFIG_COEFFICIENT_RANGE_CHECKING
-#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
-#define AV1_DCT_GTEST
-#include "av1/encoder/dct.c"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
-    CONFIG_DAALA_DCT32
-#include "av1/common/daala_tx.c"
-#endif
-
-using libaom_test::ACMRandom;
-
-namespace {
-void reference_dct_1d(const double *in, double *out, int size) {
-  const double kInvSqrt2 = 0.707106781186547524400844362104;
-  for (int k = 0; k < size; ++k) {
-    out[k] = 0;
-    for (int n = 0; n < size; ++n) {
-      out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
-    }
-    if (k == 0) out[k] = out[k] * kInvSqrt2;
-  }
-}
-
-typedef void (*FdctFuncRef)(const double *in, double *out, int size);
-typedef void (*IdctFuncRef)(const double *in, double *out, int size);
-typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
-typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
-
-class TransTestBase {
- public:
-  virtual ~TransTestBase() {}
-
- protected:
-  void RunFwdAccuracyCheck() {
-    tran_low_t *input = new tran_low_t[txfm_size_];
-    tran_low_t *output = new tran_low_t[txfm_size_];
-    double *ref_input = new double[txfm_size_];
-    double *ref_output = new double[txfm_size_];
-
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 5000;
-    for (int ti = 0; ti < count_test_block; ++ti) {
-      for (int ni = 0; ni < txfm_size_; ++ni) {
-        input[ni] = rnd.Rand8() - rnd.Rand8();
-        ref_input[ni] = static_cast<double>(input[ni]);
-      }
-
-      fwd_txfm_(input, output);
-      fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
-
-      for (int ni = 0; ni < txfm_size_; ++ni) {
-        EXPECT_LE(
-            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
-            max_error_);
-      }
-    }
-
-    delete[] input;
-    delete[] output;
-    delete[] ref_input;
-    delete[] ref_output;
-  }
-
-  double max_error_;
-  int txfm_size_;
-  FdctFunc fwd_txfm_;
-  FdctFuncRef fwd_txfm_ref_;
-};
-
-typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
-class AV1FwdTxfm : public TransTestBase,
-                   public ::testing::TestWithParam<FdctParam> {
- public:
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = GET_PARAM(1);
-    txfm_size_ = GET_PARAM(2);
-    max_error_ = GET_PARAM(3);
-  }
-  virtual void TearDown() {}
-};
-
-TEST_P(AV1FwdTxfm, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
-
-INSTANTIATE_TEST_CASE_P(
-    C, AV1FwdTxfm,
-    ::testing::Values(FdctParam(&fdct4, &reference_dct_1d, 4, 1),
-                      FdctParam(&fdct8, &reference_dct_1d, 8, 1),
-                      FdctParam(&fdct16, &reference_dct_1d, 16, 2),
-                      FdctParam(&fdct32, &reference_dct_1d, 32, 3)));
-}  // namespace
diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc
index 034b071678..d2abbab7f1 100644
--- a/third_party/aom/test/av1_ext_tile_test.cc
+++ b/third_party/aom/test/av1_ext_tile_test.cc
@@ -46,6 +46,7 @@ class AV1ExtTileTest
     cfg.allow_lowbitdepth = 1;
 
     decoder_ = codec_->CreateDecoder(cfg, 0);
+    decoder_->Control(AV1_SET_TILE_MODE, 1);
     decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
     decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
 
@@ -86,13 +87,8 @@ class AV1ExtTileTest
       encoder->Control(AV1E_SET_TILE_ROWS, kTileSize);
       // TODO(yunqingwang): test single_tile_decoding = 0.
       encoder->Control(AV1E_SET_SINGLE_TILE_DECODING, 1);
-#if CONFIG_EXT_PARTITION
       // Always use 64x64 max partition.
       encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64);
-#endif
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
-#endif
     }
 
     if (video->frame() == 1) {
@@ -174,6 +170,23 @@ class AV1ExtTileTest
     }
   }
 
+  void TestRoundTrip() {
+    ::libaom_test::I420VideoSource video(
+        "hantro_collage_w352h288.yuv", kImgWidth, kImgHeight, 30, 1, 0, kLimit);
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
+    cfg_.large_scale_tile = 1;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_threads = 1;
+
+    // Tile encoding
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    // Compare to check if two vectors are equal.
+    ASSERT_EQ(md5_, tile_md5_);
+  }
+
   ::libaom_test::TestMode encoding_mode_;
   int set_cpu_used_;
   ::libaom_test::Decoder *decoder_;
@@ -182,25 +195,19 @@ class AV1ExtTileTest
   std::vector<std::string> tile_md5_;
 };
 
-TEST_P(AV1ExtTileTest, DecoderResultTest) {
-  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", kImgWidth,
-                                       kImgHeight, 30, 1, 0, kLimit);
-  cfg_.rc_target_bitrate = 500;
-  cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
-  cfg_.large_scale_tile = 1;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.g_threads = 1;
+TEST_P(AV1ExtTileTest, DISABLED_DecoderResultTest) { TestRoundTrip(); }
 
-  // Tile encoding
-  init_flags_ = AOM_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+AV1_INSTANTIATE_TEST_CASE(
+    // Now only test 2-pass mode.
+    AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood),
+    ::testing::Range(1, 4));
 
-  // Compare to check if two vectors are equal.
-  ASSERT_EQ(md5_, tile_md5_);
-}
+class AV1ExtTileTestLarge : public AV1ExtTileTest {};
+
+TEST_P(AV1ExtTileTestLarge, DISABLED_DecoderResultTest) { TestRoundTrip(); }
 
 AV1_INSTANTIATE_TEST_CASE(
     // Now only test 2-pass mode.
-    AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood),
-    ::testing::Range(0, 4));
+    AV1ExtTileTestLarge, ::testing::Values(::libaom_test::kTwoPassGood),
+    ::testing::Range(0, 1));
 }  // namespace
diff --git a/third_party/aom/test/av1_fht16x16_test.cc b/third_party/aom/test/av1_fht16x16_test.cc
deleted file mode 100644
index 21235a837c..0000000000
--- a/third_party/aom/test/av1_fht16x16_test.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x16Param;
-
-void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht16x16_c(in, out, stride, txfm_param);
-}
-
-void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht16x16_256_add_c(in, dest, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           TX_TYPE tx_type, int bd);
-typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd);
-
-// Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt16x16Param;
-
-void highbd_fht16x16_ref(const int16_t *in, int32_t *out, int stride,
-                         TX_TYPE tx_type, int bd) {
-  av1_fwd_txfm2d_16x16_c(in, out, stride, tx_type, bd);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-class AV1Trans16x16HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht16x16Param> {
- public:
-  virtual ~AV1Trans16x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 16;
-    height_ = 16;
-    fwd_txfm_ref = fht16x16_ref;
-    inv_txfm_ref = iht16x16_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans16x16HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
-TEST_P(AV1Trans16x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-TEST_P(AV1Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans16x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-
-#if CONFIG_HIGHBITDEPTH
-class AV1HighbdTrans16x16HT
-    : public ::testing::TestWithParam<HighbdHt16x16Param> {
- public:
-  virtual ~AV1HighbdTrans16x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = highbd_fht16x16_ref;
-    tx_type_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = 256;
-
-    input_ = reinterpret_cast<int16_t *>(
-        aom_memalign(32, sizeof(int16_t) * num_coeffs_));
-    output_ = reinterpret_cast<int32_t *>(
-        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
-    output_ref_ = reinterpret_cast<int32_t *>(
-        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_);
-    aom_free(output_);
-    aom_free(output_ref_);
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunBitexactCheck();
-
- private:
-  HbdHtFunc fwd_txfm_;
-  HbdHtFunc fwd_txfm_ref_;
-  TX_TYPE tx_type_;
-  int bit_depth_;
-  int mask_;
-  int num_coeffs_;
-  int16_t *input_;
-  int32_t *output_;
-  int32_t *output_ref_;
-};
-
-void AV1HighbdTrans16x16HT::RunBitexactCheck() {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int i, j;
-  const int stride = 16;
-  const int num_tests = 1000;
-
-  for (i = 0; i < num_tests; ++i) {
-    for (j = 0; j < num_coeffs_; ++j) {
-      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-    }
-
-    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
-    ASM_REGISTER_STATE_CHECK(
-        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
-
-    for (j = 0; j < num_coeffs_; ++j) {
-      EXPECT_EQ(output_ref_[j], output_[j])
-          << "Not bit-exact result at index: " << j << " at test block: " << i;
-    }
-  }
-}
-
-TEST_P(AV1HighbdTrans16x16HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-#endif  // CONFIG_HIGHBITDEPTH
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2 && !CONFIG_DAALA_DCT16
-const Ht16x16Param kArrayHt16x16Param_sse2[] = {
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_ADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_ADST,
-             AOM_BITS_8, 256),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, IDTX, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_DCT, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_DCT, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_ADST, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_ADST, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 256)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x16HT,
-                        ::testing::ValuesIn(kArrayHt16x16Param_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_AVX2 && !CONFIG_DAALA_DCT16
-const Ht16x16Param kArrayHt16x16Param_avx2[] = {
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_ADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_ADST,
-             AOM_BITS_8, 256),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_DCT,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_ADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, IDTX, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_DCT, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_DCT, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_ADST, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_ADST, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_FLIPADST,
-             AOM_BITS_8, 256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_FLIPADST,
-             AOM_BITS_8, 256)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans16x16HT,
-                        ::testing::ValuesIn(kArrayHt16x16Param_avx2));
-#endif  // HAVE_AVX2
-
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT16
-const HighbdHt16x16Param kArrayHBDHt16x16Param_sse4_1[] = {
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_ADST, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_ADST, 12),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_ADST, 12),
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans16x16HT,
-                        ::testing::ValuesIn(kArrayHBDHt16x16Param_sse4_1));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT16
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht16x32_test.cc b/third_party/aom/test/av1_fht16x32_test.cc
deleted file mode 100644
index 0b3928f64b..0000000000
--- a/third_party/aom/test/av1_fht16x32_test.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x32Param;
-
-void fht16x32_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht16x32_c(in, out, stride, txfm_param);
-}
-
-void iht16x32_ref(const tran_low_t *in, uint8_t *out, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht16x32_512_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans16x32HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht16x32Param> {
- public:
-  virtual ~AV1Trans16x32HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 16;
-    height_ = 32;
-    fwd_txfm_ref = fht16x32_ref;
-    inv_txfm_ref = iht16x32_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
-TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
-
-using std::tr1::make_tuple;
-const Ht16x32Param kArrayHt16x32Param_c[] = {
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_ADST, AOM_BITS_8,
-             512),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, IDTX, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_FLIPADST, AOM_BITS_8,
-             512)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
-                        ::testing::ValuesIn(kArrayHt16x32Param_c));
-
-#if HAVE_SSE2
-const Ht16x32Param kArrayHt16x32Param_sse2[] = {
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_ADST,
-             AOM_BITS_8, 512),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, IDTX, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 512)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x32HT,
-                        ::testing::ValuesIn(kArrayHt16x32Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht16x8_test.cc b/third_party/aom/test/av1_fht16x8_test.cc
deleted file mode 100644
index 3ee1a0830f..0000000000
--- a/third_party/aom/test/av1_fht16x8_test.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x8Param;
-
-void fht16x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam *txfm_param) {
-  av1_fht16x8_c(in, out, stride, txfm_param);
-}
-
-void iht16x8_ref(const tran_low_t *in, uint8_t *out, int stride,
-                 const TxfmParam *txfm_param) {
-  av1_iht16x8_128_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans16x8HT : public libaom_test::TransformTestBase,
-                       public ::testing::TestWithParam<Ht16x8Param> {
- public:
-  virtual ~AV1Trans16x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 16;
-    height_ = 8;
-    inv_txfm_ref = iht16x8_ref;
-    fwd_txfm_ref = fht16x8_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
-TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-using std::tr1::make_tuple;
-
-const Ht16x8Param kArrayHt16x8Param_c[] = {
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_ADST, AOM_BITS_8,
-             128),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, IDTX, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_FLIPADST, AOM_BITS_8,
-             128)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans16x8HT,
-                        ::testing::ValuesIn(kArrayHt16x8Param_c));
-
-#if HAVE_SSE2
-const Ht16x8Param kArrayHt16x8Param_sse2[] = {
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_ADST,
-             AOM_BITS_8, 128),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, IDTX, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 128)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x8HT,
-                        ::testing::ValuesIn(kArrayHt16x8Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht32x16_test.cc b/third_party/aom/test/av1_fht32x16_test.cc
deleted file mode 100644
index cbce074e50..0000000000
--- a/third_party/aom/test/av1_fht32x16_test.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht32x16Param;
-
-void fht32x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht32x16_c(in, out, stride, txfm_param);
-}
-
-void iht32x16_ref(const tran_low_t *in, uint8_t *out, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht32x16_512_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans32x16HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht32x16Param> {
- public:
-  virtual ~AV1Trans32x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 32;
-    height_ = 16;
-    fwd_txfm_ref = fht32x16_ref;
-    inv_txfm_ref = iht32x16_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
-TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
-
-using std::tr1::make_tuple;
-const Ht32x16Param kArrayHt32x16Param_c[] = {
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_ADST, AOM_BITS_8,
-             512),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, IDTX, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_FLIPADST, AOM_BITS_8,
-             512)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT,
-                        ::testing::ValuesIn(kArrayHt32x16Param_c));
-
-#if HAVE_SSE2
-const Ht32x16Param kArrayHt32x16Param_sse2[] = {
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_ADST,
-             AOM_BITS_8, 512),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, IDTX, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 512)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x16HT,
-                        ::testing::ValuesIn(kArrayHt32x16Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht32x32_test.cc b/third_party/aom/test/av1_fht32x32_test.cc
deleted file mode 100644
index 613bc91838..0000000000
--- a/third_party/aom/test/av1_fht32x32_test.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht32x32Param;
-
-void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht32x32_c(in, out, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           TX_TYPE tx_type, int bd);
-typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd);
-
-// Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt32x32Param;
-
-void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
-                         TX_TYPE tx_type, int bd) {
-  av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if (HAVE_SSE2 || HAVE_AVX2) && !CONFIG_DAALA_DCT32
-void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
-                    const TxfmParam *txfm_param) {
-  (void)in;
-  (void)out;
-  (void)stride;
-  (void)txfm_param;
-}
-#endif
-
-class AV1Trans32x32HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht32x32Param> {
- public:
-  virtual ~AV1Trans32x32HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 32;
-    height_ = 32;
-    fwd_txfm_ref = fht32x32_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
-
-#if CONFIG_HIGHBITDEPTH
-class AV1HighbdTrans32x32HT
-    : public ::testing::TestWithParam<HighbdHt32x32Param> {
- public:
-  virtual ~AV1HighbdTrans32x32HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = highbd_fht32x32_ref;
-    tx_type_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = 1024;
-
-    input_ = reinterpret_cast<int16_t *>(
-        aom_memalign(32, sizeof(int16_t) * num_coeffs_));
-    output_ = reinterpret_cast<int32_t *>(
-        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
-    output_ref_ = reinterpret_cast<int32_t *>(
-        aom_memalign(32, sizeof(int32_t) * num_coeffs_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_);
-    aom_free(output_);
-    aom_free(output_ref_);
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunBitexactCheck();
-
- private:
-  HbdHtFunc fwd_txfm_;
-  HbdHtFunc fwd_txfm_ref_;
-  TX_TYPE tx_type_;
-  int bit_depth_;
-  int mask_;
-  int num_coeffs_;
-  int16_t *input_;
-  int32_t *output_;
-  int32_t *output_ref_;
-};
-
-void AV1HighbdTrans32x32HT::RunBitexactCheck() {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int i, j;
-  const int stride = 32;
-  const int num_tests = 1000;
-
-  for (i = 0; i < num_tests; ++i) {
-    for (j = 0; j < num_coeffs_; ++j) {
-      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-    }
-
-    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
-    ASM_REGISTER_STATE_CHECK(
-        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
-
-    for (j = 0; j < num_coeffs_; ++j) {
-      EXPECT_EQ(output_ref_[j], output_[j])
-          << "Not bit-exact result at index: " << j << " at test block: " << i;
-    }
-  }
-}
-
-TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-#endif  // CONFIG_HIGHBITDEPTH
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2 && !CONFIG_DAALA_DCT32
-const Ht32x32Param kArrayHt32x32Param_sse2[] = {
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x32HT,
-                        ::testing::ValuesIn(kArrayHt32x32Param_sse2));
-#endif  // HAVE_SSE2 && !CONFIG_DAALA_DCT32
-
-#if HAVE_AVX2 && !CONFIG_DAALA_DCT32
-const Ht32x32Param kArrayHt32x32Param_avx2[] = {
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
-             1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT,
-                        ::testing::ValuesIn(kArrayHt32x32Param_avx2));
-#endif  // HAVE_AVX2 && !CONFIG_DAALA_DCT32
-}  // namespace
diff --git a/third_party/aom/test/av1_fht4x4_test.cc b/third_party/aom/test/av1_fht4x4_test.cc
deleted file mode 100644
index 1d4fc1352e..0000000000
--- a/third_party/aom/test/av1_fht4x4_test.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x4Param;
-
-void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht4x4_c(in, out, stride, txfm_param);
-}
-
-void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride,
-                const TxfmParam *txfm_param) {
-  av1_iht4x4_16_add_c(in, out, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                              TX_TYPE tx_type, int bd);
-typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd);
-
-// HighbdHt4x4Param argument list:
-// <Target optimized function, tx_type, bit depth>
-typedef tuple<HBDFhtFunc, TX_TYPE, int> HighbdHt4x4Param;
-
-void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
-                       TX_TYPE tx_type, int bd) {
-  av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-class AV1Trans4x4HT : public libaom_test::TransformTestBase,
-                      public ::testing::TestWithParam<Ht4x4Param> {
- public:
-  virtual ~AV1Trans4x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 4;
-    fwd_txfm_ref = fht4x4_ref;
-    inv_txfm_ref = iht4x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans4x4HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
-// Note:
-//  TODO(luoyi): Add tx_type, 9-15 for inverse transform.
-//  Need cleanup since same tests may be done in fdct4x4_test.cc
-// TEST_P(AV1Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-
-#if CONFIG_HIGHBITDEPTH
-class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
- public:
-  virtual ~AV1HighbdTrans4x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = highbe_fht4x4_ref;
-    tx_type_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = 16;
-
-    input_ = reinterpret_cast<int16_t *>(
-        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
-    output_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-    output_ref_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_);
-    aom_free(output_);
-    aom_free(output_ref_);
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunBitexactCheck();
-
- private:
-  HBDFhtFunc fwd_txfm_;
-  HBDFhtFunc fwd_txfm_ref_;
-  TX_TYPE tx_type_;
-  int bit_depth_;
-  int mask_;
-  int num_coeffs_;
-  int16_t *input_;
-  int32_t *output_;
-  int32_t *output_ref_;
-};
-
-void AV1HighbdTrans4x4HT::RunBitexactCheck() {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int i, j;
-  const int stride = 4;
-  const int num_tests = 1000;
-  const int num_coeffs = 16;
-
-  for (i = 0; i < num_tests; ++i) {
-    for (j = 0; j < num_coeffs; ++j) {
-      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-    }
-
-    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
-    fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
-
-    for (j = 0; j < num_coeffs; ++j) {
-      EXPECT_EQ(output_[j], output_ref_[j])
-          << "Not bit-exact result at index: " << j << " at test block: " << i;
-    }
-  }
-}
-
-TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-#endif  // CONFIG_HIGHBITDEPTH
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2 && !CONFIG_DAALA_DCT4
-const Ht4x4Param kArrayHt4x4Param_sse2[] = {
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_DCT, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_DCT, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_ADST, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_ADST, AOM_BITS_8,
-             16),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, IDTX, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_DCT, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_DCT, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_ADST, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_ADST, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_FLIPADST, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_FLIPADST, AOM_BITS_8,
-             16)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT,
-                        ::testing::ValuesIn(kArrayHt4x4Param_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
-const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 12),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 12),
-#endif  // CONFIG_EXT_TX
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT,
-                        ::testing::ValuesIn(kArrayHighbdHt4x4Param));
-
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht4x8_test.cc b/third_party/aom/test/av1_fht4x8_test.cc
deleted file mode 100644
index f9d2120e08..0000000000
--- a/third_party/aom/test/av1_fht4x8_test.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x8Param;
-
-void fht4x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht4x8_c(in, out, stride, txfm_param);
-}
-
-void iht4x8_ref(const tran_low_t *in, uint8_t *out, int stride,
-                const TxfmParam *txfm_param) {
-  av1_iht4x8_32_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans4x8HT : public libaom_test::TransformTestBase,
-                      public ::testing::TestWithParam<Ht4x8Param> {
- public:
-  virtual ~AV1Trans4x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 8;
-    fwd_txfm_ref = fht4x8_ref;
-    inv_txfm_ref = iht4x8_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans4x8HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-TEST_P(AV1Trans4x8HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans4x8HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans4x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-
-using std::tr1::make_tuple;
-
-const Ht4x8Param kArrayHt4x8Param_c[] = {
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_ADST, AOM_BITS_8, 32),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_FLIPADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_ADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, IDTX, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_FLIPADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_FLIPADST, AOM_BITS_8, 32)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT,
-                        ::testing::ValuesIn(kArrayHt4x8Param_c));
-
-#if HAVE_SSE2
-const Ht4x8Param kArrayHt4x8Param_sse2[] = {
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_DCT, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_DCT, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_ADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_ADST, AOM_BITS_8,
-             32),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, IDTX, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_FLIPADST, AOM_BITS_8,
-             32)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x8HT,
-                        ::testing::ValuesIn(kArrayHt4x8Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht64x64_test.cc b/third_party/aom/test/av1_fht64x64_test.cc
deleted file mode 100644
index f2a03e7ee9..0000000000
--- a/third_party/aom/test/av1_fht64x64_test.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-#if CONFIG_TX64X64
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht64x64Param;
-
-void fht64x64_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht64x64_c(in, out, stride, txfm_param);
-}
-
-void iht64x64_ref(const tran_low_t *in, uint8_t *dest, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht64x64_4096_add_c(in, dest, stride, txfm_param);
-}
-
-class AV1Trans64x64HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht64x64Param> {
- public:
-  virtual ~AV1Trans64x64HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 64;
-    height_ = 64;
-    fwd_txfm_ref = fht64x64_ref;
-    inv_txfm_ref = iht64x64_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans64x64HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
-TEST_P(AV1Trans64x64HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans64x64HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans64x64HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans64x64HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
-
-using std::tr1::make_tuple;
-
-const Ht64x64Param kArrayHt64x64Param_c[] = {
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_DCT, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_DCT, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_ADST, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_ADST, AOM_BITS_8,
-             4096),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_DCT,
-             AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_FLIPADST,
-             AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_FLIPADST,
-             AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_ADST,
-             AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, IDTX, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_DCT, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_DCT, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_ADST, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_ADST, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_FLIPADST, AOM_BITS_8,
-             4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_FLIPADST, AOM_BITS_8,
-             4096)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans64x64HT,
-                        ::testing::ValuesIn(kArrayHt64x64Param_c));
-
-}  // namespace
-
-#endif  // CONFIG_TX64X64
diff --git a/third_party/aom/test/av1_fht8x16_test.cc b/third_party/aom/test/av1_fht8x16_test.cc
deleted file mode 100644
index 689cb0b90a..0000000000
--- a/third_party/aom/test/av1_fht8x16_test.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x16Param;
-
-void fht8x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam *txfm_param) {
-  av1_fht8x16_c(in, out, stride, txfm_param);
-}
-
-void iht8x16_ref(const tran_low_t *in, uint8_t *out, int stride,
-                 const TxfmParam *txfm_param) {
-  av1_iht8x16_128_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans8x16HT : public libaom_test::TransformTestBase,
-                       public ::testing::TestWithParam<Ht8x16Param> {
- public:
-  virtual ~AV1Trans8x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 8;
-    height_ = 16;
-    inv_txfm_ref = iht8x16_ref;
-    fwd_txfm_ref = fht8x16_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1, 0.001); }
-TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-using std::tr1::make_tuple;
-
-const Ht8x16Param kArrayHt8x16Param_c[] = {
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_ADST, AOM_BITS_8,
-             128),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, IDTX, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_DCT, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_ADST, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_FLIPADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_FLIPADST, AOM_BITS_8,
-             128)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans8x16HT,
-                        ::testing::ValuesIn(kArrayHt8x16Param_c));
-
-#if HAVE_SSE2
-const Ht8x16Param kArrayHt8x16Param_sse2[] = {
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_ADST,
-             AOM_BITS_8, 128),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, IDTX, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_DCT, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_ADST, AOM_BITS_8,
-             128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 128)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x16HT,
-                        ::testing::ValuesIn(kArrayHt8x16Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht8x4_test.cc b/third_party/aom/test/av1_fht8x4_test.cc
deleted file mode 100644
index e50a69457d..0000000000
--- a/third_party/aom/test/av1_fht8x4_test.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using std::tr1::tuple;
-using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x4Param;
-
-void fht8x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht8x4_c(in, out, stride, txfm_param);
-}
-
-void iht8x4_ref(const tran_low_t *in, uint8_t *out, int stride,
-                const TxfmParam *txfm_param) {
-  av1_iht8x4_32_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans8x4HT : public libaom_test::TransformTestBase,
-                      public ::testing::TestWithParam<Ht8x4Param> {
- public:
-  virtual ~AV1Trans8x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 8;
-    height_ = 4;
-    fwd_txfm_ref = fht8x4_ref;
-    inv_txfm_ref = iht8x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans8x4HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-TEST_P(AV1Trans8x4HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans8x4HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans8x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans8x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-
-using std::tr1::make_tuple;
-
-const Ht8x4Param kArrayHt8x4Param_c[] = {
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_ADST, AOM_BITS_8, 32),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_FLIPADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_ADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, IDTX, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_FLIPADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_FLIPADST, AOM_BITS_8, 32)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans8x4HT,
-                        ::testing::ValuesIn(kArrayHt8x4Param_c));
-
-#if HAVE_SSE2
-const Ht8x4Param kArrayHt8x4Param_sse2[] = {
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_DCT, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_DCT, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_ADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_ADST, AOM_BITS_8,
-             32),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, IDTX, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_DCT, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_ADST, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_FLIPADST, AOM_BITS_8,
-             32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_FLIPADST, AOM_BITS_8,
-             32)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x4HT,
-                        ::testing::ValuesIn(kArrayHt8x4Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fht8x8_test.cc b/third_party/aom/test/av1_fht8x8_test.cc
deleted file mode 100644
index 499fcc3381..0000000000
--- a/third_party/aom/test/av1_fht8x8_test.cc
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-
-using libaom_test::FhtFunc;
-using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x8Param;
-
-void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht8x8_c(in, out, stride, txfm_param);
-}
-
-void iht8x8_ref(const tran_low_t *in, uint8_t *out, int stride,
-                const TxfmParam *txfm_param) {
-  av1_iht8x8_64_add_c(in, out, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           TX_TYPE tx_type, int bd);
-typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd);
-// Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt8x8Param;
-
-void highbd_fht8x8_ref(const int16_t *in, int32_t *out, int stride,
-                       TX_TYPE tx_type, int bd) {
-  av1_fwd_txfm2d_8x8_c(in, out, stride, tx_type, bd);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-class AV1Trans8x8HT : public libaom_test::TransformTestBase,
-                      public ::testing::TestWithParam<Ht8x8Param> {
- public:
-  virtual ~AV1Trans8x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 8;
-    height_ = 8;
-    fwd_txfm_ref = fht8x8_ref;
-    inv_txfm_ref = iht8x8_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans8x8HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans8x8HT, CoeffCheck) { RunCoeffCheck(); }
-// Note:
-//  TODO(luoyi): Add tx_type, 9-15 for inverse transform.
-//  Need cleanup since same tests may be done in fdct8x8_test.cc
-// TEST_P(AV1Trans8x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
-// TEST_P(AV1Trans8x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-// TEST_P(AV1Trans8x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-
-#if CONFIG_HIGHBITDEPTH
-class AV1HighbdTrans8x8HT : public ::testing::TestWithParam<HighbdHt8x8Param> {
- public:
-  virtual ~AV1HighbdTrans8x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = highbd_fht8x8_ref;
-    tx_type_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = 64;
-
-    input_ = reinterpret_cast<int16_t *>(
-        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
-    output_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-    output_ref_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_);
-    aom_free(output_);
-    aom_free(output_ref_);
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunBitexactCheck();
-
- private:
-  HbdHtFunc fwd_txfm_;
-  HbdHtFunc fwd_txfm_ref_;
-  TX_TYPE tx_type_;
-  int bit_depth_;
-  int mask_;
-  int num_coeffs_;
-  int16_t *input_;
-  int32_t *output_;
-  int32_t *output_ref_;
-};
-
-void AV1HighbdTrans8x8HT::RunBitexactCheck() {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int i, j;
-  const int stride = 8;
-  const int num_tests = 1000;
-  const int num_coeffs = 64;
-
-  for (i = 0; i < num_tests; ++i) {
-    for (j = 0; j < num_coeffs; ++j) {
-      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-    }
-
-    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
-    ASM_REGISTER_STATE_CHECK(
-        fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
-
-    for (j = 0; j < num_coeffs; ++j) {
-      EXPECT_EQ(output_ref_[j], output_[j])
-          << "Not bit-exact result at index: " << j << " at test block: " << i;
-    }
-  }
-}
-
-TEST_P(AV1HighbdTrans8x8HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-#endif  // CONFIG_HIGHBITDEPTH
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2 && !CONFIG_DAALA_DCT8
-const Ht8x8Param kArrayHt8x8Param_sse2[] = {
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_DCT, AOM_BITS_8,
-             64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_DCT, AOM_BITS_8,
-             64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_ADST, AOM_BITS_8,
-             64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_ADST, AOM_BITS_8,
-             64),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, IDTX, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_DCT, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_DCT, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_ADST, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_ADST, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_FLIPADST, AOM_BITS_8,
-             64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_FLIPADST, AOM_BITS_8,
-             64)
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x8HT,
-                        ::testing::ValuesIn(kArrayHt8x8Param_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT8
-const HighbdHt8x8Param kArrayHBDHt8x8Param_sse4_1[] = {
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_ADST, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_ADST, 12),
-#if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_ADST, 12),
-#endif  // CONFIG_EXT_TX
-};
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans8x8HT,
-                        ::testing::ValuesIn(kArrayHBDHt8x8Param_sse4_1));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT8
-
-}  // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
index 9deef3c954..49a6668793 100644
--- a/third_party/aom/test/av1_fwd_txfm1d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc
@@ -9,36 +9,37 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#include "av1/common/av1_fwd_txfm1d.h"
+#include "av1/encoder/av1_fwd_txfm1d.h"
 #include "test/av1_txfm_test.h"
 
 using libaom_test::ACMRandom;
+using libaom_test::TYPE_ADST;
+using libaom_test::TYPE_DCT;
+using libaom_test::TYPE_IDTX;
+using libaom_test::TYPE_TXFM;
 using libaom_test::input_base;
 using libaom_test::reference_hybrid_1d;
-using libaom_test::TYPE_TXFM;
-using libaom_test::TYPE_DCT;
-using libaom_test::TYPE_ADST;
 
 namespace {
-const int txfm_type_num = 2;
-const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST };
+const int txfm_type_num = 3;
+const TYPE_TXFM txfm_type_ls[txfm_type_num] = { TYPE_DCT, TYPE_ADST,
+                                                TYPE_IDTX };
 
 const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
-
-const TxfmFunc fwd_txfm_func_ls[2][5] = {
-#if CONFIG_TX64X64
-  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
-    av1_fdct64_new },
-#else
-  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
-#endif
-  { av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
+
+const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
+
+const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
+  { av1_fdct4_new, av1_fadst4_new, av1_fidentity4_c },
+  { av1_fdct8_new, av1_fadst8_new, av1_fidentity8_c },
+  { av1_fdct16_new, av1_fadst16_new, av1_fidentity16_c },
+  { av1_fdct32_new, NULL, av1_fidentity32_c },
+  { av1_fdct64_new, NULL, NULL },
 };
 
 // the maximum stage number of fwd/inv 1d dct/adst txfm is 12
-const int8_t cos_bit[12] = { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14 };
-const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 };
+const int8_t cos_bit = 14;
+const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
 
 TEST(av1_fwd_txfm1d, round_shift) {
   EXPECT_EQ(round_shift(7, 1), 4);
@@ -51,10 +52,10 @@ TEST(av1_fwd_txfm1d, round_shift) {
   EXPECT_EQ(round_shift(-8, 2), -2);
 }
 
-TEST(av1_fwd_txfm1d, cospi_arr_data) {
+TEST(av1_fwd_txfm1d, av1_cospi_arr_data) {
   for (int i = 0; i < 7; i++) {
     for (int j = 0; j < 64; j++) {
-      EXPECT_EQ(cospi_arr_data[i][j],
+      EXPECT_EQ(av1_cospi_arr_data[i][j],
                 (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
     }
   }
@@ -71,7 +72,7 @@ TEST(av1_fwd_txfm1d, accuracy) {
 
     for (int ti = 0; ti < txfm_type_num; ++ti) {
       TYPE_TXFM txfm_type = txfm_type_ls[ti];
-      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[ti][si];
+      TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
       int max_error = 7;
 
       const int count_test_block = 5000;
@@ -86,9 +87,10 @@ TEST(av1_fwd_txfm1d, accuracy) {
           reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
 
           for (int ni = 0; ni < txfm_size; ++ni) {
-            EXPECT_LE(
+            ASSERT_LE(
                 abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
-                max_error);
+                max_error)
+                << "tx size = " << txfm_size << ", tx type = " << txfm_type;
           }
         }
       }
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
index adf9a803cc..e0294be4eb 100644
--- a/third_party/aom/test/av1_fwd_txfm2d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc
@@ -12,24 +12,26 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <vector>
+
+#include "config/av1_rtcd.h"
 
 #include "test/acm_random.h"
 #include "test/util.h"
 #include "test/av1_txfm_test.h"
 #include "av1/common/av1_txfm.h"
-#include "./av1_rtcd.h"
 
 using libaom_test::ACMRandom;
-using libaom_test::input_base;
+using libaom_test::TYPE_TXFM;
 using libaom_test::bd;
 using libaom_test::compute_avg_abs_error;
-using libaom_test::Fwd_Txfm2d_Func;
-using libaom_test::TYPE_TXFM;
+using libaom_test::input_base;
+
+using std::vector;
 
 namespace {
-#if CONFIG_HIGHBITDEPTH
 // tx_type_, tx_size_, max_error_, max_avg_error_
-typedef std::tr1::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
+typedef ::testing::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
 
 class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
  public:
@@ -39,22 +41,16 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
     max_error_ = GET_PARAM(2);
     max_avg_error_ = GET_PARAM(3);
     count_ = 500;
-    TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
-        av1_get_fwd_txfm_cfg(tx_type_, tx_size_);
-    // TODO(sarahparker) this test will need to be updated when these
-    // functions are extended to support rectangular transforms
-    int amplify_bit = fwd_txfm_flip_cfg.row_cfg->shift[0] +
-                      fwd_txfm_flip_cfg.row_cfg->shift[1] +
-                      fwd_txfm_flip_cfg.row_cfg->shift[2];
+    TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
+    av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg);
+    amplify_factor_ = libaom_test::get_amplification_factor(tx_type_, tx_size_);
+    tx_width_ = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
+    tx_height_ = tx_size_high[fwd_txfm_flip_cfg.tx_size];
     ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
     lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
-    amplify_factor_ =
-        amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
 
     fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_];
-    txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_);
-    txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
-    get_txfm1d_type(tx_type_, &type0_, &type1_);
+    txfm2d_size_ = tx_width_ * tx_height_;
     input_ = reinterpret_cast<int16_t *>(
         aom_memalign(16, sizeof(input_[0]) * txfm2d_size_));
     output_ = reinterpret_cast<int32_t *>(
@@ -76,33 +72,40 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
         ref_output_[ni] = 0;
       }
 
-      fwd_txfm_(input_, output_, txfm1d_size_, tx_type_, bd);
+      fwd_txfm_(input_, output_, tx_width_, tx_type_, bd);
 
-      if (lr_flip_ && ud_flip_)
-        libaom_test::fliplrud(ref_input_, txfm1d_size_, txfm1d_size_);
-      else if (lr_flip_)
-        libaom_test::fliplr(ref_input_, txfm1d_size_, txfm1d_size_);
-      else if (ud_flip_)
-        libaom_test::flipud(ref_input_, txfm1d_size_, txfm1d_size_);
+      if (lr_flip_ && ud_flip_) {
+        libaom_test::fliplrud(ref_input_, tx_width_, tx_height_, tx_width_);
+      } else if (lr_flip_) {
+        libaom_test::fliplr(ref_input_, tx_width_, tx_height_, tx_width_);
+      } else if (ud_flip_) {
+        libaom_test::flipud(ref_input_, tx_width_, tx_height_, tx_width_);
+      }
 
-      reference_hybrid_2d(ref_input_, ref_output_, txfm1d_size_, type0_,
-                          type1_);
+      libaom_test::reference_hybrid_2d(ref_input_, ref_output_, tx_type_,
+                                       tx_size_);
 
+      double actual_max_error = 0;
       for (int ni = 0; ni < txfm2d_size_; ++ni) {
-        ref_output_[ni] = round(ref_output_[ni] * amplify_factor_);
-        EXPECT_GE(max_error_,
-                  fabs(output_[ni] - ref_output_[ni]) / amplify_factor_);
+        ref_output_[ni] = round(ref_output_[ni]);
+        const double this_error =
+            fabs(output_[ni] - ref_output_[ni]) / amplify_factor_;
+        actual_max_error = AOMMAX(actual_max_error, this_error);
+      }
+      EXPECT_GE(max_error_, actual_max_error)
+          << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
+      if (actual_max_error > max_error_) {  // exit early.
+        break;
       }
+
       avg_abs_error += compute_avg_abs_error<int32_t, double>(
           output_, ref_output_, txfm2d_size_);
     }
 
     avg_abs_error /= amplify_factor_;
     avg_abs_error /= count_;
-    // max_abs_avg_error comes from upper bound of avg_abs_error
-    // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
-    // %f\n", type0_, type1_, txfm1d_size_, avg_abs_error);
-    EXPECT_GE(max_avg_error_, avg_abs_error);
+    EXPECT_GE(max_avg_error_, avg_abs_error)
+        << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
   }
 
   virtual void TearDown() {
@@ -119,11 +122,10 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
   double amplify_factor_;
   TX_TYPE tx_type_;
   TX_SIZE tx_size_;
-  int txfm1d_size_;
+  int tx_width_;
+  int tx_height_;
   int txfm2d_size_;
-  Fwd_Txfm2d_Func fwd_txfm_;
-  TYPE_TXFM type0_;
-  TYPE_TXFM type1_;
+  FwdTxfm2dFunc fwd_txfm_;
   int16_t *input_;
   int32_t *output_;
   double *ref_input_;
@@ -132,76 +134,209 @@ class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
   int lr_flip_;  // flip left to right
 };
 
-TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
-const AV1FwdTxfm2dParam av1_fwd_txfm2d_param_c[] = {
-#if CONFIG_EXT_TX
-  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(FLIPADST_DCT, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(DCT_FLIPADST, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(ADST_FLIPADST, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(FLIPADST_ADST, TX_32X32, 70, 7),
-#endif
-  AV1FwdTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.2),
-  AV1FwdTxfm2dParam(DCT_DCT, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(ADST_DCT, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(DCT_ADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(ADST_ADST, TX_8X8, 5, 0.6),
-  AV1FwdTxfm2dParam(DCT_DCT, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(ADST_DCT, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(DCT_ADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(ADST_ADST, TX_16X16, 11, 1.5),
-  AV1FwdTxfm2dParam(DCT_DCT, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(ADST_DCT, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(DCT_ADST, TX_32X32, 70, 7),
-  AV1FwdTxfm2dParam(ADST_ADST, TX_32X32, 70, 7)
+static double avg_error_ls[TX_SIZES_ALL] = {
+  0.5,   // 4x4 transform
+  0.5,   // 8x8 transform
+  1.2,   // 16x16 transform
+  6.1,   // 32x32 transform
+  3.4,   // 64x64 transform
+  0.57,  // 4x8 transform
+  0.68,  // 8x4 transform
+  0.92,  // 8x16 transform
+  1.1,   // 16x8 transform
+  4.1,   // 16x32 transform
+  6,     // 32x16 transform
+  3.5,   // 32x64 transform
+  5.7,   // 64x32 transform
+  0.6,   // 4x16 transform
+  0.9,   // 16x4 transform
+  1.2,   // 8x32 transform
+  1.7,   // 32x8 transform
+  2.0,   // 16x64 transform
+  4.7,   // 64x16 transform
 };
 
+static double max_error_ls[TX_SIZES_ALL] = {
+  3,    // 4x4 transform
+  5,    // 8x8 transform
+  11,   // 16x16 transform
+  70,   // 32x32 transform
+  64,   // 64x64 transform
+  3.9,  // 4x8 transform
+  4.3,  // 8x4 transform
+  12,   // 8x16 transform
+  12,   // 16x8 transform
+  32,   // 16x32 transform
+  46,   // 32x16 transform
+  136,  // 32x64 transform
+  136,  // 64x32 transform
+  5,    // 4x16 transform
+  6,    // 16x4 transform
+  21,   // 8x32 transform
+  13,   // 32x8 transform
+  30,   // 16x64 transform
+  36,   // 64x16 transform
+};
+
+vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
+  vector<AV1FwdTxfm2dParam> param_list;
+  for (int s = 0; s < TX_SIZES; ++s) {
+    const double max_error = max_error_ls[s];
+    const double avg_error = avg_error_ls[s];
+    for (int t = 0; t < TX_TYPES; ++t) {
+      const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
+      const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
+      if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
+        param_list.push_back(
+            AV1FwdTxfm2dParam(tx_type, tx_size, max_error, avg_error));
+      }
+    }
+  }
+  return param_list;
+}
+
 INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d,
-                        ::testing::ValuesIn(av1_fwd_txfm2d_param_c));
+                        ::testing::ValuesIn(GetTxfm2dParamList()));
+
+TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
 
 TEST(AV1FwdTxfm2d, CfgTest) {
   for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
     int bd = libaom_test::bd_arr[bd_idx];
     int8_t low_range = libaom_test::low_range_arr[bd_idx];
     int8_t high_range = libaom_test::high_range_arr[bd_idx];
-    // TODO(angiebird): include rect txfm in this test
-    for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
       for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
-        TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(
-            static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size));
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
+                                           static_cast<TX_TYPE>(tx_type)) ==
+            false) {
+          continue;
+        }
+        TXFM_2D_FLIP_CFG cfg;
+        av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type),
+                             static_cast<TX_SIZE>(tx_size), &cfg);
         int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
         int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
         av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
-        const TXFM_1D_CFG *col_cfg = cfg.col_cfg;
-        const TXFM_1D_CFG *row_cfg = cfg.row_cfg;
-        libaom_test::txfm_stage_range_check(stage_range_col, col_cfg->stage_num,
-                                            col_cfg->cos_bit, low_range,
+        libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
+                                            cfg.cos_bit_col, low_range,
                                             high_range);
-        libaom_test::txfm_stage_range_check(stage_range_row, row_cfg->stage_num,
-                                            row_cfg->cos_bit, low_range,
+        libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
+                                            cfg.cos_bit_row, low_range,
                                             high_range);
       }
     }
   }
 }
 
-#endif  // CONFIG_HIGHBITDEPTH
+typedef void (*lowbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
+                                    int diff_stride, TxfmParam *txfm_param);
+
+void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
+  const int bd = 8;
+  TxfmParam param;
+  memset(&param, 0, sizeof(param));
+  const int rows = tx_size_high[tx_size];
+  const int cols = tx_size_wide[tx_size];
+  // printf("%d x %d\n", cols, rows);
+  for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+    if (libaom_test::IsTxSizeTypeValid(
+            tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
+      continue;
+    }
+
+    FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
+    if (ref_func != NULL) {
+      DECLARE_ALIGNED(16, int16_t, input[64 * 64]) = { 0 };
+      DECLARE_ALIGNED(16, int32_t, output[64 * 64]);
+      DECLARE_ALIGNED(16, int32_t, ref_output[64 * 64]);
+      int input_stride = 64;
+      ACMRandom rnd(ACMRandom::DeterministicSeed());
+      for (int cnt = 0; cnt < 500; ++cnt) {
+        if (cnt == 0) {
+          for (int r = 0; r < rows; ++r) {
+            for (int c = 0; c < cols; ++c) {
+              input[r * input_stride + c] = (1 << bd) - 1;
+            }
+          }
+        } else {
+          for (int r = 0; r < rows; ++r) {
+            for (int c = 0; c < cols; ++c) {
+              input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
+            }
+          }
+        }
+        param.tx_type = (TX_TYPE)tx_type;
+        param.tx_size = (TX_SIZE)tx_size;
+        param.tx_set_type = EXT_TX_SET_ALL16;
+        param.bd = bd;
+        ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
+        target_func(input, output, input_stride, &param);
+        const int check_rows = AOMMIN(32, rows);
+        const int check_cols = AOMMIN(32, rows * cols / check_rows);
+        for (int r = 0; r < check_rows; ++r) {
+          for (int c = 0; c < check_cols; ++c) {
+            ASSERT_EQ(ref_output[r * check_cols + c],
+                      output[r * check_cols + c])
+                << "[" << r << "," << c << "] cnt:" << cnt
+                << " tx_size: " << tx_size << " tx_type: " << tx_type;
+          }
+        }
+      }
+    }
+  }
+}
+
+typedef ::testing::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
+
+class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
+
+TEST_P(AV1FwdTxfm2dTest, match) {
+  AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+using ::testing::Combine;
+using ::testing::Values;
+using ::testing::ValuesIn;
+
+#if HAVE_SSE2
+static TX_SIZE fwd_txfm_for_sse2[] = {
+  TX_4X4,
+  TX_8X8,
+  TX_16X16,
+  TX_32X32,
+  // TX_64X64,
+  TX_4X8,
+  TX_8X4,
+  TX_8X16,
+  TX_16X8,
+  TX_16X32,
+  TX_32X16,
+  // TX_32X64,
+  // TX_64X32,
+  TX_4X16,
+  TX_16X4,
+  TX_8X32,
+  TX_32X8,
+  TX_16X64,
+  TX_64X16,
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, AV1FwdTxfm2dTest,
+                        Combine(ValuesIn(fwd_txfm_for_sse2),
+                                Values(av1_lowbd_fwd_txfm_sse2)));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE4_1
+static TX_SIZE fwd_txfm_for_sse41[] = {
+  TX_4X4,
+  TX_64X64,
+  TX_32X64,
+  TX_64X32,
+};
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1FwdTxfm2dTest,
+                        Combine(ValuesIn(fwd_txfm_for_sse41),
+                                Values(av1_lowbd_fwd_txfm_sse4_1)));
+#endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc
index 45df5ed844..8cadc85e79 100644
--- a/third_party/aom/test/av1_highbd_iht_test.cc
+++ b/third_party/aom/test/av1_highbd_iht_test.cc
@@ -11,7 +11,8 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -22,7 +23,7 @@
 
 namespace {
 
-using std::tr1::tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
 
 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
@@ -88,6 +89,8 @@ class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
       return 16;
     } else if (1024 == num_coeffs_) {
       return 32;
+    } else if (4096 == num_coeffs_) {
+      return 64;
     } else {
       return 0;
     }
@@ -133,28 +136,24 @@ void AV1HighbdInvHTNxN::RunBitexactCheck() {
 
 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && \
-    !(CONFIG_DAALA_DCT4 && CONFIG_DAALA_DCT8 && CONFIG_DAALA_DCT16)
-#if !CONFIG_DAALA_DCT4
+#if HAVE_SSE4_1
 #define PARAM_LIST_4X4                                   \
   &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
       &av1_inv_txfm2d_add_4x4_c, 16
-#endif
-#if !CONFIG_DAALA_DCT8
 #define PARAM_LIST_8X8                                   \
   &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \
       &av1_inv_txfm2d_add_8x8_c, 64
-#endif
-#if !CONFIG_DAALA_DCT16
 #define PARAM_LIST_16X16                                     \
   &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \
       &av1_inv_txfm2d_add_16x16_c, 256
-#endif
+#define PARAM_LIST_64X64                                     \
+  &av1_fwd_txfm2d_64x64_c, &av1_inv_txfm2d_add_64x64_sse4_1, \
+      &av1_inv_txfm2d_add_64x64_c, 4096
+
 const IHbdHtParam kArrayIhtParam[] = {
-// 16x16
-#if !CONFIG_DAALA_DCT16
+  // 16x16
   make_tuple(PARAM_LIST_16X16, DCT_DCT, 10),
   make_tuple(PARAM_LIST_16X16, DCT_DCT, 12),
   make_tuple(PARAM_LIST_16X16, ADST_DCT, 10),
@@ -163,7 +162,6 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_16X16, DCT_ADST, 12),
   make_tuple(PARAM_LIST_16X16, ADST_ADST, 10),
   make_tuple(PARAM_LIST_16X16, ADST_ADST, 12),
-#if CONFIG_EXT_TX
   make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 10),
   make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 12),
   make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 10),
@@ -174,10 +172,7 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 12),
   make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12),
-#endif
-#endif
-// 8x8
-#if !CONFIG_DAALA_DCT8
+  // 8x8
   make_tuple(PARAM_LIST_8X8, DCT_DCT, 10),
   make_tuple(PARAM_LIST_8X8, DCT_DCT, 12),
   make_tuple(PARAM_LIST_8X8, ADST_DCT, 10),
@@ -186,7 +181,6 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_8X8, DCT_ADST, 12),
   make_tuple(PARAM_LIST_8X8, ADST_ADST, 10),
   make_tuple(PARAM_LIST_8X8, ADST_ADST, 12),
-#if CONFIG_EXT_TX
   make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 10),
   make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 12),
   make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 10),
@@ -197,10 +191,7 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 12),
   make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12),
-#endif
-#endif
-// 4x4
-#if !CONFIG_DAALA_DCT4
+  // 4x4
   make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
   make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
   make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
@@ -209,7 +200,6 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
   make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
   make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
-#if CONFIG_EXT_TX
   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
   make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
@@ -220,16 +210,15 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
-#endif
-#endif
+  make_tuple(PARAM_LIST_64X64, DCT_DCT, 10),
+  make_tuple(PARAM_LIST_64X64, DCT_DCT, 12),
 };
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
                         ::testing::ValuesIn(kArrayIhtParam));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH &&
-        //  !(CONFIG_DAALA_DCT4 && CONFIG_DAALA_DCT8 && CONFIG_DAALA_DCT16)
+#endif  // HAVE_SSE4_1
 
-#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT32
+#if HAVE_AVX2
 #define PARAM_LIST_32X32                                   \
   &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
       &av1_inv_txfm2d_add_32x32_c, 1024
@@ -243,5 +232,5 @@ const IHbdHtParam kArrayIhtParam32x32[] = {
 INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN,
                         ::testing::ValuesIn(kArrayIhtParam32x32));
 
-#endif  // HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_AVX2
 }  // namespace
diff --git a/third_party/aom/test/av1_horz_only_frame_superres_test.cc b/third_party/aom/test/av1_horz_only_frame_superres_test.cc
new file mode 100644
index 0000000000..fd77ef35d7
--- /dev/null
+++ b/third_party/aom/test/av1_horz_only_frame_superres_test.cc
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/convolve.h"
+#include "av1/common/resize.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+const int kTestIters = 10;
+const int kPerfIters = 1000;
+
+const int kVPad = 32;
+const int kHPad = 32;
+
+using ::testing::make_tuple;
+using ::testing::tuple;
+using libaom_test::ACMRandom;
+
+template <typename Pixel>
+class TestImage {
+ public:
+  TestImage(int w_src, int h, int superres_denom, int x0, int bd)
+      : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0),
+        bd_(bd) {
+    assert(bd < 16);
+    assert(bd <= 8 * static_cast<int>(sizeof(Pixel)));
+    assert(9 <= superres_denom && superres_denom <= 16);
+    assert(SCALE_NUMERATOR == 8);
+    assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK);
+
+    w_dst_ = w_src_;
+    av1_calculate_unscaled_superres_size(&w_dst_, NULL, superres_denom);
+
+    src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4);
+    dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4);
+
+    // Allocate image data
+    src_data_.resize(2 * src_block_size());
+    dst_data_.resize(2 * dst_block_size());
+  }
+
+  void Initialize(ACMRandom *rnd);
+  void Check() const;
+
+  int src_stride() const { return src_stride_; }
+  int dst_stride() const { return dst_stride_; }
+
+  int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
+  int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
+
+  int src_width() const { return w_src_; }
+  int dst_width() const { return w_dst_; }
+  int height() const { return h_; }
+  int x0() const { return x0_; }
+
+  const Pixel *GetSrcData(bool ref, bool borders) const {
+    const Pixel *block = &src_data_[ref ? 0 : src_block_size()];
+    return borders ? block : block + kHPad + src_stride_ * kVPad;
+  }
+
+  Pixel *GetDstData(bool ref, bool borders) {
+    Pixel *block = &dst_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+ private:
+  int w_src_, w_dst_, h_, superres_denom_, x0_, bd_;
+  int src_stride_, dst_stride_;
+
+  std::vector<Pixel> src_data_;
+  std::vector<Pixel> dst_data_;
+};
+
+template <typename Pixel>
+void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
+  if (!trash) {
+    memset(data, 0, sizeof(*data) * num_pixels);
+    return;
+  }
+  const Pixel mask = (1 << bd) - 1;
+  for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
+}
+
+template <typename Pixel>
+void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
+                 bool trash_edges, Pixel *data) {
+  assert(rnd);
+  const Pixel mask = (1 << bd) - 1;
+
+  // Fill in the first buffer with random data
+  // Top border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
+  for (int r = 0; r < h; ++r) {
+    Pixel *row_data = data + (kVPad + r) * stride;
+    // Left border, contents, right border
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data);
+    for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
+  }
+  // Bottom border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
+
+  const int bpp = sizeof(*data);
+  const int block_elts = stride * (h + 2 * kVPad);
+  const int block_size = bpp * block_elts;
+
+  // Now copy that to the second buffer
+  memcpy(data + block_elts, data, block_size);
+}
+
+template <typename Pixel>
+void TestImage<Pixel>::Initialize(ACMRandom *rnd) {
+  PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]);
+  PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]);
+}
+
+template <typename Pixel>
+void TestImage<Pixel>::Check() const {
+  const int num_pixels = dst_block_size();
+  const Pixel *ref_dst = &dst_data_[0];
+  const Pixel *tst_dst = &dst_data_[num_pixels];
+
+  // If memcmp returns 0, there's nothing to do.
+  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
+
+  // Otherwise, iterate through the buffer looking for differences, *ignoring
+  // the edges*
+  const int stride = dst_stride_;
+  for (int r = kVPad; r < h_ + kVPad; ++r) {
+    for (int c = kVPad; c < w_dst_ + kHPad; ++c) {
+      const int32_t ref_value = ref_dst[r * stride + c];
+      const int32_t tst_value = tst_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad)
+          << ", superres_denom: " << superres_denom_ << ", height: " << h_
+          << ", src_width: " << w_src_ << ", dst_width: " << w_dst_
+          << ", x0: " << x0_;
+    }
+  }
+}
+
+template <typename Pixel>
+class ConvolveHorizRSTestBase : public ::testing::Test {
+ public:
+  ConvolveHorizRSTestBase() : image_(NULL) {}
+  virtual ~ConvolveHorizRSTestBase() {}
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+  // Implemented by subclasses (SetUp depends on the parameters passed
+  // in and RunOne depends on the function to be tested. These can't
+  // be templated for low/high bit depths because they have different
+  // numbers of parameters)
+  virtual void SetUp() = 0;
+  virtual void RunOne(bool ref) = 0;
+
+ protected:
+  void SetBitDepth(int bd) { bd_ = bd; }
+
+  void CorrectnessTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int i = 0; i < kTestIters; ++i) {
+      for (int superres_denom = 9; superres_denom <= 16; superres_denom++) {
+        // Get a random height between 512 and 767
+        int height = rnd.Rand8() + 512;
+
+        // Get a random src width between 128 and 383
+        int width_src = rnd.Rand8() + 128;
+
+        // x0 is normally calculated by get_upscale_convolve_x0 in
+        // av1/common/resize.c. However, this test should work for
+        // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK
+        // (inclusive), so we choose one at random.
+        int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1);
+
+        image_ =
+            new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
+
+        Prep(&rnd);
+        RunOne(true);
+        RunOne(false);
+        image_->Check();
+
+        delete image_;
+      }
+    }
+  }
+
+  void SpeedTest() {
+    // Pick some specific parameters to test
+    int height = 767;
+    int width_src = 129;
+    int superres_denom = 13;
+    int x0 = RS_SCALE_SUBPEL_MASK >> 1;
+
+    image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    Prep(&rnd);
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(true);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(false);
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+  }
+
+  void Prep(ACMRandom *rnd) {
+    assert(rnd);
+    image_->Initialize(rnd);
+  }
+
+  int bd_;
+  TestImage<Pixel> *image_;
+};
+
+typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride,
+                                         uint8_t *dst, int dst_stride, int w,
+                                         int h, const int16_t *x_filters,
+                                         const int x0_qn, const int x_step_qn);
+
+// Test parameter list:
+//  <tst_fun_>
+typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams;
+
+class LowBDConvolveHorizRSTest
+    : public ConvolveHorizRSTestBase<uint8_t>,
+      public ::testing::WithParamInterface<LowBDParams> {
+ public:
+  virtual ~LowBDConvolveHorizRSTest() {}
+
+  void SetUp() {
+    tst_fun_ = GET_PARAM(0);
+    const int bd = 8;
+    SetBitDepth(bd);
+  }
+
+  void RunOne(bool ref) {
+    const uint8_t *src = image_->GetSrcData(ref, false);
+    uint8_t *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+    const int width_src = image_->src_width();
+    const int width_dst = image_->dst_width();
+    const int height = image_->height();
+    const int x0_qn = image_->x0();
+
+    const int32_t x_step_qn =
+        av1_get_upscale_convolve_step(width_src, width_dst);
+
+    if (ref) {
+      av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst,
+                              height, &av1_resize_filter_normative[0][0], x0_qn,
+                              x_step_qn);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
+               &av1_resize_filter_normative[0][0], x0_qn, x_step_qn);
+    }
+  }
+
+ private:
+  LowBDConvolveHorizRsFunc tst_fun_;
+};
+
+TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
+TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, LowBDConvolveHorizRSTest,
+                        ::testing::Values(av1_convolve_horiz_rs_sse4_1));
+
+typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride,
+                                          uint16_t *dst, int dst_stride, int w,
+                                          int h, const int16_t *x_filters,
+                                          const int x0_qn, const int x_step_qn,
+                                          int bd);
+
+// Test parameter list:
+//  <tst_fun_, bd_>
+typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams;
+
+class HighBDConvolveHorizRSTest
+    : public ConvolveHorizRSTestBase<uint16_t>,
+      public ::testing::WithParamInterface<HighBDParams> {
+ public:
+  virtual ~HighBDConvolveHorizRSTest() {}
+
+  void SetUp() {
+    tst_fun_ = GET_PARAM(0);
+    const int bd = GET_PARAM(1);
+    SetBitDepth(bd);
+  }
+
+  void RunOne(bool ref) {
+    const uint16_t *src = image_->GetSrcData(ref, false);
+    uint16_t *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+    const int width_src = image_->src_width();
+    const int width_dst = image_->dst_width();
+    const int height = image_->height();
+    const int x0_qn = image_->x0();
+
+    const int32_t x_step_qn =
+        av1_get_upscale_convolve_step(width_src, width_dst);
+
+    if (ref) {
+      av1_highbd_convolve_horiz_rs_c(
+          src, src_stride, dst, dst_stride, width_dst, height,
+          &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
+               &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
+    }
+  }
+
+ private:
+  HighBDConvolveHorizRsFunc tst_fun_;
+};
+
+const int kBDs[] = { 8, 10, 12 };
+
+TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
+TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, HighBDConvolveHorizRSTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1),
+                       ::testing::ValuesIn(kBDs)));
+
+}  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc
index b44c04116f..bf3a44ed14 100644
--- a/third_party/aom/test/av1_inv_txfm1d_test.cc
+++ b/third_party/aom/test/av1_inv_txfm1d_test.cc
@@ -13,39 +13,35 @@
 
 #include "test/av1_txfm_test.h"
 #include "test/util.h"
-#include "av1/common/av1_fwd_txfm1d.h"
 #include "av1/common/av1_inv_txfm1d.h"
+#include "av1/encoder/av1_fwd_txfm1d.h"
 
 using libaom_test::ACMRandom;
 using libaom_test::input_base;
 
 namespace {
 const int txfm_type_num = 2;
-const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
+const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
 
-const TxfmFunc fwd_txfm_func_ls[][2] = {
+const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
   { av1_fdct4_new, av1_fadst4_new },
   { av1_fdct8_new, av1_fadst8_new },
   { av1_fdct16_new, av1_fadst16_new },
-  { av1_fdct32_new, av1_fadst32_new },
-#if CONFIG_TX64X64
+  { av1_fdct32_new, NULL },
   { av1_fdct64_new, NULL },
-#endif
 };
 
-const TxfmFunc inv_txfm_func_ls[][2] = {
+const TxfmFunc inv_txfm_func_ls[][txfm_type_num] = {
   { av1_idct4_new, av1_iadst4_new },
   { av1_idct8_new, av1_iadst8_new },
   { av1_idct16_new, av1_iadst16_new },
-  { av1_idct32_new, av1_iadst32_new },
-#if CONFIG_TX64X64
+  { av1_idct32_new, NULL },
   { av1_idct64_new, NULL },
-#endif
 };
 
 // the maximum stage number of fwd/inv 1d dct/adst txfm is 12
-const int8_t cos_bit[12] = { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 };
-const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 };
+const int8_t cos_bit = 13;
+const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
 
 void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) {
   double input[64];
@@ -54,8 +50,11 @@ void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) {
   double output[64];
   libaom_test::reference_idct_1d(input, output, size);
 
-  for (int i = 0; i < size; ++i)
+  for (int i = 0; i < size; ++i) {
+    ASSERT_GE(output[i], INT32_MIN);
+    ASSERT_LE(output[i], INT32_MAX);
     out[i] = static_cast<int32_t>(round(output[i]));
+  }
 }
 
 void random_matrix(int32_t *dst, int len, ACMRandom *rnd) {
@@ -73,24 +72,32 @@ void random_matrix(int32_t *dst, int len, ACMRandom *rnd) {
 TEST(av1_inv_txfm1d, InvAccuracyCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   const int count_test_block = 20000;
-  const int max_error[] = { 6, 10, 19, 28 };
+  const int max_error[] = { 6, 10, 19, 31, 40 };
+  ASSERT_EQ(NELEMENTS(max_error), TX_SIZES);
+  ASSERT_EQ(NELEMENTS(inv_txfm_func_ls), TX_SIZES);
   for (int k = 0; k < count_test_block; ++k) {
     // choose a random transform to test
-    const int txfm_type = rnd.Rand8() % NELEMENTS(inv_txfm_func_ls);
-    const int txfm_size = txfm_size_ls[txfm_type];
-    const TxfmFunc txfm_func = inv_txfm_func_ls[txfm_type][0];
+    const TX_SIZE tx_size = static_cast<TX_SIZE>(rnd.Rand8() % TX_SIZES);
+    const int tx_size_pix = txfm_size_ls[tx_size];
+    const TxfmFunc inv_txfm_func = inv_txfm_func_ls[tx_size][0];
 
     int32_t input[64];
-    random_matrix(input, txfm_size, &rnd);
+    random_matrix(input, tx_size_pix, &rnd);
+
+    // 64x64 transform assumes last 32 values are zero.
+    memset(input + 32, 0, 32 * sizeof(input[0]));
 
     int32_t ref_output[64];
-    reference_idct_1d_int(input, ref_output, txfm_size);
+    reference_idct_1d_int(input, ref_output, tx_size_pix);
 
     int32_t output[64];
-    txfm_func(input, output, cos_bit, range_bit);
+    inv_txfm_func(input, output, cos_bit, range_bit);
 
-    for (int i = 0; i < txfm_size; ++i) {
-      EXPECT_LE(abs(output[i] - ref_output[i]), max_error[txfm_type]);
+    for (int i = 0; i < tx_size_pix; ++i) {
+      EXPECT_LE(abs(output[i] - ref_output[i]), max_error[tx_size])
+          << "tx_size = " << tx_size << ", i = " << i
+          << ", output[i] = " << output[i]
+          << ", ref_output[i] = " << ref_output[i];
     }
   }
 }
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
index bccbdeebf3..461e7ebcdc 100644
--- a/third_party/aom/test/av1_inv_txfm2d_test.cc
+++ b/third_party/aom/test/av1_inv_txfm2d_test.cc
@@ -12,26 +12,35 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <vector>
 
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "av1/common/scan.h"
 #include "test/acm_random.h"
-#include "test/util.h"
 #include "test/av1_txfm_test.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "test/util.h"
 
 using libaom_test::ACMRandom;
-using libaom_test::input_base;
+using libaom_test::InvTxfm2dFunc;
+using libaom_test::LbdInvTxfm2dFunc;
 using libaom_test::bd;
 using libaom_test::compute_avg_abs_error;
-using libaom_test::Fwd_Txfm2d_Func;
-using libaom_test::Inv_Txfm2d_Func;
+using libaom_test::input_base;
+
+using ::testing::Combine;
+using ::testing::Range;
+using ::testing::Values;
+
+using std::vector;
 
 namespace {
 
-#if CONFIG_HIGHBITDEPTH
 // AV1InvTxfm2dParam argument list:
 // tx_type_, tx_size_, max_error_, max_avg_error_
-typedef std::tr1::tuple<TX_TYPE, TX_SIZE, int, double> AV1InvTxfm2dParam;
+typedef ::testing::tuple<TX_TYPE, TX_SIZE, int, double> AV1InvTxfm2dParam;
 
 class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
  public:
@@ -46,171 +55,313 @@ class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
     int tx_w = tx_size_wide[tx_size_];
     int tx_h = tx_size_high[tx_size_];
     int txfm2d_size = tx_w * tx_h;
-    const Fwd_Txfm2d_Func fwd_txfm_func =
-        libaom_test::fwd_txfm_func_ls[tx_size_];
-    const Inv_Txfm2d_Func inv_txfm_func =
-        libaom_test::inv_txfm_func_ls[tx_size_];
+    const FwdTxfm2dFunc fwd_txfm_func = libaom_test::fwd_txfm_func_ls[tx_size_];
+    const InvTxfm2dFunc inv_txfm_func = libaom_test::inv_txfm_func_ls[tx_size_];
     double avg_abs_error = 0;
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
     const int count = 500;
 
     for (int ci = 0; ci < count; ci++) {
-      int16_t expected[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size, NELEMENTS(expected));
+      DECLARE_ALIGNED(16, int16_t, input[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(input));
 
       for (int ni = 0; ni < txfm2d_size; ++ni) {
         if (ci == 0) {
           int extreme_input = input_base - 1;
-          expected[ni] = extreme_input;  // extreme case
+          input[ni] = extreme_input;  // extreme case
         } else {
-          expected[ni] = rnd.Rand16() % input_base;
+          input[ni] = rnd.Rand16() % input_base;
         }
       }
 
-      int32_t coeffs[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size, NELEMENTS(coeffs));
-      fwd_txfm_func(expected, coeffs, tx_w, tx_type_, bd);
+      DECLARE_ALIGNED(16, uint16_t, expected[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(expected));
+      if (TxfmUsesApproximation()) {
+        // Compare reference forward HT + inverse HT vs forward HT + inverse HT.
+        double ref_input[64 * 64];
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_input));
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          ref_input[ni] = input[ni];
+        }
+        double ref_coeffs[64 * 64] = { 0 };
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs));
+        ASSERT_EQ(tx_type_, DCT_DCT);
+        libaom_test::reference_hybrid_2d(ref_input, ref_coeffs, tx_type_,
+                                         tx_size_);
+        DECLARE_ALIGNED(16, int32_t, ref_coeffs_int[64 * 64]) = { 0 };
+        ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs_int));
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          ref_coeffs_int[ni] = (int32_t)round(ref_coeffs[ni]);
+        }
+        inv_txfm_func(ref_coeffs_int, expected, tx_w, tx_type_, bd);
+      } else {
+        // Compare original input vs forward HT + inverse HT.
+        for (int ni = 0; ni < txfm2d_size; ++ni) {
+          expected[ni] = input[ni];
+        }
+      }
+
+      DECLARE_ALIGNED(16, int32_t, coeffs[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(coeffs));
+      fwd_txfm_func(input, coeffs, tx_w, tx_type_, bd);
 
-      uint16_t actual[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size, NELEMENTS(actual));
+      DECLARE_ALIGNED(16, uint16_t, actual[64 * 64]) = { 0 };
+      ASSERT_LE(txfm2d_size, NELEMENTS(actual));
       inv_txfm_func(coeffs, actual, tx_w, tx_type_, bd);
 
+      double actual_max_error = 0;
       for (int ni = 0; ni < txfm2d_size; ++ni) {
-        EXPECT_GE(max_error_, abs(expected[ni] - actual[ni]));
+        const double this_error = abs(expected[ni] - actual[ni]);
+        actual_max_error = AOMMAX(actual_max_error, this_error);
+      }
+      EXPECT_GE(max_error_, actual_max_error)
+          << " tx_w: " << tx_w << " tx_h " << tx_h << " tx_type: " << tx_type_;
+      if (actual_max_error > max_error_) {  // exit early.
+        break;
       }
-      avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+      avg_abs_error += compute_avg_abs_error<uint16_t, uint16_t>(
           expected, actual, txfm2d_size);
     }
 
     avg_abs_error /= count;
-    // max_abs_avg_error comes from upper bound of
-    // printf("txfm1d_size: %d accuracy_avg_abs_error: %f\n",
-    // txfm1d_size_, avg_abs_error);
     EXPECT_GE(max_avg_error_, avg_abs_error)
         << " tx_w: " << tx_w << " tx_h " << tx_h << " tx_type: " << tx_type_;
   }
 
  private:
+  bool TxfmUsesApproximation() {
+    if (tx_size_wide[tx_size_] == 64 || tx_size_high[tx_size_] == 64) {
+      return true;
+    }
+    return false;
+  }
+
   int max_error_;
   double max_avg_error_;
   TX_TYPE tx_type_;
   TX_SIZE tx_size_;
 };
 
-TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
+static int max_error_ls[TX_SIZES_ALL] = {
+  2,  // 4x4 transform
+  2,  // 8x8 transform
+  2,  // 16x16 transform
+  4,  // 32x32 transform
+  3,  // 64x64 transform
+  2,  // 4x8 transform
+  2,  // 8x4 transform
+  2,  // 8x16 transform
+  2,  // 16x8 transform
+  3,  // 16x32 transform
+  3,  // 32x16 transform
+  5,  // 32x64 transform
+  5,  // 64x32 transform
+  2,  // 4x16 transform
+  2,  // 16x4 transform
+  2,  // 8x32 transform
+  2,  // 32x8 transform
+  3,  // 16x64 transform
+  3,  // 64x16 transform
+};
 
-const AV1InvTxfm2dParam av1_inv_txfm2d_param[] = {
-#if CONFIG_EXT_TX
-#if CONFIG_RECT_TX
-  AV1InvTxfm2dParam(DCT_DCT, TX_4X8, 2, 0.007),
-  AV1InvTxfm2dParam(ADST_DCT, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(DCT_ADST, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(ADST_ADST, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_4X8, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_4X8, 2, 0.012),
-
-  AV1InvTxfm2dParam(DCT_DCT, TX_8X4, 2, 0.007),
-  AV1InvTxfm2dParam(ADST_DCT, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(DCT_ADST, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(ADST_ADST, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X4, 2, 0.007),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X4, 2, 0.012),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X4, 2, 0.012),
-
-  AV1InvTxfm2dParam(DCT_DCT, TX_8X16, 2, 0.025),
-  AV1InvTxfm2dParam(ADST_DCT, TX_8X16, 2, 0.020),
-  AV1InvTxfm2dParam(DCT_ADST, TX_8X16, 2, 0.027),
-  AV1InvTxfm2dParam(ADST_ADST, TX_8X16, 2, 0.023),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X16, 2, 0.020),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X16, 2, 0.027),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X16, 2, 0.032),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X16, 2, 0.023),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X16, 2, 0.023),
-
-  AV1InvTxfm2dParam(DCT_DCT, TX_16X8, 2, 0.007),
-  AV1InvTxfm2dParam(ADST_DCT, TX_16X8, 2, 0.012),
-  AV1InvTxfm2dParam(DCT_ADST, TX_16X8, 2, 0.024),
-  AV1InvTxfm2dParam(ADST_ADST, TX_16X8, 2, 0.033),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_16X8, 2, 0.015),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_16X8, 2, 0.032),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X8, 2, 0.032),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_16X8, 2, 0.033),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_16X8, 2, 0.032),
-#endif
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 0.04),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(FLIPADST_DCT, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(DCT_FLIPADST, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(ADST_FLIPADST, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(FLIPADST_ADST, TX_32X32, 4, 0.4),
-#endif
-  AV1InvTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.002),
-  AV1InvTxfm2dParam(DCT_DCT, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(ADST_DCT, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(DCT_ADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(ADST_ADST, TX_8X8, 2, 0.02),
-  AV1InvTxfm2dParam(DCT_DCT, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(ADST_DCT, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(DCT_ADST, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(ADST_ADST, TX_16X16, 2, 0.04),
-  AV1InvTxfm2dParam(DCT_DCT, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(ADST_DCT, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(DCT_ADST, TX_32X32, 4, 0.4),
-  AV1InvTxfm2dParam(ADST_ADST, TX_32X32, 4, 0.4)
+static double avg_error_ls[TX_SIZES_ALL] = {
+  0.002,  // 4x4 transform
+  0.05,   // 8x8 transform
+  0.07,   // 16x16 transform
+  0.4,    // 32x32 transform
+  0.3,    // 64x64 transform
+  0.02,   // 4x8 transform
+  0.02,   // 8x4 transform
+  0.04,   // 8x16 transform
+  0.07,   // 16x8 transform
+  0.4,    // 16x32 transform
+  0.5,    // 32x16 transform
+  0.38,   // 32x64 transform
+  0.39,   // 64x32 transform
+  0.2,    // 4x16 transform
+  0.2,    // 16x4 transform
+  0.2,    // 8x32 transform
+  0.2,    // 32x8 transform
+  0.38,   // 16x64 transform
+  0.38,   // 64x16 transform
 };
 
+vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
+  vector<AV1InvTxfm2dParam> param_list;
+  for (int s = 0; s < TX_SIZES; ++s) {
+    const int max_error = max_error_ls[s];
+    const double avg_error = avg_error_ls[s];
+    for (int t = 0; t < TX_TYPES; ++t) {
+      const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
+      const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
+      if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
+        param_list.push_back(
+            AV1InvTxfm2dParam(tx_type, tx_size, max_error, avg_error));
+      }
+    }
+  }
+  return param_list;
+}
+
 INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm2d,
-                        ::testing::ValuesIn(av1_inv_txfm2d_param));
+                        ::testing::ValuesIn(GetInvTxfm2dParamList()));
+
+TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
 
 TEST(AV1InvTxfm2d, CfgTest) {
   for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
     int bd = libaom_test::bd_arr[bd_idx];
     int8_t low_range = libaom_test::low_range_arr[bd_idx];
     int8_t high_range = libaom_test::high_range_arr[bd_idx];
-    // TODO(angiebird): include rect txfm in this test
-    for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
       for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
-        TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(
-            static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size));
+        if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
+                                           static_cast<TX_TYPE>(tx_type)) ==
+            false) {
+          continue;
+        }
+        TXFM_2D_FLIP_CFG cfg;
+        av1_get_inv_txfm_cfg(static_cast<TX_TYPE>(tx_type),
+                             static_cast<TX_SIZE>(tx_size), &cfg);
         int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
         int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
         av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg,
-                                fwd_shift_sum[tx_size], bd);
-        const TXFM_1D_CFG *col_cfg = cfg.col_cfg;
-        const TXFM_1D_CFG *row_cfg = cfg.row_cfg;
-        libaom_test::txfm_stage_range_check(stage_range_col, col_cfg->stage_num,
-                                            col_cfg->cos_bit, low_range,
+                                (TX_SIZE)tx_size, bd);
+        libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
+                                            cfg.cos_bit_col, low_range,
                                             high_range);
-        libaom_test::txfm_stage_range_check(stage_range_row, row_cfg->stage_num,
-                                            row_cfg->cos_bit, low_range,
+        libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
+                                            cfg.cos_bit_row, low_range,
                                             high_range);
       }
     }
   }
 }
-#endif  // CONFIG_HIGHBITDEPTH
+
+typedef ::testing::tuple<const LbdInvTxfm2dFunc> AV1LbdInvTxfm2dParam;
+class AV1LbdInvTxfm2d : public ::testing::TestWithParam<AV1LbdInvTxfm2dParam> {
+ public:
+  virtual void SetUp() { target_func_ = GET_PARAM(0); }
+  void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times);
+
+ private:
+  LbdInvTxfm2dFunc target_func_;
+};
+
+void AV1LbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size,
+                                          int run_times) {
+  FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size];
+  InvTxfm2dFunc ref_func_ = libaom_test::inv_txfm_func_ls[tx_size];
+  if (fwd_func_ == NULL || ref_func_ == NULL || target_func_ == NULL) {
+    return;
+  }
+  const int bd = 8;
+  const int BLK_WIDTH = 64;
+  const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
+  DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(16, uint8_t, output[BLK_SIZE]) = { 0 };
+  DECLARE_ALIGNED(16, uint16_t, ref_output[BLK_SIZE]) = { 0 };
+  int stride = BLK_WIDTH;
+  int rows = tx_size_high[tx_size];
+  int cols = tx_size_wide[tx_size];
+  const int rows_nonezero = AOMMIN(32, rows);
+  const int cols_nonezero = AOMMIN(32, cols);
+  run_times /= (rows * cols);
+  run_times = AOMMAX(1, run_times);
+  const SCAN_ORDER *scan_order = get_default_scan(tx_size, tx_type);
+  const int16_t *scan = scan_order->scan;
+  const int16_t eobmax = rows_nonezero * cols_nonezero;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int randTimes = run_times == 1 ? (eobmax + 500) : 1;
+  for (int cnt = 0; cnt < randTimes; ++cnt) {
+    const int16_t max_in = (1 << (bd)) - 1;
+    for (int r = 0; r < BLK_WIDTH; ++r) {
+      for (int c = 0; c < BLK_WIDTH; ++c) {
+        input[r * cols + c] = (cnt == 0) ? max_in : rnd.Rand8Extremes();
+        output[r * stride + c] = (cnt == 0) ? 128 : rnd.Rand8();
+        ref_output[r * stride + c] = output[r * stride + c];
+      }
+    }
+    fwd_func_(input, inv_input, stride, tx_type, bd);
+
+    // produce eob input by setting high freq coeffs to zero
+    const int eob = AOMMIN(cnt + 1, eobmax);
+    for (int i = eob; i < eobmax; i++) {
+      inv_input[scan[i]] = 0;
+    }
+
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      ref_func_(inv_input, ref_output, stride, tx_type, bd);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    aom_usec_timer_start(&timer);
+    for (int i = 0; i < run_times; ++i) {
+      target_func_(inv_input, output, stride, tx_type, tx_size, eob);
+    }
+    aom_usec_timer_mark(&timer);
+    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    if (run_times > 10) {
+      printf("txfm[%d] %3dx%-3d:%7.2f/%7.2fns", tx_type, cols, rows, time1,
+             time2);
+      printf("(%3.2f)\n", time1 / time2);
+    }
+    for (int r = 0; r < rows; ++r) {
+      for (int c = 0; c < cols; ++c) {
+        uint8_t ref_value = static_cast<uint8_t>(ref_output[r * stride + c]);
+        ASSERT_EQ(ref_value, output[r * stride + c])
+            << "[" << r << "," << c << "] " << cnt
+            << " tx_size: " << static_cast<int>(tx_size)
+            << " tx_type: " << tx_type << " eob " << eob;
+      }
+    }
+  }
+}
+
+TEST_P(AV1LbdInvTxfm2d, match) {
+  for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+    for (int i = 0; i < (int)TX_TYPES; ++i) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
+                                         static_cast<TX_TYPE>(i))) {
+        RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
+                            1);
+      }
+    }
+  }
+}
+
+TEST_P(AV1LbdInvTxfm2d, DISABLED_Speed) {
+  for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
+    for (int i = 0; i < (int)TX_TYPES; ++i) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
+                                         static_cast<TX_TYPE>(i))) {
+        RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
+                            10000000);
+      }
+    }
+  }
+}
+
+#if HAVE_SSSE3
+#if defined(_MSC_VER) || defined(__SSSE3__)
+#include "av1/common/x86/av1_inv_txfm_ssse3.h"
+INSTANTIATE_TEST_CASE_P(SSSE3, AV1LbdInvTxfm2d,
+                        ::testing::Values(av1_lowbd_inv_txfm2d_add_ssse3));
+#endif  // _MSC_VER || __SSSE3__
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+extern "C" void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input,
+                                              uint8_t *output, int stride,
+                                              TX_TYPE tx_type, TX_SIZE tx_size,
+                                              int eob);
+
+INSTANTIATE_TEST_CASE_P(AVX2, AV1LbdInvTxfm2d,
+                        ::testing::Values(av1_lowbd_inv_txfm2d_add_avx2));
+#endif  // HAVE_AVX2
 
 }  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm_test.cc b/third_party/aom/test/av1_inv_txfm_test.cc
deleted file mode 100644
index 873e806852..0000000000
--- a/third_party/aom/test/av1_inv_txfm_test.cc
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/av1_txfm_test.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/blockd.h"
-#include "av1/common/scan.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/inv_txfm.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
-
-class TransTestBase {
- public:
-  virtual ~TransTestBase() {}
-
- protected:
-  void RunInvAccuracyCheck() {
-    tran_low_t input[64];
-    tran_low_t output[64];
-    double ref_input[64];
-    double ref_output[64];
-
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 5000;
-    for (int ti = 0; ti < count_test_block; ++ti) {
-      for (int ni = 0; ni < txfm_size_; ++ni) {
-        input[ni] = rnd.Rand8() - rnd.Rand8();
-        ref_input[ni] = static_cast<double>(input[ni]);
-      }
-
-      inv_txfm_(input, output);
-      libaom_test::reference_idct_1d(ref_input, ref_output, txfm_size_);
-
-      for (int ni = 0; ni < txfm_size_; ++ni) {
-        EXPECT_LE(
-            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
-            max_error_);
-      }
-    }
-  }
-
-  double max_error_;
-  int txfm_size_;
-  IdctFunc inv_txfm_;
-};
-
-typedef std::tr1::tuple<IdctFunc, int, int> IdctParam;
-class AV1InvTxfm : public TransTestBase,
-                   public ::testing::TestWithParam<IdctParam> {
- public:
-  virtual void SetUp() {
-    inv_txfm_ = GET_PARAM(0);
-    txfm_size_ = GET_PARAM(1);
-    max_error_ = GET_PARAM(2);
-  }
-  virtual void TearDown() {}
-};
-
-TEST_P(AV1InvTxfm, RunInvAccuracyCheck) { RunInvAccuracyCheck(); }
-
-INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm,
-                        ::testing::Values(IdctParam(&aom_idct4_c, 4, 1),
-                                          IdctParam(&aom_idct8_c, 8, 2),
-                                          IdctParam(&aom_idct16_c, 16, 4),
-                                          IdctParam(&aom_idct32_c, 32, 6)));
-
-#if CONFIG_AV1_ENCODER
-typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, InvTxfmFunc, TX_SIZE, int>
-    PartialInvTxfmParam;
-#if !CONFIG_ADAPT_SCAN
-const int kMaxNumCoeffs = 1024;
-#endif
-class AV1PartialIDctTest
-    : public ::testing::TestWithParam<PartialInvTxfmParam> {
- public:
-  virtual ~AV1PartialIDctTest() {}
-  virtual void SetUp() {
-    ftxfm_ = GET_PARAM(0);
-    full_itxfm_ = GET_PARAM(1);
-    partial_itxfm_ = GET_PARAM(2);
-    tx_size_ = GET_PARAM(3);
-    last_nonzero_ = GET_PARAM(4);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int last_nonzero_;
-  TX_SIZE tx_size_;
-  FwdTxfmFunc ftxfm_;
-  InvTxfmFunc full_itxfm_;
-  InvTxfmFunc partial_itxfm_;
-};
-
-#if !CONFIG_ADAPT_SCAN
-static MB_MODE_INFO get_mbmi() {
-  MB_MODE_INFO mbmi;
-  mbmi.ref_frame[0] = LAST_FRAME;
-  assert(is_inter_block(&mbmi));
-  return mbmi;
-}
-
-TEST_P(AV1PartialIDctTest, RunQuantCheck) {
-  int size;
-  switch (tx_size_) {
-    case TX_4X4: size = 4; break;
-    case TX_8X8: size = 8; break;
-    case TX_16X16: size = 16; break;
-    case TX_32X32: size = 32; break;
-    default: FAIL() << "Wrong Size!"; break;
-  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-
-  const int count_test_block = 1000;
-  const int block_size = size * size;
-
-  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
-
-  int max_error = 0;
-  for (int m = 0; m < count_test_block; ++m) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-    for (int n = 0; n < count_test_block; ++n) {
-      // Initialize a test block with input range [-255, 255].
-      if (n == 0) {
-        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
-      } else if (n == 1) {
-        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
-      } else {
-        for (int j = 0; j < block_size; ++j) {
-          input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
-        }
-      }
-
-      ftxfm_(input_extreme_block, output_ref_block, size);
-
-      // quantization with maximum allowed step sizes
-      test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
-      MB_MODE_INFO mbmi = get_mbmi();
-      for (int j = 1; j < last_nonzero_; ++j)
-        test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT,
-                                  &mbmi)
-                             ->scan[j]] = (output_ref_block[j] / 1828) * 1828;
-    }
-
-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
-
-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error) max_error = error;
-    }
-  }
-
-  EXPECT_EQ(0, max_error)
-      << "Error: partial inverse transform produces different results";
-}
-
-TEST_P(AV1PartialIDctTest, ResultsMatch) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int size;
-  switch (tx_size_) {
-    case TX_4X4: size = 4; break;
-    case TX_8X8: size = 8; break;
-    case TX_16X16: size = 16; break;
-    case TX_32X32: size = 32; break;
-    default: FAIL() << "Wrong Size!"; break;
-  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-  const int count_test_block = 1000;
-  const int max_coeff = 32766 / 4;
-  const int block_size = size * size;
-  int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-    int max_energy_leftover = max_coeff * max_coeff;
-    for (int j = 0; j < last_nonzero_; ++j) {
-      int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
-                                          (rnd.Rand16() - 32768) / 65536);
-      max_energy_leftover -= coef * coef;
-      if (max_energy_leftover < 0) {
-        max_energy_leftover = 0;
-        coef = 0;
-      }
-      MB_MODE_INFO mbmi = get_mbmi();
-      test_coef_block1[get_scan((const AV1_COMMON *)NULL, tx_size_, DCT_DCT,
-                                &mbmi)
-                           ->scan[j]] = coef;
-    }
-
-    memcpy(test_coef_block2, test_coef_block1,
-           sizeof(*test_coef_block2) * block_size);
-
-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
-
-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error) max_error = error;
-    }
-  }
-
-  EXPECT_EQ(0, max_error)
-      << "Error: partial inverse transform produces different results";
-}
-#endif
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
-    C, AV1PartialIDctTest,
-    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c,
-                                 &aom_idct32x32_34_add_c, TX_32X32, 34),
-                      make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c,
-                                 &aom_idct32x32_1_add_c, TX_32X32, 1),
-                      make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c,
-                                 &aom_idct16x16_10_add_c, TX_16X16, 10),
-                      make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c,
-                                 &aom_idct16x16_1_add_c, TX_16X16, 1),
-                      make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c,
-                                 &aom_idct8x8_12_add_c, TX_8X8, 12),
-                      make_tuple(&aom_fdct8x8_c, &aom_idct8x8_64_add_c,
-                                 &aom_idct8x8_1_add_c, TX_8X8, 1),
-                      make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c,
-                                 &aom_idct4x4_1_add_c, TX_4X4, 1)));
-#endif  // CONFIG_AV1_ENCODER
-}  // namespace
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
index 36ac8c4adf..aaf0939181 100644
--- a/third_party/aom/test/av1_quantize_test.cc
+++ b/third_party/aom/test/av1_quantize_test.cc
@@ -12,8 +12,9 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -22,8 +23,8 @@
 namespace {
 
 typedef void (*QuantizeFpFunc)(
-    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, int log_scale);
@@ -50,20 +51,19 @@ class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
   void RunQuantizeTest() {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
-    DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
     DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
-    DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
     uint16_t eob;
     uint16_t ref_eob;
     int err_count_total = 0;
     int first_failure = -1;
-    int skip_block = 0;
     int count = params_.coeffCount;
     const TX_SIZE txSize = getTxSize(count);
     int log_scale = (txSize == TX_32X32);
@@ -86,20 +86,26 @@ class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
         quant_ptr[j] = (1 << 16) / dequant_ptr[j];
         round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
       }
-
-      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+      for (int j = 2; j < 8; ++j) {
+        zbin_ptr[j] = zbin_ptr[1];
+        quant_shift_ptr[j] = quant_shift_ptr[1];
+        dequant_ptr[j] = dequant_ptr[1];
+        quant_ptr[j] = quant_ptr[1];
+        round_ptr[j] = round_ptr[1];
+      }
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                   quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
                   &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
 
       ASM_REGISTER_STATE_CHECK(
-          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                    quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
                    scanOrder.scan, scanOrder.iscan, log_scale));
 
       for (int j = 0; j < count; ++j) {
         err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
                      (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-        EXPECT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j])
+        ASSERT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j])
             << "qcoeff error: i = " << i << " j = " << j << "\n";
         EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j])
             << "dqcoeff error: i = " << i << " j = " << j << "\n";
@@ -120,18 +126,17 @@ class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
   void RunEobTest() {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
     DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
-    DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
-    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
+    DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
     DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
     DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
-    DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+    DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
     uint16_t eob;
     uint16_t ref_eob;
-    int skip_block = 0;
     int count = params_.coeffCount;
     const TX_SIZE txSize = getTxSize(count);
     int log_scale = (txSize == TX_32X32);
@@ -157,13 +162,20 @@ class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
         quant_ptr[j] = (1 << 16) / dequant_ptr[j];
         round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
       }
+      for (int j = 2; j < 8; ++j) {
+        zbin_ptr[j] = zbin_ptr[1];
+        quant_shift_ptr[j] = quant_shift_ptr[1];
+        dequant_ptr[j] = dequant_ptr[1];
+        quant_ptr[j] = quant_ptr[1];
+        round_ptr[j] = round_ptr[1];
+      }
 
-      quanFuncRef(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+      quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                   quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
                   &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
 
       ASM_REGISTER_STATE_CHECK(
-          quanFunc(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+          quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
                    quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
                    scanOrder.scan, scanOrder.iscan, log_scale));
       EXPECT_EQ(ref_eob, eob) << "eob error: "
@@ -196,7 +208,7 @@ TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
 
 #if HAVE_SSE4_1
 const QuantizeFuncParams qfps[4] = {
-  QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
+  QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
                      16),
   QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
                      64),
@@ -208,4 +220,20 @@ const QuantizeFuncParams qfps[4] = {
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
 #endif  // HAVE_SSE4_1
+
+#if HAVE_AVX2
+const QuantizeFuncParams qfps_avx2[4] = {
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     16),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     64),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     256),
+  QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
+                     1024),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, AV1QuantizeTest, ::testing::ValuesIn(qfps_avx2));
+#endif  // HAVE_AVX2
+
 }  // namespace
diff --git a/third_party/aom/test/av1_round_shift_array_test.cc b/third_party/aom/test/av1_round_shift_array_test.cc
new file mode 100644
index 0000000000..825d1348ed
--- /dev/null
+++ b/third_party/aom/test/av1_round_shift_array_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace AV1CompRoundShift {
+
+typedef void (*comp_round_shift_array_func)(int32_t *arr, int size, int bit);
+
+const int kValidBitCheck[] = {
+  -4, -3, -2, -1, 0, 1, 2, 3, 4,
+};
+
+typedef ::testing::tuple<comp_round_shift_array_func, BLOCK_SIZE, int>
+    CompRoundShiftParam;
+
+class AV1CompRoundShiftTest
+    : public ::testing::TestWithParam<CompRoundShiftParam> {
+ public:
+  ~AV1CompRoundShiftTest();
+
+  void SetUp() { rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); }
+  void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunCheckOutput(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
+                      int bit);
+  void RunSpeedTest(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
+                    int bit);
+
+  libaom_test::ACMRandom rnd_;
+};
+
+AV1CompRoundShiftTest::~AV1CompRoundShiftTest() { ; }
+
+void AV1CompRoundShiftTest::RunCheckOutput(
+    comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, int bit) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int blk_wd = 64;
+  DECLARE_ALIGNED(32, int32_t, pred_[blk_wd]);
+  DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
+  for (int i = 0; i < (blk_wd); ++i) {
+    ref_buffer_[i] = pred_[i] = rnd_.Rand31() / 16;
+  }
+  av1_round_shift_array_c(ref_buffer_, w, bit);
+  test_impl(pred_, w, bit);
+  for (int x = 0; x < w; ++x) {
+    ASSERT_EQ(ref_buffer_[x], pred_[x]) << w << "x" << h << "mismatch @"
+                                        << "(" << x << ")";
+  }
+}
+
+void AV1CompRoundShiftTest::RunSpeedTest(comp_round_shift_array_func test_impl,
+                                         BLOCK_SIZE bsize, int bit) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int blk_wd = 64;
+  DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
+  for (int i = 0; i < (blk_wd); ++i) {
+    ref_buffer_[i] = rnd_.Rand31();
+  }
+
+  const int num_loops = 1000000000 / (w + h);
+  comp_round_shift_array_func funcs[2] = { av1_round_shift_array_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    comp_round_shift_array_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(ref_buffer_, w, bit);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("av1_round_shift_array %3dx%-3d: bit : %d %7.2f/%7.2fns", w, h, bit,
+         elapsed_time[0], elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1CompRoundShiftTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
+}
+
+TEST_P(AV1CompRoundShiftTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1CompRoundShiftTest,
+    ::testing::Combine(::testing::Values(&av1_round_shift_array_sse4_1),
+                       ::testing::ValuesIn(txsize_to_bsize),
+                       ::testing::ValuesIn(kValidBitCheck)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, AV1CompRoundShiftTest,
+    ::testing::Combine(::testing::Values(&av1_round_shift_array_neon),
+                       ::testing::ValuesIn(txsize_to_bsize),
+                       ::testing::ValuesIn(kValidBitCheck)));
+#endif
+
+};  // namespace AV1CompRoundShift
diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc
index 4545de1008..d5b0ce3255 100644
--- a/third_party/aom/test/av1_txfm_test.cc
+++ b/third_party/aom/test/av1_txfm_test.cc
@@ -34,7 +34,6 @@ void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
       *type0 = TYPE_ADST;
       *type1 = TYPE_ADST;
       break;
-#if CONFIG_EXT_TX
     case FLIPADST_DCT:
       *type0 = TYPE_ADST;
       *type1 = TYPE_DCT;
@@ -55,7 +54,34 @@ void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
       *type0 = TYPE_ADST;
       *type1 = TYPE_ADST;
       break;
-#endif  // CONFIG_EXT_TX
+    case IDTX:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_DCT:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_DCT;
+      break;
+    case V_DCT:
+      *type0 = TYPE_DCT;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_ADST:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_ADST;
+      break;
+    case V_ADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_IDTX;
+      break;
+    case H_FLIPADST:
+      *type0 = TYPE_IDTX;
+      *type1 = TYPE_ADST;
+      break;
+    case V_FLIPADST:
+      *type0 = TYPE_ADST;
+      *type1 = TYPE_IDTX;
+      break;
     default:
       *type0 = TYPE_DCT;
       *type1 = TYPE_DCT;
@@ -64,6 +90,7 @@ void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
   }
 }
 
+double Sqrt2 = pow(2, 0.5);
 double invSqrt2 = 1 / pow(2, 0.5);
 
 double dct_matrix(double n, double k, int size) {
@@ -92,7 +119,63 @@ void reference_idct_1d(const double *in, double *out, int size) {
   }
 }
 
+// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4_new'
+// function). Should be replaced by a proper reference function that takes
+// 'double' input & output.
+static void fadst4_new(const tran_low_t *input, tran_low_t *output) {
+  tran_high_t x0, x1, x2, x3;
+  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+  x0 = input[0];
+  x1 = input[1];
+  x2 = input[2];
+  x3 = input[3];
+
+  if (!(x0 | x1 | x2 | x3)) {
+    output[0] = output[1] = output[2] = output[3] = 0;
+    return;
+  }
+
+  s0 = sinpi_1_9 * x0;
+  s1 = sinpi_4_9 * x0;
+  s2 = sinpi_2_9 * x1;
+  s3 = sinpi_1_9 * x1;
+  s4 = sinpi_3_9 * x2;
+  s5 = sinpi_4_9 * x3;
+  s6 = sinpi_2_9 * x3;
+  s7 = x0 + x1 - x3;
+
+  x0 = s0 + s2 + s5;
+  x1 = sinpi_3_9 * s7;
+  x2 = s1 - s3 + s6;
+  x3 = s4;
+
+  s0 = x0 + x3;
+  s1 = x1;
+  s2 = x2 - x3;
+  s3 = x2 - x0 + x3;
+
+  // 1-D transform scaling factor is sqrt(2).
+  output[0] = (tran_low_t)fdct_round_shift(s0);
+  output[1] = (tran_low_t)fdct_round_shift(s1);
+  output[2] = (tran_low_t)fdct_round_shift(s2);
+  output[3] = (tran_low_t)fdct_round_shift(s3);
+}
+
 void reference_adst_1d(const double *in, double *out, int size) {
+  if (size == 4) {  // Special case.
+    tran_low_t int_input[4];
+    for (int i = 0; i < 4; ++i) {
+      int_input[i] = static_cast<tran_low_t>(round(in[i]));
+    }
+    tran_low_t int_output[4];
+    fadst4_new(int_input, int_output);
+    for (int i = 0; i < 4; ++i) {
+      out[i] = int_output[i];
+    }
+    return;
+  }
+
   for (int k = 0; k < size; ++k) {
     out[k] = 0;
     for (int n = 0; n < size; ++n) {
@@ -101,96 +184,188 @@ void reference_adst_1d(const double *in, double *out, int size) {
   }
 }
 
+void reference_idtx_1d(const double *in, double *out, int size) {
+  double scale = 0;
+  if (size == 4)
+    scale = Sqrt2;
+  else if (size == 8)
+    scale = 2;
+  else if (size == 16)
+    scale = 2 * Sqrt2;
+  else if (size == 32)
+    scale = 4;
+  else if (size == 64)
+    scale = 4 * Sqrt2;
+  for (int k = 0; k < size; ++k) {
+    out[k] = in[k] * scale;
+  }
+}
+
 void reference_hybrid_1d(double *in, double *out, int size, int type) {
   if (type == TYPE_DCT)
     reference_dct_1d(in, out, size);
-  else
+  else if (type == TYPE_ADST)
     reference_adst_1d(in, out, size);
+  else
+    reference_idtx_1d(in, out, size);
 }
 
-void reference_hybrid_2d(double *in, double *out, int size, int type0,
-                         int type1) {
-  double *tempOut = new double[size * size];
+double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size) {
+  TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
+  av1_get_fwd_txfm_cfg(tx_type, tx_size, &fwd_txfm_flip_cfg);
+  const int tx_width = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
+  const int tx_height = tx_size_high[fwd_txfm_flip_cfg.tx_size];
+  const int8_t *shift = fwd_txfm_flip_cfg.shift;
+  const int amplify_bit = shift[0] + shift[1] + shift[2];
+  double amplify_factor =
+      amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+  // For rectangular transforms, we need to multiply by an extra factor.
+  const int rect_type = get_rect_tx_log_ratio(tx_width, tx_height);
+  if (abs(rect_type) == 1) {
+    amplify_factor *= pow(2, 0.5);
+  }
+  return amplify_factor;
+}
 
-  for (int r = 0; r < size; r++) {
-    // out ->tempOut
-    for (int c = 0; c < size; c++) {
-      tempOut[r * size + c] = in[c * size + r];
+void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
+                         TX_SIZE tx_size) {
+  // Get transform type and size of each dimension.
+  TYPE_TXFM type0;
+  TYPE_TXFM type1;
+  get_txfm1d_type(tx_type, &type0, &type1);
+  const int tx_width = tx_size_wide[tx_size];
+  const int tx_height = tx_size_high[tx_size];
+
+  double *const temp_in = new double[AOMMAX(tx_width, tx_height)];
+  double *const temp_out = new double[AOMMAX(tx_width, tx_height)];
+  double *const out_interm = new double[tx_width * tx_height];
+  const int stride = tx_width;
+
+  // Transform columns.
+  for (int c = 0; c < tx_width; ++c) {
+    for (int r = 0; r < tx_height; ++r) {
+      temp_in[r] = in[r * stride + c];
+    }
+    reference_hybrid_1d(temp_in, temp_out, tx_height, type0);
+    for (int r = 0; r < tx_height; ++r) {
+      out_interm[r * stride + c] = temp_out[r];
     }
   }
 
-  // dct each row: in -> out
-  for (int r = 0; r < size; r++) {
-    reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
+  // Transform rows.
+  for (int r = 0; r < tx_height; ++r) {
+    reference_hybrid_1d(out_interm + r * stride, out + r * stride, tx_width,
+                        type1);
   }
 
-  for (int r = 0; r < size; r++) {
-    // out ->tempOut
-    for (int c = 0; c < size; c++) {
-      tempOut[r * size + c] = out[c * size + r];
+  delete[] temp_in;
+  delete[] temp_out;
+  delete[] out_interm;
+
+  // These transforms use an approximate 2D DCT transform, by only keeping the
+  // top-left quarter of the coefficients, and repacking them in the first
+  // quarter indices.
+  // TODO(urvang): Refactor this code.
+  if (tx_width == 64 && tx_height == 64) {  // tx_size == TX_64X64
+    // Zero out top-right 32x32 area.
+    for (int row = 0; row < 32; ++row) {
+      memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Zero out the bottom 64x32 area.
+    memset(out + 32 * 64, 0, 32 * 64 * sizeof(*out));
+    // Re-pack non-zero coeffs in the first 32x32 indices.
+    for (int row = 1; row < 32; ++row) {
+      memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
+    }
+  } else if (tx_width == 32 && tx_height == 64) {  // tx_size == TX_32X64
+    // Zero out the bottom 32x32 area.
+    memset(out + 32 * 32, 0, 32 * 32 * sizeof(*out));
+    // Note: no repacking needed here.
+  } else if (tx_width == 64 && tx_height == 32) {  // tx_size == TX_64X32
+    // Zero out right 32x32 area.
+    for (int row = 0; row < 32; ++row) {
+      memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Re-pack non-zero coeffs in the first 32x32 indices.
+    for (int row = 1; row < 32; ++row) {
+      memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
+    }
+  } else if (tx_width == 16 && tx_height == 64) {  // tx_size == TX_16X64
+    // Zero out the bottom 16x32 area.
+    memset(out + 16 * 32, 0, 16 * 32 * sizeof(*out));
+    // Note: no repacking needed here.
+  } else if (tx_width == 64 && tx_height == 16) {  // tx_size == TX_64X16
+    // Zero out right 32x16 area.
+    for (int row = 0; row < 16; ++row) {
+      memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
+    }
+    // Re-pack non-zero coeffs in the first 32x16 indices.
+    for (int row = 1; row < 16; ++row) {
+      memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
     }
   }
 
-  for (int r = 0; r < size; r++) {
-    reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
+  // Apply appropriate scale.
+  const double amplify_factor = get_amplification_factor(tx_type, tx_size);
+  for (int c = 0; c < tx_width; ++c) {
+    for (int r = 0; r < tx_height; ++r) {
+      out[r * stride + c] *= amplify_factor;
+    }
   }
-  delete[] tempOut;
 }
 
 template <typename Type>
-void fliplr(Type *dest, int stride, int length) {
-  int i, j;
-  for (i = 0; i < length; ++i) {
-    for (j = 0; j < length / 2; ++j) {
-      const Type tmp = dest[i * stride + j];
-      dest[i * stride + j] = dest[i * stride + length - 1 - j];
-      dest[i * stride + length - 1 - j] = tmp;
+void fliplr(Type *dest, int width, int height, int stride) {
+  for (int r = 0; r < height; ++r) {
+    for (int c = 0; c < width / 2; ++c) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[r * stride + width - 1 - c];
+      dest[r * stride + width - 1 - c] = tmp;
     }
   }
 }
 
 template <typename Type>
-void flipud(Type *dest, int stride, int length) {
-  int i, j;
-  for (j = 0; j < length; ++j) {
-    for (i = 0; i < length / 2; ++i) {
-      const Type tmp = dest[i * stride + j];
-      dest[i * stride + j] = dest[(length - 1 - i) * stride + j];
-      dest[(length - 1 - i) * stride + j] = tmp;
+void flipud(Type *dest, int width, int height, int stride) {
+  for (int c = 0; c < width; ++c) {
+    for (int r = 0; r < height / 2; ++r) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[(height - 1 - r) * stride + c];
+      dest[(height - 1 - r) * stride + c] = tmp;
     }
   }
 }
 
 template <typename Type>
-void fliplrud(Type *dest, int stride, int length) {
-  int i, j;
-  for (i = 0; i < length / 2; ++i) {
-    for (j = 0; j < length; ++j) {
-      const Type tmp = dest[i * stride + j];
-      dest[i * stride + j] = dest[(length - 1 - i) * stride + length - 1 - j];
-      dest[(length - 1 - i) * stride + length - 1 - j] = tmp;
+void fliplrud(Type *dest, int width, int height, int stride) {
+  for (int r = 0; r < height / 2; ++r) {
+    for (int c = 0; c < width; ++c) {
+      const Type tmp = dest[r * stride + c];
+      dest[r * stride + c] = dest[(height - 1 - r) * stride + width - 1 - c];
+      dest[(height - 1 - r) * stride + width - 1 - c] = tmp;
     }
   }
 }
 
-template void fliplr<double>(double *dest, int stride, int length);
-template void flipud<double>(double *dest, int stride, int length);
-template void fliplrud<double>(double *dest, int stride, int length);
+template void fliplr<double>(double *dest, int width, int height, int stride);
+template void flipud<double>(double *dest, int width, int height, int stride);
+template void fliplrud<double>(double *dest, int width, int height, int stride);
 
 int bd_arr[BD_NUM] = { 8, 10, 12 };
-int8_t low_range_arr[BD_NUM] = { 16, 32, 32 };
+
+int8_t low_range_arr[BD_NUM] = { 18, 32, 32 };
 int8_t high_range_arr[BD_NUM] = { 32, 32, 32 };
 
 void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
-                            const int8_t *cos_bit, int low_range,
-                            int high_range) {
+                            int8_t cos_bit, int low_range, int high_range) {
   for (int i = 0; i < stage_num; ++i) {
     EXPECT_LE(stage_range[i], low_range);
+    ASSERT_LE(stage_range[i] + cos_bit, high_range) << "stage = " << i;
   }
   for (int i = 0; i < stage_num - 1; ++i) {
     // make sure there is no overflow while doing half_btf()
-    EXPECT_LE(stage_range[i] + cos_bit[i], high_range);
-    EXPECT_LE(stage_range[i + 1] + cos_bit[i], high_range);
+    ASSERT_LE(stage_range[i + 1] + cos_bit, high_range) << "stage = " << i;
   }
 }
 }  // namespace libaom_test
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
index 3e64e36ad3..70d1a894fb 100644
--- a/third_party/aom/test/av1_txfm_test.h
+++ b/third_party/aom/test/av1_txfm_test.h
@@ -19,17 +19,20 @@
 #endif
 #include <math.h>
 
+#include "config/av1_rtcd.h"
+
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
 #include "test/acm_random.h"
-#include "av1/common/enums.h"
 #include "av1/common/av1_txfm.h"
-#include "./av1_rtcd.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
 
 namespace libaom_test {
 typedef enum {
   TYPE_DCT = 0,
   TYPE_ADST,
+  TYPE_IDTX,
   TYPE_IDCT,
   TYPE_IADST,
   TYPE_LAST
@@ -46,8 +49,10 @@ void reference_adst_1d(const double *in, double *out, int size);
 
 void reference_hybrid_1d(double *in, double *out, int size, int type);
 
-void reference_hybrid_2d(double *in, double *out, int size, int type0,
-                         int type1);
+double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size);
+
+void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
+                         TX_SIZE tx_size);
 template <typename Type1, typename Type2>
 static double compute_avg_abs_error(const Type1 *a, const Type2 *b,
                                     const int size) {
@@ -60,81 +65,62 @@ static double compute_avg_abs_error(const Type1 *a, const Type2 *b,
 }
 
 template <typename Type>
-void fliplr(Type *dest, int stride, int length);
+void fliplr(Type *dest, int width, int height, int stride);
 
 template <typename Type>
-void flipud(Type *dest, int stride, int length);
+void flipud(Type *dest, int width, int height, int stride);
 
 template <typename Type>
-void fliplrud(Type *dest, int stride, int length);
+void fliplrud(Type *dest, int width, int height, int stride);
 
-typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t *cos_bit,
+typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t cos_bit,
                          const int8_t *range_bit);
 
-typedef void (*Fwd_Txfm2d_Func)(const int16_t *, int32_t *, int, TX_TYPE, int);
-typedef void (*Inv_Txfm2d_Func)(const int32_t *, uint16_t *, int, TX_TYPE, int);
+typedef void (*InvTxfm2dFunc)(const int32_t *, uint16_t *, int, TX_TYPE, int);
+typedef void (*LbdInvTxfm2dFunc)(const int32_t *, uint8_t *, int, TX_TYPE,
+                                 TX_SIZE, int);
 
 static const int bd = 10;
 static const int input_base = (1 << bd);
 
-#if CONFIG_HIGHBITDEPTH
+static INLINE bool IsTxSizeTypeValid(TX_SIZE tx_size, TX_TYPE tx_type) {
+  const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
+  TxSetType tx_set_type;
+  if (tx_size_sqr_up > TX_32X32) {
+    tx_set_type = EXT_TX_SET_DCTONLY;
+  } else if (tx_size_sqr_up == TX_32X32) {
+    tx_set_type = EXT_TX_SET_DCT_IDTX;
+  } else {
+    tx_set_type = EXT_TX_SET_ALL16;
+  }
+  return av1_ext_tx_used[tx_set_type][tx_type] != 0;
+}
+
 #if CONFIG_AV1_ENCODER
 
-static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
-  NULL,
-#endif
-  av1_fwd_txfm2d_4x4_c,
-  av1_fwd_txfm2d_8x8_c,
-  av1_fwd_txfm2d_16x16_c,
-  av1_fwd_txfm2d_32x32_c,
-#if CONFIG_TX64X64
-  av1_fwd_txfm2d_64x64_c,
-#endif  // CONFIG_TX64X64
-  av1_fwd_txfm2d_4x8_c,
-  av1_fwd_txfm2d_8x4_c,
-  av1_fwd_txfm2d_8x16_c,
-  av1_fwd_txfm2d_16x8_c,
-  av1_fwd_txfm2d_16x32_c,
-  av1_fwd_txfm2d_32x16_c,
-#if CONFIG_TX64X64
-  av1_fwd_txfm2d_32x64_c,
-  av1_fwd_txfm2d_64x32_c,
-#endif  // CONFIG_TX64X64
-  NULL,
-  NULL,
-  NULL,
-  NULL,
+static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_fwd_txfm2d_4x4_c,   av1_fwd_txfm2d_8x8_c,   av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c,
+  av1_fwd_txfm2d_8x4_c,   av1_fwd_txfm2d_8x16_c,  av1_fwd_txfm2d_16x8_c,
+  av1_fwd_txfm2d_16x32_c, av1_fwd_txfm2d_32x16_c, av1_fwd_txfm2d_32x64_c,
+  av1_fwd_txfm2d_64x32_c, av1_fwd_txfm2d_4x16_c,  av1_fwd_txfm2d_16x4_c,
+  av1_fwd_txfm2d_8x32_c,  av1_fwd_txfm2d_32x8_c,  av1_fwd_txfm2d_16x64_c,
+  av1_fwd_txfm2d_64x16_c,
 };
 #endif
 
-static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
-  NULL,
-#endif
-  av1_inv_txfm2d_add_4x4_c,
-  av1_inv_txfm2d_add_8x8_c,
-  av1_inv_txfm2d_add_16x16_c,
-  av1_inv_txfm2d_add_32x32_c,
-#if CONFIG_TX64X64
-  av1_inv_txfm2d_add_64x64_c,
-#endif  // CONFIG_TX64X64
-  av1_inv_txfm2d_add_4x8_c,
-  av1_inv_txfm2d_add_8x4_c,
-  av1_inv_txfm2d_add_8x16_c,
-  av1_inv_txfm2d_add_16x8_c,
-  av1_inv_txfm2d_add_16x32_c,
-  av1_inv_txfm2d_add_32x16_c,
-#if CONFIG_TX64X64
-  av1_inv_txfm2d_add_32x64_c,
-  av1_inv_txfm2d_add_64x32_c,
-#endif  // CONFIG_TX64X64
-  NULL,
-  NULL,
-  NULL,
-  NULL,
+static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = {
+  av1_inv_txfm2d_add_4x4_c,   av1_inv_txfm2d_add_8x8_c,
+  av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c,
+  av1_inv_txfm2d_add_64x64_c, av1_inv_txfm2d_add_4x8_c,
+  av1_inv_txfm2d_add_8x4_c,   av1_inv_txfm2d_add_8x16_c,
+  av1_inv_txfm2d_add_16x8_c,  av1_inv_txfm2d_add_16x32_c,
+  av1_inv_txfm2d_add_32x16_c, av1_inv_txfm2d_add_32x64_c,
+  av1_inv_txfm2d_add_64x32_c, av1_inv_txfm2d_add_4x16_c,
+  av1_inv_txfm2d_add_16x4_c,  av1_inv_txfm2d_add_8x32_c,
+  av1_inv_txfm2d_add_32x8_c,  av1_inv_txfm2d_add_16x64_c,
+  av1_inv_txfm2d_add_64x16_c,
 };
-#endif  // CONFIG_HIGHBITDEPTH
 
 #define BD_NUM 3
 
@@ -143,7 +129,7 @@ extern int8_t low_range_arr[];
 extern int8_t high_range_arr[];
 
 void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
-                            const int8_t *cos_bit, int low_range,
+                            const int8_t cos_bit, int low_range,
                             int high_range);
 }  // namespace libaom_test
 #endif  // AV1_TXFM_TEST_H_
diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc
index d4b560fc10..cfdf2d36c1 100644
--- a/third_party/aom/test/av1_wedge_utils_test.cc
+++ b/third_party/aom/test/av1_wedge_utils_test.cc
@@ -11,10 +11,9 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
 
 #include "aom_dsp/aom_dsp_common.h"
 
@@ -100,7 +99,7 @@ TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) {
       p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
     }
 
-    aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, h, w, 0, 0);
+    aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, w, h, 0, 0);
 
     aom_subtract_block(h, w, r0, w, s, w, p0, w);
     aom_subtract_block(h, w, r1, w, s, w, p1, w);
diff --git a/third_party/aom/test/avg_test.cc b/third_party/aom/test/avg_test.cc
deleted file mode 100644
index e83a75c1c1..0000000000
--- a/third_party/aom/test/avg_test.cc
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "aom_mem/aom_mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-class AverageTestBase : public ::testing::Test {
- public:
-  AverageTestBase(int width, int height) : width_(width), height_(height) {}
-
-  static void SetUpTestCase() {
-    source_data_ = reinterpret_cast<uint8_t *>(
-        aom_memalign(kDataAlignment, kDataBlockSize));
-  }
-
-  static void TearDownTestCase() {
-    aom_free(source_data_);
-    source_data_ = NULL;
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  // Handle blocks up to 4 blocks 64x64 with stride up to 128
-  static const int kDataAlignment = 16;
-  static const int kDataBlockSize = 64 * 128;
-
-  virtual void SetUp() {
-    source_stride_ = (width_ + 31) & ~31;
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
-  void FillConstant(uint8_t fill_constant) {
-    for (int i = 0; i < width_ * height_; ++i) {
-      source_data_[i] = fill_constant;
-    }
-  }
-
-  void FillRandom() {
-    for (int i = 0; i < width_ * height_; ++i) {
-      source_data_[i] = rnd_.Rand8();
-    }
-  }
-
-  int width_, height_;
-  static uint8_t *source_data_;
-  int source_stride_;
-
-  ACMRandom rnd_;
-};
-
-typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
-                              const int ref_stride, const int height);
-
-typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
-
-class IntProRowTest : public AverageTestBase,
-                      public ::testing::WithParamInterface<IntProRowParam> {
- public:
-  IntProRowTest()
-      : AverageTestBase(16, GET_PARAM(0)), hbuf_asm_(NULL), hbuf_c_(NULL) {
-    asm_func_ = GET_PARAM(1);
-    c_func_ = GET_PARAM(2);
-  }
-
- protected:
-  virtual void SetUp() {
-    hbuf_asm_ = reinterpret_cast<int16_t *>(
-        aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
-    hbuf_c_ = reinterpret_cast<int16_t *>(
-        aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
-  }
-
-  virtual void TearDown() {
-    aom_free(hbuf_c_);
-    hbuf_c_ = NULL;
-    aom_free(hbuf_asm_);
-    hbuf_asm_ = NULL;
-  }
-
-  void RunComparison() {
-    ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
-    ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
-    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
-        << "Output mismatch";
-  }
-
- private:
-  IntProRowFunc asm_func_;
-  IntProRowFunc c_func_;
-  int16_t *hbuf_asm_;
-  int16_t *hbuf_c_;
-};
-
-typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
-
-typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
-
-class IntProColTest : public AverageTestBase,
-                      public ::testing::WithParamInterface<IntProColParam> {
- public:
-  IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
-    asm_func_ = GET_PARAM(1);
-    c_func_ = GET_PARAM(2);
-  }
-
- protected:
-  void RunComparison() {
-    ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
-    ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
-    EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
-  }
-
- private:
-  IntProColFunc asm_func_;
-  IntProColFunc c_func_;
-  int16_t sum_asm_;
-  int16_t sum_c_;
-};
-
-typedef int (*SatdFunc)(const int16_t *coeffs, int length);
-typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
-
-class SatdTest : public ::testing::Test,
-                 public ::testing::WithParamInterface<SatdTestParam> {
- protected:
-  virtual void SetUp() {
-    satd_size_ = GET_PARAM(0);
-    satd_func_ = GET_PARAM(1);
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-    src_ = reinterpret_cast<int16_t *>(
-        aom_memalign(16, sizeof(*src_) * satd_size_));
-    ASSERT_TRUE(src_ != NULL);
-  }
-
-  virtual void TearDown() {
-    libaom_test::ClearSystemState();
-    aom_free(src_);
-  }
-
-  void FillConstant(const int16_t val) {
-    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
-  }
-
-  void FillRandom() {
-    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
-  }
-
-  void Check(int expected) {
-    int total;
-    ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
-    EXPECT_EQ(expected, total);
-  }
-
-  int satd_size_;
-
- private:
-  int16_t *src_;
-  SatdFunc satd_func_;
-  ACMRandom rnd_;
-};
-
-uint8_t *AverageTestBase::source_data_ = NULL;
-
-TEST_P(IntProRowTest, MinValue) {
-  FillConstant(0);
-  RunComparison();
-}
-
-TEST_P(IntProRowTest, MaxValue) {
-  FillConstant(255);
-  RunComparison();
-}
-
-TEST_P(IntProRowTest, Random) {
-  FillRandom();
-  RunComparison();
-}
-
-TEST_P(IntProColTest, MinValue) {
-  FillConstant(0);
-  RunComparison();
-}
-
-TEST_P(IntProColTest, MaxValue) {
-  FillConstant(255);
-  RunComparison();
-}
-
-TEST_P(IntProColTest, Random) {
-  FillRandom();
-  RunComparison();
-}
-
-TEST_P(SatdTest, MinValue) {
-  const int kMin = -32640;
-  const int expected = -kMin * satd_size_;
-  FillConstant(kMin);
-  Check(expected);
-}
-
-TEST_P(SatdTest, MaxValue) {
-  const int kMax = 32640;
-  const int expected = kMax * satd_size_;
-  FillConstant(kMax);
-  Check(expected);
-}
-
-TEST_P(SatdTest, Random) {
-  int expected;
-  switch (satd_size_) {
-    case 16: expected = 205298; break;
-    case 64: expected = 1113950; break;
-    case 256: expected = 4268415; break;
-    case 1024: expected = 16954082; break;
-    default:
-      FAIL() << "Invalid satd size (" << satd_size_
-             << ") valid: 16/64/256/1024";
-  }
-  FillRandom();
-  Check(expected);
-}
-
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(C, SatdTest,
-                        ::testing::Values(make_tuple(16, &aom_satd_c),
-                                          make_tuple(64, &aom_satd_c),
-                                          make_tuple(256, &aom_satd_c),
-                                          make_tuple(1024, &aom_satd_c)));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, IntProRowTest,
-    ::testing::Values(make_tuple(16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
-                      make_tuple(32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
-                      make_tuple(64, &aom_int_pro_row_sse2,
-                                 &aom_int_pro_row_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    SSE2, IntProColTest,
-    ::testing::Values(make_tuple(16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
-                      make_tuple(32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
-                      make_tuple(64, &aom_int_pro_col_sse2,
-                                 &aom_int_pro_col_c)));
-
-INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
-                        ::testing::Values(make_tuple(16, &aom_satd_sse2),
-                                          make_tuple(64, &aom_satd_sse2),
-                                          make_tuple(256, &aom_satd_sse2),
-                                          make_tuple(1024, &aom_satd_sse2)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, IntProRowTest,
-    ::testing::Values(make_tuple(16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
-                      make_tuple(32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
-                      make_tuple(64, &aom_int_pro_row_neon,
-                                 &aom_int_pro_row_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    NEON, IntProColTest,
-    ::testing::Values(make_tuple(16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
-                      make_tuple(32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
-                      make_tuple(64, &aom_int_pro_col_neon,
-                                 &aom_int_pro_col_c)));
-
-INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
-                        ::testing::Values(make_tuple(16, &aom_satd_neon),
-                                          make_tuple(64, &aom_satd_neon),
-                                          make_tuple(256, &aom_satd_neon),
-                                          make_tuple(1024, &aom_satd_neon)));
-#endif
-
-}  // namespace
diff --git a/third_party/aom/test/best_encode.sh b/third_party/aom/test/best_encode.sh
new file mode 100755
index 0000000000..fe31a01cb9
--- /dev/null
+++ b/third_party/aom/test/best_encode.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# Author: jimbankoski@google.com (Jim Bankoski)
+
+if [[ $# -ne 2 ]]; then
+  echo "Encodes a file using best known settings (slow!)"
+  echo "  Usage:    be [FILE] [BITRATE]"
+  echo "  Example:  be akiyo_cif.y4m 200"
+  exit
+fi
+
+f=$1  # file is first parameter
+b=$2  # bitrate is second parameter
+
+if [[ -e $f.fpf ]]; then
+  # First-pass file found, do second pass only
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=2 \
+    --fpf=$f.fpf \
+    --best \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0 \
+    --bias-pct=50 \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --psnr \
+    --arnr-maxframes=7 \
+    --arnr-strength=3 \
+    --arnr-type=3
+else
+  # No first-pass file found, do 2-pass encode
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=1 \
+    --fpf=$f.fpf \
+    --best \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0
+
+  aomenc \
+    $f \
+    -o $f-$b.av1.webm \
+    -p 2 \
+    --pass=2 \
+    --fpf=$f.fpf \
+    --best \
+    --cpu-used=0 \
+    --target-bitrate=$b \
+    --auto-alt-ref=1 \
+    -v \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --lag-in-frames=25 \
+    --kf-min-dist=0 \
+    --kf-max-dist=99999 \
+    --static-thresh=0 \
+    --min-q=0 \
+    --max-q=63 \
+    --drop-frame=0 \
+    --bias-pct=50 \
+    --minsection-pct=0 \
+    --maxsection-pct=800 \
+    --psnr \
+    --arnr-maxframes=7 \
+    --arnr-strength=3 \
+    --arnr-type=3
+fi
diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc
index 41efec7813..45660cf853 100644
--- a/third_party/aom/test/binary_codes_test.cc
+++ b/third_party/aom/test/binary_codes_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <math.h>
 #include <stdlib.h>
@@ -15,7 +15,8 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "test/acm_random.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/bitreader.h"
@@ -29,57 +30,6 @@ using libaom_test::ACMRandom;
 
 namespace {
 
-// Test for Bilevel code with reference
-TEST(AV1, TestPrimitiveRefbilivel) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int kBufferSize = 65536;
-  aom_writer bw;
-  uint8_t bw_buffer[kBufferSize];
-  const uint16_t kRanges = 8;
-  const uint16_t kNearRanges = 8;
-  const uint16_t kReferences = 8;
-  const uint16_t kValues = 16;
-  const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 };
-  uint16_t enc_values[kRanges][kNearRanges][kReferences][kValues][4];
-  aom_start_encode(&bw, bw_buffer);
-  for (int n = 0; n < kRanges; ++n) {
-    const uint16_t range = range_vals[n];
-    for (int p = 0; p < kNearRanges; ++p) {
-      const uint16_t near_range = 1 + rnd(range);
-      for (int r = 0; r < kReferences; ++r) {
-        const uint16_t ref = rnd(range);
-        for (int v = 0; v < kValues; ++v) {
-          const uint16_t value = rnd(range);
-          enc_values[n][p][r][v][0] = range;
-          enc_values[n][p][r][v][1] = near_range;
-          enc_values[n][p][r][v][2] = ref;
-          enc_values[n][p][r][v][3] = value;
-          aom_write_primitive_refbilevel(&bw, range, near_range, ref, value);
-        }
-      }
-    }
-  }
-  aom_stop_encode(&bw);
-  aom_reader br;
-  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
-  GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
-  GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
-  for (int n = 0; n < kRanges; ++n) {
-    for (int p = 0; p < kNearRanges; ++p) {
-      for (int r = 0; r < kReferences; ++r) {
-        for (int v = 0; v < kValues; ++v) {
-          const uint16_t range = enc_values[n][p][r][v][0];
-          const uint16_t near_range = enc_values[n][p][r][v][1];
-          const uint16_t ref = enc_values[n][p][r][v][2];
-          const uint16_t value = aom_read_primitive_refbilevel(
-              &br, range, near_range, ref, ACCT_STR);
-          GTEST_ASSERT_EQ(value, enc_values[n][p][r][v][3]);
-        }
-      }
-    }
-  }
-}
-
 // Test for Finite subexponential code with reference
 TEST(AV1, TestPrimitiveRefsubexpfin) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -111,7 +61,7 @@ TEST(AV1, TestPrimitiveRefsubexpfin) {
   }
   aom_stop_encode(&bw);
   aom_reader br;
-  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+  aom_reader_init(&br, bw_buffer, bw.pos);
   GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
   GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
   for (int n = 0; n < kRanges; ++n) {
diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc
index 66e741a74d..f8844eef8b 100644
--- a/third_party/aom/test/blend_a64_mask_1d_test.cc
+++ b/third_party/aom/test/blend_a64_mask_1d_test.cc
@@ -17,11 +17,11 @@
 #include "test/register_state_check.h"
 #include "test/function_equivalence_test.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
 
-#include "./av1_rtcd.h"
+#include "aom/aom_integer.h"
 
 #include "av1/common/enums.h"
 
@@ -46,8 +46,8 @@ class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> {
   virtual void Execute(const T *p_src0, const T *p_src1) = 0;
 
   void Common() {
-    w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
-    h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+    w_ = 2 << this->rng_(MAX_SB_SIZE_LOG2);
+    h_ = 2 << this->rng_(MAX_SB_SIZE_LOG2);
 
     dst_offset_ = this->rng_(33);
     dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
@@ -116,7 +116,7 @@ class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> {
 
 typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
                     uint32_t src0_stride, const uint8_t *src1,
-                    uint32_t src1_stride, const uint8_t *mask, int h, int w);
+                    uint32_t src1_stride, const uint8_t *mask, int w, int h);
 typedef libaom_test::FuncParam<F8B> TestFuncs;
 
 class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> {
@@ -124,10 +124,10 @@ class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> {
   void Execute(const uint8_t *p_src0, const uint8_t *p_src1) {
     params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
                      src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
-                     h_, w_);
+                     w_, h_);
     ASM_REGISTER_STATE_CHECK(params_.tst_func(
         dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
-        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, h_, w_));
+        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, w_, h_));
   }
 };
 
@@ -167,7 +167,7 @@ TEST_P(BlendA64Mask1DTest8B, ExtremeValues) {
 static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride,
                                 const uint8_t *src0, uint32_t src0_stride,
                                 const uint8_t *src1, uint32_t src1_stride,
-                                const uint8_t *mask, int h, int w) {
+                                const uint8_t *mask, int w, int h) {
   uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
                 [BlendA64Mask1DTest8B::kMaxMaskSize];
 
@@ -175,14 +175,14 @@ static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride,
     for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
 
   aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
                        0, 0);
 }
 
 static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride,
                                 const uint8_t *src0, uint32_t src0_stride,
                                 const uint8_t *src1, uint32_t src1_stride,
-                                const uint8_t *mask, int h, int w) {
+                                const uint8_t *mask, int w, int h) {
   uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
                 [BlendA64Mask1DTest8B::kMaxMaskSize];
 
@@ -190,7 +190,7 @@ static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride,
     for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
 
   aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, h, w,
+                       &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
                        0, 0);
 }
 
@@ -207,14 +207,21 @@ INSTANTIATE_TEST_CASE_P(
         TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1)));
 #endif  // HAVE_SSE4_1
 
-#if CONFIG_HIGHBITDEPTH
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, BlendA64Mask1DTest8B,
+                        ::testing::Values(TestFuncs(blend_a64_hmask_ref,
+                                                    aom_blend_a64_hmask_neon),
+                                          TestFuncs(blend_a64_vmask_ref,
+                                                    aom_blend_a64_vmask_neon)));
+#endif  // HAVE_NEON
+
 //////////////////////////////////////////////////////////////////////////////
 // High bit-depth version
 //////////////////////////////////////////////////////////////////////////////
 
 typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
                      uint32_t src0_stride, const uint8_t *src1,
-                     uint32_t src1_stride, const uint8_t *mask, int h, int w,
+                     uint32_t src1_stride, const uint8_t *mask, int w, int h,
                      int bd);
 typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
 
@@ -224,11 +231,11 @@ class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> {
     params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
                      CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
                      CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
-                     mask_, h_, w_, bit_depth_);
+                     mask_, w_, h_, bit_depth_);
     ASM_REGISTER_STATE_CHECK(params_.tst_func(
         CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
         CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
-        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, h_, w_,
+        CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, w_, h_,
         bit_depth_));
   }
 
@@ -287,7 +294,7 @@ TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) {
 static void highbd_blend_a64_hmask_ref(
     uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
     uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
-    const uint8_t *mask, int h, int w, int bd) {
+    const uint8_t *mask, int w, int h, int bd) {
   uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
                 [BlendA64Mask1DTestHBD::kMaxMaskSize];
 
@@ -296,13 +303,13 @@ static void highbd_blend_a64_hmask_ref(
 
   aom_highbd_blend_a64_mask_c(
       dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
-      BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd);
+      BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
 }
 
 static void highbd_blend_a64_vmask_ref(
     uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
     uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
-    const uint8_t *mask, int h, int w, int bd) {
+    const uint8_t *mask, int w, int h, int bd) {
   uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
                 [BlendA64Mask1DTestHBD::kMaxMaskSize];
 
@@ -311,7 +318,7 @@ static void highbd_blend_a64_vmask_ref(
 
   aom_highbd_blend_a64_mask_c(
       dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
-      BlendA64Mask1DTestHBD::kMaxMaskSize, h, w, 0, 0, bd);
+      BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
 }
 
 INSTANTIATE_TEST_CASE_P(
@@ -329,6 +336,4 @@ INSTANTIATE_TEST_CASE_P(
                       TestFuncsHBD(highbd_blend_a64_vmask_ref,
                                    aom_highbd_blend_a64_vmask_sse4_1)));
 #endif  // HAVE_SSE4_1
-
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc
index fef124d347..c9c6795eef 100644
--- a/third_party/aom/test/blend_a64_mask_test.cc
+++ b/third_party/aom/test/blend_a64_mask_test.cc
@@ -17,11 +17,11 @@
 #include "test/register_state_check.h"
 #include "test/function_equivalence_test.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
 
-#include "./av1_rtcd.h"
+#include "aom/aom_integer.h"
 
 #include "av1/common/enums.h"
 
@@ -31,8 +31,8 @@ using libaom_test::FunctionEquivalenceTest;
 
 namespace {
 
-template <typename F, typename T>
-class BlendA64MaskTest : public FunctionEquivalenceTest<F> {
+template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
+class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
  protected:
   static const int kIterations = 10000;
   static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
@@ -43,14 +43,44 @@ class BlendA64MaskTest : public FunctionEquivalenceTest<F> {
 
   virtual ~BlendA64MaskTest() {}
 
-  virtual void Execute(const T *p_src0, const T *p_src1) = 0;
+  virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1) = 0;
+
+  template <typename Pixel>
+  void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/) {
+    switch (this->rng_(3)) {
+      case 0:  // Separate sources
+        *src0 = src0_;
+        *src1 = src1_;
+        break;
+      case 1:  // src0 == dst
+        *src0 = dst_tst_;
+        src0_stride_ = dst_stride_;
+        src0_offset_ = dst_offset_;
+        *src1 = src1_;
+        break;
+      case 2:  // src1 == dst
+        *src0 = src0_;
+        *src1 = dst_tst_;
+        src1_stride_ = dst_stride_;
+        src1_offset_ = dst_offset_;
+        break;
+      default: FAIL();
+    }
+  }
+
+  void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/) {
+    *src0 = src0_;
+    *src1 = src1_;
+  }
+
+  uint8_t Rand1() { return this->rng_.Rand8() & 1; }
 
-  void Common() {
-    w_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
-    h_ = 1 << this->rng_(MAX_SB_SIZE_LOG2 + 1);
+  void RunTest() {
+    w_ = 4 << this->rng_(MAX_SB_SIZE_LOG2 - 1);
+    h_ = 4 << this->rng_(MAX_SB_SIZE_LOG2 - 1);
 
-    subx_ = this->rng_(2);
-    suby_ = this->rng_(2);
+    subx_ = Rand1();
+    suby_ = Rand1();
 
     dst_offset_ = this->rng_(33);
     dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
@@ -64,49 +94,35 @@ class BlendA64MaskTest : public FunctionEquivalenceTest<F> {
     mask_stride_ =
         this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
 
-    T *p_src0;
-    T *p_src1;
+    SrcPixel *p_src0;
+    SrcPixel *p_src1;
 
-    switch (this->rng_(3)) {
-      case 0:  // Separate sources
-        p_src0 = src0_;
-        p_src1 = src1_;
-        break;
-      case 1:  // src0 == dst
-        p_src0 = dst_tst_;
-        src0_stride_ = dst_stride_;
-        src0_offset_ = dst_offset_;
-        p_src1 = src1_;
-        break;
-      case 2:  // src1 == dst
-        p_src0 = src0_;
-        p_src1 = dst_tst_;
-        src1_stride_ = dst_stride_;
-        src1_offset_ = dst_offset_;
-        break;
-      default: FAIL();
-    }
+    p_src0 = src0_;
+    p_src1 = src1_;
+
+    GetSources(&p_src0, &p_src1, &dst_ref_[0]);
 
     Execute(p_src0, p_src1);
 
     for (int r = 0; r < h_; ++r) {
       for (int c = 0; c < w_; ++c) {
         ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
-                  dst_tst_[dst_offset_ + r * dst_stride_ + c]);
+                  dst_tst_[dst_offset_ + r * dst_stride_ + c])
+            << w_ << "x" << h_ << " r: " << r << " c: " << c;
       }
     }
   }
 
-  T dst_ref_[kBufSize];
-  T dst_tst_[kBufSize];
+  DstPixel dst_ref_[kBufSize];
+  DstPixel dst_tst_[kBufSize];
   uint32_t dst_stride_;
   uint32_t dst_offset_;
 
-  T src0_[kBufSize];
+  SrcPixel src0_[kBufSize];
   uint32_t src0_stride_;
   uint32_t src0_offset_;
 
-  T src1_[kBufSize];
+  SrcPixel src1_[kBufSize];
   uint32_t src1_stride_;
   uint32_t src1_offset_;
 
@@ -127,19 +143,19 @@ class BlendA64MaskTest : public FunctionEquivalenceTest<F> {
 typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
                     uint32_t src0_stride, const uint8_t *src1,
                     uint32_t src1_stride, const uint8_t *mask,
-                    uint32_t mask_stride, int h, int w, int suby, int subx);
+                    uint32_t mask_stride, int w, int h, int subx, int suby);
 typedef libaom_test::FuncParam<F8B> TestFuncs;
 
-class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t> {
+class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
  protected:
   void Execute(const uint8_t *p_src0, const uint8_t *p_src1) {
     params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
                      src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
-                     kMaxMaskWidth, h_, w_, suby_, subx_);
+                     kMaxMaskWidth, w_, h_, subx_, suby_);
     ASM_REGISTER_STATE_CHECK(params_.tst_func(
         dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
         src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, kMaxMaskWidth,
-        h_, w_, suby_, subx_));
+        w_, h_, subx_, suby_));
   }
 };
 
@@ -156,7 +172,7 @@ TEST_P(BlendA64MaskTest8B, RandomValues) {
     for (int i = 0; i < kMaxMaskSize; ++i)
       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
 
-    Common();
+    RunTest();
   }
 }
 
@@ -172,7 +188,7 @@ TEST_P(BlendA64MaskTest8B, ExtremeValues) {
     for (int i = 0; i < kMaxMaskSize; ++i)
       mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
 
-    Common();
+    RunTest();
   }
 }
 
@@ -182,7 +198,85 @@ INSTANTIATE_TEST_CASE_P(SSE4_1, BlendA64MaskTest8B,
                             aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
 #endif  // HAVE_SSE4_1
 
-#if CONFIG_HIGHBITDEPTH
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit _d16 version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
+                        uint32_t src0_stride, const uint16_t *src1,
+                        uint32_t src1_stride, const uint8_t *mask,
+                        uint32_t mask_stride, int w, int h, int subx, int suby,
+                        ConvolveParams *conv_params);
+typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
+
+class BlendA64MaskTest8B_d16
+    : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
+ protected:
+  // max number of bits used by the source
+  static const int kSrcMaxBitsMask = 0x3fff;
+
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
+    ConvolveParams conv_params;
+    conv_params.round_0 = ROUND0_BITS;
+    conv_params.round_1 = COMPOUND_ROUND1_BITS;
+    params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+                     src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
+                     kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(
+        dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
+        src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, kMaxMaskWidth,
+        w_, h_, subx_, suby_, &conv_params));
+  }
+};
+
+TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+      src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest();
+  }
+}
+
+TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = 255;
+      dst_tst_[i] = 255;
+
+      src0_[i] = kSrcMaxBitsMask;
+      src1_[i] = kSrcMaxBitsMask;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
+
+    RunTest();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BlendA64MaskTest8B_d16,
+    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+                                    aom_lowbd_blend_a64_d16_mask_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, BlendA64MaskTest8B_d16,
+    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
+                                    aom_lowbd_blend_a64_d16_mask_neon)));
+#endif  // HAVE_NEON
+
 //////////////////////////////////////////////////////////////////////////////
 // High bit-depth version
 //////////////////////////////////////////////////////////////////////////////
@@ -190,22 +284,22 @@ INSTANTIATE_TEST_CASE_P(SSE4_1, BlendA64MaskTest8B,
 typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
                      uint32_t src0_stride, const uint8_t *src1,
                      uint32_t src1_stride, const uint8_t *mask,
-                     uint32_t mask_stride, int h, int w, int suby, int subx,
+                     uint32_t mask_stride, int w, int h, int subx, int suby,
                      int bd);
 typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
 
-class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t> {
+class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
  protected:
   void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
     params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
                      CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
                      CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
-                     mask_, kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_);
+                     mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
     ASM_REGISTER_STATE_CHECK(params_.tst_func(
         CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
         CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
         CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_,
-        kMaxMaskWidth, h_, w_, suby_, subx_, bit_depth_));
+        kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_));
   }
 
   int bit_depth_;
@@ -231,7 +325,7 @@ TEST_P(BlendA64MaskTestHBD, RandomValues) {
     for (int i = 0; i < kMaxMaskSize; ++i)
       mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
 
-    Common();
+    RunTest();
   }
 }
 
@@ -256,7 +350,7 @@ TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
     for (int i = 0; i < kMaxMaskSize; ++i)
       mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
 
-    Common();
+    RunTest();
   }
 }
 
@@ -266,5 +360,104 @@ INSTANTIATE_TEST_CASE_P(
     ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
                                    aom_highbd_blend_a64_mask_sse4_1)));
 #endif  // HAVE_SSE4_1
-#endif  // CONFIG_HIGHBITDEPTH
+
+//////////////////////////////////////////////////////////////////////////////
+// HBD _d16 version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
+                         const CONV_BUF_TYPE *src0, uint32_t src0_stride,
+                         const CONV_BUF_TYPE *src1, uint32_t src1_stride,
+                         const uint8_t *mask, uint32_t mask_stride, int w,
+                         int h, int subx, int suby, ConvolveParams *conv_params,
+                         const int bd);
+typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
+
+class BlendA64MaskTestHBD_d16
+    : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
+ protected:
+  // max number of bits used by the source
+  static const int kSrcMaxBitsMask = (1 << 14) - 1;
+  static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
+
+  void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
+    ConvolveParams conv_params;
+    conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
+    conv_params.round_1 = COMPOUND_ROUND1_BITS;
+
+    params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
+                     p_src0 + src0_offset_, src0_stride_, p_src1 + src1_offset_,
+                     src1_stride_, mask_, kMaxMaskWidth, w_, h_, subx_, suby_,
+                     &conv_params, bit_depth_);
+    if (params_.tst_func) {
+      ASM_REGISTER_STATE_CHECK(params_.tst_func(
+          CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
+          p_src0 + src0_offset_, src0_stride_, p_src1 + src1_offset_,
+          src1_stride_, mask_, kMaxMaskWidth, w_, h_, subx_, suby_,
+          &conv_params, bit_depth_));
+    }
+  }
+
+  int bit_depth_;
+  int src_max_bits_mask_;
+};
+
+TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
+  if (params_.tst_func == NULL) return;
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+    src_max_bits_mask_ =
+        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = rng_.Rand8();
+      dst_tst_[i] = rng_.Rand8();
+
+      src0_[i] = rng_.Rand16() & src_max_bits_mask_;
+      src1_[i] = rng_.Rand16() & src_max_bits_mask_;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i)
+      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
+
+    RunTest();
+  }
+}
+
+TEST_P(BlendA64MaskTestHBD_d16, SaturatedValues) {
+  for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
+    src_max_bits_mask_ =
+        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
+
+    for (int i = 0; i < kBufSize; ++i) {
+      dst_ref_[i] = 0;
+      dst_tst_[i] = (1 << bit_depth_) - 1;
+
+      src0_[i] = src_max_bits_mask_;
+      src1_[i] = src_max_bits_mask_;
+    }
+
+    for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
+
+    RunTest();
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, BlendA64MaskTestHBD_d16,
+    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, NULL)));
+
+// TODO(slavarnway): Enable the following in the avx2 commit. (56501)
+#if 0
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BlendA64MaskTestHBD,
+    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
+                                   aom_highbd_blend_a64_mask_avx2)));
+#endif  // HAVE_AVX2
+#endif
 }  // namespace
diff --git a/third_party/aom/test/block_error_test.cc b/third_party/aom/test/block_error_test.cc
deleted file mode 100644
index 4364af422e..0000000000
--- a/third_party/aom/test/block_error_test.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-using libaom_test::ACMRandom;
-
-typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
-                                  const tran_low_t *dqcoeff, intptr_t size,
-                                  int64_t *ssz);
-#if CONFIG_HIGHBITDEPTH
-typedef int64_t (*HbdBlockErrorFunc)(const tran_low_t *coeff,
-                                     const tran_low_t *dqcoeff, intptr_t size,
-                                     int64_t *ssz, int bd);
-#endif
-
-typedef std::tr1::tuple<BlockErrorFunc, BlockErrorFunc, TX_SIZE,
-                        aom_bit_depth_t>
-    BlockErrorParam;
-
-const int kTestNum = 10000;
-
-class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
- public:
-  BlockErrorTest()
-      : blk_err_ref_(GET_PARAM(0)), blk_err_(GET_PARAM(1)),
-        tx_size_(GET_PARAM(2)), bd_(GET_PARAM(3)) {}
-
-  virtual ~BlockErrorTest() {}
-
-  virtual void SetUp() {
-    const intptr_t block_size = getCoeffNum();
-    coeff_ = reinterpret_cast<tran_low_t *>(
-        aom_memalign(16, 2 * block_size * sizeof(tran_low_t)));
-  }
-
-  virtual void TearDown() {
-    aom_free(coeff_);
-    coeff_ = NULL;
-    libaom_test::ClearSystemState();
-  }
-
-  void BlockErrorRun(int testNum) {
-    int i;
-    int64_t error_ref, error;
-    int64_t sse_ref, sse;
-    const intptr_t block_size = getCoeffNum();
-    tran_low_t *dqcoeff = coeff_ + block_size;
-    for (i = 0; i < testNum; ++i) {
-      FillRandomData();
-
-      error_ref = blk_err_ref_(coeff_, dqcoeff, block_size, &sse_ref);
-      ASM_REGISTER_STATE_CHECK(error =
-                                   blk_err_(coeff_, dqcoeff, block_size, &sse));
-
-      EXPECT_EQ(error_ref, error) << "Error doesn't match on test: " << i;
-      EXPECT_EQ(sse_ref, sse) << "SSE doesn't match on test: " << i;
-    }
-  }
-
-  intptr_t getCoeffNum() { return tx_size_2d[tx_size_]; }
-
-  void FillRandomData() {
-    const intptr_t block_size = getCoeffNum();
-    tran_low_t *dqcoeff = coeff_ + block_size;
-    intptr_t i;
-    int16_t margin = 512;
-    for (i = 0; i < block_size; ++i) {
-      coeff_[i] = GetRandomNumWithRange(INT16_MIN + margin, INT16_MAX - margin);
-      dqcoeff[i] = coeff_[i] + GetRandomDeltaWithRange(margin);
-    }
-  }
-
-  void FillConstantData() {
-    const intptr_t block_size = getCoeffNum();
-    tran_low_t *dqcoeff = coeff_ + block_size;
-    intptr_t i;
-    for (i = 0; i < block_size; ++i) {
-      coeff_[i] = 5;
-      dqcoeff[i] = 7;
-    }
-  }
-
-  tran_low_t GetRandomNumWithRange(int16_t min, int16_t max) {
-    return clamp((int16_t)rnd_.Rand16(), min, max);
-  }
-
-  tran_low_t GetRandomDeltaWithRange(int16_t delta) {
-    tran_low_t value = (int16_t)rnd_.Rand16();
-    value %= delta;
-    return value;
-  }
-
-  BlockErrorFunc blk_err_ref_;
-  BlockErrorFunc blk_err_;
-  TX_SIZE tx_size_;
-  aom_bit_depth_t bd_;
-  ACMRandom rnd_;
-  tran_low_t *coeff_;
-};
-
-TEST_P(BlockErrorTest, BitExact) { BlockErrorRun(kTestNum); }
-
-using std::tr1::make_tuple;
-
-#if !CONFIG_HIGHBITDEPTH && HAVE_SSE2
-const BlockErrorParam kBlkErrParamArraySse2[] = { make_tuple(
-    &av1_block_error_c, &av1_block_error_sse2, TX_32X32, AOM_BITS_8) };
-INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest,
-                        ::testing::ValuesIn(kBlkErrParamArraySse2));
-#endif
-
-#if HAVE_AVX2
-const BlockErrorParam kBlkErrParamArrayAvx2[] = { make_tuple(
-    &av1_block_error_c, &av1_block_error_avx2, TX_32X32, AOM_BITS_8) };
-INSTANTIATE_TEST_CASE_P(AVX2, BlockErrorTest,
-                        ::testing::ValuesIn(kBlkErrParamArrayAvx2));
-#endif
-}  // namespace
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
index 916a544273..72182de109 100644
--- a/third_party/aom/test/boolcoder_test.cc
+++ b/third_party/aom/test/boolcoder_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <math.h>
 #include <stdlib.h>
@@ -69,7 +69,7 @@ TEST(AV1, TestBitIO) {
         aom_stop_encode(&bw);
 
         aom_reader br;
-        aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+        aom_reader_init(&br, bw_buffer, bw.pos);
         bit_rnd.Reset(random_seed);
         for (int i = 0; i < kBitsToTest; ++i) {
           if (bit_method == 2) {
@@ -86,7 +86,7 @@ TEST(AV1, TestBitIO) {
   }
 }
 
-#define FRAC_DIFF_TOTAL_ERROR 0.16
+#define FRAC_DIFF_TOTAL_ERROR 0.18
 
 TEST(AV1, TestTell) {
   const int kBufferSize = 10000;
@@ -102,7 +102,7 @@ TEST(AV1, TestTell) {
     }
     aom_stop_encode(&bw);
     aom_reader br;
-    aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
+    aom_reader_init(&br, bw_buffer, bw.pos);
     uint32_t last_tell = aom_reader_tell(&br);
     uint32_t last_tell_frac = aom_reader_tell_frac(&br);
     double frac_diff_total = 0;
diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc
index ee771707c2..893237ef30 100644
--- a/third_party/aom/test/borders_test.cc
+++ b/third_party/aom/test/borders_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <climits>
 #include <vector>
@@ -19,12 +19,12 @@
 
 namespace {
 
-class BordersTest
+class BordersTestLarge
     : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
       public ::libaom_test::EncoderTest {
  protected:
-  BordersTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~BordersTest() {}
+  BordersTestLarge() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~BordersTestLarge() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -47,7 +47,7 @@ class BordersTest
   }
 };
 
-TEST_P(BordersTest, TestEncodeHighBitrate) {
+TEST_P(BordersTestLarge, TestEncodeHighBitrate) {
   // Validate that this non multiple of 64 wide clip encodes and decodes
   // without a mismatch when passing in a very low max q.  This pushes
   // the encoder to producing lots of big partitions which will likely
@@ -63,7 +63,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) {
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
-TEST_P(BordersTest, TestLowBitrate) {
+TEST_P(BordersTestLarge, TestLowBitrate) {
   // Validate that this clip encodes and decodes without a mismatch
   // when passing in a very high min q.  This pushes the encoder to producing
   // lots of small partitions which might will test the other condition.
@@ -80,6 +80,6 @@ TEST_P(BordersTest, TestLowBitrate) {
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
 
-AV1_INSTANTIATE_TEST_CASE(BordersTest,
+AV1_INSTANTIATE_TEST_CASE(BordersTestLarge,
                           ::testing::Values(::libaom_test::kTwoPassGood));
 }  // namespace
diff --git a/third_party/aom/test/cdef_test.cc b/third_party/aom/test/cdef_test.cc
index b6250b6e98..becc072918 100644
--- a/third_party/aom/test/cdef_test.cc
+++ b/third_party/aom/test/cdef_test.cc
@@ -7,15 +7,16 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <cstdlib>
 #include <string>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
 #include "aom_ports/aom_timer.h"
 #include "av1/common/cdef_block.h"
 #include "test/acm_random.h"
@@ -27,7 +28,8 @@ using libaom_test::ACMRandom;
 
 namespace {
 
-typedef std::tr1::tuple<cdef_filter_block_func, cdef_filter_block_func, int>
+typedef ::testing::tuple<cdef_filter_block_func, cdef_filter_block_func,
+                         BLOCK_SIZE, int, int>
     cdef_dir_param_t;
 
 class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
@@ -37,12 +39,16 @@ class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
     cdef = GET_PARAM(0);
     ref_cdef = GET_PARAM(1);
     bsize = GET_PARAM(2);
+    boundary = GET_PARAM(3);
+    depth = GET_PARAM(4);
   }
 
   virtual void TearDown() { libaom_test::ClearSystemState(); }
 
  protected:
   int bsize;
+  int boundary;
+  int depth;
   cdef_filter_block_func cdef;
   cdef_filter_block_func ref_cdef;
 };
@@ -50,7 +56,7 @@ class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
 typedef CDEFBlockTest CDEFSpeedTest;
 
 void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
-               cdef_filter_block_func ref_cdef) {
+               cdef_filter_block_func ref_cdef, int boundary, int depth) {
   const int size = 8;
   const int ysize = size + 2 * CDEF_VBORDER;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -61,80 +67,73 @@ void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
   memset(d, 0, sizeof(d));
 
   int error = 0, pristrength = 0, secstrength, dir;
-  int boundary, pridamping, secdamping, depth, bits, level, count,
+  int pridamping, secdamping, bits, level, count,
       errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
       errpridamping = 0, errsecdamping = 0;
   unsigned int pos = 0;
 
-  for (boundary = 0; boundary < 16; boundary++) {
-    for (depth = 8; depth <= 12; depth += 2) {
-      const unsigned int max_pos = size * size >> (depth == 8);
-      for (pridamping = 3 + depth - 8;
-           pridamping < 7 - 3 * !!boundary + depth - 8; pridamping++) {
-        for (secdamping = 3 + depth - 8;
-             secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
-          for (count = 0; count < iterations; count++) {
-            for (level = 0; level < (1 << depth) && !error;
-                 level += (2 + 6 * !!boundary) << (depth - 8)) {
-              for (bits = 1; bits <= depth && !error;
-                   bits += 1 + 3 * !!boundary) {
-                for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
-                  s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                               (1 << depth) - 1);
-                if (boundary) {
-                  if (boundary & 1) {  // Left
-                    for (int i = 0; i < ysize; i++)
-                      for (int j = 0; j < CDEF_HBORDER; j++)
-                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                  }
-                  if (boundary & 2) {  // Right
-                    for (int i = 0; i < ysize; i++)
-                      for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
-                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                  }
-                  if (boundary & 4) {  // Above
-                    for (int i = 0; i < CDEF_VBORDER; i++)
-                      for (int j = 0; j < CDEF_BSTRIDE; j++)
-                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                  }
-                  if (boundary & 8) {  // Below
-                    for (int i = CDEF_VBORDER + size; i < ysize; i++)
-                      for (int j = 0; j < CDEF_BSTRIDE; j++)
-                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                  }
-                }
-                for (dir = 0; dir < 8; dir++) {
-                  for (pristrength = 0;
-                       pristrength <= 19 << (depth - 8) && !error;
-                       pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
-                    if (pristrength == 16) pristrength = 19;
-                    for (secstrength = 0;
-                         secstrength <= 4 << (depth - 8) && !error;
-                         secstrength += 1 << (depth - 8)) {
-                      if (secstrength == 3 << (depth - 8)) continue;
-                      ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
-                               s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                               pristrength, secstrength, dir, pridamping,
-                               secdamping, bsize, (1 << depth) - 1);
-                      // If cdef and ref_cdef are the same, we're just testing
-                      // speed
-                      if (cdef != ref_cdef)
-                        ASM_REGISTER_STATE_CHECK(
-                            cdef(depth == 8 ? (uint8_t *)d : 0, d, size,
-                                 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                                 pristrength, secstrength, dir, pridamping,
-                                 secdamping, bsize, (1 << depth) - 1));
-                      if (ref_cdef != cdef) {
-                        for (pos = 0; pos < max_pos && !error; pos++) {
-                          error = ref_d[pos] != d[pos];
-                          errdepth = depth;
-                          errpristrength = pristrength;
-                          errsecstrength = secstrength;
-                          errboundary = boundary;
-                          errpridamping = pridamping;
-                          errsecdamping = secdamping;
-                        }
-                      }
+  const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
+  for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
+       pridamping++) {
+    for (secdamping = 3 + depth - 8;
+         secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
+      for (count = 0; count < iterations; count++) {
+        for (level = 0; level < (1 << depth) && !error;
+             level += (2 + 6 * !!boundary) << (depth - 8)) {
+          for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
+            for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+              s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                           (1 << depth) - 1);
+            if (boundary) {
+              if (boundary & 1) {  // Left
+                for (int i = 0; i < ysize; i++)
+                  for (int j = 0; j < CDEF_HBORDER; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 2) {  // Right
+                for (int i = 0; i < ysize; i++)
+                  for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 4) {  // Above
+                for (int i = 0; i < CDEF_VBORDER; i++)
+                  for (int j = 0; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+              if (boundary & 8) {  // Below
+                for (int i = CDEF_VBORDER + size; i < ysize; i++)
+                  for (int j = 0; j < CDEF_BSTRIDE; j++)
+                    s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+              }
+            }
+            for (dir = 0; dir < 8; dir++) {
+              for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
+                   pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
+                if (pristrength == 16) pristrength = 19;
+                for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
+                     secstrength += 1 << (depth - 8)) {
+                  if (secstrength == 3 << (depth - 8)) continue;
+                  ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
+                           s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                           pristrength, secstrength, dir, pridamping,
+                           secdamping, bsize, (1 << depth) - 1, depth - 8);
+                  // If cdef and ref_cdef are the same, we're just testing
+                  // speed
+                  if (cdef != ref_cdef)
+                    ASM_REGISTER_STATE_CHECK(
+                        cdef(depth == 8 ? (uint8_t *)d : 0, d, size,
+                             s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                             pristrength, secstrength, dir, pridamping,
+                             secdamping, bsize, (1 << depth) - 1, depth - 8));
+                  if (ref_cdef != cdef) {
+                    for (pos = 0; pos < max_pos && !error; pos++) {
+                      error = ref_d[pos] != d[pos];
+                      errdepth = depth;
+                      errpristrength = pristrength;
+                      errsecstrength = secstrength;
+                      errboundary = boundary;
+                      errpridamping = pridamping;
+                      errsecdamping = secdamping;
                     }
                   }
                 }
@@ -145,6 +144,7 @@ void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
       }
     }
   }
+
   pos--;
   EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
                       << std::endl
@@ -162,25 +162,20 @@ void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
 }
 
 void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
-                     cdef_filter_block_func ref_cdef) {
+                     cdef_filter_block_func ref_cdef, int boundary, int depth) {
   aom_usec_timer ref_timer;
   aom_usec_timer timer;
 
   aom_usec_timer_start(&ref_timer);
-  test_cdef(bsize, iterations, ref_cdef, ref_cdef);
+  test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
   aom_usec_timer_mark(&ref_timer);
   int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
 
   aom_usec_timer_start(&timer);
-  test_cdef(bsize, iterations, cdef, cdef);
+  test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
   aom_usec_timer_mark(&timer);
   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
   EXPECT_GT(ref_elapsed_time, elapsed_time)
       << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
       << "C time: " << ref_elapsed_time << " us" << std::endl
@@ -190,7 +185,7 @@ void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
 typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
                           int coeff_shift);
 
-typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
+typedef ::testing::tuple<find_dir_t, find_dir_t> find_dir_param_t;
 
 class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
  public:
@@ -268,11 +263,6 @@ void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
   aom_usec_timer_mark(&timer);
   int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
 
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
   EXPECT_GT(ref_elapsed_time, elapsed_time)
       << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
       << "C time: " << ref_elapsed_time << " us" << std::endl
@@ -280,11 +270,11 @@ void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
 }
 
 TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
-  test_cdef(bsize, 1, cdef, ref_cdef);
+  test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
 }
 
 TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
-  test_cdef_speed(bsize, 4, cdef, ref_cdef);
+  test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
 }
 
 TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
@@ -295,7 +285,7 @@ TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
   test_finddir_speed(finddir, ref_finddir);
 }
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 // VS compiling for 32 bit targets does not support vector types in
 // structs as arguments, which makes the v256 type of the intrinsics
@@ -304,9 +294,11 @@ using std::tr1::make_tuple;
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
     SSE2, CDEFBlockTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_sse2,
                                                      &cdef_find_dir_c)));
@@ -314,9 +306,11 @@ INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirTest,
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(
     SSSE3, CDEFBlockTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
                                                      &cdef_find_dir_c)));
@@ -325,10 +319,11 @@ INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirTest,
 #if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(
     SSE4_1, CDEFBlockTest,
-    ::testing::Values(make_tuple(&cdef_filter_block_sse4_1,
-                                 &cdef_filter_block_c, BLOCK_4X4),
-                      make_tuple(&cdef_filter_block_sse4_1,
-                                 &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
                                                      &cdef_find_dir_c)));
@@ -337,9 +332,11 @@ INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirTest,
 #if HAVE_AVX2
 INSTANTIATE_TEST_CASE_P(
     AVX2, CDEFBlockTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_avx2,
                                                      &cdef_find_dir_c)));
@@ -348,9 +345,11 @@ INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirTest,
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
     NEON, CDEFBlockTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_neon,
                                                      &cdef_find_dir_c)));
@@ -360,9 +359,11 @@ INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirTest,
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
     SSE2, CDEFSpeedTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirSpeedTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_sse2,
                                                      &cdef_find_dir_c)));
@@ -371,9 +372,11 @@ INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirSpeedTest,
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(
     SSSE3, CDEFSpeedTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirSpeedTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
                                                      &cdef_find_dir_c)));
@@ -382,10 +385,11 @@ INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirSpeedTest,
 #if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(
     SSE4_1, CDEFSpeedTest,
-    ::testing::Values(make_tuple(&cdef_filter_block_sse4_1,
-                                 &cdef_filter_block_c, BLOCK_4X4),
-                      make_tuple(&cdef_filter_block_sse4_1,
-                                 &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirSpeedTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
                                                      &cdef_find_dir_c)));
@@ -394,9 +398,11 @@ INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirSpeedTest,
 #if HAVE_AVX2
 INSTANTIATE_TEST_CASE_P(
     AVX2, CDEFSpeedTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirSpeedTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_avx2,
                                                      &cdef_find_dir_c)));
@@ -405,9 +411,11 @@ INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirSpeedTest,
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
     NEON, CDEFSpeedTest,
-    ::testing::Values(
-        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
-        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
+    ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
+                       ::testing::Values(&cdef_filter_block_c),
+                       ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
+                                         BLOCK_8X8),
+                       ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
 INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirSpeedTest,
                         ::testing::Values(make_tuple(&cdef_find_dir_neon,
                                                      &cdef_find_dir_c)));
diff --git a/third_party/aom/test/cfl_test.cc b/third_party/aom/test/cfl_test.cc
new file mode 100644
index 0000000000..e4d438d6ab
--- /dev/null
+++ b/third_party/aom/test/cfl_test.cc
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "test/util.h"
+#include "test/acm_random.h"
+
+using ::testing::make_tuple;
+
+using libaom_test::ACMRandom;
+
+#define NUM_ITERATIONS (100)
+#define NUM_ITERATIONS_SPEED (INT16_MAX)
+
+#define ALL_CFL_TX_SIZES(function)                                     \
+  make_tuple(TX_4X4, &function), make_tuple(TX_4X8, &function),        \
+      make_tuple(TX_4X16, &function), make_tuple(TX_8X4, &function),   \
+      make_tuple(TX_8X8, &function), make_tuple(TX_8X16, &function),   \
+      make_tuple(TX_8X32, &function), make_tuple(TX_16X4, &function),  \
+      make_tuple(TX_16X8, &function), make_tuple(TX_16X16, &function), \
+      make_tuple(TX_16X32, &function), make_tuple(TX_32X8, &function), \
+      make_tuple(TX_32X16, &function), make_tuple(TX_32X32, &function)
+
+#define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444) \
+  make_tuple(TX_4X4, &fun420, &fun422, &fun444),           \
+      make_tuple(TX_4X8, &fun420, &fun422, &fun444),       \
+      make_tuple(TX_4X16, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_8X4, &fun420, &fun422, &fun444),       \
+      make_tuple(TX_8X8, &fun420, &fun422, &fun444),       \
+      make_tuple(TX_8X16, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_8X32, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_16X4, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_16X8, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_16X16, &fun420, &fun422, &fun444),     \
+      make_tuple(TX_16X32, &fun420, &fun422, &fun444),     \
+      make_tuple(TX_32X8, &fun420, &fun422, &fun444),      \
+      make_tuple(TX_32X16, &fun420, &fun422, &fun444),     \
+      make_tuple(TX_32X32, &fun420, &fun422, &fun444)
+
+namespace {
+
+template <typename A>
+static void assert_eq(const A *a, const A *b, int width, int height) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
+    }
+  }
+}
+
+static void assertFaster(int ref_elapsed_time, int elapsed_time) {
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
+                       int height) {
+  std::cout.precision(2);
+  std::cout << "[          ] " << width << "x" << height
+            << ": C time = " << ref_elapsed_time
+            << " us, SIMD time = " << elapsed_time << " us"
+            << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
+            << std::endl;
+}
+
+class CFLTest {
+ public:
+  virtual ~CFLTest() {}
+  void init(TX_SIZE tx) {
+    tx_size = tx;
+    width = tx_size_wide[tx_size];
+    height = tx_size_high[tx_size];
+    rnd(ACMRandom::DeterministicSeed());
+  }
+
+ protected:
+  TX_SIZE tx_size;
+  int width;
+  int height;
+  ACMRandom rnd;
+};
+
+template <typename I>
+class CFLTestWithData : public CFLTest {
+ public:
+  virtual ~CFLTestWithData() {}
+
+ protected:
+  I data[CFL_BUF_SQUARE];
+  I data_ref[CFL_BUF_SQUARE];
+  void randData(I (ACMRandom::*random)()) {
+    for (int j = 0; j < this->height; j++) {
+      for (int i = 0; i < this->width; i++) {
+        const I d = (this->rnd.*random)();
+        data[j * CFL_BUF_LINE + i] = d;
+        data_ref[j * CFL_BUF_LINE + i] = d;
+      }
+    }
+  }
+};
+
+template <typename I>
+class CFLTestWithAlignedData : public CFLTest {
+ public:
+  CFLTestWithAlignedData() {
+    chroma_pels_ref =
+        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
+    chroma_pels =
+        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
+    sub_luma_pels_ref = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
+    sub_luma_pels = reinterpret_cast<int16_t *>(
+        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
+    memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
+    memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
+    memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
+    memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
+  }
+  ~CFLTestWithAlignedData() {
+    aom_free(chroma_pels_ref);
+    aom_free(sub_luma_pels_ref);
+    aom_free(chroma_pels);
+    aom_free(sub_luma_pels);
+  }
+
+ protected:
+  I *chroma_pels_ref;
+  I *chroma_pels;
+  int16_t *sub_luma_pels_ref;
+  int16_t *sub_luma_pels;
+  int alpha_q3;
+  I dc;
+  void randData(int bd) {
+    alpha_q3 = this->rnd(33) - 16;
+    dc = this->rnd(1 << bd);
+    for (int j = 0; j < this->height; j++) {
+      for (int i = 0; i < this->width; i++) {
+        chroma_pels[j * CFL_BUF_LINE + i] = dc;
+        chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
+        sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
+            sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
+      }
+    }
+  }
+};
+
+typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
+typedef ::testing::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
+class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
+                      public CFLTestWithData<int16_t> {
+ public:
+  virtual void SetUp() {
+    CFLTest::init(::testing::get<0>(this->GetParam()));
+    sub_avg = ::testing::get<1>(this->GetParam())(tx_size);
+    sub_avg_ref = get_subtract_average_fn_c(tx_size);
+  }
+  virtual ~CFLSubAvgTest() {}
+
+ protected:
+  cfl_subtract_average_fn sub_avg;
+  cfl_subtract_average_fn sub_avg_ref;
+};
+
+TEST_P(CFLSubAvgTest, SubAvgTest) {
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(&ACMRandom::Rand15Signed);
+    sub_avg((uint16_t *)data, data);
+    sub_avg_ref((uint16_t *)data_ref, data_ref);
+    assert_eq<int16_t>(data, data_ref, width, height);
+  }
+}
+
+TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  randData(&ACMRandom::Rand15Signed);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    sub_avg_ref((uint16_t *)data_ref, data_ref);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    sub_avg((uint16_t *)data, data);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+
+template <typename S, typename T, typename I>
+class CFLSubsampleTest : public ::testing::TestWithParam<S>,
+                         public CFLTestWithData<I> {
+ public:
+  virtual void SetUp() {
+    CFLTest::init(::testing::get<0>(this->GetParam()));
+    fun_420 = ::testing::get<1>(this->GetParam())(this->tx_size);
+    fun_422 = ::testing::get<2>(this->GetParam())(this->tx_size);
+    fun_444 = ::testing::get<3>(this->GetParam())(this->tx_size);
+  }
+
+ protected:
+  T fun_420;
+  T fun_422;
+  T fun_444;
+  T fun_420_ref;
+  T fun_422_ref;
+  T fun_444_ref;
+
+  void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
+                     I (ACMRandom::*random)()) {
+    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
+    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
+
+    for (int it = 0; it < NUM_ITERATIONS; it++) {
+      CFLTestWithData<I>::randData(random);
+      fun(this->data, CFL_BUF_LINE, sub_luma_pels);
+      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
+      assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
+                          sub_height);
+    }
+  }
+
+  void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
+    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
+    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
+    aom_usec_timer ref_timer;
+    aom_usec_timer timer;
+
+    CFLTestWithData<I>::randData(random);
+    aom_usec_timer_start(&ref_timer);
+    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
+    }
+    aom_usec_timer_mark(&ref_timer);
+    int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+    aom_usec_timer_start(&timer);
+    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+      fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
+    }
+    aom_usec_timer_mark(&timer);
+    int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+    printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
+    assertFaster(ref_elapsed_time, elapsed_time);
+  }
+};
+
+typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size);
+typedef ::testing::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
+                         get_subsample_lbd_fn>
+    subsample_lbd_param;
+class CFLSubsampleLBDTest
+    : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
+                              uint8_t> {
+ public:
+  virtual ~CFLSubsampleLBDTest() {}
+  virtual void SetUp() {
+    CFLSubsampleTest::SetUp();
+    fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
+    fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
+    fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
+  }
+};
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
+  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
+                &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
+  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
+  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
+  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
+  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
+}
+
+TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
+  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
+}
+
+typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
+typedef ::testing::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
+                         get_subsample_hbd_fn>
+    subsample_hbd_param;
+class CFLSubsampleHBDTest
+    : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
+                              uint16_t> {
+ public:
+  virtual ~CFLSubsampleHBDTest() {}
+  virtual void SetUp() {
+    CFLSubsampleTest::SetUp();
+    fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
+    fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
+    fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
+  }
+};
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
+  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
+                &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
+  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
+  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
+  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
+  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
+}
+
+TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
+  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
+}
+
+typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
+typedef ::testing::tuple<TX_SIZE, get_predict_fn> predict_param;
+class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
+                       public CFLTestWithAlignedData<uint8_t> {
+ public:
+  virtual void SetUp() {
+    CFLTest::init(::testing::get<0>(this->GetParam()));
+    predict = ::testing::get<1>(this->GetParam())(tx_size);
+    predict_ref = get_predict_lbd_fn_c(tx_size);
+  }
+  virtual ~CFLPredictTest() {}
+
+ protected:
+  cfl_predict_lbd_fn predict;
+  cfl_predict_lbd_fn predict_ref;
+};
+
+TEST_P(CFLPredictTest, PredictTest) {
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(8);
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
+    assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
+  }
+}
+TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  randData(8);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+
+typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
+typedef ::testing::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
+class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
+                          public CFLTestWithAlignedData<uint16_t> {
+ public:
+  virtual void SetUp() {
+    CFLTest::init(::testing::get<0>(this->GetParam()));
+    predict = ::testing::get<1>(this->GetParam())(tx_size);
+    predict_ref = get_predict_hbd_fn_c(tx_size);
+  }
+  virtual ~CFLPredictHBDTest() {}
+
+ protected:
+  cfl_predict_hbd_fn predict;
+  cfl_predict_hbd_fn predict_ref;
+};
+
+TEST_P(CFLPredictHBDTest, PredictHBDTest) {
+  int bd = 12;
+  for (int it = 0; it < NUM_ITERATIONS; it++) {
+    randData(bd);
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
+    assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
+  }
+}
+TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+  const int bd = 12;
+  randData(bd);
+  aom_usec_timer_start(&ref_timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
+  }
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
+    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
+  }
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+  printSpeed(ref_elapsed_time, elapsed_time, width, height);
+  assertFaster(ref_elapsed_time, elapsed_time);
+}
+
+#if HAVE_SSE2
+const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
+    get_subtract_average_fn_sse2) };
+
+INSTANTIATE_TEST_CASE_P(SSE2, CFLSubAvgTest,
+                        ::testing::ValuesIn(sub_avg_sizes_sse2));
+
+#endif
+
+#if HAVE_SSSE3
+const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
+                             cfl_get_luma_subsampling_422_lbd_ssse3,
+                             cfl_get_luma_subsampling_444_lbd_ssse3)
+};
+
+const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
+                             cfl_get_luma_subsampling_422_hbd_ssse3,
+                             cfl_get_luma_subsampling_444_hbd_ssse3)
+};
+
+const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
+    get_predict_lbd_fn_ssse3) };
+
+const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
+    get_predict_hbd_fn_ssse3) };
+
+INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleLBDTest,
+                        ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
+
+INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleHBDTest,
+                        ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
+
+INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictTest,
+                        ::testing::ValuesIn(predict_sizes_ssse3));
+
+INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictHBDTest,
+                        ::testing::ValuesIn(predict_sizes_hbd_ssse3));
+#endif
+
+#if HAVE_AVX2
+const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
+    get_subtract_average_fn_avx2) };
+
+const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
+                             cfl_get_luma_subsampling_422_lbd_avx2,
+                             cfl_get_luma_subsampling_444_lbd_avx2)
+};
+
+const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
+                             cfl_get_luma_subsampling_422_hbd_avx2,
+                             cfl_get_luma_subsampling_444_hbd_avx2)
+};
+
+const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
+    get_predict_lbd_fn_avx2) };
+
+const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
+    get_predict_hbd_fn_avx2) };
+
+INSTANTIATE_TEST_CASE_P(AVX2, CFLSubAvgTest,
+                        ::testing::ValuesIn(sub_avg_sizes_avx2));
+
+INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleLBDTest,
+                        ::testing::ValuesIn(subsample_lbd_sizes_avx2));
+
+INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleHBDTest,
+                        ::testing::ValuesIn(subsample_hbd_sizes_avx2));
+
+INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictTest,
+                        ::testing::ValuesIn(predict_sizes_avx2));
+
+INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictHBDTest,
+                        ::testing::ValuesIn(predict_sizes_hbd_avx2));
+#endif
+
+#if HAVE_NEON
+
+const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
+    get_subtract_average_fn_neon) };
+
+const subsample_lbd_param subsample_lbd_sizes_neon[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
+                             cfl_get_luma_subsampling_422_lbd_neon,
+                             cfl_get_luma_subsampling_444_lbd_neon)
+};
+
+const subsample_hbd_param subsample_hbd_sizes_neon[] = {
+  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
+                             cfl_get_luma_subsampling_422_hbd_neon,
+                             cfl_get_luma_subsampling_444_hbd_neon)
+};
+
+const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
+    get_predict_lbd_fn_neon) };
+
+const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
+    get_predict_hbd_fn_neon) };
+
+INSTANTIATE_TEST_CASE_P(NEON, CFLSubAvgTest,
+                        ::testing::ValuesIn(sub_avg_sizes_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleLBDTest,
+                        ::testing::ValuesIn(subsample_lbd_sizes_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleHBDTest,
+                        ::testing::ValuesIn(subsample_hbd_sizes_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, CFLPredictTest,
+                        ::testing::ValuesIn(predict_sizes_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, CFLPredictHBDTest,
+                        ::testing::ValuesIn(predict_sizes_hbd_neon));
+#endif
+
+#if HAVE_VSX
+const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
+    get_subtract_average_fn_vsx) };
+
+INSTANTIATE_TEST_CASE_P(VSX, CFLSubAvgTest,
+                        ::testing::ValuesIn(sub_avg_sizes_vsx));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/clear_system_state.h b/third_party/aom/test/clear_system_state.h
index 4f3c1eed07..7aa78243bb 100644
--- a/third_party/aom/test/clear_system_state.h
+++ b/third_party/aom/test/clear_system_state.h
@@ -11,7 +11,8 @@
 #ifndef TEST_CLEAR_SYSTEM_STATE_H_
 #define TEST_CLEAR_SYSTEM_STATE_H_
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #if ARCH_X86 || ARCH_X86_64
 #include "aom_ports/x86.h"
 #endif
diff --git a/third_party/aom/test/clpf_test.cc b/third_party/aom/test/clpf_test.cc
deleted file mode 100644
index ecb0428769..0000000000
--- a/third_party/aom/test/clpf_test.cc
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef void (*clpf_block_t)(uint8_t *dst, const uint16_t *src, int dstride,
-                             int sstride, int sizex, int sizey,
-                             unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
-    clpf_block_param_t;
-
-class CDEFClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> {
- public:
-  virtual ~CDEFClpfBlockTest() {}
-  virtual void SetUp() {
-    clpf = GET_PARAM(0);
-    ref_clpf = GET_PARAM(1);
-    sizex = GET_PARAM(2);
-    sizey = GET_PARAM(3);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int sizex;
-  int sizey;
-  clpf_block_t clpf;
-  clpf_block_t ref_clpf;
-};
-
-typedef CDEFClpfBlockTest CDEFClpfSpeedTest;
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*clpf_block_hbd_t)(uint16_t *dst, const uint16_t *src,
-                                 int dstride, int sstride, int sizex, int sizey,
-                                 unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
-    clpf_block_hbd_param_t;
-
-class CDEFClpfBlockHbdTest
-    : public ::testing::TestWithParam<clpf_block_hbd_param_t> {
- public:
-  virtual ~CDEFClpfBlockHbdTest() {}
-  virtual void SetUp() {
-    clpf = GET_PARAM(0);
-    ref_clpf = GET_PARAM(1);
-    sizex = GET_PARAM(2);
-    sizey = GET_PARAM(3);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int sizex;
-  int sizey;
-  clpf_block_hbd_t clpf;
-  clpf_block_hbd_t ref_clpf;
-};
-
-typedef CDEFClpfBlockHbdTest ClpfHbdSpeedTest;
-#endif
-
-template <typename pixel>
-void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
-               void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
-                            int sstride, int sizex, int sizey,
-                            unsigned int strength, unsigned int bitdepth),
-               void (*ref_clpf)(pixel *dst, const uint16_t *src, int dstride,
-                                int sstride, int sizex, int sizey,
-                                unsigned int strength, unsigned int bitdepth)) {
-  const int size = 24;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-  DECLARE_ALIGNED(16, pixel, d[size * size]);
-  DECLARE_ALIGNED(16, pixel, ref_d[size * size]);
-  memset(ref_d, 0, size * size * sizeof(*ref_d));
-  memset(d, 0, size * size * sizeof(*d));
-
-  int error = 0, pos = 0, xpos = 8, ypos = 8;
-  unsigned int strength = 0, bits, level, count, damp = 0, boundary = 0;
-
-  assert(size >= w + 16 && size >= h + 16);
-  assert(depth >= 8);
-
-  // Test every combination of:
-  // * Input with up to <depth> bits of noise
-  // * Noise level around every value from 0 to (1<<depth)-1
-  // * All strengths
-  // * All dampings
-  // * Boundaries
-  // If clpf and ref_clpf are the same, we're just testing speed
-  for (boundary = 0; boundary < 16; boundary++) {
-    for (count = 0; count < iterations; count++) {
-      for (level = 0; level < (1U << depth) && !error;
-           level += (1 + 4 * !!boundary) << (depth - 8)) {
-        for (bits = 1; bits <= depth && !error; bits++) {
-          for (damp = 4 + depth - 8; damp < depth - 1 && !error; damp++) {
-            for (int i = 0; i < size * size; i++)
-              s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                           (1 << depth) - 1);
-            if (boundary) {
-              if (boundary & 1) {  // Left
-                for (int i = 0; i < size; i++)
-                  for (int j = 0; j < xpos; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 2) {  // Right
-                for (int i = 0; i < size; i++)
-                  for (int j = xpos + w; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 4) {  // Above
-                for (int i = 0; i < ypos; i++)
-                  for (int j = 0; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 8) {  // Below
-                for (int i = ypos + h; i < size; i++)
-                  for (int j = 0; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-            }
-            for (strength = depth - 8; strength < depth - 5 && !error;
-                 strength += !error) {
-              ref_clpf(ref_d + ypos * size + xpos, s + ypos * size + xpos, size,
-                       size, w, h, 1 << strength, damp);
-              if (clpf != ref_clpf)
-                ASM_REGISTER_STATE_CHECK(clpf(d + ypos * size + xpos,
-                                              s + ypos * size + xpos, size,
-                                              size, w, h, 1 << strength, damp));
-              if (ref_clpf != clpf) {
-                for (pos = 0; pos < size * size && !error; pos++) {
-                  error = ref_d[pos] != d[pos];
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  pos--;
-  EXPECT_EQ(0, error)
-      << "Error: CDEFClpfBlockTest, SIMD and C mismatch." << std::endl
-      << "First error at " << pos % size << "," << pos / size << " ("
-      << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl
-      << "strength: " << (1 << strength) << std::endl
-      << "damping: " << damp << std::endl
-      << "depth: " << depth << std::endl
-      << "boundary: " << boundary << std::endl
-      << "w: " << w << std::endl
-      << "h: " << h << std::endl
-      << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl
-      << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
-      << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
-      << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
-      << "X=" << (int16_t)s[pos] << std::endl
-      << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl
-      << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl
-      << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1)
-      << std::endl
-      << "H="
-      << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1)
-      << std::endl;
-}
-
-template <typename pixel>
-void test_clpf_speed(int w, int h, unsigned int depth, unsigned int iterations,
-                     void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
-                                  int sstride, int sizex, int sizey,
-                                  unsigned int strength, unsigned int bitdepth),
-                     void (*ref_clpf)(pixel *dst, const uint16_t *src,
-                                      int dstride, int sstride, int sizex,
-                                      int sizey, unsigned int strength,
-                                      unsigned int bitdepth)) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_clpf(w, h, depth, iterations, clpf, clpf);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFClpfSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFClpfBlockTest, TestSIMDNoMismatch) {
-  test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf);
-}
-
-TEST_P(CDEFClpfSpeedTest, DISABLED_TestSpeed) {
-  test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf);
-}
-
-#if CONFIG_HIGHBITDEPTH
-TEST_P(CDEFClpfBlockHbdTest, TestSIMDNoMismatch) {
-  test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf);
-}
-
-TEST_P(ClpfHbdSpeedTest, DISABLED_TestSpeed) {
-  test_clpf_speed(sizex, sizey, 12, 4, clpf, ref_clpf);
-}
-#endif
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-// Test all supported architectures and block sizes
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-#endif  // CONFIG_HIGHBITDEPTH
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfSpeedTest,
-    ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
-                      make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8,
-                                 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFClpfSpeedTest,
-                        ::testing::Values(make_tuple(&aom_clpf_block_ssse3,
-                                                     &aom_clpf_block_c, 8, 8),
-                                          make_tuple(&aom_clpf_hblock_ssse3,
-                                                     &aom_clpf_hblock_c, 8,
-                                                     8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFClpfSpeedTest,
-                        ::testing::Values(make_tuple(&aom_clpf_block_sse4_1,
-                                                     &aom_clpf_block_c, 8, 8),
-                                          make_tuple(&aom_clpf_hblock_sse4_1,
-                                                     &aom_clpf_hblock_c, 8,
-                                                     8)));
-
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfSpeedTest,
-    ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
-                      make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8,
-                                 8)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // defined(_WIN64) || !defined(_MSC_VER)
-
-}  // namespace
diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h
index d2f20b832d..65b76094c4 100644
--- a/third_party/aom/test/codec_factory.h
+++ b/third_party/aom/test/codec_factory.h
@@ -11,7 +11,8 @@
 #ifndef TEST_CODEC_FACTORY_H_
 #define TEST_CODEC_FACTORY_H_
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "aom/aom_decoder.h"
 #include "aom/aom_encoder.h"
 #if CONFIG_AV1_ENCODER
@@ -39,7 +40,6 @@ class CodecFactory {
                                  const aom_codec_flags_t flags) const = 0;
 
   virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
-                                 unsigned long deadline,
                                  const unsigned long init_flags,
                                  TwopassStatsStore *stats) const = 0;
 
@@ -54,22 +54,26 @@ class CodecFactory {
 template <class T1>
 class CodecTestWithParam
     : public ::testing::TestWithParam<
-          std::tr1::tuple<const libaom_test::CodecFactory *, T1> > {};
+          ::testing::tuple<const libaom_test::CodecFactory *, T1> > {};
 
 template <class T1, class T2>
 class CodecTestWith2Params
     : public ::testing::TestWithParam<
-          std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2> > {};
+          ::testing::tuple<const libaom_test::CodecFactory *, T1, T2> > {};
 
 template <class T1, class T2, class T3>
 class CodecTestWith3Params
     : public ::testing::TestWithParam<
-          std::tr1::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {};
+          ::testing::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {};
+
+template <class T1, class T2, class T3, class T4>
+class CodecTestWith4Params
+    : public ::testing::TestWithParam< ::testing::tuple<
+          const libaom_test::CodecFactory *, T1, T2, T3, T4> > {};
 
 /*
  * AV1 Codec Definitions
  */
-#if CONFIG_AV1
 class AV1Decoder : public Decoder {
  public:
   explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {}
@@ -89,9 +93,9 @@ class AV1Decoder : public Decoder {
 
 class AV1Encoder : public Encoder {
  public:
-  AV1Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline,
-             const unsigned long init_flags, TwopassStatsStore *stats)
-      : Encoder(cfg, deadline, init_flags, stats) {}
+  AV1Encoder(aom_codec_enc_cfg_t cfg, const uint32_t init_flags,
+             TwopassStatsStore *stats)
+      : Encoder(cfg, init_flags, stats) {}
 
  protected:
   virtual aom_codec_iface_t *CodecInterface() const {
@@ -123,14 +127,12 @@ class AV1CodecFactory : public CodecFactory {
   }
 
   virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
-                                 unsigned long deadline,
                                  const unsigned long init_flags,
                                  TwopassStatsStore *stats) const {
 #if CONFIG_AV1_ENCODER
-    return new AV1Encoder(cfg, deadline, init_flags, stats);
+    return new AV1Encoder(cfg, init_flags, stats);
 #else
     (void)cfg;
-    (void)deadline;
     (void)init_flags;
     (void)stats;
     return NULL;
@@ -158,9 +160,6 @@ const libaom_test::AV1CodecFactory kAV1;
           ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \
               &libaom_test::kAV1)),                                         \
           __VA_ARGS__))
-#else
-#define AV1_INSTANTIATE_TEST_CASE(test, ...)
-#endif  // CONFIG_AV1
 
 }  // namespace libaom_test
 #endif  // TEST_CODEC_FACTORY_H_
diff --git a/third_party/aom/test/coding_path_sync.cc b/third_party/aom/test/coding_path_sync.cc
index 5b6409d034..51a506004d 100644
--- a/third_party/aom/test/coding_path_sync.cc
+++ b/third_party/aom/test/coding_path_sync.cc
@@ -13,7 +13,7 @@
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/acm_random.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
 
 #include "aom_ports/mem.h"  // ROUND_POWER_OF_TWO
 #include "aom/aomcx.h"
@@ -32,11 +32,17 @@ class CompressedSource {
     aom_codec_enc_cfg_t cfg;
     aom_codec_enc_config_default(algo, &cfg, 0);
 
-    const int max_q = cfg.rc_max_quantizer;
+    // force the quantizer, to reduce the sensitivity on encoding choices.
+    // e.g, we don't want this test to break when the rate control is modified.
+    {
+      const int max_q = cfg.rc_max_quantizer;
+      const int min_q = cfg.rc_min_quantizer;
+      const int q = rnd_.PseudoUniform(max_q - min_q + 1) + min_q;
 
-    cfg.rc_end_usage = AOM_CQ;
-    cfg.rc_max_quantizer = max_q;
-    cfg.rc_min_quantizer = max_q;
+      cfg.rc_end_usage = AOM_Q;
+      cfg.rc_max_quantizer = q;
+      cfg.rc_min_quantizer = q;
+    }
 
     // choose the picture size
     {
@@ -44,9 +50,26 @@ class CompressedSource {
       height_ = rnd_.PseudoUniform(kHeight - 8) + 8;
     }
 
+    // choose the chroma subsampling
+    {
+      const aom_img_fmt_t fmts[] = {
+        AOM_IMG_FMT_I420,
+        AOM_IMG_FMT_I422,
+        AOM_IMG_FMT_I444,
+      };
+
+      format_ = fmts[rnd_.PseudoUniform(NELEMENTS(fmts))];
+    }
+
     cfg.g_w = width_;
     cfg.g_h = height_;
     cfg.g_lag_in_frames = 0;
+    if (format_ == AOM_IMG_FMT_I420)
+      cfg.g_profile = 0;
+    else if (format_ == AOM_IMG_FMT_I444)
+      cfg.g_profile = 1;
+    else if (format_ == AOM_IMG_FMT_I422)
+      cfg.g_profile = 2;
 
     aom_codec_enc_init(&enc_, algo, &cfg, 0);
   }
@@ -54,7 +77,7 @@ class CompressedSource {
   ~CompressedSource() { aom_codec_destroy(&enc_); }
 
   const aom_codec_cx_pkt_t *ReadFrame() {
-    uint8_t buf[kWidth * kHeight * 3 / 2] = { 0 };
+    uint8_t buf[kWidth * kHeight * 3] = { 0 };
 
     // render regular pattern
     const int period = rnd_.Rand8() % 32 + 1;
@@ -67,8 +90,8 @@ class CompressedSource {
       buf[i] = (i + phase) % period < period / 2 ? val_a : val_b;
 
     aom_image_t img;
-    aom_img_wrap(&img, AOM_IMG_FMT_I420, width_, height_, 0, buf);
-    aom_codec_encode(&enc_, &img, frame_count_++, 1, 0, 0);
+    aom_img_wrap(&img, format_, width_, height_, 0, buf);
+    aom_codec_encode(&enc_, &img, frame_count_++, 1, 0);
 
     aom_codec_iter_t iter = NULL;
 
@@ -86,6 +109,7 @@ class CompressedSource {
   static const int kHeight = 128;
 
   ACMRandom rnd_;
+  aom_img_fmt_t format_;
   aom_codec_ctx_t enc_;
   int frame_count_;
   int width_, height_;
@@ -128,7 +152,7 @@ class Decoder {
 
   std::vector<int16_t> decode(const aom_codec_cx_pkt_t *pkt) {
     aom_codec_decode(&dec_, static_cast<uint8_t *>(pkt->data.frame.buf),
-                     static_cast<unsigned int>(pkt->data.frame.sz), NULL, 0);
+                     pkt->data.frame.sz, NULL);
 
     aom_codec_iter_t iter = NULL;
     return Serialize(aom_codec_get_frame(&dec_, &iter));
@@ -140,18 +164,41 @@ class Decoder {
 
 // Try to reveal a mismatch between LBD and HBD coding paths.
 TEST(CodingPathSync, SearchForHbdLbdMismatch) {
-  const int count_tests = 100;
+  const int count_tests = 10;
   for (int i = 0; i < count_tests; ++i) {
     Decoder dec_hbd(0);
     Decoder dec_lbd(1);
 
     CompressedSource enc(i);
-    const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
 
-    std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
-    std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
+    for (int k = 0; k < 3; ++k) {
+      const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
+
+      std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
+      std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
 
-    ASSERT_EQ(lbd_yuv, hbd_yuv);
+      ASSERT_EQ(lbd_yuv, hbd_yuv);
+    }
+  }
+}
+
+TEST(CodingPathSyncLarge, SearchForHbdLbdMismatchLarge) {
+  const int count_tests = 100;
+  const int seed = 1234;
+  for (int i = 0; i < count_tests; ++i) {
+    Decoder dec_hbd(0);
+    Decoder dec_lbd(1);
+
+    CompressedSource enc(seed + i);
+
+    for (int k = 0; k < 5; ++k) {
+      const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
+
+      std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
+      std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
+
+      ASSERT_EQ(lbd_yuv, hbd_yuv);
+    }
   }
 }
 
diff --git a/third_party/aom/test/comp_avg_pred_test.cc b/third_party/aom/test/comp_avg_pred_test.cc
new file mode 100644
index 0000000000..8bd826eb48
--- /dev/null
+++ b/third_party/aom/test/comp_avg_pred_test.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/comp_avg_pred_test.h"
+
+using ::testing::make_tuple;
+using ::testing::tuple;
+using libaom_test::ACMRandom;
+using libaom_test::AV1JNTCOMPAVG::AV1HighBDJNTCOMPAVGTest;
+using libaom_test::AV1JNTCOMPAVG::AV1HighBDJNTCOMPAVGUPSAMPLEDTest;
+using libaom_test::AV1JNTCOMPAVG::AV1JNTCOMPAVGTest;
+using libaom_test::AV1JNTCOMPAVG::AV1JNTCOMPAVGUPSAMPLEDTest;
+
+namespace {
+
+TEST_P(AV1JNTCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+TEST_P(AV1JNTCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1JNTCOMPAVGTest,
+    libaom_test::AV1JNTCOMPAVG::BuildParams(aom_jnt_comp_avg_pred_ssse3));
+#endif
+
+TEST_P(AV1JNTCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0));
+}
+
+TEST_P(AV1JNTCOMPAVGUPSAMPLEDTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(0));
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, AV1JNTCOMPAVGUPSAMPLEDTest,
+                        libaom_test::AV1JNTCOMPAVG::BuildParams(
+                            aom_jnt_comp_avg_upsampled_pred_ssse3));
+#endif
+
+TEST_P(AV1HighBDJNTCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(1)); }
+
+TEST_P(AV1HighBDJNTCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AV1HighBDJNTCOMPAVGTest,
+    libaom_test::AV1JNTCOMPAVG::BuildParams(aom_highbd_jnt_comp_avg_pred_sse2));
+#endif
+
+TEST_P(AV1HighBDJNTCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
+
+TEST_P(AV1HighBDJNTCOMPAVGUPSAMPLEDTest, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, AV1HighBDJNTCOMPAVGUPSAMPLEDTest,
+                        libaom_test::AV1JNTCOMPAVG::BuildParams(
+                            aom_highbd_jnt_comp_avg_upsampled_pred_sse2));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/comp_avg_pred_test.h b/third_party/aom/test/comp_avg_pred_test.h
new file mode 100644
index 0000000000..ab2004c05b
--- /dev/null
+++ b/third_party/aom/test/comp_avg_pred_test.h
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_COMP_AVG_PRED_TEST_H_
+#define TEST_COMP_AVG_PRED_TEST_H_
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "av1/common/common_data.h"
+#include "aom_ports/aom_timer.h"
+
+namespace libaom_test {
+const int kMaxSize = 128 + 32;  // padding
+
+namespace AV1JNTCOMPAVG {
+
+typedef void (*jntcompavg_func)(uint8_t *comp_pred, const uint8_t *pred,
+                                int width, int height, const uint8_t *ref,
+                                int ref_stride,
+                                const JNT_COMP_PARAMS *jcp_param);
+
+typedef void (*jntcompavgupsampled_func)(
+    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
+    const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
+    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
+    int ref_stride, const JNT_COMP_PARAMS *jcp_param);
+
+typedef void (*highbdjntcompavg_func)(uint16_t *comp_pred, const uint8_t *pred8,
+                                      int width, int height,
+                                      const uint8_t *ref8, int ref_stride,
+                                      const JNT_COMP_PARAMS *jcp_param);
+
+typedef void (*highbdjntcompavgupsampled_func)(
+    MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
+    const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
+    int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
+    int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param);
+
+typedef ::testing::tuple<jntcompavg_func, BLOCK_SIZE> JNTCOMPAVGParam;
+
+typedef ::testing::tuple<jntcompavgupsampled_func, BLOCK_SIZE>
+    JNTCOMPAVGUPSAMPLEDParam;
+
+typedef ::testing::tuple<int, highbdjntcompavg_func, BLOCK_SIZE>
+    HighbdJNTCOMPAVGParam;
+
+typedef ::testing::tuple<int, highbdjntcompavgupsampled_func, BLOCK_SIZE>
+    HighbdJNTCOMPAVGUPSAMPLEDParam;
+
+::testing::internal::ParamGenerator<JNTCOMPAVGParam> BuildParams(
+    jntcompavg_func filter) {
+  return ::testing::Combine(::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+::testing::internal::ParamGenerator<JNTCOMPAVGUPSAMPLEDParam> BuildParams(
+    jntcompavgupsampled_func filter) {
+  return ::testing::Combine(::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+::testing::internal::ParamGenerator<HighbdJNTCOMPAVGParam> BuildParams(
+    highbdjntcompavg_func filter) {
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+::testing::internal::ParamGenerator<HighbdJNTCOMPAVGUPSAMPLEDParam> BuildParams(
+    highbdjntcompavgupsampled_func filter) {
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+
+class AV1JNTCOMPAVGTest : public ::testing::TestWithParam<JNTCOMPAVGParam> {
+ public:
+  ~AV1JNTCOMPAVGTest() {}
+  void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+  void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunCheckOutput(jntcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    uint8_t output[kMaxSize * kMaxSize];
+    uint8_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    for (int ii = 0; ii < 2; ii++) {
+      for (int jj = 0; jj < 4; jj++) {
+        jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+        jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+
+        const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+        const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+        aom_jnt_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c, in_w,
+                                in_h, ref8 + offset_r * w + offset_c, in_w,
+                                &jnt_comp_params);
+        test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h,
+                  ref8 + offset_r * w + offset_c, in_w, &jnt_comp_params);
+
+        for (int i = 0; i < in_h; ++i) {
+          for (int j = 0; j < in_w; ++j) {
+            int idx = i * in_w + j;
+            ASSERT_EQ(output[idx], output2[idx])
+                << "Mismatch at unit tests for AV1JNTCOMPAVGTest\n"
+                << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                << " = (" << i << ", " << j << ")";
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(jntcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    uint8_t output[kMaxSize * kMaxSize];
+    uint8_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
+    jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_jnt_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w,
+                              &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("jntcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(output2, pred8, in_w, in_h, ref8, in_w, &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("jntcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1JNTCOMPAVGTest
+
+class AV1JNTCOMPAVGUPSAMPLEDTest
+    : public ::testing::TestWithParam<JNTCOMPAVGUPSAMPLEDParam> {
+ public:
+  ~AV1JNTCOMPAVGUPSAMPLEDTest() {}
+  void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+  void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunCheckOutput(jntcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+    int sub_x_q3, sub_y_q3;
+    for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+      for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+        for (int ii = 0; ii < 2; ii++) {
+          for (int jj = 0; jj < 4; jj++) {
+            jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+            jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+
+            const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+            const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+
+            aom_jnt_comp_avg_upsampled_pred_c(
+                NULL, NULL, 0, 0, NULL, output, pred8 + offset_r * w + offset_c,
+                in_w, in_h, sub_x_q3, sub_y_q3, ref8 + offset_r * w + offset_c,
+                in_w, &jnt_comp_params);
+            test_impl(NULL, NULL, 0, 0, NULL, output2,
+                      pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
+                      sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
+                      &jnt_comp_params);
+
+            for (int i = 0; i < in_h; ++i) {
+              for (int j = 0; j < in_w; ++j) {
+                int idx = i * in_w + j;
+                ASSERT_EQ(output[idx], output2[idx])
+                    << "Mismatch at unit tests for AV1JNTCOMPAVGUPSAMPLEDTest\n"
+                    << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                    << " = (" << i << ", " << j << "), sub pixel offset = ("
+                    << sub_y_q3 << ", " << sub_x_q3 << ")";
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(jntcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(1);
+
+    uint8_t pred8[kMaxSize * kMaxSize];
+    uint8_t ref8[kMaxSize * kMaxSize];
+    DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
+    DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand8();
+        ref8[i * w + j] = rnd_.Rand8();
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
+    jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
+
+    int sub_x_q3 = 0;
+    int sub_y_q3 = 0;
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_jnt_comp_avg_upsampled_pred_c(NULL, NULL, 0, 0, NULL, output, pred8,
+                                        in_w, in_h, sub_x_q3, sub_y_q3, ref8,
+                                        in_w, &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("jntcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(NULL, NULL, 0, 0, NULL, output2, pred8, in_w, in_h, sub_x_q3,
+                sub_y_q3, ref8, in_w, &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("jntcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1JNTCOMPAVGUPSAMPLEDTest
+
+class AV1HighBDJNTCOMPAVGTest
+    : public ::testing::TestWithParam<HighbdJNTCOMPAVGParam> {
+ public:
+  ~AV1HighBDJNTCOMPAVGTest() {}
+  void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+  void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunCheckOutput(highbdjntcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    for (int ii = 0; ii < 2; ii++) {
+      for (int jj = 0; jj < 4; jj++) {
+        jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+        jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+
+        const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+        const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+        aom_highbd_jnt_comp_avg_pred_c(
+            output, CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
+            in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
+            &jnt_comp_params);
+        test_impl(output2, CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
+                  in_w, in_h,
+                  CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
+                  &jnt_comp_params);
+
+        for (int i = 0; i < in_h; ++i) {
+          for (int j = 0; j < in_w; ++j) {
+            int idx = i * in_w + j;
+            ASSERT_EQ(output[idx], output2[idx])
+                << "Mismatch at unit tests for AV1HighBDJNTCOMPAVGTest\n"
+                << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                << " = (" << i << ", " << j << ")";
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(highbdjntcompavg_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
+    jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
+
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_highbd_jnt_comp_avg_pred_c(output, CONVERT_TO_BYTEPTR(pred8), in_w,
+                                     in_h, CONVERT_TO_BYTEPTR(ref8), in_w,
+                                     &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("highbdjntcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(output2, CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
+                CONVERT_TO_BYTEPTR(ref8), in_w, &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("highbdjntcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1HighBDJNTCOMPAVGTest
+
+class AV1HighBDJNTCOMPAVGUPSAMPLEDTest
+    : public ::testing::TestWithParam<HighbdJNTCOMPAVGUPSAMPLEDParam> {
+ public:
+  ~AV1HighBDJNTCOMPAVGUPSAMPLEDTest() {}
+  void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+  void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunCheckOutput(highbdjntcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+    int sub_x_q3, sub_y_q3;
+
+    for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
+      for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
+        for (int ii = 0; ii < 2; ii++) {
+          for (int jj = 0; jj < 4; jj++) {
+            jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+            jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+
+            const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
+            const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
+
+            aom_highbd_jnt_comp_avg_upsampled_pred_c(
+                NULL, NULL, 0, 0, NULL, output,
+                CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
+                sub_x_q3, sub_y_q3,
+                CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
+                &jnt_comp_params);
+            test_impl(NULL, NULL, 0, 0, NULL, output2,
+                      CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
+                      in_h, sub_x_q3, sub_y_q3,
+                      CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
+                      bd, &jnt_comp_params);
+
+            for (int i = 0; i < in_h; ++i) {
+              for (int j = 0; j < in_w; ++j) {
+                int idx = i * in_w + j;
+                ASSERT_EQ(output[idx], output2[idx])
+                    << "Mismatch at unit tests for "
+                       "AV1HighBDJNTCOMPAVGUPSAMPLEDTest\n"
+                    << in_w << "x" << in_h << " Pixel mismatch at index " << idx
+                    << " = (" << i << ", " << j << "), sub pixel offset = ("
+                    << sub_y_q3 << ", " << sub_x_q3 << ")";
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  void RunSpeedTest(highbdjntcompavgupsampled_func test_impl) {
+    const int w = kMaxSize, h = kMaxSize;
+    const int block_idx = GET_PARAM(2);
+    const int bd = GET_PARAM(0);
+    uint16_t pred8[kMaxSize * kMaxSize];
+    uint16_t ref8[kMaxSize * kMaxSize];
+    uint16_t output[kMaxSize * kMaxSize];
+    uint16_t output2[kMaxSize * kMaxSize];
+
+    for (int i = 0; i < h; ++i)
+      for (int j = 0; j < w; ++j) {
+        pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+        ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+      }
+    const int in_w = block_size_wide[block_idx];
+    const int in_h = block_size_high[block_idx];
+
+    JNT_COMP_PARAMS jnt_comp_params;
+    jnt_comp_params.use_jnt_comp_avg = 1;
+
+    jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
+    jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
+    int sub_x_q3 = 0;
+    int sub_y_q3 = 0;
+    const int num_loops = 1000000000 / (in_w + in_h);
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+
+    for (int i = 0; i < num_loops; ++i)
+      aom_highbd_jnt_comp_avg_upsampled_pred_c(
+          NULL, NULL, 0, 0, NULL, output, CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
+          sub_x_q3, sub_y_q3, CONVERT_TO_BYTEPTR(ref8), in_w, bd,
+          &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+    printf("highbdjntcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
+           1000.0 * elapsed_time / num_loops);
+
+    aom_usec_timer timer1;
+    aom_usec_timer_start(&timer1);
+
+    for (int i = 0; i < num_loops; ++i)
+      test_impl(NULL, NULL, 0, 0, NULL, output2, CONVERT_TO_BYTEPTR(pred8),
+                in_w, in_h, sub_x_q3, sub_y_q3, CONVERT_TO_BYTEPTR(ref8), in_w,
+                bd, &jnt_comp_params);
+
+    aom_usec_timer_mark(&timer1);
+    const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+    printf("highbdjntcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w,
+           in_h, 1000.0 * elapsed_time1 / num_loops);
+  }
+
+  libaom_test::ACMRandom rnd_;
+};  // class AV1HighBDJNTCOMPAVGUPSAMPLEDTest
+
+}  // namespace AV1JNTCOMPAVG
+}  // namespace libaom_test
+
+#endif  // TEST_COMP_AVG_PRED_TEST_H_
diff --git a/third_party/aom/test/comp_mask_variance_test.cc b/third_party/aom/test/comp_mask_variance_test.cc
new file mode 100644
index 0000000000..a5e3f34110
--- /dev/null
+++ b/third_party/aom/test/comp_mask_variance_test.cc
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/variance.h"
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/reconinter.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace AV1CompMaskVariance {
+typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
+                                    int width, int height, const uint8_t *ref,
+                                    int ref_stride, const uint8_t *mask,
+                                    int mask_stride, int invert_mask);
+#if HAVE_SSSE3 || HAVE_AV2
+const BLOCK_SIZE kValidBlockSize[] = {
+  BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8,  BLOCK_16X16,
+  BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32,
+};
+#endif
+typedef ::testing::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
+
+class AV1CompMaskVarianceTest
+    : public ::testing::TestWithParam<CompMaskPredParam> {
+ public:
+  ~AV1CompMaskVarianceTest();
+  void SetUp();
+
+  void TearDown();
+
+ protected:
+  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
+  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
+  bool CheckResult(int width, int height) {
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        const int idx = y * width + x;
+        if (comp_pred1_[idx] != comp_pred2_[idx]) {
+          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
+          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  libaom_test::ACMRandom rnd_;
+  uint8_t *comp_pred1_;
+  uint8_t *comp_pred2_;
+  uint8_t *pred_;
+  uint8_t *ref_buffer_;
+  uint8_t *ref_;
+};
+
+AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
+
+void AV1CompMaskVarianceTest::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  av1_init_wedge_masks();
+  comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
+  ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
+  ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
+  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
+    pred_[i] = rnd_.Rand8();
+  }
+  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
+    ref_buffer_[i] = rnd_.Rand8();
+  }
+}
+
+void AV1CompMaskVarianceTest::TearDown() {
+  aom_free(comp_pred1_);
+  aom_free(comp_pred2_);
+  aom_free(pred_);
+  aom_free(ref_buffer_);
+  libaom_test::ClearSystemState();
+}
+
+void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
+                                             BLOCK_SIZE bsize, int inv) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+
+  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+  for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+    const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+
+    aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
+                         inv);
+    test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
+
+    ASSERT_EQ(CheckResult(w, h), true)
+        << " wedge " << wedge_index << " inv " << inv;
+  }
+}
+
+void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
+                                           BLOCK_SIZE bsize) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+
+  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+  int wedge_index = wedge_types / 2;
+  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+  const int num_loops = 1000000000 / (w + h);
+
+  comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    comp_mask_pred_func func = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
+  // inv = 0, 1
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1CompMaskVarianceTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AV1CompMaskVarianceTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#ifndef aom_comp_mask_pred
+// can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
+class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
+ public:
+  ~AV1CompMaskUpVarianceTest();
+
+ protected:
+  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
+  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
+                    int havSub);
+};
+
+AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
+
+void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
+                                               BLOCK_SIZE bsize, int inv) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+
+  // loop through subx and suby
+  for (int sub = 0; sub < 8 * 8; ++sub) {
+    int subx = sub & 0x7;
+    int suby = (sub >> 3);
+    for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+      const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+
+      aom_comp_mask_pred = aom_comp_mask_pred_c;  // ref
+      aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
+                                   w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
+                                   inv);
+
+      aom_comp_mask_pred = test_impl;  // test
+      aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
+                                   w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
+                                   inv);
+      ASSERT_EQ(CheckResult(w, h), true)
+          << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
+          << "," << suby << ")";
+    }
+  }
+}
+
+void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
+                                             BLOCK_SIZE bsize, int havSub) {
+  const int w = block_size_wide[bsize];
+  const int h = block_size_high[bsize];
+  const int subx = havSub ? 3 : 0;
+  const int suby = havSub ? 4 : 0;
+
+  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+  int wedge_index = wedge_types / 2;
+  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
+
+  const int num_loops = 1000000000 / (w + h);
+  comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
+  double elapsed_time[2] = { 0 };
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    aom_comp_mask_pred = funcs[i];
+    for (int j = 0; j < num_loops; ++j) {
+      aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
+                                   w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
+                                   0);
+    }
+    aom_usec_timer_mark(&timer);
+    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
+    elapsed_time[i] = 1000.0 * time / num_loops;
+  }
+  printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
+         elapsed_time[1]);
+  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
+}
+
+TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
+  // inv mask = 0, 1
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
+  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
+}
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AV1CompMaskUpVarianceTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AV1CompMaskUpVarianceTest,
+    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+#endif  // ifndef aom_comp_mask_pred
+}  // namespace AV1CompMaskVariance
diff --git a/third_party/aom/test/convolve_round_test.cc b/third_party/aom/test/convolve_round_test.cc
index 4976b03c8a..2f801e7d46 100644
--- a/third_party/aom/test/convolve_round_test.cc
+++ b/third_party/aom/test/convolve_round_test.cc
@@ -11,7 +11,8 @@
 
 #include <assert.h>
 
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
 #include "aom/aom_integer.h"
 #include "aom_ports/aom_timer.h"
 #include "test/acm_random.h"
@@ -51,7 +52,7 @@ void highbd_convolve_rounding_12(CONVOLVE_ROUNDING_PARAM) {
 
 typedef enum { LOWBITDEPTH_TEST, HIGHBITDEPTH_TEST } DataPathType;
 
-using std::tr1::tuple;
+using ::testing::tuple;
 
 typedef tuple<ConvolveRoundFunc, ConvolveRoundFunc, DataPathType>
     ConvolveRoundParam;
@@ -92,11 +93,9 @@ class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
     if (data_path_ == LOWBITDEPTH_TEST) {
       dst = reinterpret_cast<uint8_t *>(dst_);
       dst_ref = reinterpret_cast<uint8_t *>(dst_ref_);
-#if CONFIG_HIGHBITDEPTH
     } else if (data_path_ == HIGHBITDEPTH_TEST) {
       dst = CONVERT_TO_BYTEPTR(dst_);
       dst_ref = CONVERT_TO_BYTEPTR(dst_ref_);
-#endif
     } else {
       assert(0);
     }
@@ -163,10 +162,8 @@ class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
 
 TEST_P(ConvolveRoundTest, BitExactCheck) { ConvolveRoundingRun(); }
 
-using std::tr1::make_tuple;
-
+using ::testing::make_tuple;
 #if HAVE_AVX2
-#if CONFIG_HIGHBITDEPTH
 const ConvolveRoundParam kConvRndParamArray[] = {
   make_tuple(&av1_convolve_rounding_c, &av1_convolve_rounding_avx2,
              LOWBITDEPTH_TEST),
@@ -180,11 +177,6 @@ const ConvolveRoundParam kConvRndParamArray[] = {
              &highbd_convolve_rounding_12<av1_highbd_convolve_rounding_avx2>,
              HIGHBITDEPTH_TEST)
 };
-#else
-const ConvolveRoundParam kConvRndParamArray[] = { make_tuple(
-    &av1_convolve_rounding_c, &av1_convolve_rounding_avx2, LOWBITDEPTH_TEST) };
-#endif
-
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveRoundTest,
                         ::testing::ValuesIn(kConvRndParamArray));
 #endif  // HAVE_AVX2
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
index ffe0b87d25..7098e8af65 100644
--- a/third_party/aom/test/convolve_test.cc
+++ b/third_party/aom/test/convolve_test.cc
@@ -7,24 +7,25 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string.h>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_dsp/aom_filter.h"
 #include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
 #include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
 #include "av1/common/filter.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
 
 namespace {
 
@@ -37,35 +38,16 @@ typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                              int w, int h);
 
 struct ConvolveFunctions {
-  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
-                    ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
-                    ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
-                    ConvolveFunc sh8_avg, ConvolveFunc sv8,
-                    ConvolveFunc sv8_avg, ConvolveFunc shv8,
-                    ConvolveFunc shv8_avg, int bd)
-      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
-        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
-        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
-        use_highbd_(bd) {}
+  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc h8, ConvolveFunc v8, int bd)
+      : copy_(copy), h8_(h8), v8_(v8), use_highbd_(bd) {}
 
   ConvolveFunc copy_;
-  ConvolveFunc avg_;
   ConvolveFunc h8_;
   ConvolveFunc v8_;
-  ConvolveFunc hv8_;
-  ConvolveFunc h8_avg_;
-  ConvolveFunc v8_avg_;
-  ConvolveFunc hv8_avg_;
-  ConvolveFunc sh8_;       // scaled horiz
-  ConvolveFunc sv8_;       // scaled vert
-  ConvolveFunc shv8_;      // scaled horiz/vert
-  ConvolveFunc sh8_avg_;   // scaled avg horiz
-  ConvolveFunc sv8_avg_;   // scaled avg vert
-  ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
 };
 
-typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+typedef ::testing::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 
 #define ALL_SIZES_64(convolve_fn)                                         \
   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
@@ -76,13 +58,9 @@ typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
       make_tuple(64, 64, &convolve_fn)
 
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
 #define ALL_SIZES(convolve_fn)                                          \
   make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
       make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
-#else
-#define ALL_SIZES ALL_SIZES_64
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
 
 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
 #define AV1_FILTER_WEIGHT 128
@@ -186,7 +164,6 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
                     output_height);
 }
 
-#if CONFIG_HIGHBITDEPTH
 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
                                const unsigned int src_stride,
                                const int16_t *HFilter, const int16_t *VFilter,
@@ -288,7 +265,6 @@ void highbd_filter_average_block2d_8_c(
   highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
                            output_width, output_height);
 }
-#endif  // CONFIG_HIGHBITDEPTH
 
 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  public:
@@ -301,7 +277,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
         aom_memalign(kDataAlignment, kOutputBufferSize));
     output_ref_ = reinterpret_cast<uint8_t *>(
         aom_memalign(kDataAlignment, kOutputBufferSize));
-#if CONFIG_HIGHBITDEPTH
     input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
                1;
@@ -309,7 +284,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
     output16_ref_ = reinterpret_cast<uint16_t *>(
         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
-#endif
   }
 
   virtual void TearDown() { libaom_test::ClearSystemState(); }
@@ -321,14 +295,12 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
     output_ = NULL;
     aom_free(output_ref_);
     output_ref_ = NULL;
-#if CONFIG_HIGHBITDEPTH
     aom_free(input16_ - 1);
     input16_ = NULL;
     aom_free(output16_);
     output16_ = NULL;
     aom_free(output16_ref_);
     output16_ref_ = NULL;
-#endif
   }
 
  protected:
@@ -356,24 +328,18 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 
   virtual void SetUp() {
     UUT_ = GET_PARAM(2);
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ != 0)
       mask_ = (1 << UUT_->use_highbd_) - 1;
     else
       mask_ = 255;
-#endif
     /* Set up guard blocks for an inner block centered in the outer block */
     for (int i = 0; i < kOutputBufferSize; ++i) {
       if (IsIndexInBorder(i)) {
         output_[i] = 255;
-#if CONFIG_HIGHBITDEPTH
         output16_[i] = mask_;
-#endif
       } else {
         output_[i] = 0;
-#if CONFIG_HIGHBITDEPTH
         output16_[i] = 0;
-#endif
       }
     }
 
@@ -381,31 +347,23 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
     for (int i = 0; i < kInputBufferSize; ++i) {
       if (i & 1) {
         input_[i] = 255;
-#if CONFIG_HIGHBITDEPTH
         input16_[i] = mask_;
-#endif
       } else {
         input_[i] = prng.Rand8Extremes();
-#if CONFIG_HIGHBITDEPTH
         input16_[i] = prng.Rand16() & mask_;
-#endif
       }
     }
   }
 
   void SetConstantInput(int value) {
     memset(input_, value, kInputBufferSize);
-#if CONFIG_HIGHBITDEPTH
     aom_memset16(input16_, value, kInputBufferSize);
-#endif
   }
 
   void CopyOutputToRef() {
     memcpy(output_ref_, output_, kOutputBufferSize);
-#if CONFIG_HIGHBITDEPTH
     // Copy 16-bit pixels values. The effective number of bytes is double.
     memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
-#endif
   }
 
   void CheckGuardBlocks() {
@@ -418,72 +376,51 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 
   uint8_t *input() const {
     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       return input_ + offset;
     } else {
       return CONVERT_TO_BYTEPTR(input16_) + offset;
     }
-#else
-    return input_ + offset;
-#endif
   }
 
   uint8_t *output() const {
     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       return output_ + offset;
     } else {
       return CONVERT_TO_BYTEPTR(output16_) + offset;
     }
-#else
-    return output_ + offset;
-#endif
   }
 
   uint8_t *output_ref() const {
     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       return output_ref_ + offset;
     } else {
       return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
     }
-#else
-    return output_ref_ + offset;
-#endif
   }
 
   uint16_t lookup(uint8_t *list, int index) const {
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       return list[index];
     } else {
       return CONVERT_TO_SHORTPTR(list)[index];
     }
-#else
-    return list[index];
-#endif
   }
 
   void assign_val(uint8_t *list, int index, uint16_t val) const {
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       list[index] = (uint8_t)val;
     } else {
       CONVERT_TO_SHORTPTR(list)[index] = val;
     }
-#else
-    list[index] = (uint8_t)val;
-#endif
   }
 
   void wrapper_filter_average_block2d_8_c(
       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
       unsigned int output_width, unsigned int output_height) {
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
                                  dst_stride, output_width, output_height);
@@ -493,17 +430,12 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
           CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
           UUT_->use_highbd_);
     }
-#else
-    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                               dst_stride, output_width, output_height);
-#endif
   }
 
   void wrapper_filter_block2d_8_c(
       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
       unsigned int output_width, unsigned int output_height) {
-#if CONFIG_HIGHBITDEPTH
     if (UUT_->use_highbd_ == 0) {
       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
                          dst_stride, output_width, output_height);
@@ -513,32 +445,24 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
                                 dst_stride, output_width, output_height,
                                 UUT_->use_highbd_);
     }
-#else
-    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                       dst_stride, output_width, output_height);
-#endif
   }
 
   const ConvolveFunctions *UUT_;
   static uint8_t *input_;
   static uint8_t *output_;
   static uint8_t *output_ref_;
-#if CONFIG_HIGHBITDEPTH
   static uint16_t *input16_;
   static uint16_t *output16_;
   static uint16_t *output16_ref_;
   int mask_;
-#endif
 };
 
 uint8_t *ConvolveTest::input_ = NULL;
 uint8_t *ConvolveTest::output_ = NULL;
 uint8_t *ConvolveTest::output_ref_ = NULL;
-#if CONFIG_HIGHBITDEPTH
 uint16_t *ConvolveTest::input16_ = NULL;
 uint16_t *ConvolveTest::output16_ = NULL;
 uint16_t *ConvolveTest::output16_ref_ = NULL;
-#endif
 
 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
 
@@ -558,83 +482,6 @@ TEST_P(ConvolveTest, Copy) {
           << "(" << x << "," << y << ")";
 }
 
-TEST_P(ConvolveTest, Avg) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-  uint8_t *const out_ref = output_ref();
-  CopyOutputToRef();
-
-  ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
-                                      NULL, 0, NULL, 0, Width(), Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
-                                       lookup(out_ref, y * kOutputStride + x),
-                                   1))
-          << "(" << x << "," << y << ")";
-}
-
-TEST_P(ConvolveTest, CopyHoriz) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-  DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
-
-  ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                lookup(in, y * kInputStride + x))
-          << "(" << x << "," << y << ")";
-}
-
-TEST_P(ConvolveTest, CopyVert) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-  DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
-
-  ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                lookup(in, y * kInputStride + x))
-          << "(" << x << "," << y << ")";
-}
-
-TEST_P(ConvolveTest, Copy2D) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-  DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
-
-  ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                       filter8, 16, filter8, 16, Width(),
-                                       Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                lookup(in, y * kInputStride + x))
-          << "(" << x << "," << y << ")";
-}
-
 const int kNumFilterBanks = SWITCHABLE_FILTERS;
 const int kNumFilters = 16;
 
@@ -643,11 +490,9 @@ TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
     const InterpFilter filter = (InterpFilter)filter_bank;
     const InterpKernel *filters =
         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
-#if CONFIG_DUAL_FILTER
     const InterpFilterParams filter_params =
-        av1_get_interp_filter_params(filter);
+        av1_get_interp_filter_params_with_block_size(filter, 8);
     if (filter_params.taps != SUBPEL_TAPS) continue;
-#endif
     for (int i = 0; i < kNumFilters; i++) {
       const int p0 = filters[i][0] + filters[i][1];
       const int p1 = filters[i][2] + filters[i][3];
@@ -670,7 +515,6 @@ const int16_t kInvalidFilter[8] = { 0 };
 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
   uint8_t *const in = input();
   uint8_t *const out = output();
-#if CONFIG_HIGHBITDEPTH
   uint8_t ref8[kOutputStride * kMaxDimension];
   uint16_t ref16[kOutputStride * kMaxDimension];
   uint8_t *ref;
@@ -679,19 +523,14 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
   } else {
     ref = CONVERT_TO_BYTEPTR(ref16);
   }
-#else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
 
   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
     const InterpFilter filter = (InterpFilter)filter_bank;
     const InterpKernel *filters =
         (const InterpKernel *)av1_get_interp_filter_kernel(filter);
-#if CONFIG_DUAL_FILTER
     const InterpFilterParams filter_params =
-        av1_get_interp_filter_params(filter);
+        av1_get_interp_filter_params_with_block_size(filter, 8);
     if (filter_params.taps != SUBPEL_TAPS) continue;
-#endif
 
     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -700,9 +539,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
                                    Width(), Height());
 
         if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
+          continue;
         else if (filter_y)
           ASM_REGISTER_STATE_CHECK(
               UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
@@ -730,93 +567,9 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
   }
 }
 
-TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-#if CONFIG_HIGHBITDEPTH
-  uint8_t ref8[kOutputStride * kMaxDimension];
-  uint16_t ref16[kOutputStride * kMaxDimension];
-  uint8_t *ref;
-  if (UUT_->use_highbd_ == 0) {
-    ref = ref8;
-  } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
-  }
-#else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
-
-  // Populate ref and out with some random data
-  ::libaom_test::ACMRandom prng;
-  for (int y = 0; y < Height(); ++y) {
-    for (int x = 0; x < Width(); ++x) {
-      uint16_t r;
-#if CONFIG_HIGHBITDEPTH
-      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
-        r = prng.Rand8Extremes();
-      } else {
-        r = prng.Rand16() & mask_;
-      }
-#else
-      r = prng.Rand8Extremes();
-#endif
-
-      assign_val(out, y * kOutputStride + x, r);
-      assign_val(ref, y * kOutputStride + x, r);
-    }
-  }
-
-  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
-    const InterpFilter filter = (InterpFilter)filter_bank;
-    const InterpKernel *filters =
-        (const InterpKernel *)av1_get_interp_filter_kernel(filter);
-#if CONFIG_DUAL_FILTER
-    const InterpFilterParams filter_params =
-        av1_get_interp_filter_params(filter);
-    if (filter_params.taps != SUBPEL_TAPS) continue;
-#endif
-
-    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
-      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-        wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
-                                           filters[filter_y], ref,
-                                           kOutputStride, Width(), Height());
-
-        if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
-              in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_x)
-          ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              kInvalidFilter, 16, Width(), Height()));
-        else
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                         0, kInvalidFilter, 0, Width(), Height()));
-
-        CheckGuardBlocks();
-
-        for (int y = 0; y < Height(); ++y)
-          for (int x = 0; x < Width(); ++x)
-            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
-                      lookup(out, y * kOutputStride + x))
-                << "mismatch at (" << x << "," << y << "), "
-                << "filters (" << filter_bank << "," << filter_x << ","
-                << filter_y << ")";
-      }
-    }
-  }
-}
-
 TEST_P(ConvolveTest, FilterExtremes) {
   uint8_t *const in = input();
   uint8_t *const out = output();
-#if CONFIG_HIGHBITDEPTH
   uint8_t ref8[kOutputStride * kMaxDimension];
   uint16_t ref16[kOutputStride * kMaxDimension];
   uint8_t *ref;
@@ -825,24 +578,17 @@ TEST_P(ConvolveTest, FilterExtremes) {
   } else {
     ref = CONVERT_TO_BYTEPTR(ref16);
   }
-#else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
 
   // Populate ref and out with some random data
   ::libaom_test::ACMRandom prng;
   for (int y = 0; y < Height(); ++y) {
     for (int x = 0; x < Width(); ++x) {
       uint16_t r;
-#if CONFIG_HIGHBITDEPTH
       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
         r = prng.Rand8Extremes();
       } else {
         r = prng.Rand16() & mask_;
       }
-#else
-      r = prng.Rand8Extremes();
-#endif
       assign_val(out, y * kOutputStride + x, r);
       assign_val(ref, y * kOutputStride + x, r);
     }
@@ -853,13 +599,8 @@ TEST_P(ConvolveTest, FilterExtremes) {
     while (seed_val < 256) {
       for (int y = 0; y < 8; ++y) {
         for (int x = 0; x < 8; ++x) {
-#if CONFIG_HIGHBITDEPTH
           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
-#else
-          assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
-                     ((seed_val >> (axis ? y : x)) & 1) * 255);
-#endif
           if (axis) seed_val++;
         }
         if (axis)
@@ -873,20 +614,16 @@ TEST_P(ConvolveTest, FilterExtremes) {
         const InterpFilter filter = (InterpFilter)filter_bank;
         const InterpKernel *filters =
             (const InterpKernel *)av1_get_interp_filter_kernel(filter);
-#if CONFIG_DUAL_FILTER
         const InterpFilterParams filter_params =
-            av1_get_interp_filter_params(filter);
+            av1_get_interp_filter_params_with_block_size(filter, 8);
         if (filter_params.taps != SUBPEL_TAPS) continue;
-#endif
         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
             wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
                                        filters[filter_y], ref, kOutputStride,
                                        Width(), Height());
             if (filter_x && filter_y)
-              ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
-                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                  filters[filter_y], 16, Width(), Height()));
+              continue;
             else if (filter_y)
               ASM_REGISTER_STATE_CHECK(UUT_->v8_(
                   in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
@@ -914,37 +651,6 @@ TEST_P(ConvolveTest, FilterExtremes) {
   }
 }
 
-/* This test exercises that enough rows and columns are filtered with every
-   possible initial fractional positions and scaling steps. */
-TEST_P(ConvolveTest, CheckScalingFiltering) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-  const InterpKernel *const eighttap =
-      (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR);
-
-  SetConstantInput(127);
-
-  for (int frac = 0; frac < 16; ++frac) {
-    for (int step = 1; step <= 32; ++step) {
-      /* Test the horizontal and vertical filters in combination. */
-      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                           eighttap[frac], step, eighttap[frac],
-                                           step, Width(), Height()));
-
-      CheckGuardBlocks();
-
-      for (int y = 0; y < Height(); ++y) {
-        for (int x = 0; x < Width(); ++x) {
-          ASSERT_EQ(lookup(in, y * kInputStride + x),
-                    lookup(out, y * kOutputStride + x))
-              << "x == " << x << ", y == " << y << ", frac == " << frac
-              << ", step == " << step;
-        }
-      }
-    }
-  }
-}
-
 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
   const uint8_t *const in = input();
   uint8_t *const out = output();
@@ -965,30 +671,9 @@ TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
 }
 
-TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
-  const uint8_t *const in = input();
-  uint8_t *const out = output();
-  const int kNumTests = 5000000;
-  const int width = Width();
-  const int height = Height();
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&timer);
-  for (int n = 0; n < kNumTests; ++n) {
-    UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
-               height);
-  }
-  aom_usec_timer_mark(&timer);
-
-  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-  printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
-         UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
-}
-
 TEST_P(ConvolveTest, DISABLED_Speed) {
   uint8_t *const in = input();
   uint8_t *const out = output();
-#if CONFIG_HIGHBITDEPTH
   uint8_t ref8[kOutputStride * kMaxDimension];
   uint16_t ref16[kOutputStride * kMaxDimension];
   uint8_t *ref;
@@ -997,25 +682,17 @@ TEST_P(ConvolveTest, DISABLED_Speed) {
   } else {
     ref = CONVERT_TO_BYTEPTR(ref16);
   }
-#else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
 
   // Populate ref and out with some random data
   ::libaom_test::ACMRandom prng;
   for (int y = 0; y < Height(); ++y) {
     for (int x = 0; x < Width(); ++x) {
       uint16_t r;
-#if CONFIG_HIGHBITDEPTH
       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
         r = prng.Rand8Extremes();
       } else {
         r = prng.Rand16() & mask_;
       }
-#else
-      r = prng.Rand8Extremes();
-#endif
-
       assign_val(out, y * kOutputStride + x, r);
       assign_val(ref, y * kOutputStride + x, r);
     }
@@ -1036,18 +713,13 @@ TEST_P(ConvolveTest, DISABLED_Speed) {
       const InterpFilter filter = (InterpFilter)filter_bank;
       const InterpKernel *filters =
           (const InterpKernel *)av1_get_interp_filter_kernel(filter);
-#if CONFIG_DUAL_FILTER
       const InterpFilterParams filter_params =
-          av1_get_interp_filter_params(filter);
+          av1_get_interp_filter_params_with_block_size(filter, 8);
       if (filter_params.taps != SUBPEL_TAPS) continue;
-#endif
 
       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-          if (filter_x && filter_y)
-            ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
-                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-                filters[filter_y], 16, Width(), Height()));
+          if (filter_x && filter_y) continue;
           if (filter_y)
             ASM_REGISTER_STATE_CHECK(
                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
@@ -1069,11 +741,10 @@ TEST_P(ConvolveTest, DISABLED_Speed) {
          UUT_->use_highbd_, elapsed_time);
 }
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
-#if CONFIG_HIGHBITDEPTH
 #define WRAP(func, bd)                                                       \
-  void wrap_##func##_##bd(                                                   \
+  static void wrap_##func##_##bd(                                            \
       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
       const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
@@ -1082,173 +753,78 @@ using std::tr1::make_tuple;
   }
 #if HAVE_SSE2 && ARCH_X86_64
 WRAP(convolve_copy_sse2, 8)
-WRAP(convolve_avg_sse2, 8)
 WRAP(convolve_copy_sse2, 10)
-WRAP(convolve_avg_sse2, 10)
 WRAP(convolve_copy_sse2, 12)
-WRAP(convolve_avg_sse2, 12)
 WRAP(convolve8_horiz_sse2, 8)
-WRAP(convolve8_avg_horiz_sse2, 8)
 WRAP(convolve8_vert_sse2, 8)
-WRAP(convolve8_avg_vert_sse2, 8)
-WRAP(convolve8_sse2, 8)
-WRAP(convolve8_avg_sse2, 8)
 WRAP(convolve8_horiz_sse2, 10)
-WRAP(convolve8_avg_horiz_sse2, 10)
 WRAP(convolve8_vert_sse2, 10)
-WRAP(convolve8_avg_vert_sse2, 10)
-WRAP(convolve8_sse2, 10)
-WRAP(convolve8_avg_sse2, 10)
 WRAP(convolve8_horiz_sse2, 12)
-WRAP(convolve8_avg_horiz_sse2, 12)
 WRAP(convolve8_vert_sse2, 12)
-WRAP(convolve8_avg_vert_sse2, 12)
-WRAP(convolve8_sse2, 12)
-WRAP(convolve8_avg_sse2, 12)
 #endif  // HAVE_SSE2 && ARCH_X86_64
 
 WRAP(convolve_copy_c, 8)
-WRAP(convolve_avg_c, 8)
 WRAP(convolve8_horiz_c, 8)
-WRAP(convolve8_avg_horiz_c, 8)
 WRAP(convolve8_vert_c, 8)
-WRAP(convolve8_avg_vert_c, 8)
-WRAP(convolve8_c, 8)
-WRAP(convolve8_avg_c, 8)
 WRAP(convolve_copy_c, 10)
-WRAP(convolve_avg_c, 10)
 WRAP(convolve8_horiz_c, 10)
-WRAP(convolve8_avg_horiz_c, 10)
 WRAP(convolve8_vert_c, 10)
-WRAP(convolve8_avg_vert_c, 10)
-WRAP(convolve8_c, 10)
-WRAP(convolve8_avg_c, 10)
 WRAP(convolve_copy_c, 12)
-WRAP(convolve_avg_c, 12)
 WRAP(convolve8_horiz_c, 12)
-WRAP(convolve8_avg_horiz_c, 12)
 WRAP(convolve8_vert_c, 12)
-WRAP(convolve8_avg_vert_c, 12)
-WRAP(convolve8_c, 12)
-WRAP(convolve8_avg_c, 12)
 
 #if HAVE_AVX2
 WRAP(convolve_copy_avx2, 8)
-WRAP(convolve_avg_avx2, 8)
 WRAP(convolve8_horiz_avx2, 8)
-WRAP(convolve8_avg_horiz_avx2, 8)
 WRAP(convolve8_vert_avx2, 8)
-WRAP(convolve8_avg_vert_avx2, 8)
-WRAP(convolve8_avx2, 8)
-WRAP(convolve8_avg_avx2, 8)
 
 WRAP(convolve_copy_avx2, 10)
-WRAP(convolve_avg_avx2, 10)
-WRAP(convolve8_avx2, 10)
 WRAP(convolve8_horiz_avx2, 10)
 WRAP(convolve8_vert_avx2, 10)
-WRAP(convolve8_avg_avx2, 10)
-WRAP(convolve8_avg_horiz_avx2, 10)
-WRAP(convolve8_avg_vert_avx2, 10)
 
 WRAP(convolve_copy_avx2, 12)
-WRAP(convolve_avg_avx2, 12)
-WRAP(convolve8_avx2, 12)
 WRAP(convolve8_horiz_avx2, 12)
 WRAP(convolve8_vert_avx2, 12)
-WRAP(convolve8_avg_avx2, 12)
-WRAP(convolve8_avg_horiz_avx2, 12)
-WRAP(convolve8_avg_vert_avx2, 12)
 #endif  // HAVE_AVX2
 
 #undef WRAP
 
-const ConvolveFunctions convolve8_c(
-    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
-    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
-    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
-    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
-    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
-    wrap_convolve8_avg_c_8, 8);
-const ConvolveFunctions convolve10_c(
-    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
-    wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
-    wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
-    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
-    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
-    wrap_convolve8_avg_c_10, 10);
-const ConvolveFunctions convolve12_c(
-    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
-    wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
-    wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
-    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
-    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
-    wrap_convolve8_avg_c_12, 12);
+const ConvolveFunctions convolve8_c(wrap_convolve_copy_c_8,
+                                    wrap_convolve8_horiz_c_8,
+                                    wrap_convolve8_vert_c_8, 8);
+const ConvolveFunctions convolve10_c(wrap_convolve_copy_c_10,
+                                     wrap_convolve8_horiz_c_10,
+                                     wrap_convolve8_vert_c_10, 10);
+const ConvolveFunctions convolve12_c(wrap_convolve_copy_c_12,
+                                     wrap_convolve8_horiz_c_12,
+                                     wrap_convolve8_vert_c_12, 12);
 const ConvolveParam kArrayConvolve_c[] = {
   ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
 };
 
-#else
-const ConvolveFunctions convolve8_c(
-    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_c,
-    aom_convolve8_avg_horiz_c, aom_convolve8_vert_c, aom_convolve8_avg_vert_c,
-    aom_convolve8_c, aom_convolve8_avg_c, aom_scaled_horiz_c,
-    aom_scaled_avg_horiz_c, aom_scaled_vert_c, aom_scaled_avg_vert_c,
-    aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
-#endif
 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
 
 #if HAVE_SSE2 && ARCH_X86_64
-#if CONFIG_HIGHBITDEPTH
-const ConvolveFunctions convolve8_sse2(
-    wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
-    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
-    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
-    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
-    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
-    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
-    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
-const ConvolveFunctions convolve10_sse2(
-    wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
-    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
-    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
-    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
-    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
-    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
-    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
-const ConvolveFunctions convolve12_sse2(
-    wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
-    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
-    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
-    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
-    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
-    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
-    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
+const ConvolveFunctions convolve8_sse2(wrap_convolve_copy_sse2_8,
+                                       wrap_convolve8_horiz_sse2_8,
+                                       wrap_convolve8_vert_sse2_8, 8);
+const ConvolveFunctions convolve10_sse2(wrap_convolve_copy_sse2_10,
+                                        wrap_convolve8_horiz_sse2_10,
+                                        wrap_convolve8_vert_sse2_10, 10);
+const ConvolveFunctions convolve12_sse2(wrap_convolve_copy_sse2_12,
+                                        wrap_convolve8_horiz_sse2_12,
+                                        wrap_convolve8_vert_sse2_12, 12);
 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
                                               ALL_SIZES(convolve10_sse2),
                                               ALL_SIZES(convolve12_sse2) };
-#else
-const ConvolveFunctions convolve8_sse2(
-    aom_convolve_copy_sse2, aom_convolve_avg_sse2, aom_convolve8_horiz_sse2,
-    aom_convolve8_avg_horiz_sse2, aom_convolve8_vert_sse2,
-    aom_convolve8_avg_vert_sse2, aom_convolve8_sse2, aom_convolve8_avg_sse2,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-
-const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
-#endif  // CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve_sse2));
 #endif
 
 #if HAVE_SSSE3
-const ConvolveFunctions convolve8_ssse3(
-    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_ssse3,
-    aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_ssse3,
-    aom_convolve8_avg_vert_ssse3, aom_convolve8_ssse3, aom_convolve8_avg_ssse3,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_ssse3, aom_scaled_avg_2d_c, 0);
+const ConvolveFunctions convolve8_ssse3(aom_convolve_copy_c,
+                                        aom_convolve8_horiz_ssse3,
+                                        aom_convolve8_vert_ssse3, 0);
 
 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
@@ -1256,95 +832,20 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
 #endif
 
 #if HAVE_AVX2
-#if CONFIG_HIGHBITDEPTH
-const ConvolveFunctions convolve8_avx2(
-    wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
-    wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
-    wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
-    wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
-    wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
-    wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
-const ConvolveFunctions convolve10_avx2(
-    wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
-    wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
-    wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
-    wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
-    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
-    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
-    wrap_convolve8_avg_c_10, 10);
-const ConvolveFunctions convolve12_avx2(
-    wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
-    wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
-    wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
-    wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
-    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
-    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
-    wrap_convolve8_avg_c_12, 12);
+const ConvolveFunctions convolve8_avx2(wrap_convolve_copy_avx2_8,
+                                       wrap_convolve8_horiz_avx2_8,
+                                       wrap_convolve8_vert_avx2_8, 8);
+const ConvolveFunctions convolve10_avx2(wrap_convolve_copy_avx2_10,
+                                        wrap_convolve8_horiz_avx2_10,
+                                        wrap_convolve8_vert_avx2_10, 10);
+const ConvolveFunctions convolve12_avx2(wrap_convolve_copy_avx2_12,
+                                        wrap_convolve8_horiz_avx2_12,
+                                        wrap_convolve8_vert_avx2_12, 12);
 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2),
                                                ALL_SIZES_64(convolve10_avx2),
                                                ALL_SIZES_64(convolve12_avx2) };
-#else
-const ConvolveFunctions convolve8_avx2(
-    aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2,
-    aom_convolve8_avg_horiz_ssse3, aom_convolve8_vert_avx2,
-    aom_convolve8_avg_vert_ssse3, aom_convolve8_avx2, aom_convolve8_avg_ssse3,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-
-const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2) };
-#endif  // CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve8_avx2));
 #endif  // HAVE_AVX2
 
-// TODO(any): Make NEON versions support 128x128 128x64 64x128 block sizes
-#if HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
-#if HAVE_NEON_ASM
-const ConvolveFunctions convolve8_neon(
-    aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
-    aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
-    aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-#else   // HAVE_NEON
-const ConvolveFunctions convolve8_neon(
-    aom_convolve_copy_neon, aom_convolve_avg_neon, aom_convolve8_horiz_neon,
-    aom_convolve8_avg_horiz_neon, aom_convolve8_vert_neon,
-    aom_convolve8_avg_vert_neon, aom_convolve8_neon, aom_convolve8_avg_neon,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-#endif  // HAVE_NEON_ASM
-
-const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES_64(convolve8_neon) };
-INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
-                        ::testing::ValuesIn(kArrayConvolve8_neon));
-#endif  // HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
-
-// TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
-#if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
-const ConvolveFunctions convolve8_dspr2(
-    aom_convolve_copy_dspr2, aom_convolve_avg_dspr2, aom_convolve8_horiz_dspr2,
-    aom_convolve8_avg_horiz_dspr2, aom_convolve8_vert_dspr2,
-    aom_convolve8_avg_vert_dspr2, aom_convolve8_dspr2, aom_convolve8_avg_dspr2,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-
-const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES_64(convolve8_dspr2) };
-INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
-                        ::testing::ValuesIn(kArrayConvolve8_dspr2));
-#endif  // HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
-
-// TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
-#if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
-const ConvolveFunctions convolve8_msa(
-    aom_convolve_copy_msa, aom_convolve_avg_msa, aom_convolve8_horiz_msa,
-    aom_convolve8_avg_horiz_msa, aom_convolve8_vert_msa,
-    aom_convolve8_avg_vert_msa, aom_convolve8_msa, aom_convolve8_avg_msa,
-    aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
-    aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
-
-const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES_64(convolve8_msa) };
-INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
-                        ::testing::ValuesIn(kArrayConvolve8_msa));
-#endif  // HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
 }  // namespace
diff --git a/third_party/aom/test/corner_match_test.cc b/third_party/aom/test/corner_match_test.cc
index 2197fffee4..58e3139c5f 100644
--- a/third_party/aom/test/corner_match_test.cc
+++ b/third_party/aom/test/corner_match_test.cc
@@ -8,11 +8,11 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
+#include "config/av1_rtcd.h"
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/acm_random.h"
 #include "test/util.h"
-#include "./av1_rtcd.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 
@@ -24,8 +24,8 @@ namespace AV1CornerMatch {
 
 using libaom_test::ACMRandom;
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 typedef tuple<int> CornerMatchParam;
 
 class AV1CornerMatchTest : public ::testing::TestWithParam<CornerMatchParam> {
diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc
index bde00472de..8ea3e69650 100644
--- a/third_party/aom/test/cpu_speed_test.cc
+++ b/third_party/aom/test/cpu_speed_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
index d577be35a9..1588d3cc19 100644
--- a/third_party/aom/test/datarate_test.cc
+++ b/third_party/aom/test/datarate_test.cc
@@ -7,9 +7,10 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
+
+#include "config/aom_config.h"
 
-#include "./aom_config.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
@@ -215,6 +216,7 @@ TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
   cfg_.rc_end_usage = AOM_CBR;
   cfg_.rc_target_bitrate = 200;
   cfg_.g_lag_in_frames = 0;
+  cfg_.g_error_resilient = 1;
   // TODO(marpan): Investigate datarate target failures with a smaller keyframe
   // interval (128).
   cfg_.kf_max_dist = 9999;
diff --git a/third_party/aom/test/dct16x16_test.cc b/third_party/aom/test/dct16x16_test.cc
deleted file mode 100644
index 3cc0ed8c0e..0000000000
--- a/third_party/aom/test/dct16x16_test.cc
+++ /dev/null
@@ -1,888 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "av1/common/scan.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/msvc.h"  // for round()
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-const int kNumCoeffs = 256;
-const double C1 = 0.995184726672197;
-const double C2 = 0.98078528040323;
-const double C3 = 0.956940335732209;
-const double C4 = 0.923879532511287;
-const double C5 = 0.881921264348355;
-const double C6 = 0.831469612302545;
-const double C7 = 0.773010453362737;
-const double C8 = 0.707106781186548;
-const double C9 = 0.634393284163646;
-const double C10 = 0.555570233019602;
-const double C11 = 0.471396736825998;
-const double C12 = 0.38268343236509;
-const double C13 = 0.290284677254462;
-const double C14 = 0.195090322016128;
-const double C15 = 0.098017140329561;
-
-void butterfly_16x16_dct_1d(double input[16], double output[16]) {
-  double step[16];
-  double intermediate[16];
-  double temp1, temp2;
-
-  // step 1
-  step[0] = input[0] + input[15];
-  step[1] = input[1] + input[14];
-  step[2] = input[2] + input[13];
-  step[3] = input[3] + input[12];
-  step[4] = input[4] + input[11];
-  step[5] = input[5] + input[10];
-  step[6] = input[6] + input[9];
-  step[7] = input[7] + input[8];
-  step[8] = input[7] - input[8];
-  step[9] = input[6] - input[9];
-  step[10] = input[5] - input[10];
-  step[11] = input[4] - input[11];
-  step[12] = input[3] - input[12];
-  step[13] = input[2] - input[13];
-  step[14] = input[1] - input[14];
-  step[15] = input[0] - input[15];
-
-  // step 2
-  output[0] = step[0] + step[7];
-  output[1] = step[1] + step[6];
-  output[2] = step[2] + step[5];
-  output[3] = step[3] + step[4];
-  output[4] = step[3] - step[4];
-  output[5] = step[2] - step[5];
-  output[6] = step[1] - step[6];
-  output[7] = step[0] - step[7];
-
-  temp1 = step[8] * C7;
-  temp2 = step[15] * C9;
-  output[8] = temp1 + temp2;
-
-  temp1 = step[9] * C11;
-  temp2 = step[14] * C5;
-  output[9] = temp1 - temp2;
-
-  temp1 = step[10] * C3;
-  temp2 = step[13] * C13;
-  output[10] = temp1 + temp2;
-
-  temp1 = step[11] * C15;
-  temp2 = step[12] * C1;
-  output[11] = temp1 - temp2;
-
-  temp1 = step[11] * C1;
-  temp2 = step[12] * C15;
-  output[12] = temp2 + temp1;
-
-  temp1 = step[10] * C13;
-  temp2 = step[13] * C3;
-  output[13] = temp2 - temp1;
-
-  temp1 = step[9] * C5;
-  temp2 = step[14] * C11;
-  output[14] = temp2 + temp1;
-
-  temp1 = step[8] * C9;
-  temp2 = step[15] * C7;
-  output[15] = temp2 - temp1;
-
-  // step 3
-  step[0] = output[0] + output[3];
-  step[1] = output[1] + output[2];
-  step[2] = output[1] - output[2];
-  step[3] = output[0] - output[3];
-
-  temp1 = output[4] * C14;
-  temp2 = output[7] * C2;
-  step[4] = temp1 + temp2;
-
-  temp1 = output[5] * C10;
-  temp2 = output[6] * C6;
-  step[5] = temp1 + temp2;
-
-  temp1 = output[5] * C6;
-  temp2 = output[6] * C10;
-  step[6] = temp2 - temp1;
-
-  temp1 = output[4] * C2;
-  temp2 = output[7] * C14;
-  step[7] = temp2 - temp1;
-
-  step[8] = output[8] + output[11];
-  step[9] = output[9] + output[10];
-  step[10] = output[9] - output[10];
-  step[11] = output[8] - output[11];
-
-  step[12] = output[12] + output[15];
-  step[13] = output[13] + output[14];
-  step[14] = output[13] - output[14];
-  step[15] = output[12] - output[15];
-
-  // step 4
-  output[0] = (step[0] + step[1]);
-  output[8] = (step[0] - step[1]);
-
-  temp1 = step[2] * C12;
-  temp2 = step[3] * C4;
-  temp1 = temp1 + temp2;
-  output[4] = 2 * (temp1 * C8);
-
-  temp1 = step[2] * C4;
-  temp2 = step[3] * C12;
-  temp1 = temp2 - temp1;
-  output[12] = 2 * (temp1 * C8);
-
-  output[2] = 2 * ((step[4] + step[5]) * C8);
-  output[14] = 2 * ((step[7] - step[6]) * C8);
-
-  temp1 = step[4] - step[5];
-  temp2 = step[6] + step[7];
-  output[6] = (temp1 + temp2);
-  output[10] = (temp1 - temp2);
-
-  intermediate[8] = step[8] + step[14];
-  intermediate[9] = step[9] + step[15];
-
-  temp1 = intermediate[8] * C12;
-  temp2 = intermediate[9] * C4;
-  temp1 = temp1 - temp2;
-  output[3] = 2 * (temp1 * C8);
-
-  temp1 = intermediate[8] * C4;
-  temp2 = intermediate[9] * C12;
-  temp1 = temp2 + temp1;
-  output[13] = 2 * (temp1 * C8);
-
-  output[9] = 2 * ((step[10] + step[11]) * C8);
-
-  intermediate[11] = step[10] - step[11];
-  intermediate[12] = step[12] + step[13];
-  intermediate[13] = step[12] - step[13];
-  intermediate[14] = step[8] - step[14];
-  intermediate[15] = step[9] - step[15];
-
-  output[15] = (intermediate[11] + intermediate[12]);
-  output[1] = -(intermediate[11] - intermediate[12]);
-
-  output[7] = 2 * (intermediate[13] * C8);
-
-  temp1 = intermediate[14] * C12;
-  temp2 = intermediate[15] * C4;
-  temp1 = temp1 - temp2;
-  output[11] = -2 * (temp1 * C8);
-
-  temp1 = intermediate[14] * C4;
-  temp2 = intermediate[15] * C12;
-  temp1 = temp2 + temp1;
-  output[5] = 2 * (temp1 * C8);
-}
-
-void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
-  // First transform columns
-  for (int i = 0; i < 16; ++i) {
-    double temp_in[16], temp_out[16];
-    for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i];
-    butterfly_16x16_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j];
-  }
-  // Then transform rows
-  for (int i = 0; i < 16; ++i) {
-    double temp_in[16], temp_out[16];
-    for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16];
-    butterfly_16x16_dct_1d(temp_in, temp_out);
-    // Scale by some magic number
-    for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2;
-  }
-}
-
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        TxfmParam *txfm_param);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
-    Dct16x16Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t>
-    Ht16x16Param;
-typedef std::tr1::tuple<IdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
-    Idct16x16Param;
-
-void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                   TxfmParam * /*txfm_param*/) {
-  aom_fdct16x16_c(in, out, stride);
-}
-
-void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
-                   const TxfmParam * /*txfm_param*/) {
-  aom_idct16x16_256_add_c(in, dest, stride);
-}
-
-void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht16x16_c(in, out, stride, txfm_param);
-}
-
-void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht16x16_256_add_c(in, dest, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-void fht16x16_10(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_16x16_c(in, out, stride, txfm_param->tx_type, 10);
-}
-
-void fht16x16_12(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_16x16_c(in, out, stride, txfm_param->tx_type, 12);
-}
-
-void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride,
-                 const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_16x16_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                             txfm_param->tx_type, 10);
-}
-
-void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride,
-                 const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_16x16_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                             txfm_param->tx_type, 12);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-class Trans16x16TestBase {
- public:
-  virtual ~Trans16x16TestBase() {}
-
- protected:
-  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
-
-  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
-
-  void RunAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    uint32_t max_error = 0;
-    int64_t total_error = 0;
-    const int count_test_block = 10000;
-    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif
-
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (bit_depth_ == AOM_BITS_8) {
-          src[j] = rnd.Rand8();
-          dst[j] = rnd.Rand8();
-          test_input_block[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          src16[j] = rnd.Rand16() & mask_;
-          dst16[j] = rnd.Rand16() & mask_;
-          test_input_block[j] = src16[j] - dst16[j];
-#endif
-        }
-      }
-
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      if (bit_depth_ == AOM_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int32_t diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int32_t diff = dst[j] - src[j];
-#endif
-        const uint32_t error = diff * diff;
-        if (max_error < error) max_error = error;
-        total_error += error;
-      }
-    }
-
-    EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
-        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
-
-    EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
-        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
-  }
-
-  void RunCoeffCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-
-      fwd_txfm_ref(input_block, output_ref_block, pitch_, &txfm_param_);
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
-
-      // The minimum quant value is 4.
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_EQ(output_block[j], output_ref_block[j]);
-    }
-  }
-
-  void RunMemCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
-      }
-      if (i == 0) {
-        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
-      } else if (i == 1) {
-        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
-      }
-
-      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_);
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(input_extreme_block, output_block, pitch_));
-
-      // The minimum quant value is 4.
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        EXPECT_EQ(output_block[j], output_ref_block[j]);
-        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
-            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
-      }
-    }
-  }
-
-  void RunQuantCheck(int dc_thred, int ac_thred) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
-#endif
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
-      }
-      if (i == 0)
-        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
-      if (i == 1)
-        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
-
-      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_);
-
-      // clear reconstructed pixel buffers
-      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
-      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
-#if CONFIG_HIGHBITDEPTH
-      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
-      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
-#endif
-
-      // quantization with maximum allowed step sizes
-      output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
-      for (int j = 1; j < kNumCoeffs; ++j)
-        output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
-      if (bit_depth_ == AOM_BITS_8) {
-        inv_txfm_ref(output_ref_block, ref, pitch_, &txfm_param_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
-                     &txfm_param_);
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-      if (bit_depth_ == AOM_BITS_8) {
-        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]);
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]);
-#endif
-      }
-    }
-  }
-
-  void RunInvAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-
-    for (int i = 0; i < count_test_block; ++i) {
-      double out_r[kNumCoeffs];
-
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (bit_depth_ == AOM_BITS_8) {
-          src[j] = rnd.Rand8();
-          dst[j] = rnd.Rand8();
-          in[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          src16[j] = rnd.Rand16() & mask_;
-          dst16[j] = rnd.Rand16() & mask_;
-          in[j] = src16[j] - dst16[j];
-#endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
-
-      reference_16x16_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
-        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
-
-      if (bit_depth_ == AOM_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
-#endif  // CONFIG_HIGHBITDEPTH
-      }
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int diff = dst[j] - src[j];
-#endif  // CONFIG_HIGHBITDEPTH
-        const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
-            << "Error: 16x16 IDCT has error " << error << " at index " << j;
-      }
-    }
-  }
-
-  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 10000;
-    const int eob = 10;
-    const int16_t *scan = av1_default_scan_orders[TX_16X16].scan;
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-
-    for (int i = 0; i < count_test_block; ++i) {
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (j < eob) {
-          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
-        } else {
-          coeff[scan[j]] = 0;
-        }
-        if (bit_depth_ == AOM_BITS_8) {
-          dst[j] = 0;
-          ref[j] = 0;
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          dst16[j] = 0;
-          ref16[j] = 0;
-#endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
-      if (bit_depth_ == AOM_BITS_8) {
-        ref_txfm(coeff, ref, pitch_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
-      } else {
-#if CONFIG_HIGHBITDEPTH
-        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif  // CONFIG_HIGHBITDEPTH
-      }
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
-#else
-        const int diff = dst[j] - ref[j];
-#endif  // CONFIG_HIGHBITDEPTH
-        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
-                             << error << " at index " << j;
-      }
-    }
-  }
-
-  int pitch_;
-  aom_bit_depth_t bit_depth_;
-  int mask_;
-  FhtFunc fwd_txfm_ref;
-  IhtFunc inv_txfm_ref;
-  TxfmParam txfm_param_;
-};
-
-class Trans16x16DCT : public Trans16x16TestBase,
-                      public ::testing::TestWithParam<Dct16x16Param> {
- public:
-  virtual ~Trans16x16DCT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(3);
-    pitch_ = 16;
-    fwd_txfm_ref = fdct16x16_ref;
-    inv_txfm_ref = idct16x16_ref;
-    mask_ = (1 << bit_depth_) - 1;
-    inv_txfm_ref = idct16x16_ref;
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); }
-
-TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans16x16DCT, QuantCheck) {
-  // Use maximally allowed quantization step sizes for DC and AC
-  // coefficients respectively.
-  RunQuantCheck(1336, 1828);
-}
-
-TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
-
-class Trans16x16HT : public Trans16x16TestBase,
-                     public ::testing::TestWithParam<Ht16x16Param> {
- public:
-  virtual ~Trans16x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(3);
-    pitch_ = 16;
-    mask_ = (1 << bit_depth_) - 1;
-    txfm_param_.tx_type = GET_PARAM(2);
-#if CONFIG_HIGHBITDEPTH
-    switch (bit_depth_) {
-      case AOM_BITS_10:
-        fwd_txfm_ref = fht16x16_10;
-        inv_txfm_ref = iht16x16_10;
-        break;
-      case AOM_BITS_12:
-        fwd_txfm_ref = fht16x16_12;
-        inv_txfm_ref = iht16x16_12;
-        break;
-      default:
-        fwd_txfm_ref = fht16x16_ref;
-        inv_txfm_ref = iht16x16_ref;
-        break;
-    }
-#else
-    fwd_txfm_ref = fht16x16_ref;
-    inv_txfm_ref = iht16x16_ref;
-#endif
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); }
-
-TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans16x16HT, QuantCheck) {
-  // The encoder skips any non-DC intra prediction modes,
-  // when the quantization step size goes beyond 988.
-  RunQuantCheck(429, 729);
-}
-
-class InvTrans16x16DCT : public Trans16x16TestBase,
-                         public ::testing::TestWithParam<Idct16x16Param> {
- public:
-  virtual ~InvTrans16x16DCT() {}
-
-  virtual void SetUp() {
-    ref_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
-    bit_depth_ = GET_PARAM(3);
-    pitch_ = 16;
-    mask_ = (1 << bit_depth_) - 1;
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-
-  IdctFunc ref_txfm_;
-  IdctFunc inv_txfm_;
-  int thresh_;
-};
-
-TEST_P(InvTrans16x16DCT, CompareReference) {
-  CompareInvReference(ref_txfm_, thresh_);
-}
-
-class PartialTrans16x16Test : public ::testing::TestWithParam<
-                                  std::tr1::tuple<FdctFunc, aom_bit_depth_t> > {
- public:
-  virtual ~PartialTrans16x16Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    bit_depth_ = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  aom_bit_depth_t bit_depth_;
-  FdctFunc fwd_txfm_;
-};
-
-TEST_P(PartialTrans16x16Test, Extremes) {
-#if CONFIG_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  const int minval = -maxval;
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
-}
-
-TEST_P(PartialTrans16x16Test, Random) {
-#if CONFIG_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  int sum = 0;
-  for (int i = 0; i < kNumCoeffs; ++i) {
-    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
-    input[i] = val;
-    sum += val;
-  }
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ(sum >> 1, output[0]);
-}
-
-using std::tr1::make_tuple;
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
-                        ::testing::Values(make_tuple(&aom_fdct16x16_c,
-                                                     &aom_idct16x16_256_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
-                        ::testing::Values(make_tuple(&aom_fdct16x16_c,
-                                                     &aom_idct16x16_256_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&fht16x16_10, &iht16x16_10, DCT_DCT, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, ADST_DCT, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, DCT_ADST, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, ADST_ADST, AOM_BITS_10),
-        make_tuple(&fht16x16_12, &iht16x16_12, DCT_DCT, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, ADST_DCT, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, DCT_ADST, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, ADST_ADST, AOM_BITS_12),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, DCT_DCT,
-                   AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, ADST_DCT,
-                   AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, DCT_ADST,
-                   AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, ADST_ADST,
-                   AOM_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans16x16HT,
-    ::testing::Values(make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, Trans16x16DCT,
-    ::testing::Values(make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_neon,
-                                 DCT_DCT, AOM_BITS_8)));
-#endif
-
-#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
-                        ::testing::Values(make_tuple(
-                            &aom_fdct16x16_sse2, &aom_idct16x16_256_add_sse2,
-                            DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_DAALA_DCT16
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans16x16HT,
-    ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // CONFIG_DAALA_DCT16
-#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
-                        ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
-                                                     &aom_idct16x16_256_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_DAALA_DCT16
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans16x16HT,
-    ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif
-#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-
-#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
-                        ::testing::Values(make_tuple(&aom_fdct16x16_msa,
-                                                     &aom_idct16x16_256_add_msa,
-                                                     DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT16
-// TODO(yaowu): re-enable this after msa versions are updated to match C.
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_MSA, Trans16x16HT,
-    ::testing::Values(make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // !CONFIG_EXT_TX && !CONFIG_DAALA_DCT16
-#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
-}  // namespace
diff --git a/third_party/aom/test/dct32x32_test.cc b/third_party/aom/test/dct32x32_test.cc
deleted file mode 100644
index 02a723a9ce..0000000000
--- a/third_party/aom/test/dct32x32_test.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/msvc.h"  // for round()
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-const int kNumCoeffs = 1024;
-const double kPi = 3.141592653589793238462643383279502884;
-void reference_32x32_dct_1d(const double in[32], double out[32]) {
-  const double kInvSqrt2 = 0.707106781186547524400844362104;
-  for (int k = 0; k < 32; k++) {
-    out[k] = 0.0;
-    for (int n = 0; n < 32; n++)
-      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
-    if (k == 0) out[k] = out[k] * kInvSqrt2;
-  }
-}
-
-void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
-                            double output[kNumCoeffs]) {
-  // First transform columns
-  for (int i = 0; i < 32; ++i) {
-    double temp_in[32], temp_out[32];
-    for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
-    reference_32x32_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
-  }
-  // Then transform rows
-  for (int i = 0; i < 32; ++i) {
-    double temp_in[32], temp_out[32];
-    for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
-    reference_32x32_dct_1d(temp_in, temp_out);
-    // Scale by some magic number
-    for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
-  }
-}
-
-typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-
-typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, aom_bit_depth_t>
-    Trans32x32Param;
-
-class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
- public:
-  virtual ~Trans32x32Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    version_ = GET_PARAM(2);  // 0: high precision forward transform
-                              // 1: low precision version for rd loop
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int version_;
-  aom_bit_depth_t bit_depth_;
-  int mask_;
-  FwdTxfmFunc fwd_txfm_;
-  InvTxfmFunc inv_txfm_;
-};
-
-TEST_P(Trans32x32Test, AccuracyCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  uint32_t max_error = 0;
-  int64_t total_error = 0;
-  const int count_test_block = 10000;
-  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif
-
-  for (int i = 0; i < count_test_block; ++i) {
-    // Initialize a test block with input range [-mask_, mask_].
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      if (bit_depth_ == AOM_BITS_8) {
-        src[j] = rnd.Rand8();
-        dst[j] = rnd.Rand8();
-        test_input_block[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        src16[j] = rnd.Rand16() & mask_;
-        dst16[j] = rnd.Rand16() & mask_;
-        test_input_block[j] = src16[j] - dst16[j];
-#endif
-      }
-    }
-
-    ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
-    if (bit_depth_ == AOM_BITS_8) {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
-#if CONFIG_HIGHBITDEPTH
-    } else {
-      ASM_REGISTER_STATE_CHECK(
-          inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
-#endif
-    }
-
-    for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-      const int32_t diff =
-          bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-      const int32_t diff = dst[j] - src[j];
-#endif
-      const uint32_t error = diff * diff;
-      if (max_error < error) max_error = error;
-      total_error += error;
-    }
-  }
-
-  if (version_ == 1) {
-    max_error /= 2;
-    total_error /= 45;
-  }
-
-  EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
-      << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
-
-  EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
-      << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
-}
-
-TEST_P(Trans32x32Test, CoeffCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 1000;
-
-  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
-  for (int i = 0; i < count_test_block; ++i) {
-    for (int j = 0; j < kNumCoeffs; ++j)
-      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-
-    const int stride = 32;
-    aom_fdct32x32_c(input_block, output_ref_block, stride);
-    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
-
-    if (version_ == 0) {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_EQ(output_block[j], output_ref_block[j])
-            << "Error: 32x32 FDCT versions have mismatched coefficients";
-    } else {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
-            << "Error: 32x32 FDCT rd has mismatched coefficients";
-    }
-  }
-}
-
-TEST_P(Trans32x32Test, MemCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 2000;
-
-  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
-
-  for (int i = 0; i < count_test_block; ++i) {
-    // Initialize a test block with input range [-mask_, mask_].
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
-    }
-    if (i == 0) {
-      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
-    } else if (i == 1) {
-      for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
-    }
-
-    const int stride = 32;
-    aom_fdct32x32_c(input_extreme_block, output_ref_block, stride);
-    ASM_REGISTER_STATE_CHECK(
-        fwd_txfm_(input_extreme_block, output_block, stride));
-
-    // The minimum quant value is 4.
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      if (version_ == 0) {
-        EXPECT_EQ(output_block[j], output_ref_block[j])
-            << "Error: 32x32 FDCT versions have mismatched coefficients";
-      } else {
-        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
-            << "Error: 32x32 FDCT rd has mismatched coefficients";
-      }
-      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
-          << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
-      EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
-          << "Error: 32x32 FDCT has coefficient larger than "
-          << "4*DCT_MAX_VALUE";
-    }
-  }
-}
-
-TEST_P(Trans32x32Test, InverseAccuracy) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 1000;
-  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif
-
-  for (int i = 0; i < count_test_block; ++i) {
-    double out_r[kNumCoeffs];
-
-    // Initialize a test block with input range [-255, 255]
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      if (bit_depth_ == AOM_BITS_8) {
-        src[j] = rnd.Rand8();
-        dst[j] = rnd.Rand8();
-        in[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        src16[j] = rnd.Rand16() & mask_;
-        dst16[j] = rnd.Rand16() & mask_;
-        in[j] = src16[j] - dst16[j];
-#endif
-      }
-    }
-
-    reference_32x32_dct_2d(in, out_r);
-    for (int j = 0; j < kNumCoeffs; ++j)
-      coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
-    if (bit_depth_ == AOM_BITS_8) {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
-#if CONFIG_HIGHBITDEPTH
-    } else {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
-#endif
-    }
-    for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-      const int diff =
-          bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-      const int diff = dst[j] - src[j];
-#endif
-      const int error = diff * diff;
-      EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
-                          << " at index " << j;
-    }
-  }
-}
-
-class PartialTrans32x32Test
-    : public ::testing::TestWithParam<
-          std::tr1::tuple<FwdTxfmFunc, aom_bit_depth_t> > {
- public:
-  virtual ~PartialTrans32x32Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    bit_depth_ = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  aom_bit_depth_t bit_depth_;
-  FwdTxfmFunc fwd_txfm_;
-};
-
-TEST_P(PartialTrans32x32Test, Extremes) {
-#if CONFIG_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  const int minval = -maxval;
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
-}
-
-TEST_P(PartialTrans32x32Test, Random) {
-#if CONFIG_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  int sum = 0;
-  for (int i = 0; i < kNumCoeffs; ++i) {
-    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
-    input[i] = val;
-    sum += val;
-  }
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ(sum >> 3, output[0]);
-}
-
-using std::tr1::make_tuple;
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
-                                 1, AOM_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_c, 0,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_c, &aom_idct32x32_1024_add_c,
-                                 1, AOM_BITS_8)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_neon,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_c,
-                                 &aom_idct32x32_1024_add_neon, ADST_DCT,
-                                 AOM_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_sse2,
-                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_sse2,
-                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
-                                 AOM_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, Trans32x32Test,
-                        ::testing::Values(make_tuple(&aom_fdct32x32_sse2,
-                                                     &aom_idct32x32_1024_add_c,
-                                                     DCT_DCT, AOM_BITS_8),
-                                          make_tuple(&aom_fdct32x32_rd_sse2,
-                                                     &aom_idct32x32_1024_add_c,
-                                                     ADST_DCT, AOM_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-
-#if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
-                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_avx2,
-                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
-                                 AOM_BITS_8)));
-#endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
-                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_avx2,
-                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
-                                 AOM_BITS_8)));
-#endif  // HAVE_AVX2 && CONFIG_HIGHBITDEPTH
-
-#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_msa,
-                                 &aom_idct32x32_1024_add_msa, DCT_DCT,
-                                 AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_msa,
-                                 &aom_idct32x32_1024_add_msa, ADST_DCT,
-                                 AOM_BITS_8)));
-#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
-}  // namespace
diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc
index 187c8e06a1..97cbd0655b 100644
--- a/third_party/aom/test/decode_api_test.cc
+++ b/third_party/aom/test/decode_api_test.cc
@@ -7,12 +7,12 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "test/ivf_video_source.h"
+#include "config/aom_config.h"
+
 #include "test/util.h"
 #include "aom/aomdx.h"
 #include "aom/aom_decoder.h"
@@ -30,12 +30,12 @@ TEST(DecodeAPI, InvalidParams) {
 
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(NULL, NULL, NULL, 0));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(&dec, NULL, NULL, 0));
-  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, NULL, 0, NULL, 0));
-  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, buf, 0, NULL, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, NULL, 0, NULL));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, buf, 0, NULL));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
-            aom_codec_decode(NULL, buf, NELEMENTS(buf), NULL, 0));
+            aom_codec_decode(NULL, buf, NELEMENTS(buf), NULL));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
-            aom_codec_decode(NULL, NULL, NELEMENTS(buf), NULL, 0));
+            aom_codec_decode(NULL, NULL, NELEMENTS(buf), NULL));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
   EXPECT_TRUE(aom_codec_error(NULL) != NULL);
 
@@ -44,14 +44,9 @@ TEST(DecodeAPI, InvalidParams) {
               aom_codec_dec_init(NULL, kCodecs[i], NULL, 0));
 
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, kCodecs[i], NULL, 0));
-#if !CONFIG_OBU
-    // Needs to be fixed
-    EXPECT_EQ(AOM_CODEC_UNSUP_BITSTREAM,
-              aom_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0));
-#endif
     EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
-              aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0));
-    EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL, 0));
+              aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL));
+    EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL));
 
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec));
   }
diff --git a/third_party/aom/test/decode_multithreaded_test.cc b/third_party/aom/test/decode_multithreaded_test.cc
new file mode 100644
index 0000000000..ed9a9ceefc
--- /dev/null
+++ b/third_party/aom/test/decode_multithreaded_test.cc
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "aom_mem/aom_mem.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+static const int kNumMultiThreadDecoders = 3;
+
+class AV1DecodeMultiThreadedTest
+    : public ::libaom_test::CodecTestWith4Params<int, int, int, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  AV1DecodeMultiThreadedTest()
+      : EncoderTest(GET_PARAM(0)), md5_single_thread_(), md5_multi_thread_(),
+        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
+        n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)) {
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+    cfg.w = 704;
+    cfg.h = 576;
+    cfg.threads = 1;
+    cfg.allow_lowbitdepth = 1;
+    single_thread_dec_ = codec_->CreateDecoder(cfg, 0);
+
+    // Test cfg.threads == powers of 2.
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      cfg.threads <<= 1;
+      multi_thread_dec_[i] = codec_->CreateDecoder(cfg, 0);
+    }
+
+    if (single_thread_dec_->IsAV1()) {
+      single_thread_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
+      single_thread_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
+    }
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      if (multi_thread_dec_[i]->IsAV1()) {
+        multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_ROW, -1);
+        multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_COL, -1);
+      }
+    }
+  }
+
+  virtual ~AV1DecodeMultiThreadedTest() {
+    delete single_thread_dec_;
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+      delete multi_thread_dec_[i];
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(libaom_test::kTwoPassGood);
+  }
+
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
+      encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+      encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+    }
+  }
+
+  void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
+                 ::libaom_test::MD5 *md5) {
+    const aom_codec_err_t res = dec->DecodeFrame(
+        reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != AOM_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(AOM_CODEC_OK, res);
+    }
+    const aom_image_t *img = dec->GetDxData().Next();
+    md5->Add(img);
+  }
+
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    UpdateMD5(single_thread_dec_, pkt, &md5_single_thread_);
+
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+      UpdateMD5(multi_thread_dec_[i], pkt, &md5_multi_thread_[i]);
+  }
+
+  void DoTest() {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = 500;
+    cfg_.g_lag_in_frames = 12;
+    cfg_.rc_end_usage = AOM_VBR;
+
+    libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
+                                       timebase.den, timebase.num, 0, 5);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+    const char *md5_single_thread_str = md5_single_thread_.Get();
+
+    for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
+      const char *md5_multi_thread_str = md5_multi_thread_[i].Get();
+      ASSERT_STREQ(md5_single_thread_str, md5_multi_thread_str);
+    }
+  }
+
+  ::libaom_test::MD5 md5_single_thread_;
+  ::libaom_test::MD5 md5_multi_thread_[kNumMultiThreadDecoders];
+  ::libaom_test::Decoder *single_thread_dec_;
+  ::libaom_test::Decoder *multi_thread_dec_[kNumMultiThreadDecoders];
+
+ private:
+  int n_tile_cols_;
+  int n_tile_rows_;
+  int n_tile_groups_;
+  int set_cpu_used_;
+};
+
+// run an encode and do the decode both in single thread
+// and multi thread. Ensure that the MD5 of the output in both cases
+// is identical. If so, the test passes.
+TEST_P(AV1DecodeMultiThreadedTest, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+class AV1DecodeMultiThreadedTestLarge : public AV1DecodeMultiThreadedTest {};
+
+TEST_P(AV1DecodeMultiThreadedTestLarge, MD5Match) {
+  cfg_.large_scale_tile = 0;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
+  DoTest();
+}
+
+// TODO(ranjit): More tests have to be added using pre-generated MD5.
+AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTest, ::testing::Values(1, 2),
+                          ::testing::Values(1, 2), ::testing::Values(1),
+                          ::testing::Values(3));
+AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTestLarge,
+                          ::testing::Values(0, 1, 2, 6),
+                          ::testing::Values(0, 1, 2, 6),
+                          ::testing::Values(1, 4), ::testing::Values(0));
+
+class AV1DecodeMultiThreadedLSTestLarge
+    : public AV1DecodeMultiThreadedTestLarge {};
+
+TEST_P(AV1DecodeMultiThreadedLSTestLarge, DISABLED_MD5Match) {
+  cfg_.large_scale_tile = 1;
+  single_thread_dec_->Control(AV1_SET_TILE_MODE, 1);
+  for (int i = 0; i < kNumMultiThreadDecoders; ++i)
+    multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 1);
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedLSTestLarge,
+                          ::testing::Values(1, 2, 32),
+                          ::testing::Values(1, 2, 32), ::testing::Values(1),
+                          ::testing::Values(0, 3));
+
+}  // namespace
diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc
index a24d02a6c3..bb7b00032e 100644
--- a/third_party/aom/test/decode_perf_test.cc
+++ b/third_party/aom/test/decode_perf_test.cc
@@ -7,9 +7,14 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
+
+#include "config/aom_version.h"
+
+#include "aom_ports/aom_timer.h"
+#include "common/ivfenc.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
 #include "test/encode_test_driver.h"
@@ -18,25 +23,21 @@
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/webm_video_source.h"
-#include "aom_ports/aom_timer.h"
-#include "./ivfenc.h"
-#include "./aom_version.h"
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 namespace {
 
 #define VIDEO_NAME 0
 #define THREADS 1
 
-const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
 const char kNewEncodeOutputFile[] = "new_encode.ivf";
 
 /*
  DecodePerfTest takes a tuple of filename + number of threads to decode with
  */
-typedef std::tr1::tuple<const char *, unsigned> DecodePerfParam;
+typedef ::testing::tuple<const char *, unsigned> DecodePerfParam;
 
 // TODO(jimbankoski): Add actual test vectors here when available.
 // const DecodePerfParam kAV1DecodePerfVectors[] = {};
@@ -129,7 +130,8 @@ class AV1NewEncodeDecodePerfTest
   }
 
   virtual void BeginPassHook(unsigned int /*pass*/) {
-    const std::string data_path = getenv("LIBAOM_TEST_DATA_PATH");
+    const char *const env = getenv("LIBAOM_TEST_DATA_PATH");
+    const std::string data_path(env ? env : ".");
     const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
     outfile_ = fopen(path_to_source.c_str(), "wb");
     ASSERT_TRUE(outfile_ != NULL);
@@ -157,7 +159,7 @@ class AV1NewEncodeDecodePerfTest
               pkt->data.frame.sz);
   }
 
-  virtual bool DoDecode() { return false; }
+  virtual bool DoDecode() const { return false; }
 
   void set_speed(unsigned int speed) { speed_ = speed; }
 
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
index 9a465327ef..ed261b5270 100644
--- a/third_party/aom/test/decode_test_driver.cc
+++ b/third_party/aom/test/decode_test_driver.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
@@ -18,13 +18,12 @@
 
 namespace libaom_test {
 
-const char kVP8Name[] = "WebM Project VP8";
 const char kAV1Name[] = "AOMedia Project AV1 Decoder";
 
 aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
                                     aom_codec_stream_info_t *stream_info) {
-  return aom_codec_peek_stream_info(
-      CodecInterface(), cxdata, static_cast<unsigned int>(size), stream_info);
+  return aom_codec_peek_stream_info(CodecInterface(), cxdata, size,
+                                    stream_info);
 }
 
 aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
@@ -36,39 +35,22 @@ aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
   aom_codec_err_t res_dec;
   InitOnce();
   API_REGISTER_STATE_CHECK(
-      res_dec = aom_codec_decode(
-          &decoder_, cxdata, static_cast<unsigned int>(size), user_priv, 0));
+      res_dec = aom_codec_decode(&decoder_, cxdata, size, user_priv));
   return res_dec;
 }
 
-bool Decoder::IsVP8() const {
-  const char *codec_name = GetDecoderName();
-  return strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;
-}
-
 bool Decoder::IsAV1() const {
   const char *codec_name = GetDecoderName();
   return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0;
 }
 
-void DecoderTest::HandlePeekResult(Decoder *const decoder,
-                                   CompressedVideoSource *video,
+void DecoderTest::HandlePeekResult(Decoder *const /*decoder*/,
+                                   CompressedVideoSource * /*video*/,
                                    const aom_codec_err_t res_peek) {
-  const bool is_vp8 = decoder->IsVP8();
-  if (is_vp8) {
-    /* Vp8's implementation of PeekStream returns an error if the frame you
-     * pass it is not a keyframe, so we only expect AOM_CODEC_OK on the first
-     * frame, which must be a keyframe. */
-    if (video->frame_number() == 0) {
-      ASSERT_EQ(AOM_CODEC_OK, res_peek)
-          << "Peek return failed: " << aom_codec_err_to_string(res_peek);
-    }
-  } else {
-    /* The Av1 implementation of PeekStream returns an error only if the
-     * data passed to it isn't a valid Av1 chunk. */
-    ASSERT_EQ(AOM_CODEC_OK, res_peek)
-        << "Peek return failed: " << aom_codec_err_to_string(res_peek);
-  }
+  /* The Av1 implementation of PeekStream returns an error only if the
+   * data passed to it isn't a valid Av1 chunk. */
+  ASSERT_EQ(AOM_CODEC_OK, res_peek)
+      << "Peek return failed: " << aom_codec_err_to_string(res_peek);
 }
 
 void DecoderTest::RunLoop(CompressedVideoSource *video,
@@ -76,6 +58,7 @@ void DecoderTest::RunLoop(CompressedVideoSource *video,
   Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_);
   ASSERT_TRUE(decoder != NULL);
   bool end_of_file = false;
+  bool peeked_stream = false;
 
   // Decode frames.
   for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
@@ -83,15 +66,23 @@ void DecoderTest::RunLoop(CompressedVideoSource *video,
     PreDecodeFrameHook(*video, decoder);
 
     aom_codec_stream_info_t stream_info;
+    stream_info.is_annexb = 0;
+
     if (video->cxdata() != NULL) {
-      const aom_codec_err_t res_peek = decoder->PeekStream(
-          video->cxdata(), video->frame_size(), &stream_info);
-      HandlePeekResult(decoder, video, res_peek);
-      ASSERT_FALSE(::testing::Test::HasFailure());
+      if (!peeked_stream) {
+        // TODO(yaowu): PeekStream returns error for non-sequence_header_obu,
+        // therefore should only be tried once per sequence, this shall be fixed
+        // once PeekStream is updated to properly operate on other obus.
+        const aom_codec_err_t res_peek = decoder->PeekStream(
+            video->cxdata(), video->frame_size(), &stream_info);
+        HandlePeekResult(decoder, video, res_peek);
+        ASSERT_FALSE(::testing::Test::HasFailure());
+        peeked_stream = true;
+      }
 
       aom_codec_err_t res_dec =
           decoder->DecodeFrame(video->cxdata(), video->frame_size());
-      if (!HandleDecodeResult(res_dec, decoder)) break;
+      if (!HandleDecodeResult(res_dec, *video, decoder)) break;
     } else {
       // Signal end of the file to the decoder.
       const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h
index e7deb389c9..916efdad0e 100644
--- a/third_party/aom/test/decode_test_driver.h
+++ b/third_party/aom/test/decode_test_driver.h
@@ -13,7 +13,9 @@
 #define TEST_DECODE_TEST_DRIVER_H_
 #include <cstring>
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "./aom_config.h"
+
+#include "config/aom_config.h"
+
 #include "aom/aom_decoder.h"
 
 namespace libaom_test {
@@ -93,8 +95,6 @@ class Decoder {
     return aom_codec_iface_name(CodecInterface());
   }
 
-  bool IsVP8() const;
-
   bool IsAV1() const;
 
   aom_codec_ctx_t *GetDecoder() { return &decoder_; }
@@ -134,6 +134,7 @@ class DecoderTest {
 
   // Hook to be called to handle decode result. Return true to continue.
   virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
+                                  const CompressedVideoSource & /*video*/,
                                   Decoder *decoder) {
     EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
     return AOM_CODEC_OK == res_dec;
diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh
index 44c9f5f052..2edd1cb52b 100755
--- a/third_party/aom/test/decode_to_md5.sh
+++ b/third_party/aom/test/decode_to_md5.sh
@@ -16,7 +16,7 @@
 . $(dirname $0)/tools_common.sh
 
 # Environment check: Make sure input is available:
-#   $AOM_IVF_FILE and $AV1_IVF_FILE are required.
+#   $AV1_IVF_FILE is required.
 decode_to_md5_verify_environment() {
   if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
     return 1
@@ -27,7 +27,7 @@ decode_to_md5_verify_environment() {
 # interpreted as codec name and used solely to name the output file. $3 is the
 # expected md5 sum: It must match that of the final frame.
 decode_to_md5() {
-  local decoder="${LIBAOM_BIN_PATH}/decode_to_md5${AOM_TEST_EXE_SUFFIX}"
+  local decoder="$(aom_tool_path decode_to_md5)"
   local input_file="$1"
   local codec="$2"
   local expected_md5="$3"
@@ -45,14 +45,23 @@ decode_to_md5() {
 
   local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')"
   local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')"
-  [ "${actual_md5}" = "${expected_md5}" ] || return 1
+  if [ "${actual_md5}" = "${expected_md5}" ]; then
+    return 0
+  else
+    elog "MD5 mismatch:"
+    elog "Expected: ${expected_md5}"
+    elog "Actual: ${actual_md5}"
+    return 1
+  fi
 }
 
-decode_to_md5_av1() {
+DISABLED_decode_to_md5_av1() {
   # expected MD5 sum for the last frame.
-  local expected_md5="26d3ef1d60754a1f6acb603c3763efbe"
+  local expected_md5="567dd6d4b7a7170edddbf58bbcc3aff1"
   local file="${AV1_IVF_FILE}"
 
+  # TODO(urvang): Check in the encoded file (like libvpx does) to avoid
+  # encoding every time.
   if [ "$(av1_decode_available)" = "yes" ]; then
     if [ ! -e "${AV1_IVF_FILE}" ]; then
       file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
@@ -62,6 +71,7 @@ decode_to_md5_av1() {
   fi
 }
 
-decode_to_md5_tests="decode_to_md5_av1"
+# TODO(tomfinegan): Enable when the bitstream stabilizes.
+decode_to_md5_tests="DISABLED_decode_to_md5_av1"
 
 run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh
index 5978312f20..155ee92077 100755
--- a/third_party/aom/test/decode_with_drops.sh
+++ b/third_party/aom/test/decode_with_drops.sh
@@ -16,7 +16,7 @@
 . $(dirname $0)/tools_common.sh
 
 # Environment check: Make sure input is available:
-#   $AOM_IVF_FILE and $AV1_IVF_FILE are required.
+#   $AV1_IVF_FILE is required.
 decode_with_drops_verify_environment() {
   if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
     return 1
@@ -27,7 +27,7 @@ decode_with_drops_verify_environment() {
 # to name the output file. $3 is the drop mode, and is passed directly to
 # decode_with_drops.
 decode_with_drops() {
-  local decoder="${LIBAOM_BIN_PATH}/decode_with_drops${AOM_TEST_EXE_SUFFIX}"
+  local decoder="$(aom_tool_path decode_with_drops)"
   local input_file="$1"
   local codec="$2"
   local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
@@ -47,21 +47,22 @@ decode_with_drops() {
 
 # Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode,
 # and once in pattern mode.
-decode_with_drops_av1() {
+DISABLED_decode_with_drops_av1() {
   if [ "$(av1_decode_available)" = "yes" ]; then
     local file="${AV1_IVF_FILE}"
     if [ ! -e "${AV1_IVF_FILE}" ]; then
       file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
       encode_yuv_raw_input_av1 "${file}" --ivf
     fi
-    # Drop frames 2 and 3.
-    decode_with_drops "${file}" "av1" "2-3"
+    # Drop frames 3 and 4.
+    decode_with_drops "${file}" "av1" "3-4"
 
     # Test pattern mode: Drop 3 of every 4 frames.
     decode_with_drops "${file}" "av1" "3/4"
   fi
 }
 
-decode_with_drops_tests="decode_with_drops_av1"
+# TODO(yaowu): Disable this test as trailing_bit check is expected to fail
+decode_with_drops_tests="DISABLED_decode_with_drops_av1"
 
 run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/third_party/aom/test/dering_test.cc b/third_party/aom/test/dering_test.cc
deleted file mode 100644
index 6b76561c80..0000000000
--- a/third_party/aom/test/dering_test.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef std::tr1::tuple<cdef_direction_func, cdef_direction_func, int>
-    dering_dir_param_t;
-
-class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
- public:
-  virtual ~CDEFDeringDirTest() {}
-  virtual void SetUp() {
-    dering = GET_PARAM(0);
-    ref_dering = GET_PARAM(1);
-    bsize = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int bsize;
-  cdef_direction_func dering;
-  cdef_direction_func ref_dering;
-};
-
-typedef CDEFDeringDirTest CDEFDeringSpeedTest;
-
-void test_dering(int bsize, int iterations, cdef_direction_func dering,
-                 cdef_direction_func ref_dering) {
-  const int size = 8;
-  const int ysize = size + 2 * CDEF_VBORDER;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
-  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
-  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
-  memset(ref_d, 0, sizeof(ref_d));
-  memset(d, 0, sizeof(d));
-
-  int error = 0, threshold = 0, dir;
-  int boundary, damping, depth, bits, level, count,
-      errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0;
-  unsigned int pos = 0;
-
-  for (boundary = 0; boundary < 16; boundary++) {
-    for (depth = 8; depth <= 12; depth += 2) {
-      for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) {
-        for (count = 0; count < iterations; count++) {
-          for (level = 0; level < (1 << depth) && !error;
-               level += (1 + 4 * !!boundary) << (depth - 8)) {
-            for (bits = 1; bits <= depth && !error; bits++) {
-              for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
-                s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                             (1 << depth) - 1);
-              if (boundary) {
-                if (boundary & 1) {  // Left
-                  for (int i = 0; i < ysize; i++)
-                    for (int j = 0; j < CDEF_HBORDER; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 2) {  // Right
-                  for (int i = 0; i < ysize; i++)
-                    for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 4) {  // Above
-                  for (int i = 0; i < CDEF_VBORDER; i++)
-                    for (int j = 0; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 8) {  // Below
-                  for (int i = CDEF_VBORDER + size; i < ysize; i++)
-                    for (int j = 0; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-              }
-              for (dir = 0; dir < 8; dir++) {
-                for (threshold = 0; threshold < 64 << (depth - 8) && !error;
-                     threshold += (1 + 4 * !!boundary) << (depth - 8)) {
-                  ref_dering(ref_d, size,
-                             s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                             threshold, dir, damping);
-                  // If dering and ref_dering are the same, we're just testing
-                  // speed
-                  if (dering != ref_dering)
-                    ASM_REGISTER_STATE_CHECK(dering(
-                        d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                        threshold, dir, damping));
-                  if (ref_dering != dering) {
-                    for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
-                         pos++) {
-                      error = ref_d[pos] != d[pos];
-                      errdepth = depth;
-                      errthreshold = threshold;
-                      errboundary = boundary;
-                      errdamping = damping;
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  pos--;
-  EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch."
-                      << std::endl
-                      << "First error at " << pos % size << "," << pos / size
-                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
-                      << ") " << std::endl
-                      << "threshold: " << errthreshold << std::endl
-                      << "damping: " << errdamping << std::endl
-                      << "depth: " << errdepth << std::endl
-                      << "size: " << bsize << std::endl
-                      << "boundary: " << errboundary << std::endl
-                      << std::endl;
-}
-
-void test_dering_speed(int bsize, int iterations, cdef_direction_func dering,
-                       cdef_direction_func ref_dering) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_dering(bsize, iterations, ref_dering, ref_dering);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_dering(bsize, iterations, dering, dering);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
-                          int coeff_shift);
-
-typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
-
-class CDEFDeringFindDirTest
-    : public ::testing::TestWithParam<find_dir_param_t> {
- public:
-  virtual ~CDEFDeringFindDirTest() {}
-  virtual void SetUp() {
-    finddir = GET_PARAM(0);
-    ref_finddir = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  find_dir_t finddir;
-  find_dir_t ref_finddir;
-};
-
-typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
-
-void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
-                                 int coeff_shift),
-                  int (*ref_finddir)(const uint16_t *img, int stride,
-                                     int32_t *var, int coeff_shift)) {
-  const int size = 8;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-
-  int error = 0;
-  int depth, bits, level, count, errdepth = 0;
-  int ref_res = 0, res = 0;
-  int32_t ref_var = 0, var = 0;
-
-  for (depth = 8; depth <= 12 && !error; depth += 2) {
-    for (count = 0; count < 512 && !error; count++) {
-      for (level = 0; level < (1 << depth) && !error;
-           level += 1 << (depth - 8)) {
-        for (bits = 1; bits <= depth && !error; bits++) {
-          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
-            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                         (1 << depth) - 1);
-          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
-            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
-          if (finddir != ref_finddir)
-            ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
-          if (ref_finddir != finddir) {
-            if (res != ref_res || var != ref_var) error = 1;
-            errdepth = depth;
-          }
-        }
-      }
-    }
-  }
-
-  EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch."
-                      << std::endl
-                      << "return: " << res << " : " << ref_res << std::endl
-                      << "var: " << var << " : " << ref_var << std::endl
-                      << "depth: " << errdepth << std::endl
-                      << std::endl;
-}
-
-void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
-                                       int32_t *var, int coeff_shift),
-                        int (*ref_finddir)(const uint16_t *img, int stride,
-                                           int32_t *var, int coeff_shift)) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_finddir(ref_finddir, ref_finddir);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_finddir(finddir, finddir);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) {
-  test_dering(bsize, 1, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) {
-  test_dering_speed(bsize, 4, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) {
-  test_finddir(finddir, ref_finddir);
-}
-
-TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) {
-  test_finddir_speed(finddir, ref_finddir);
-}
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse2,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
-                                                     &cdef_find_dir_c)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_ssse3,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse4_1,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_neon,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
-                                                     &cdef_find_dir_c)));
-#endif
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse2,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_ssse3,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse4_1,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_neon,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#endif  // defined(_WIN64) || !defined(_MSC_VER)
-}  // namespace
diff --git a/third_party/aom/test/dr_prediction_test.cc b/third_party/aom/test/dr_prediction_test.cc
new file mode 100644
index 0000000000..22b9832a11
--- /dev/null
+++ b/third_party/aom/test/dr_prediction_test.cc
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/blockd.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconintra.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+
+const int kZ1Start = 0;
+const int kZ2Start = 90;
+const int kZ3Start = 180;
+
+const TX_SIZE kTxSize[] = { TX_4X4,   TX_8X8,   TX_16X16, TX_32X32, TX_64X64,
+                            TX_4X8,   TX_8X4,   TX_8X16,  TX_16X8,  TX_16X32,
+                            TX_32X16, TX_32X64, TX_64X32, TX_4X16,  TX_16X4,
+                            TX_8X32,  TX_32X8,  TX_16X64, TX_64X16 };
+
+const char *const kTxSizeStrings[] = {
+  "TX_4X4",   "TX_8X8",   "TX_16X16", "TX_32X32", "TX_64X64",
+  "TX_4X8",   "TX_8X4",   "TX_8X16",  "TX_16X8",  "TX_16X32",
+  "TX_32X16", "TX_32X64", "TX_64X32", "TX_4X16",  "TX_16X4",
+  "TX_8X32",  "TX_32X8",  "TX_16X64", "TX_64X16"
+};
+
+using libaom_test::ACMRandom;
+
+typedef void (*DrPred_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                           const uint16_t *above, const uint16_t *left,
+                           int upsample_above, int upsample_left, int dx,
+                           int dy, int bd);
+
+typedef void (*DrPred)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy,
+                       int bd);
+
+typedef void (*Z1_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int dx, int dy);
+template <Z1_Lbd fn>
+void z1_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left, int upsample_above,
+                int /*upsample_left*/, int dx, int dy, int /*bd*/) {
+  fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy);
+}
+
+typedef void (*Z2_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy);
+template <Z2_Lbd fn>
+void z2_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left, int upsample_above,
+                int upsample_left, int dx, int dy, int /*bd*/) {
+  fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy);
+}
+
+typedef void (*Z3_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint8_t *above, const uint8_t *left,
+                       int upsample_left, int dx, int dy);
+template <Z3_Lbd fn>
+void z3_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+                const uint8_t *above, const uint8_t *left,
+                int /*upsample_above*/, int upsample_left, int dx, int dy,
+                int /*bd*/) {
+  fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy);
+}
+
+typedef void (*Z1_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_above, int dx, int dy, int bd);
+template <Z1_Hbd fn>
+void z1_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int upsample_above, int /*upsample_left*/, int dx, int dy,
+                    int bd) {
+  fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy, bd);
+}
+
+typedef void (*Z2_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_above, int upsample_left, int dx, int dy,
+                       int bd);
+template <Z2_Hbd fn>
+void z2_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int upsample_above, int upsample_left, int dx, int dy,
+                    int bd) {
+  fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy,
+     bd);
+}
+
+typedef void (*Z3_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                       const uint16_t *above, const uint16_t *left,
+                       int upsample_left, int dx, int dy, int bd);
+template <Z3_Hbd fn>
+void z3_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
+                    const uint16_t *above, const uint16_t *left,
+                    int /*upsample_above*/, int upsample_left, int dx, int dy,
+                    int bd) {
+  fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy, bd);
+}
+
+template <typename FuncType>
+struct DrPredFunc {
+  DrPredFunc(FuncType pred = NULL, FuncType tst = NULL, int bit_depth_value = 0,
+             int start_angle_value = 0)
+      : ref_fn(pred), tst_fn(tst), bit_depth(bit_depth_value),
+        start_angle(start_angle_value) {}
+
+  FuncType ref_fn;
+  FuncType tst_fn;
+  int bit_depth;
+  int start_angle;
+};
+
+template <typename Pixel, typename FuncType>
+class DrPredTest : public ::testing::TestWithParam<DrPredFunc<FuncType> > {
+ protected:
+  static const int kMaxNumTests = 100000;
+  static const int kIterations = 10;
+  static const int kDstStride = 64;
+  static const int kDstSize = kDstStride * kDstStride;
+  static const int kOffset = 16;
+  static const int kBufSize = ((2 * MAX_TX_SIZE) << 1) + 16;
+
+  DrPredTest()
+      : upsample_above_(0), upsample_left_(0), bw_(0), bh_(0), dx_(1), dy_(1),
+        bd_(8), txsize_(TX_4X4) {
+    params_ = this->GetParam();
+    start_angle_ = params_.start_angle;
+    stop_angle_ = start_angle_ + 90;
+
+    dst_ref_ = &dst_ref_data_[0];
+    dst_tst_ = &dst_tst_data_[0];
+    dst_stride_ = kDstStride;
+    above_ = &above_data_[kOffset];
+    left_ = &left_data_[kOffset];
+
+    for (int i = 0; i < kBufSize; ++i) {
+      above_data_[i] = rng_.Rand8();
+      left_data_[i] = rng_.Rand8();
+    }
+
+    for (int i = 0; i < kDstSize; ++i) {
+      dst_ref_[i] = 0;
+    }
+  }
+
+  virtual ~DrPredTest() {}
+
+  void Predict(bool speedtest, int tx) {
+    const int kNumTests = speedtest ? kMaxNumTests : 1;
+    aom_usec_timer timer;
+
+    aom_usec_timer_start(&timer);
+    for (int k = 0; k < kNumTests; ++k) {
+      params_.ref_fn(dst_ref_, dst_stride_, bw_, bh_, above_, left_,
+                     upsample_above_, upsample_left_, dx_, dy_, bd_);
+    }
+    aom_usec_timer_mark(&timer);
+    const int ref_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    aom_usec_timer_start(&timer);
+    if (params_.tst_fn) {
+      for (int k = 0; k < kNumTests; ++k) {
+        ASM_REGISTER_STATE_CHECK(params_.tst_fn(dst_tst_, dst_stride_, bw_, bh_,
+                                                above_, left_, upsample_above_,
+                                                upsample_left_, dx_, dy_, bd_));
+      }
+    }
+    aom_usec_timer_mark(&timer);
+    const int tst_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+
+    OutputTimes(kNumTests, ref_time, tst_time, tx);
+  }
+
+  void RunTest(bool speedtest) {
+    for (int i = 0; i < kBufSize; ++i) {
+      above_data_[i] = left_data_[i] = (1 << bd_) - 1;
+    }
+
+    for (int tx = 0; tx < TX_SIZES_ALL; ++tx) {
+      if (params_.tst_fn == NULL) {
+        for (int i = 0; i < kDstSize; ++i) {
+          dst_tst_[i] = (1 << bd_) - 1;
+        }
+      } else {
+        for (int i = 0; i < kDstSize; ++i) {
+          dst_tst_[i] = 0;
+        }
+      }
+
+      bw_ = tx_size_wide[kTxSize[tx]];
+      bh_ = tx_size_high[kTxSize[tx]];
+
+      Predict(speedtest, tx);
+
+      for (int r = 0; r < bh_; ++r) {
+        for (int c = 0; c < bw_; ++c) {
+          ASSERT_EQ(dst_ref_[r * dst_stride_ + c],
+                    dst_tst_[r * dst_stride_ + c])
+              << bw_ << "x" << bh_ << " r: " << r << " c: " << c
+              << " dx: " << dx_ << " dy: " << dy_
+              << " upsample_above: " << upsample_above_
+              << " upsample_left: " << upsample_left_;
+        }
+      }
+    }
+  }
+
+  void OutputTimes(int num_tests, int ref_time, int tst_time, int tx) {
+    if (num_tests > 1) {
+      if (params_.tst_fn) {
+        const float x = static_cast<float>(ref_time) / tst_time;
+        printf("\t[%8s] :: ref time %6d, tst time %6d     %3.2f\n",
+               kTxSizeStrings[tx], ref_time, tst_time, x);
+      } else {
+        printf("\t[%8s] :: ref time %6d\n", kTxSizeStrings[tx], ref_time);
+      }
+    }
+  }
+
+  Pixel dst_ref_data_[kDstSize];
+  Pixel dst_tst_data_[kDstSize];
+
+  Pixel left_data_[kBufSize];
+  Pixel dummy_data_[kBufSize];
+  Pixel above_data_[kBufSize];
+
+  Pixel *dst_ref_;
+  Pixel *dst_tst_;
+  Pixel *above_;
+  Pixel *left_;
+  int dst_stride_;
+
+  int upsample_above_;
+  int upsample_left_;
+  int bw_;
+  int bh_;
+  int dx_;
+  int dy_;
+  int bd_;
+  TX_SIZE txsize_;
+
+  int start_angle_;
+  int stop_angle_;
+
+  ACMRandom rng_;
+
+  DrPredFunc<FuncType> params_;
+};
+
+class LowbdDrPredTest : public DrPredTest<uint8_t, DrPred> {};
+
+TEST_P(LowbdDrPredTest, SaturatedValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    upsample_above_ = iter & 1;
+    for (int angle = start_angle_; angle < stop_angle_; ++angle) {
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      if (dx_ && dy_) RunTest(false);
+    }
+  }
+}
+
+TEST_P(LowbdDrPredTest, DISABLED_Speed) {
+  const int angles[] = { 3, 45, 87 };
+  for (upsample_above_ = 0; upsample_above_ < 2; ++upsample_above_) {
+    upsample_left_ = upsample_above_;
+    for (int i = 0; i < 3; ++i) {
+      dx_ = av1_get_dx(angles[i] + start_angle_);
+      dy_ = av1_get_dy(angles[i] + start_angle_);
+      printf("upsample_above: %d upsample_left: %d angle: %d ~~~~~~~~~~~~~~~\n",
+             upsample_above_, upsample_left_, angles[i] + start_angle_);
+      if (dx_ && dy_) RunTest(true);
+    }
+  }
+}
+
+using ::testing::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, LowbdDrPredTest,
+    ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+                                         NULL, AOM_BITS_8, kZ1Start),
+                      DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
+                                         NULL, AOM_BITS_8, kZ2Start),
+                      DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+                                         NULL, AOM_BITS_8, kZ3Start)));
+
+class HighbdDrPredTest : public DrPredTest<uint16_t, DrPred_Hbd> {};
+
+TEST_P(HighbdDrPredTest, SaturatedValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    upsample_above_ = iter & 1;
+    for (int angle = start_angle_; angle < stop_angle_; ++angle) {
+      dx_ = av1_get_dx(angle);
+      dy_ = av1_get_dy(angle);
+      if (dx_ && dy_) RunTest(false);
+    }
+  }
+}
+
+TEST_P(HighbdDrPredTest, DISABLED_Speed) {
+  const int angles[] = { 3, 45, 87 };
+  for (upsample_above_ = 0; upsample_above_ < 2; ++upsample_above_) {
+    upsample_left_ = upsample_above_;
+    for (int i = 0; i < 3; ++i) {
+      dx_ = av1_get_dx(angles[i] + start_angle_);
+      dy_ = av1_get_dy(angles[i] + start_angle_);
+      printf("upsample_above: %d upsample_left: %d angle: %d ~~~~~~~~~~~~~~~\n",
+             upsample_above_, upsample_left_, angles[i] + start_angle_);
+      if (dx_ && dy_) RunTest(true);
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, HighbdDrPredTest,
+    ::testing::Values(
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               NULL, AOM_BITS_8, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               NULL, AOM_BITS_10, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+                               NULL, AOM_BITS_12, kZ1Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               NULL, AOM_BITS_8, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               NULL, AOM_BITS_10, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
+                               NULL, AOM_BITS_12, kZ2Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               NULL, AOM_BITS_8, kZ3Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               NULL, AOM_BITS_10, kZ3Start),
+        DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+                               NULL, AOM_BITS_12, kZ3Start)));
+
+}  // namespace
diff --git a/third_party/aom/test/dump_obu.sh b/third_party/aom/test/dump_obu.sh
new file mode 100755
index 0000000000..182e894f56
--- /dev/null
+++ b/third_party/aom/test/dump_obu.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+## Copyright (c) 2018, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+## This file tests the libaom dump_obu tool. To add new tests to this
+## file, do the following:
+##   1. Write a shell function (this is your test).
+##   2. Add the function to dump_obu_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+readonly dump_obu_test_file="${AOM_TEST_OUTPUT_DIR}/av1_obu_test.ivf"
+
+dump_obu_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ "$(dump_obu_available)" = "yes" ]; then
+    if [ -z "$(aom_tool_path dump_obu)" ]; then
+      elog "dump_obu not found in LIBAOM_BIN_PATH, its parent, or child tools/."
+    fi
+  fi
+}
+
+dump_obu_available() {
+  if [ "$(av1_decode_available)" = "yes" ] && \
+     [ "$(av1_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+aomenc_available() {
+  if [ -x "$(aom_tool_path aomenc)" ]; then
+    echo yes
+  fi
+}
+
+encode_test_file() {
+  if [ "$(aomenc_available)" = "yes" ]; then
+    local readonly encoder="$(aom_tool_path aomenc)"
+
+    eval "${encoder}" \
+      $(aomenc_encode_test_fast_params) \
+      $(yuv_raw_input) \
+      --ivf \
+      --output=${dump_obu_test_file} \
+      ${devnull}
+
+    if [ ! -e "${dump_obu_test_file}" ]; then
+      elog "dump_obu test input encode failed."
+      return 1
+    fi
+  fi
+}
+
+dump_obu() {
+  encode_test_file
+  eval $(aom_tool_path dump_obu) "${dump_obu_test_file}" ${devnull}
+}
+
+dump_obu_tests="dump_obu"
+
+run_tests dump_obu_verify_environment "${dump_obu_tests}"
diff --git a/third_party/aom/test/ec_test.cc b/third_party/aom/test/ec_test.cc
new file mode 100644
index 0000000000..e6a5ea63bf
--- /dev/null
+++ b/third_party/aom/test/ec_test.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include <cstdlib>
+
+#include "aom_dsp/entenc.h"
+#include "aom_dsp/entdec.h"
+
+TEST(EC_TEST, random_ec_test) {
+  od_ec_enc enc;
+  od_ec_dec dec;
+  int sz;
+  int i;
+  int ret;
+  unsigned int sym;
+  unsigned int seed;
+  unsigned char *ptr;
+  uint32_t ptr_sz;
+  char *seed_str;
+  ret = 0;
+  seed_str = getenv("EC_TEST_SEED");
+  if (seed_str) {
+    seed = atoi(seed_str);
+  } else {
+    seed = 0xdaa1a;
+  }
+  srand(seed);
+  od_ec_enc_init(&enc, 1);
+  /*Test compatibility between multiple different encode/decode routines.*/
+  for (i = 0; i < 409600; i++) {
+    unsigned *fz;
+    unsigned *fts;
+    unsigned *data;
+    unsigned *tell;
+    unsigned *enc_method;
+    int j;
+    sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U);
+    fz = (unsigned *)malloc(sz * sizeof(*fz));
+    fts = (unsigned *)malloc(sz * sizeof(*fts));
+    data = (unsigned *)malloc(sz * sizeof(*data));
+    tell = (unsigned *)malloc((sz + 1) * sizeof(*tell));
+    enc_method = (unsigned *)malloc(sz * sizeof(*enc_method));
+    od_ec_enc_reset(&enc);
+    tell[0] = od_ec_enc_tell_frac(&enc);
+    for (j = 0; j < sz; j++) {
+      data[j] = rand() / ((RAND_MAX >> 1) + 1);
+
+      fts[j] = CDF_PROB_BITS;
+      fz[j] = (rand() % (CDF_PROB_TOP - 2)) >> (CDF_PROB_BITS - fts[j]);
+      fz[j] = OD_MAXI(fz[j], 1);
+      enc_method[j] = 3 + (rand() & 1);
+      switch (enc_method[j]) {
+        case 3: {
+          od_ec_encode_bool_q15(&enc, data[j],
+                                OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
+          break;
+        }
+        case 4: {
+          uint16_t cdf[2];
+          cdf[0] = OD_ICDF(fz[j]);
+          cdf[1] = OD_ICDF(1U << fts[j]);
+          od_ec_encode_cdf_q15(&enc, data[j], cdf, 2);
+          break;
+        }
+      }
+
+      tell[j + 1] = od_ec_enc_tell_frac(&enc);
+    }
+    ptr = od_ec_enc_done(&enc, &ptr_sz);
+    EXPECT_GE(((od_ec_enc_tell(&enc) + 7U) >> 3), ptr_sz)
+        << "od_ec_enc_tell() lied: "
+           "there's "
+        << ptr_sz << " bytes instead of " << ((od_ec_enc_tell(&enc) + 7) >> 3)
+        << " (Random seed: " << seed << ")\n";
+    od_ec_dec_init(&dec, ptr, ptr_sz);
+    EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0])
+        << "od_ec_dec_tell() mismatch between encoder and decoder "
+           "at symbol 0: "
+        << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0]
+        << " (Random seed: " << seed << ").\n";
+    for (j = 0; j < sz; j++) {
+      int dec_method;
+      if (CDF_SHIFT == 0) {
+        dec_method = 3 + (rand() & 1);
+      } else {
+        dec_method = enc_method[j];
+      }
+      switch (dec_method) {
+        case 3: {
+          sym = od_ec_decode_bool_q15(
+              &dec, OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
+          break;
+        }
+        case 4: {
+          uint16_t cdf[2];
+          cdf[0] = OD_ICDF(fz[j]);
+          cdf[1] = OD_ICDF(1U << fts[j]);
+          sym = od_ec_decode_cdf_q15(&dec, cdf, 2);
+          break;
+        }
+      }
+
+      EXPECT_EQ(sym, data[j])
+          << "Decoded " << sym << " instead of " << data[j]
+          << " with fz=" << fz[j] << " and ftb=" << fts[j] << "at position "
+          << j << " of " << sz << " (Random seed: " << seed << ").\n"
+          << "Encoding method: " << enc_method[j]
+          << " decoding method: " << dec_method << "\n";
+      EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1])
+          << "od_ec_dec_tell() mismatch between encoder and "
+             "decoder at symbol "
+          << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec)
+          << " instead of " << tell[j + 1] << " (Random seed: " << seed
+          << ").\n";
+    }
+    free(enc_method);
+    free(tell);
+    free(data);
+    free(fts);
+    free(fz);
+  }
+  od_ec_enc_reset(&enc);
+  if (CDF_SHIFT == 0) {
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
+    od_ec_enc_patch_initial_bits(&enc, 3, 2);
+    EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
+    od_ec_enc_patch_initial_bits(&enc, 0, 5);
+    EXPECT_TRUE(enc.error)
+        << "od_ec_enc_patch_initial_bits() didn't fail when it should have.\n";
+    od_ec_enc_reset(&enc);
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
+    od_ec_encode_bool_q15(&enc, 1, OD_ICDF(32256));
+    od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
+    od_ec_enc_patch_initial_bits(&enc, 0, 2);
+    EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
+    ptr = od_ec_enc_done(&enc, &ptr_sz);
+    EXPECT_EQ(ptr_sz, 2u);
+    EXPECT_EQ(ptr[0], 63)
+        << "Got " << ptr[0]
+        << " when expecting 63 for od_ec_enc_patch_initial_bits().\n";
+  }
+  od_ec_enc_clear(&enc);
+  EXPECT_EQ(ret, 0);
+}
diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc
index 80c42fee48..c469d08719 100644
--- a/third_party/aom/test/encode_api_test.cc
+++ b/third_party/aom/test/encode_api_test.cc
@@ -7,11 +7,12 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "test/util.h"
 #include "aom/aomcx.h"
 #include "aom/aom_encoder.h"
@@ -33,8 +34,8 @@ TEST(EncodeAPI, InvalidParams) {
 
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(NULL, NULL, NULL, 0));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, NULL, NULL, 0));
-  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, NULL, 0, 0, 0, 0));
-  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, &img, 0, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, NULL, 0, 0, 0));
+  EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, &img, 0, 0, 0));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
   EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
             aom_codec_enc_config_default(NULL, NULL, 0));
@@ -53,7 +54,7 @@ TEST(EncodeAPI, InvalidParams) {
 
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0));
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
-    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, NULL, 0, 0, 0, 0));
+    EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, NULL, 0, 0, 0));
 
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
   }
diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc
index 5a37b480b4..fe649b1539 100644
--- a/third_party/aom/test/encode_perf_test.cc
+++ b/third_party/aom/test/encode_perf_test.cc
@@ -7,12 +7,14 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "./aom_config.h"
-#include "./aom_version.h"
+
+#include "config/aom_config.h"
+#include "config/aom_version.h"
+
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc
index 6941f0148b..b75d7be16e 100644
--- a/third_party/aom/test/encode_test_driver.cc
+++ b/third_party/aom/test/encode_test_driver.cc
@@ -7,13 +7,14 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "aom_ports/mem.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
@@ -34,21 +35,6 @@ void Encoder::InitEncoder(VideoSource *video) {
 
     res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_);
     ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
-
-#if CONFIG_AV1_ENCODER
-    if (CodecInterface() == &aom_codec_av1_cx_algo) {
-// Default to 1 tile column for AV1. With CONFIG_EXT_TILE, the
-// default is already the largest possible tile size
-#if !CONFIG_EXT_TILE
-      const int log2_tile_columns = 0;
-      res = aom_codec_control_(&encoder_, AV1E_SET_TILE_COLUMNS,
-                               log2_tile_columns);
-      ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
-#endif  // !CONFIG_EXT_TILE
-    } else
-#endif
-    {
-    }
   }
 }
 
@@ -82,15 +68,14 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
   }
 
   // Encode the frame
-  API_REGISTER_STATE_CHECK(res = aom_codec_encode(&encoder_, img, video.pts(),
-                                                  video.duration(), frame_flags,
-                                                  deadline_));
+  API_REGISTER_STATE_CHECK(res =
+                               aom_codec_encode(&encoder_, img, video.pts(),
+                                                video.duration(), frame_flags));
   ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
 }
 
 void Encoder::Flush() {
-  const aom_codec_err_t res =
-      aom_codec_encode(&encoder_, NULL, 0, 0, 0, deadline_);
+  const aom_codec_err_t res = aom_codec_encode(&encoder_, NULL, 0, 0, 0);
   if (!encoder_.priv)
     ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError();
   else
@@ -105,11 +90,8 @@ void EncoderTest::InitializeConfig() {
 void EncoderTest::SetMode(TestMode mode) {
   switch (mode) {
     case kOnePassGood:
-    case kTwoPassGood: deadline_ = AOM_DL_GOOD_QUALITY; break;
-    case kRealTime:
-      deadline_ = AOM_DL_GOOD_QUALITY;
-      cfg_.g_lag_in_frames = 0;
-      break;
+    case kTwoPassGood: break;
+    case kRealTime: cfg_.g_lag_in_frames = 0; break;
     default: ASSERT_TRUE(false) << "Unexpected mode " << mode;
   }
   mode_ = mode;
@@ -149,14 +131,16 @@ static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
                         int *const mismatch_row, int *const mismatch_col,
                         int *const mismatch_plane, int *const mismatch_pix1,
                         int *const mismatch_pix2) {
-  if (img1->fmt != img2->fmt || img1->cs != img2->cs ||
-      img1->d_w != img2->d_w || img1->d_h != img2->d_h) {
+  if (img1->fmt != img2->fmt || img1->cp != img2->cp || img1->tc != img2->tc ||
+      img1->mc != img2->mc || img1->d_w != img2->d_w ||
+      img1->d_h != img2->d_h || img1->monochrome != img2->monochrome) {
     if (mismatch_row != NULL) *mismatch_row = -1;
     if (mismatch_col != NULL) *mismatch_col = -1;
     return false;
   }
 
-  for (int plane = 0; plane < 3; plane++) {
+  const int num_planes = img1->monochrome ? 1 : 3;
+  for (int plane = 0; plane < num_planes; plane++) {
     if (!compare_plane(img1->planes[plane], img1->stride[plane],
                        img2->planes[plane], img2->stride[plane],
                        aom_img_plane_width(img1, plane),
@@ -209,7 +193,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
 
     BeginPassHook(pass);
     testing::internal::scoped_ptr<Encoder> encoder(
-        codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_));
+        codec_->CreateEncoder(cfg_, init_flags_, &stats_));
     ASSERT_TRUE(encoder.get() != NULL);
 
     ASSERT_NO_FATAL_FAILURE(video->Begin());
@@ -228,10 +212,11 @@ void EncoderTest::RunLoop(VideoSource *video) {
       dec_init_flags |= AOM_CODEC_USE_INPUT_FRAGMENTS;
     testing::internal::scoped_ptr<Decoder> decoder(
         codec_->CreateDecoder(dec_cfg, dec_init_flags));
-#if CONFIG_AV1 && CONFIG_EXT_TILE
+#if CONFIG_AV1_DECODER
     if (decoder->IsAV1()) {
       // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole
       // frame is decoded.
+      decoder->Control(AV1_SET_TILE_MODE, cfg_.large_scale_tile);
       decoder->Control(AV1_SET_DECODE_TILE_ROW, -1);
       decoder->Control(AV1_SET_DECODE_TILE_COL, -1);
     }
@@ -256,8 +241,16 @@ void EncoderTest::RunLoop(VideoSource *video) {
           case AOM_CODEC_CX_FRAME_PKT:
             has_cxdata = true;
             if (decoder.get() != NULL && DoDecode()) {
-              aom_codec_err_t res_dec = decoder->DecodeFrame(
-                  (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);
+              aom_codec_err_t res_dec;
+              if (DoDecodeInvisible()) {
+                res_dec = decoder->DecodeFrame(
+                    (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);
+              } else {
+                res_dec = decoder->DecodeFrame(
+                    (const uint8_t *)pkt->data.frame.buf +
+                        (pkt->data.frame.sz - pkt->data.frame.vis_frame_size),
+                    pkt->data.frame.vis_frame_size);
+              }
 
               if (!HandleDecodeResult(res_dec, decoder.get())) break;
 
diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h
index 97c1bf860d..138cd6a675 100644
--- a/third_party/aom/test/encode_test_driver.h
+++ b/third_party/aom/test/encode_test_driver.h
@@ -16,7 +16,8 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #if CONFIG_AV1_ENCODER
 #include "aom/aomcx.h"
 #endif
@@ -37,6 +38,9 @@ enum TestMode { kRealTime, kOnePassGood, kTwoPassGood };
 
 #define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood)
 
+#define NONREALTIME_TEST_MODES \
+  ::testing::Values(::libaom_test::kOnePassGood, ::libaom_test::kTwoPassGood)
+
 // Provides an object to handle the libaom get_cx_data() iteration pattern
 class CxDataIterator {
  public:
@@ -78,9 +82,9 @@ class TwopassStatsStore {
 // level of abstraction will be fleshed out as more tests are written.
 class Encoder {
  public:
-  Encoder(aom_codec_enc_cfg_t cfg, unsigned long deadline,
-          const unsigned long init_flags, TwopassStatsStore *stats)
-      : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
+  Encoder(aom_codec_enc_cfg_t cfg, const uint32_t init_flags,
+          TwopassStatsStore *stats)
+      : cfg_(cfg), init_flags_(init_flags), stats_(stats) {
     memset(&encoder_, 0, sizeof(encoder_));
   }
 
@@ -128,8 +132,6 @@ class Encoder {
     cfg_ = *cfg;
   }
 
-  void set_deadline(unsigned long deadline) { deadline_ = deadline; }
-
  protected:
   virtual aom_codec_iface_t *CodecInterface() const = 0;
 
@@ -147,7 +149,6 @@ class Encoder {
 
   aom_codec_ctx_t encoder_;
   aom_codec_enc_cfg_t cfg_;
-  unsigned long deadline_;
   unsigned long init_flags_;
   TwopassStatsStore *stats_;
 };
@@ -173,7 +174,7 @@ class EncoderTest {
   // Initialize the cfg_ member with the default configuration.
   void InitializeConfig();
 
-  // Map the TestMode enum to the deadline_ and passes_ variables.
+  // Map the TestMode enum to the passes_ variables.
   void SetMode(TestMode mode);
 
   // Set encoder flag.
@@ -206,9 +207,11 @@ class EncoderTest {
     return !(::testing::Test::HasFatalFailure() || abort_);
   }
 
-  const CodecFactory *codec_;
   // Hook to determine whether to decode frame after encoding
-  virtual bool DoDecode() const { return 1; }
+  virtual bool DoDecode() const { return true; }
+
+  // Hook to determine whether to decode invisible frames after encoding
+  virtual bool DoDecodeInvisible() const { return true; }
 
   // Hook to handle encode/decode mismatch
   virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2);
@@ -230,10 +233,10 @@ class EncoderTest {
     return pkt;
   }
 
+  const CodecFactory *codec_;
   bool abort_;
   aom_codec_enc_cfg_t cfg_;
   unsigned int passes_;
-  unsigned long deadline_;
   TwopassStatsStore stats_;
   unsigned long init_flags_;
   unsigned long frame_flags_;
diff --git a/third_party/aom/test/encoder_parms_get_to_decoder.cc b/third_party/aom/test/encoder_parms_get_to_decoder.cc
deleted file mode 100644
index 227ee8246a..0000000000
--- a/third_party/aom/test/encoder_parms_get_to_decoder.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "av1/av1_dx_iface.c"
-
-namespace {
-
-const int kCpuUsed = 2;
-
-struct EncodePerfTestVideo {
-  const char *name;
-  uint32_t width;
-  uint32_t height;
-  uint32_t bitrate;
-  int frames;
-};
-
-const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
-  { "niklas_1280_720_30.y4m", 1280, 720, 600, 10 },
-};
-
-struct EncodeParameters {
-  int32_t tile_rows;
-  int32_t tile_cols;
-  int32_t lossless;
-  int32_t error_resilient;
-  int32_t frame_parallel;
-  aom_color_range_t color_range;
-  aom_color_space_t cs;
-#if CONFIG_COLORSPACE_HEADERS
-  aom_transfer_function_t tf;
-  aom_chroma_sample_position_t csp;
-#endif
-  int render_size[2];
-  // TODO(JBB): quantizers / bitrate
-};
-
-const EncodeParameters kAV1EncodeParameterSet[] = {
-  { 0, 0, 0, 1, 0, AOM_CR_STUDIO_RANGE, AOM_CS_BT_601, { 0, 0 } },
-  { 0, 0, 0, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_709, { 0, 0 } },
-#if CONFIG_COLORSPACE_HEADERS
-  { 0, 0, 1, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_2020_NCL, { 0, 0 } },
-#else
-  { 0, 0, 1, 0, 0, AOM_CR_FULL_RANGE, AOM_CS_BT_2020, { 0, 0 } },
-#endif
-  { 0, 2, 0, 0, 1, AOM_CR_STUDIO_RANGE, AOM_CS_UNKNOWN, { 640, 480 } },
-  // TODO(JBB): Test profiles (requires more work).
-};
-
-class AvxEncoderParmsGetToDecoder
-    : public ::libaom_test::CodecTestWith2Params<EncodeParameters,
-                                                 EncodePerfTestVideo>,
-      public ::libaom_test::EncoderTest,
-{
- protected:
-  AvxEncoderParmsGetToDecoder()
-      : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
-
-  virtual ~AvxEncoderParmsGetToDecoder() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(::libaom_test::kTwoPassGood);
-    cfg_.g_lag_in_frames = 25;
-    cfg_.g_error_resilient = encode_parms.error_resilient;
-    dec_cfg_.threads = 4;
-    test_video_ = GET_PARAM(2);
-    cfg_.rc_target_bitrate = test_video_.bitrate;
-  }
-
-  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
-                                  ::libaom_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(AV1E_SET_COLOR_SPACE, encode_parms.cs);
-#if CONFIG_COLORSPACE_HEADERS
-      encoder->Control(AV1E_SET_TRANSFER_FUNCTION, encode_parms.tf);
-      encoder->Control(AV1E_SET_CHROMA_SAMPLE_POSITION, encode_parms.csp);
-#endif
-      encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range);
-      encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless);
-      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING,
-                       encode_parms.frame_parallel);
-      encoder->Control(AV1E_SET_TILE_ROWS, encode_parms.tile_rows);
-      encoder->Control(AV1E_SET_TILE_COLUMNS, encode_parms.tile_cols);
-      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
-      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
-      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
-      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
-      if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0)
-        encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size);
-    }
-  }
-
-  virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
-                                  libaom_test::Decoder *decoder) {
-    aom_codec_ctx_t *const av1_decoder = decoder->GetDecoder();
-    aom_codec_alg_priv_t *const priv =
-        reinterpret_cast<aom_codec_alg_priv_t *>(av1_decoder->priv);
-    FrameWorkerData *const worker_data =
-        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
-    AV1_COMMON *const common = &worker_data->pbi->common;
-
-    if (encode_parms.lossless) {
-      EXPECT_EQ(0, common->base_qindex);
-      EXPECT_EQ(0, common->y_dc_delta_q);
-      EXPECT_EQ(0, common->uv_dc_delta_q);
-      EXPECT_EQ(0, common->uv_ac_delta_q);
-      EXPECT_EQ(ONLY_4X4, common->tx_mode);
-    }
-    EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode);
-    if (encode_parms.error_resilient) {
-      EXPECT_EQ(0, common->use_prev_frame_mvs);
-    }
-    EXPECT_EQ(encode_parms.color_range, common->color_range);
-    EXPECT_EQ(encode_parms.cs, common->color_space);
-#if CONFIG_COLORSPACE_HEADERS
-    EXPECT_EQ(encode_parms.tf, common->transfer_function);
-    EXPECT_EQ(encode_parms.csp, common->chroma_sample_position);
-#endif
-    if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
-      EXPECT_EQ(encode_parms.render_size[0], common->render_width);
-      EXPECT_EQ(encode_parms.render_size[1], common->render_height);
-    }
-    EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols);
-    EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows);
-
-    EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
-    return AOM_CODEC_OK == res_dec;
-  }
-
-  EncodePerfTestVideo test_video_;
-
- private:
-  EncodeParameters encode_parms;
-};
-
-TEST_P(AvxEncoderParmsGetToDecoder, BitstreamParms) {
-  init_flags_ = AOM_CODEC_USE_PSNR;
-
-  testing::internal::scoped_ptr<libaom_test::VideoSource> video(
-      new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames));
-  ASSERT_TRUE(video.get() != NULL);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-AV1_INSTANTIATE_TEST_CASE(AvxEncoderParmsGetToDecoder,
-                          ::testing::ValuesIn(kAV1EncodeParameterSet),
-                          ::testing::ValuesIn(kAV1EncodePerfTestVectors));
-}  // namespace
diff --git a/third_party/aom/test/encodetxb_test.cc b/third_party/aom/test/encodetxb_test.cc
new file mode 100644
index 0000000000..ab6ec72c6d
--- /dev/null
+++ b/third_party/aom/test/encodetxb_test.cc
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem.h"
+#include "av1/common/idct.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+typedef void (*GetNzMapContextsFunc)(const uint8_t *const levels,
+                                     const int16_t *const scan,
+                                     const uint16_t eob, const TX_SIZE tx_size,
+                                     const TX_CLASS tx_class,
+                                     int8_t *const coeff_contexts);
+
+class EncodeTxbTest : public ::testing::TestWithParam<GetNzMapContextsFunc> {
+ public:
+  EncodeTxbTest() : get_nz_map_contexts_func_(GetParam()) {}
+
+  virtual ~EncodeTxbTest() {}
+
+  virtual void SetUp() {
+    coeff_contexts_ref_ = reinterpret_cast<int8_t *>(
+        aom_memalign(16, sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE));
+    ASSERT_TRUE(coeff_contexts_ref_ != NULL);
+    coeff_contexts_ = reinterpret_cast<int8_t *>(
+        aom_memalign(16, sizeof(*coeff_contexts_) * MAX_TX_SQUARE));
+    ASSERT_TRUE(coeff_contexts_ != NULL);
+  }
+
+  virtual void TearDown() {
+    aom_free(coeff_contexts_ref_);
+    aom_free(coeff_contexts_);
+    libaom_test::ClearSystemState();
+  }
+
+  void GetNzMapContextsRun() {
+    const int kNumTests = 10;
+    int result = 0;
+
+    for (int is_inter = 0; is_inter < 2; ++is_inter) {
+      for (int tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+        const TX_CLASS tx_class = tx_type_to_class[tx_type];
+        for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+          const int bwl = get_txb_bwl((TX_SIZE)tx_size);
+          const int width = get_txb_wide((TX_SIZE)tx_size);
+          const int height = get_txb_high((TX_SIZE)tx_size);
+          const int real_width = tx_size_wide[tx_size];
+          const int real_height = tx_size_high[tx_size];
+          const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
+
+          levels_ = set_levels(levels_buf_, width);
+          for (int i = 0; i < kNumTests && !result; ++i) {
+            for (int eob = 1; eob <= width * height && !result; ++eob) {
+              InitDataWithEob(scan, bwl, eob);
+
+              av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size,
+                                        tx_class, coeff_contexts_ref_);
+              get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
+                                        tx_class, coeff_contexts_);
+
+              result = Compare(scan, eob);
+
+              EXPECT_EQ(result, 0)
+                  << " tx_class " << tx_class << " width " << real_width
+                  << " height " << real_height << " eob " << eob;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  void SpeedTestGetNzMapContextsRun() {
+    const int kNumTests = 2000000000;
+    aom_usec_timer timer;
+
+    printf("Note: Only test the largest possible eob case!\n");
+    for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+      const int bwl = get_txb_bwl((TX_SIZE)tx_size);
+      const int width = get_txb_wide((TX_SIZE)tx_size);
+      const int height = get_txb_high((TX_SIZE)tx_size);
+      const int real_width = tx_size_wide[tx_size];
+      const int real_height = tx_size_high[tx_size];
+      const TX_TYPE tx_type = DCT_DCT;
+      const TX_CLASS tx_class = tx_type_to_class[tx_type];
+      const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
+      const int eob = width * height;
+      const int numTests = kNumTests / (width * height);
+
+      levels_ = set_levels(levels_buf_, width);
+      InitDataWithEob(scan, bwl, eob);
+
+      aom_usec_timer_start(&timer);
+      for (int i = 0; i < numTests; ++i) {
+        get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
+                                  tx_class, coeff_contexts_);
+      }
+      aom_usec_timer_mark(&timer);
+
+      const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+      printf("get_nz_map_contexts_%2dx%2d: %7.1f ms\n", real_width, real_height,
+             elapsed_time / 1000.0);
+    }
+  }
+
+ private:
+  void InitDataWithEob(const int16_t *const scan, const int bwl,
+                       const int eob) {
+    memset(levels_buf_, 0, sizeof(levels_buf_));
+    memset(coeff_contexts_, 0, sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
+
+    for (int c = 0; c < eob; ++c) {
+      levels_[get_padded_idx(scan[c], bwl)] =
+          static_cast<uint8_t>(clamp(rnd_.Rand8(), 0, INT8_MAX));
+      coeff_contexts_[scan[c]] = rnd_.Rand16() >> 1;
+    }
+
+    memcpy(coeff_contexts_ref_, coeff_contexts_,
+           sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
+  }
+
+  bool Compare(const int16_t *const scan, const int eob) const {
+    bool result = false;
+    if (memcmp(coeff_contexts_, coeff_contexts_ref_,
+               sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE)) {
+      for (int i = 0; i < eob; i++) {
+        const int pos = scan[i];
+        if (coeff_contexts_ref_[pos] != coeff_contexts_[pos]) {
+          printf("coeff_contexts_[%d] diff:%6d (ref),%6d (opt)\n", pos,
+                 coeff_contexts_ref_[pos], coeff_contexts_[pos]);
+          result = true;
+          break;
+        }
+      }
+    }
+    return result;
+  }
+
+  GetNzMapContextsFunc get_nz_map_contexts_func_;
+  ACMRandom rnd_;
+  uint8_t levels_buf_[TX_PAD_2D];
+  uint8_t *levels_;
+  int8_t *coeff_contexts_ref_;
+  int8_t *coeff_contexts_;
+};
+
+TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); }
+
+TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) {
+  SpeedTestGetNzMapContextsRun();
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, EncodeTxbTest,
+                        ::testing::Values(av1_get_nz_map_contexts_sse2));
+#endif
+
+#if HAVE_SSE4_1
+class EncodeTxbInitLevelTest : public ::testing::TestWithParam<int> {
+ public:
+  virtual ~EncodeTxbInitLevelTest() {}
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+  void RunTest(int tx_size, int is_speed);
+};
+
+void EncodeTxbInitLevelTest::RunTest(int tx_size, int is_speed) {
+  const int width = get_txb_wide((TX_SIZE)tx_size);
+  const int height = get_txb_high((TX_SIZE)tx_size);
+  tran_low_t coeff[MAX_TX_SQUARE];
+
+  uint8_t levels_buf[2][TX_PAD_2D];
+  uint8_t *const levels0 = set_levels(levels_buf[0], width);
+  uint8_t *const levels1 = set_levels(levels_buf[1], width);
+
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int i = 0; i < width * height; i++) {
+    coeff[i] = rnd.Rand15Signed() + rnd.Rand15Signed();
+  }
+  for (int i = 0; i < TX_PAD_2D; i++) {
+    levels_buf[0][i] = rnd.Rand8();
+    levels_buf[1][i] = rnd.Rand8();
+  }
+  const int run_times = is_speed ? (width * height) * 10000 : 1;
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_txb_init_levels_c(coeff, width, height, levels0);
+  }
+  const double t1 = get_time_mark(&timer);
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_txb_init_levels_sse4_1(coeff, width, height, levels1);
+  }
+  const double t2 = get_time_mark(&timer);
+  if (is_speed) {
+    printf("init %3dx%-3d:%7.2f/%7.2fns", width, height, t1, t2);
+    printf("(%3.2f)\n", t1 / t2);
+  }
+  const int stride = width + TX_PAD_HOR;
+  for (int r = 0; r < height + TX_PAD_VER; ++r) {
+    for (int c = 0; c < stride; ++c) {
+      ASSERT_EQ(levels_buf[0][c + r * stride], levels_buf[1][c + r * stride])
+          << "[" << r << "," << c << "] " << run_times << width << "x"
+          << height;
+    }
+  }
+}
+
+TEST_P(EncodeTxbInitLevelTest, match) { RunTest(GetParam(), 0); }
+TEST_P(EncodeTxbInitLevelTest, DISABLED_Speed) { RunTest(GetParam(), 1); }
+
+INSTANTIATE_TEST_CASE_P(SSE4_1, EncodeTxbInitLevelTest,
+                        ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/end_to_end_test.cc b/third_party/aom/test/end_to_end_test.cc
index e1a833ec42..1ac0ae9312 100644
--- a/third_party/aom/test/end_to_end_test.cc
+++ b/third_party/aom/test/end_to_end_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
@@ -30,7 +30,7 @@ const double kPsnrThreshold[][5] = {
 // AV1 HBD average PSNR is slightly lower than AV1.
 // We make two cases here to enable the testing and
 // guard picture quality.
-#if CONFIG_AV1_ENCODER && CONFIG_HIGHBITDEPTH
+#if CONFIG_AV1_ENCODER
   { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 31.0, 36.0, 36.0, 36.0, 36.0 },
   { 31.0, 35.0, 35.0, 35.0, 35.0 }, { 31.0, 34.0, 34.0, 34.0, 34.0 },
   { 31.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
@@ -40,7 +40,7 @@ const double kPsnrThreshold[][5] = {
   { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 },
   { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
   { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 },
-#endif  // CONFIG_HIGHBITDEPTH && CONFIG_AV1_ENCODER
+#endif  // CONFIG_AV1_ENCODER
 };
 
 typedef struct {
@@ -53,24 +53,20 @@ typedef struct {
 
 const TestVideoParam kTestVectors[] = {
   { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
-  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 1 },
+  { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 },
   { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
-  { "park_joy_90p_8_440.yuv", 8, AOM_IMG_FMT_I440, AOM_BITS_8, 1 },
-#if CONFIG_HIGHBITDEPTH
-  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 2 },
-  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 3 },
-  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 3 },
-  { "park_joy_90p_10_440.yuv", 10, AOM_IMG_FMT_I44016, AOM_BITS_10, 3 },
+  { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 },
+  { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 },
+  { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 },
   { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
-  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 3 },
-  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 3 },
-  { "park_joy_90p_12_440.yuv", 12, AOM_IMG_FMT_I44016, AOM_BITS_12, 3 },
-#endif  // CONFIG_HIGHBITDEPTH
+  { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 },
+  { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 },
 };
 
 // Encoding modes tested
 const libaom_test::TestMode kEncodingModeVectors[] = {
-  ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood,
+  ::libaom_test::kTwoPassGood,
+  ::libaom_test::kOnePassGood,
   ::libaom_test::kRealTime,
 };
 
@@ -150,6 +146,32 @@ class EndToEndTest
     return kPsnrThreshold[cpu_used_][encoding_mode_];
   }
 
+  void DoTest() {
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+    if (is_extension_y4m(test_video_param_.filename)) {
+      video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                  kFrames));
+    } else {
+      video.reset(new libaom_test::YUVVideoSource(
+          test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
+          kFramerate, 1, 0, kFrames));
+    }
+    ASSERT_TRUE(video.get() != NULL);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "cpu used = " << cpu_used_ << ", encoding mode = " << encoding_mode_;
+  }
+
   TestVideoParam test_video_param_;
   int cpu_used_;
 
@@ -161,55 +183,9 @@ class EndToEndTest
 
 class EndToEndTestLarge : public EndToEndTest {};
 
-TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
-  cfg_.rc_target_bitrate = kBitrate;
-  cfg_.g_error_resilient = 0;
-  cfg_.g_profile = test_video_param_.profile;
-  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
-  cfg_.g_bit_depth = test_video_param_.bit_depth;
-  init_flags_ = AOM_CODEC_USE_PSNR;
-  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
-  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
-  if (is_extension_y4m(test_video_param_.filename)) {
-    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
-                                                kFrames));
-  } else {
-    video.reset(new libaom_test::YUVVideoSource(
-        test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
-        kFramerate, 1, 0, kFrames));
-  }
-  ASSERT_TRUE(video.get() != NULL);
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); }
 
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  const double psnr = GetAveragePsnr();
-  EXPECT_GT(psnr, GetPsnrThreshold());
-}
-
-TEST_P(EndToEndTest, EndtoEndPSNRTest) {
-  cfg_.rc_target_bitrate = kBitrate;
-  cfg_.g_error_resilient = 0;
-  cfg_.g_profile = test_video_param_.profile;
-  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
-  cfg_.g_bit_depth = test_video_param_.bit_depth;
-  init_flags_ = AOM_CODEC_USE_PSNR;
-  if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
-  testing::internal::scoped_ptr<libaom_test::VideoSource> video;
-  if (is_extension_y4m(test_video_param_.filename)) {
-    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
-                                                kFrames));
-  } else {
-    video.reset(new libaom_test::YUVVideoSource(
-        test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
-        kFramerate, 1, 0, kFrames));
-  }
-  ASSERT_TRUE(video.get() != NULL);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  const double psnr = GetAveragePsnr();
-  EXPECT_GT(psnr, GetPsnrThreshold());
-}
+TEST_P(EndToEndTest, EndtoEndPSNRTest) { DoTest(); }
 
 AV1_INSTANTIATE_TEST_CASE(EndToEndTestLarge,
                           ::testing::ValuesIn(kEncodingModeVectors),
diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc
index 227065fa90..353947c3d7 100644
--- a/third_party/aom/test/error_block_test.cc
+++ b/third_party/aom/test/error_block_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <cmath>
 #include <cstdlib>
@@ -15,8 +15,9 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -28,14 +29,13 @@
 using libaom_test::ACMRandom;
 
 namespace {
-#if CONFIG_HIGHBITDEPTH
 const int kNumIterations = 1000;
 
 typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
                                   const tran_low_t *dqcoeff,
                                   intptr_t block_size, int64_t *ssz, int bps);
 
-typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>
+typedef ::testing::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>
     ErrorBlockParam;
 
 class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
@@ -156,8 +156,8 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
       << "First failed at test case " << first_failure;
 }
 
-#if HAVE_SSE2 || HAVE_AVX
-using std::tr1::make_tuple;
+#if (HAVE_SSE2 || HAVE_AVX)
+using ::testing::make_tuple;
 
 INSTANTIATE_TEST_CASE_P(
     SSE2, ErrorBlockTest,
@@ -168,6 +168,4 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(&av1_highbd_block_error_sse2,
                                  &av1_highbd_block_error_c, AOM_BITS_8)));
 #endif  // HAVE_SSE2
-
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc
index e9abdde6da..13ac0bf93d 100644
--- a/third_party/aom/test/error_resilience_test.cc
+++ b/third_party/aom/test/error_resilience_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
@@ -18,7 +18,13 @@
 namespace {
 
 const int kMaxErrorFrames = 12;
+const int kMaxInvisibleErrorFrames = 12;
 const int kMaxDroppableFrames = 12;
+const int kMaxErrorResilientFrames = 12;
+const int kMaxNoMFMVFrames = 12;
+const int kMaxPrimRefNoneFrames = 12;
+const int kMaxSFrames = 12;
+const int kCpuUsed = 1;
 
 class ErrorResilienceTestLarge
     : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
@@ -26,7 +32,7 @@ class ErrorResilienceTestLarge
  protected:
   ErrorResilienceTestLarge()
       : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0),
-        mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)) {
+        mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)), allow_mismatch_(0) {
     Reset();
   }
 
@@ -34,8 +40,21 @@ class ErrorResilienceTestLarge
 
   void Reset() {
     error_nframes_ = 0;
+    invisible_error_nframes_ = 0;
     droppable_nframes_ = 0;
-    pattern_switch_ = 0;
+    error_resilient_nframes_ = 0;
+    nomfmv_nframes_ = 0;
+    prim_ref_none_nframes_ = 0;
+    s_nframes_ = 0;
+  }
+
+  void SetupEncoder(int bitrate, int lag) {
+    const aom_rational timebase = { 33333333, 1000000000 };
+    cfg_.g_timebase = timebase;
+    cfg_.rc_target_bitrate = bitrate;
+    cfg_.kf_mode = AOM_KF_DISABLED;
+    cfg_.g_lag_in_frames = lag;
+    init_flags_ = AOM_CODEC_USE_PSNR;
   }
 
   virtual void SetUp() {
@@ -46,6 +65,7 @@ class ErrorResilienceTestLarge
   virtual void BeginPassHook(unsigned int /*pass*/) {
     psnr_ = 0.0;
     nframes_ = 0;
+    decoded_nframes_ = 0;
     mismatch_psnr_ = 0.0;
     mismatch_nframes_ = 0;
   }
@@ -55,18 +75,71 @@ class ErrorResilienceTestLarge
     nframes_++;
   }
 
-  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video) {
+  virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
+                                  libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
     frame_flags_ &=
-        ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF);
+        ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
+          AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT |
+          AOM_EFLAG_SET_S_FRAME | AOM_EFLAG_SET_PRIMARY_REF_NONE);
     if (droppable_nframes_ > 0 &&
         (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
       for (unsigned int i = 0; i < droppable_nframes_; ++i) {
         if (droppable_frames_[i] == video->frame()) {
-          std::cout << "Encoding droppable frame: " << droppable_frames_[i]
-                    << "\n";
+          std::cout << "             Encoding droppable frame: "
+                    << droppable_frames_[i] << "\n";
           frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
                            AOM_EFLAG_NO_UPD_ARF);
-          return;
+          break;
+        }
+      }
+    }
+
+    if (error_resilient_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < error_resilient_nframes_; ++i) {
+        if (error_resilient_frames_[i] == video->frame()) {
+          std::cout << "             Encoding error_resilient frame: "
+                    << error_resilient_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_ERROR_RESILIENT;
+          break;
+        }
+      }
+    }
+
+    if (nomfmv_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < nomfmv_nframes_; ++i) {
+        if (nomfmv_frames_[i] == video->frame()) {
+          std::cout << "             Encoding no mfmv frame: "
+                    << nomfmv_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_NO_REF_FRAME_MVS;
+          break;
+        }
+      }
+    }
+
+    if (prim_ref_none_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i) {
+        if (prim_ref_none_frames_[i] == video->frame()) {
+          std::cout << "             Encoding no PRIMARY_REF_NONE frame: "
+                    << prim_ref_none_frames_[i] << "\n";
+          frame_flags_ |= AOM_EFLAG_SET_PRIMARY_REF_NONE;
+          break;
+        }
+      }
+    }
+
+    encoder->Control(AV1E_SET_S_FRAME_MODE, 0);
+    if (s_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < s_nframes_; ++i) {
+        if (s_frames_[i] == video->frame()) {
+          std::cout << "             Encoding S frame: " << s_frames_[i]
+                    << "\n";
+          frame_flags_ |= AOM_EFLAG_SET_S_FRAME;
+          break;
         }
       }
     }
@@ -96,12 +169,37 @@ class ErrorResilienceTestLarge
     return 1;
   }
 
+  virtual bool DoDecodeInvisible() const {
+    if (invisible_error_nframes_ > 0 &&
+        (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
+      for (unsigned int i = 0; i < invisible_error_nframes_; ++i) {
+        if (invisible_error_frames_[i] == nframes_ - 1) {
+          std::cout << "             Skipping decoding all invisible frames in "
+                       "frame pkt: "
+                    << invisible_error_frames_[i] << "\n";
+          return 0;
+        }
+      }
+    }
+    return 1;
+  }
+
   virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) {
-    double mismatch_psnr = compute_psnr(img1, img2);
-    mismatch_psnr_ += mismatch_psnr;
-    ++mismatch_nframes_;
-    // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
-    ::libaom_test::EncoderTest::MismatchHook(img1, img2);
+    if (allow_mismatch_) {
+      double mismatch_psnr = compute_psnr(img1, img2);
+      mismatch_psnr_ += mismatch_psnr;
+      ++mismatch_nframes_;
+      // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
+    } else {
+      ::libaom_test::EncoderTest::MismatchHook(img1, img2);
+    }
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     aom_codec_pts_t pts) {
+    (void)img;
+    (void)pts;
+    ++decoded_nframes_;
   }
 
   void SetErrorFrames(int num, unsigned int *list) {
@@ -114,6 +212,16 @@ class ErrorResilienceTestLarge
       error_frames_[i] = list[i];
   }
 
+  void SetInvisibleErrorFrames(int num, unsigned int *list) {
+    if (num > kMaxInvisibleErrorFrames)
+      num = kMaxInvisibleErrorFrames;
+    else if (num < 0)
+      num = 0;
+    invisible_error_nframes_ = num;
+    for (unsigned int i = 0; i < invisible_error_nframes_; ++i)
+      invisible_error_frames_[i] = list[i];
+  }
+
   void SetDroppableFrames(int num, unsigned int *list) {
     if (num > kMaxDroppableFrames)
       num = kMaxDroppableFrames;
@@ -124,42 +232,93 @@ class ErrorResilienceTestLarge
       droppable_frames_[i] = list[i];
   }
 
+  void SetErrorResilientFrames(int num, unsigned int *list) {
+    if (num > kMaxErrorResilientFrames)
+      num = kMaxErrorResilientFrames;
+    else if (num < 0)
+      num = 0;
+    error_resilient_nframes_ = num;
+    for (unsigned int i = 0; i < error_resilient_nframes_; ++i)
+      error_resilient_frames_[i] = list[i];
+  }
+
+  void SetNoMFMVFrames(int num, unsigned int *list) {
+    if (num > kMaxNoMFMVFrames)
+      num = kMaxNoMFMVFrames;
+    else if (num < 0)
+      num = 0;
+    nomfmv_nframes_ = num;
+    for (unsigned int i = 0; i < nomfmv_nframes_; ++i)
+      nomfmv_frames_[i] = list[i];
+  }
+
+  void SetPrimaryRefNoneFrames(int num, unsigned int *list) {
+    if (num > kMaxPrimRefNoneFrames)
+      num = kMaxPrimRefNoneFrames;
+    else if (num < 0)
+      num = 0;
+    prim_ref_none_nframes_ = num;
+    for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i)
+      prim_ref_none_frames_[i] = list[i];
+  }
+
+  void SetSFrames(int num, unsigned int *list) {
+    if (num > kMaxSFrames)
+      num = kMaxSFrames;
+    else if (num < 0)
+      num = 0;
+    s_nframes_ = num;
+    for (unsigned int i = 0; i < s_nframes_; ++i) s_frames_[i] = list[i];
+  }
+
   unsigned int GetMismatchFrames() { return mismatch_nframes_; }
+  unsigned int GetEncodedFrames() { return nframes_; }
+  unsigned int GetDecodedFrames() { return decoded_nframes_; }
 
-  void SetPatternSwitch(int frame_switch) { pattern_switch_ = frame_switch; }
+  void SetAllowMismatch(int allow) { allow_mismatch_ = allow; }
 
  private:
   double psnr_;
   unsigned int nframes_;
+  unsigned int decoded_nframes_;
   unsigned int error_nframes_;
+  unsigned int invisible_error_nframes_;
   unsigned int droppable_nframes_;
-  unsigned int pattern_switch_;
+  unsigned int error_resilient_nframes_;
+  unsigned int nomfmv_nframes_;
+  unsigned int prim_ref_none_nframes_;
+  unsigned int s_nframes_;
   double mismatch_psnr_;
   unsigned int mismatch_nframes_;
   unsigned int error_frames_[kMaxErrorFrames];
+  unsigned int invisible_error_frames_[kMaxInvisibleErrorFrames];
   unsigned int droppable_frames_[kMaxDroppableFrames];
+  unsigned int error_resilient_frames_[kMaxErrorResilientFrames];
+  unsigned int nomfmv_frames_[kMaxNoMFMVFrames];
+  unsigned int prim_ref_none_frames_[kMaxPrimRefNoneFrames];
+  unsigned int s_frames_[kMaxSFrames];
   libaom_test::TestMode encoding_mode_;
+  int allow_mismatch_;
 };
 
 TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
-  const aom_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 2000;
-  cfg_.g_lag_in_frames = 10;
-
-  init_flags_ = AOM_CODEC_USE_PSNR;
-
+  SetupEncoder(2000, 10);
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 12);
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 12);
 
-  // Error resilient mode OFF.
+  // Global error resilient mode OFF.
   cfg_.g_error_resilient = 0;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   const double psnr_resilience_off = GetAveragePsnr();
   EXPECT_GT(psnr_resilience_off, 25.0);
 
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
+  Reset();
+  // Error resilient mode ON for certain frames
+  unsigned int num_error_resilient_frames = 5;
+  unsigned int error_resilient_frame_list[] = { 3, 5, 6, 9, 11 };
+  SetErrorResilientFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   const double psnr_resilience_on = GetAveragePsnr();
   EXPECT_GT(psnr_resilience_on, 25.0);
@@ -175,60 +334,105 @@ TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
 // Check for successful decoding and no encoder/decoder mismatch
 // if we lose (i.e., drop before decoding) a set of droppable
 // frames (i.e., frames that don't update any reference buffers).
-// Check both isolated and consecutive loss.
 TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
-  const aom_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 500;
-  // FIXME(debargha): Fix this to work for any lag.
-  // Currently this test only works for lag = 0
-  cfg_.g_lag_in_frames = 0;
-
-  init_flags_ = AOM_CODEC_USE_PSNR;
-
+  SetupEncoder(500, 10);
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 20);
-
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
-  cfg_.kf_mode = AOM_KF_DISABLED;
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 20);
 
   // Set an arbitrary set of error frames same as droppable frames.
-  // In addition to isolated loss/drop, add a long consecutive series
-  // (of size 9) of dropped frames.
-  unsigned int num_droppable_frames = 5;
-  unsigned int droppable_frame_list[] = { 5, 10, 13, 16, 19 };
+  unsigned int num_droppable_frames = 3;
+  unsigned int droppable_frame_list[] = { 5, 10, 13 };
   SetDroppableFrames(num_droppable_frames, droppable_frame_list);
   SetErrorFrames(num_droppable_frames, droppable_frame_list);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   // Test that no mismatches have been found
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
   std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int)0);
-
-  // Reset previously set of error/droppable frames.
-  Reset();
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_droppable_frames);
+}
 
-#if 0
-  // TODO(jkoleszar): This test is disabled for the time being as too
-  // sensitive. It's not clear how to set a reasonable threshold for
-  // this behavior.
+// Check for ParseAbility property of an error-resilient frame.
+// Encode a frame in error-resilient mode (E-frame), and disallow all
+// subsequent frames from using MFMV. If frames are dropped before the
+// E frame, all frames starting from the E frame should be parse-able.
+TEST_P(ErrorResilienceTestLarge, ParseAbilityTest) {
+  SetupEncoder(500, 10);
 
-  // Now set an arbitrary set of error frames that are non-droppable
-  unsigned int num_error_frames = 3;
-  unsigned int error_frame_list[] = {3, 10, 20};
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 15);
+
+  SetAllowMismatch(1);
+
+  // Note that an E-frame cannot be forced on a frame that is a
+  // show_existing_frame, or a frame that comes directly after an invisible
+  // frame. Currently, this will cause an assertion failure.
+  // Set an arbitrary error resilient (E) frame
+  unsigned int num_error_resilient_frames = 1;
+  unsigned int error_resilient_frame_list[] = { 8 };
+  SetErrorResilientFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
+  // Ensure that any invisible frames before the E frame are dropped
+  SetInvisibleErrorFrames(num_error_resilient_frames,
+                          error_resilient_frame_list);
+  // Set all frames after the error resilient frame to not allow MFMV
+  unsigned int num_post_error_resilient_frames = 6;
+  unsigned int post_error_resilient_frame_list[] = { 9, 10, 11, 12, 13, 14 };
+  SetNoMFMVFrames(num_post_error_resilient_frames,
+                  post_error_resilient_frame_list);
+
+  // Set a few frames before the E frame that are lost (not decoded)
+  unsigned int num_error_frames = 5;
+  unsigned int error_frame_list[] = { 3, 4, 5, 6, 7 };
   SetErrorFrames(num_error_frames, error_frame_list);
+
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
+  // All frames following the E-frame and the E-frame are expected to have
+  // mismatches, but still be parse-able.
+  EXPECT_LE(GetMismatchFrames(), num_post_error_resilient_frames + 1);
+}
 
-  // Test that dropping an arbitrary set of inter frames does not hurt too much
-  // Note the Average Mismatch PSNR is the average of the PSNR between
-  // decoded frame and encoder's version of the same frame for all frames
-  // with mismatch.
-  const double psnr_resilience_mismatch = GetAverageMismatchPsnr();
-  std::cout << "             Mismatch PSNR: "
-            << psnr_resilience_mismatch << "\n";
-  EXPECT_GT(psnr_resilience_mismatch, 20.0);
-#endif
+// Check for ParseAbility property of an S frame.
+// Encode an S-frame. If frames are dropped before the S-frame, all frames
+// starting from the S frame should be parse-able.
+TEST_P(ErrorResilienceTestLarge, SFrameTest) {
+  SetupEncoder(500, 10);
+
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 15);
+
+  SetAllowMismatch(1);
+
+  // Note that an S-frame cannot be forced on a frame that is a
+  // show_existing_frame. This issue still needs to be addressed.
+  // Set an arbitrary S-frame
+  unsigned int num_s_frames = 1;
+  unsigned int s_frame_list[] = { 6 };
+  SetSFrames(num_s_frames, s_frame_list);
+  // Ensure that any invisible frames before the S frame are dropped
+  SetInvisibleErrorFrames(num_s_frames, s_frame_list);
+
+  // Set a few frames before the S frame that are lost (not decoded)
+  unsigned int num_error_frames = 4;
+  unsigned int error_frame_list[] = { 2, 3, 4, 5 };
+  SetErrorFrames(num_error_frames, error_frame_list);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  std::cout << "             Encoded frames: " << GetEncodedFrames() << "\n";
+  std::cout << "             Decoded frames: " << GetDecodedFrames() << "\n";
+  std::cout << "             Mismatch frames: " << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
+  // All frames following the S-frame and the S-frame are expected to have
+  // mismatches, but still be parse-able.
+  EXPECT_LE(GetMismatchFrames(), GetEncodedFrames() - s_frame_list[0]);
 }
 
-AV1_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+AV1_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, NONREALTIME_TEST_MODES);
 }  // namespace
diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc
index 86eb3228ee..3dcc2a707e 100644
--- a/third_party/aom/test/ethread_test.cc
+++ b/third_party/aom/test/ethread_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
 #include <vector>
@@ -16,7 +16,7 @@
 #include "test/encode_test_driver.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
-#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
 
 namespace {
 class AVxEncoderThreadTest
@@ -32,12 +32,10 @@ class AVxEncoderThreadTest
     cfg.h = 720;
     cfg.allow_lowbitdepth = 1;
     decoder_ = codec_->CreateDecoder(cfg, 0);
-#if CONFIG_AV1
     if (decoder_->IsAV1()) {
       decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
       decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
     }
-#endif
 
     size_enc_.clear();
     md5_dec_.clear();
@@ -71,9 +69,6 @@ class AVxEncoderThreadTest
                                   ::libaom_test::Encoder *encoder) {
     if (!encoder_initialized_) {
       SetTileSize(encoder);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
-#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
       if (encoding_mode_ != ::libaom_test::kRealTime) {
         encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
@@ -118,7 +113,8 @@ class AVxEncoderThreadTest
   }
 
   void DoTest() {
-    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 18);
+    ::libaom_test::YUVVideoSource video(
+        "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 15, 18);
     cfg_.rc_target_bitrate = 1000;
 
     // Encode using single thread.
@@ -164,18 +160,16 @@ class AVxEncoderThreadTest
 };
 
 TEST_P(AVxEncoderThreadTest, EncoderResultTest) {
-#if CONFIG_AV1 && CONFIG_EXT_TILE
   cfg_.large_scale_tile = 0;
-#endif  // CONFIG_AV1 && CONFIG_EXT_TILE
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
   DoTest();
 }
 
 class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {};
 
 TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) {
-#if CONFIG_AV1 && CONFIG_EXT_TILE
   cfg_.large_scale_tile = 0;
-#endif  // CONFIG_AV1 && CONFIG_EXT_TILE
+  decoder_->Control(AV1_SET_TILE_MODE, 0);
   DoTest();
 }
 
@@ -190,7 +184,6 @@ AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
                                             ::libaom_test::kOnePassGood),
                           ::testing::Range(0, 2));
 
-#if CONFIG_AV1 && CONFIG_EXT_TILE
 class AVxEncoderThreadLSTest : public AVxEncoderThreadTest {
   virtual void SetTileSize(libaom_test::Encoder *encoder) {
     encoder->Control(AV1E_SET_TILE_COLUMNS, 1);
@@ -200,15 +193,17 @@ class AVxEncoderThreadLSTest : public AVxEncoderThreadTest {
   }
 };
 
-TEST_P(AVxEncoderThreadLSTest, EncoderResultTest) {
+TEST_P(AVxEncoderThreadLSTest, DISABLED_EncoderResultTest) {
   cfg_.large_scale_tile = 1;
+  decoder_->Control(AV1_SET_TILE_MODE, 1);
   DoTest();
 }
 
 class AVxEncoderThreadLSTestLarge : public AVxEncoderThreadLSTest {};
 
-TEST_P(AVxEncoderThreadLSTestLarge, EncoderResultTest) {
+TEST_P(AVxEncoderThreadLSTestLarge, DISABLED_EncoderResultTest) {
   cfg_.large_scale_tile = 1;
+  decoder_->Control(AV1_SET_TILE_MODE, 1);
   DoTest();
 }
 
@@ -220,5 +215,4 @@ AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadLSTestLarge,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
                           ::testing::Range(0, 2));
-#endif  // CONFIG_AV1 && CONFIG_EXT_TILE
 }  // namespace
diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh
index d3152be7db..2cdb89dd08 100755
--- a/third_party/aom/test/examples.sh
+++ b/third_party/aom/test/examples.sh
@@ -12,10 +12,10 @@
 ##
 . $(dirname $0)/tools_common.sh
 
-example_tests=$(ls $(dirname $0)/*.sh)
+example_tests=$(ls -r $(dirname $0)/*.sh)
 
 # List of script names to exclude.
-exclude_list="examples tools_common decode_to_md5"
+exclude_list="best_encode examples run_encodes tools_common"
 
 # Filter out the scripts in $exclude_list.
 for word in ${exclude_list}; do
diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc
deleted file mode 100644
index 5fad1667b4..0000000000
--- a/third_party/aom/test/fdct4x4_test.cc
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using libaom_test::FhtFunc;
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int>
-    Dct4x4Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int>
-    Ht4x4Param;
-
-void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam * /*txfm_param*/) {
-  aom_fdct4x4_c(in, out, stride);
-}
-
-void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht4x4_c(in, out, stride, txfm_param);
-}
-
-void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam * /*txfm_param*/) {
-  av1_fwht4x4_c(in, out, stride);
-}
-
-#if CONFIG_HIGHBITDEPTH
-void fht4x4_10(const int16_t *in, tran_low_t *out, int stride,
-               TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_4x4_c(in, out, stride, txfm_param->tx_type, 10);
-}
-
-void fht4x4_12(const int16_t *in, tran_low_t *out, int stride,
-               TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_4x4_c(in, out, stride, txfm_param->tx_type, 12);
-}
-
-void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride,
-               const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_4x4_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                           txfm_param->tx_type, 10);
-}
-
-void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride,
-               const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_4x4_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                           txfm_param->tx_type, 12);
-}
-
-void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  aom_highbd_iwht4x4_16_add_c(in, out, stride, 10);
-}
-
-void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  aom_highbd_iwht4x4_16_add_c(in, out, stride, 12);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-class Trans4x4DCT : public libaom_test::TransformTestBase,
-                    public ::testing::TestWithParam<Dct4x4Param> {
- public:
-  virtual ~Trans4x4DCT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 4;
-    fwd_txfm_ref = fdct4x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-
-TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-class Trans4x4HT : public libaom_test::TransformTestBase,
-                   public ::testing::TestWithParam<Ht4x4Param> {
- public:
-  virtual ~Trans4x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 4;
-    fwd_txfm_ref = fht4x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-#if CONFIG_HIGHBITDEPTH
-    switch (bit_depth_) {
-      case AOM_BITS_10: fwd_txfm_ref = fht4x4_10; break;
-      case AOM_BITS_12: fwd_txfm_ref = fht4x4_12; break;
-      default: fwd_txfm_ref = fht4x4_ref; break;
-    }
-#endif
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1, 0.005); }
-
-TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-class Trans4x4WHT : public libaom_test::TransformTestBase,
-                    public ::testing::TestWithParam<Dct4x4Param> {
- public:
-  virtual ~Trans4x4WHT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 4;
-    fwd_txfm_ref = fwht4x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-
-TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
-                        ::testing::Values(make_tuple(&aom_fdct4x4_c,
-                                                     &aom_idct4x4_16_add_c,
-                                                     DCT_DCT, AOM_BITS_8, 16)));
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_C, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&fht4x4_12, &iht4x4_12, DCT_DCT, AOM_BITS_12, 16),
-        make_tuple(&fht4x4_12, &iht4x4_12, ADST_DCT, AOM_BITS_12, 16),
-        make_tuple(&fht4x4_12, &iht4x4_12, DCT_ADST, AOM_BITS_12, 16),
-        make_tuple(&fht4x4_12, &iht4x4_12, ADST_ADST, AOM_BITS_12, 16)));
-
-INSTANTIATE_TEST_CASE_P(
-    C, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&fht4x4_10, &iht4x4_10, DCT_DCT, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, ADST_DCT, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, DCT_ADST, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, ADST_ADST, AOM_BITS_10, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_DCT, AOM_BITS_8,
-                   16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_DCT, AOM_BITS_8,
-                   16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_ADST, AOM_BITS_8,
-                   16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_ADST, AOM_BITS_8,
-                   16)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_DCT,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_DCT,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_ADST,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_ADST,
-                                 AOM_BITS_8, 16)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, Trans4x4WHT,
-    ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, DCT_DCT,
-                                 AOM_BITS_10, 16),
-                      make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, DCT_DCT,
-                                 AOM_BITS_12, 16),
-                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, DCT_DCT,
-                                 AOM_BITS_8, 16)));
-#else
-INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
-                        ::testing::Values(make_tuple(&av1_fwht4x4_c,
-                                                     &aom_iwht4x4_16_add_c,
-                                                     DCT_DCT, AOM_BITS_8, 16)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
-                        ::testing::Values(make_tuple(&aom_fdct4x4_c,
-                                                     &aom_idct4x4_16_add_neon,
-                                                     DCT_DCT, AOM_BITS_8, 16)));
-#endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
-                                 DCT_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
-                                 ADST_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
-                                 DCT_ADST, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
-                                 ADST_ADST, AOM_BITS_8, 16)));
-#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && !CONFIG_DAALA_DCT4
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans4x4WHT,
-    ::testing::Values(make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, DCT_DCT,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_sse2,
-                                 DCT_DCT, AOM_BITS_8, 16)));
-#endif
-
-#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
-                        ::testing::Values(make_tuple(&aom_fdct4x4_sse2,
-                                                     &aom_idct4x4_16_add_sse2,
-                                                     DCT_DCT, AOM_BITS_8, 16)));
-#if !CONFIG_DAALA_DCT4
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
-                                 DCT_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
-                                 ADST_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
-                                 DCT_ADST, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
-                                 ADST_ADST, AOM_BITS_8, 16)));
-#endif  // !CONFIG_DAALA_DCT4
-#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
-                                 DCT_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
-                                 ADST_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
-                                 DCT_ADST, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
-                                 ADST_ADST, AOM_BITS_8, 16)));
-#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
-
-#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
-                        ::testing::Values(make_tuple(&aom_fdct4x4_msa,
-                                                     &aom_idct4x4_16_add_msa,
-                                                     DCT_DCT, AOM_BITS_8, 16)));
-#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT4
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
-                                 DCT_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
-                                 ADST_DCT, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
-                                 DCT_ADST, AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
-                                 ADST_ADST, AOM_BITS_8, 16)));
-#endif  // !CONFIG_EXT_TX && && !CONFIG_DAALA_DCT4
-#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
-}  // namespace
diff --git a/third_party/aom/test/fdct8x8_test.cc b/third_party/aom/test/fdct8x8_test.cc
deleted file mode 100644
index 99ae8d6779..0000000000
--- a/third_party/aom/test/fdct8x8_test.cc
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "av1/common/scan.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-const int kNumCoeffs = 64;
-const double kPi = 3.141592653589793238462643383279502884;
-
-const int kSignBiasMaxDiff255 = 1500;
-const int kSignBiasMaxDiff15 = 10000;
-
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        TxfmParam *txfm_param);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
-    Dct8x8Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t> Ht8x8Param;
-typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t> Idct8x8Param;
-
-void reference_8x8_dct_1d(const double in[8], double out[8]) {
-  const double kInvSqrt2 = 0.707106781186547524400844362104;
-  for (int k = 0; k < 8; k++) {
-    out[k] = 0.0;
-    for (int n = 0; n < 8; n++)
-      out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
-    if (k == 0) out[k] = out[k] * kInvSqrt2;
-  }
-}
-
-void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
-                          double output[kNumCoeffs]) {
-  // First transform columns
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
-    reference_8x8_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
-  }
-  // Then transform rows
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
-    reference_8x8_dct_1d(temp_in, temp_out);
-    // Scale by some magic number
-    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
-  }
-}
-
-void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                 TxfmParam * /*txfm_param*/) {
-  aom_fdct8x8_c(in, out, stride);
-}
-
-void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht8x8_c(in, out, stride, txfm_param);
-}
-
-#if CONFIG_HIGHBITDEPTH
-void fht8x8_10(const int16_t *in, tran_low_t *out, int stride,
-               TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_8x8_c(in, out, stride, txfm_param->tx_type, 10);
-}
-
-void fht8x8_12(const int16_t *in, tran_low_t *out, int stride,
-               TxfmParam *txfm_param) {
-  av1_fwd_txfm2d_8x8_c(in, out, stride, txfm_param->tx_type, 12);
-}
-
-void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride,
-               const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_8x8_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                           txfm_param->tx_type, 10);
-}
-
-void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride,
-               const TxfmParam *txfm_param) {
-  av1_inv_txfm2d_add_8x8_c(in, CONVERT_TO_SHORTPTR(out), stride,
-                           txfm_param->tx_type, 12);
-}
-
-#endif  // CONFIG_HIGHBITDEPTH
-
-class FwdTrans8x8TestBase {
- public:
-  virtual ~FwdTrans8x8TestBase() {}
-
- protected:
-  virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
-  virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
-
-  void RunSignBiasCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
-    int count_sign_block[64][2];
-    const int count_test_block = 100000;
-
-    memset(count_sign_block, 0, sizeof(count_sign_block));
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < 64; ++j)
-        test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
-                              ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_output_block, pitch_));
-
-      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
-          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
-          ++count_sign_block[j][1];
-      }
-    }
-
-    for (int j = 0; j < 64; ++j) {
-      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff255;
-      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 8x8 FDCT/FHT has a sign bias > "
-          << 1. * max_diff / count_test_block * 100 << "%"
-          << " for input range [-255, 255] at index " << j
-          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
-    }
-
-    memset(count_sign_block, 0, sizeof(count_sign_block));
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
-      for (int j = 0; j < 64; ++j)
-        test_input_block[j] =
-            ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_output_block, pitch_));
-
-      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
-          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
-          ++count_sign_block[j][1];
-      }
-    }
-
-    for (int j = 0; j < 64; ++j) {
-      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff15;
-      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 8x8 FDCT/FHT has a sign bias > "
-          << 1. * max_diff / count_test_block * 100 << "%"
-          << " for input range [-15, 15] at index " << j
-          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1] << " diff: " << diff;
-    }
-  }
-
-  void RunRoundTripErrorCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int max_error = 0;
-    int total_error = 0;
-    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
-    DECLARE_ALIGNED(16, uint8_t, dst[64]);
-    DECLARE_ALIGNED(16, uint8_t, src[64]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
-    DECLARE_ALIGNED(16, uint16_t, src16[64]);
-#endif
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < 64; ++j) {
-        if (bit_depth_ == AOM_BITS_8) {
-          src[j] = rnd.Rand8();
-          dst[j] = rnd.Rand8();
-          test_input_block[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          src16[j] = rnd.Rand16() & mask_;
-          dst16[j] = rnd.Rand16() & mask_;
-          test_input_block[j] = src16[j] - dst16[j];
-#endif
-        }
-      }
-
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      for (int j = 0; j < 64; ++j) {
-        if (test_temp_block[j] > 0) {
-          test_temp_block[j] += 2;
-          test_temp_block[j] /= 4;
-          test_temp_block[j] *= 4;
-        } else {
-          test_temp_block[j] -= 2;
-          test_temp_block[j] /= 4;
-          test_temp_block[j] *= 4;
-        }
-      }
-      if (bit_depth_ == AOM_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-
-      for (int j = 0; j < 64; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int diff = dst[j] - src[j];
-#endif
-        const int error = diff * diff;
-        if (max_error < error) max_error = error;
-        total_error += error;
-      }
-    }
-
-    EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
-        << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
-        << " roundtrip error > 1";
-
-    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
-        << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
-        << "error > 1/5 per block";
-  }
-
-  void RunExtremalCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int max_error = 0;
-    int total_error = 0;
-    int total_coeff_error = 0;
-    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
-    DECLARE_ALIGNED(16, uint8_t, dst[64]);
-    DECLARE_ALIGNED(16, uint8_t, src[64]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
-    DECLARE_ALIGNED(16, uint16_t, src16[64]);
-#endif
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < 64; ++j) {
-        if (bit_depth_ == AOM_BITS_8) {
-          if (i == 0) {
-            src[j] = 255;
-            dst[j] = 0;
-          } else if (i == 1) {
-            src[j] = 0;
-            dst[j] = 255;
-          } else {
-            src[j] = rnd.Rand8() % 2 ? 255 : 0;
-            dst[j] = rnd.Rand8() % 2 ? 255 : 0;
-          }
-          test_input_block[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          if (i == 0) {
-            src16[j] = mask_;
-            dst16[j] = 0;
-          } else if (i == 1) {
-            src16[j] = 0;
-            dst16[j] = mask_;
-          } else {
-            src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
-            dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
-          }
-          test_input_block[j] = src16[j] - dst16[j];
-#endif
-        }
-      }
-
-      ASM_REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      ASM_REGISTER_STATE_CHECK(
-          fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, &txfm_param_));
-      if (bit_depth_ == AOM_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-
-      for (int j = 0; j < 64; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int diff = dst[j] - src[j];
-#endif
-        const int error = diff * diff;
-        if (max_error < error) max_error = error;
-        total_error += error;
-
-        const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
-        total_coeff_error += abs(coeff_diff);
-      }
-
-      EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
-          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
-          << "an individual roundtrip error > 1";
-
-      EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
-          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
-          << " roundtrip error > 1/5 per block";
-
-      EXPECT_EQ(0, total_coeff_error)
-          << "Error: Extremal 8x8 FDCT/FHT has"
-          << "overflow issues in the intermediate steps > 1";
-    }
-  }
-
-  void RunInvAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-#endif
-
-    for (int i = 0; i < count_test_block; ++i) {
-      double out_r[kNumCoeffs];
-
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (bit_depth_ == AOM_BITS_8) {
-          src[j] = rnd.Rand8() % 2 ? 255 : 0;
-          dst[j] = src[j] > 0 ? 0 : 255;
-          in[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
-          dst16[j] = src16[j] > 0 ? 0 : mask_;
-          in[j] = src16[j] - dst16[j];
-#endif
-        }
-      }
-
-      reference_8x8_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
-        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
-
-      if (bit_depth_ == AOM_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int diff = dst[j] - src[j];
-#endif
-        const uint32_t error = diff * diff;
-        EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
-            << "Error: 8x8 IDCT has error " << error << " at index " << j;
-      }
-    }
-  }
-
-  void RunFwdAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-
-    for (int i = 0; i < count_test_block; ++i) {
-      double out_r[kNumCoeffs];
-
-      // Initialize a test block with input range [-mask_, mask_].
-      for (int j = 0; j < kNumCoeffs; ++j)
-        in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
-
-      RunFwdTxfm(in, coeff, pitch_);
-      reference_8x8_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
-        coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        const int32_t diff = coeff[j] - coeff_r[j];
-        const uint32_t error = diff * diff;
-        EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
-            << "Error: 8x8 DCT has error " << error << " at index " << j;
-      }
-    }
-  }
-
-  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 10000;
-    const int eob = 12;
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
-#if CONFIG_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
-#endif
-    const int16_t *scan = av1_default_scan_orders[TX_8X8].scan;
-
-    for (int i = 0; i < count_test_block; ++i) {
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (j < eob) {
-          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
-        } else {
-          coeff[scan[j]] = 0;
-        }
-        if (bit_depth_ == AOM_BITS_8) {
-          dst[j] = 0;
-          ref[j] = 0;
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          dst16[j] = 0;
-          ref16[j] = 0;
-#endif
-        }
-      }
-      if (bit_depth_ == AOM_BITS_8) {
-        ref_txfm(coeff, ref, pitch_);
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
-      } else {
-        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
-      }
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-#if CONFIG_HIGHBITDEPTH
-        const int diff =
-            bit_depth_ == AOM_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
-#else
-        const int diff = dst[j] - ref[j];
-#endif
-        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error)
-            << "Error: 8x8 IDCT has error " << error << " at index " << j;
-      }
-    }
-  }
-  int pitch_;
-  FhtFunc fwd_txfm_ref;
-  aom_bit_depth_t bit_depth_;
-  int mask_;
-  TxfmParam txfm_param_;
-};
-
-class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
-                       public ::testing::TestWithParam<Dct8x8Param> {
- public:
-  virtual ~FwdTrans8x8DCT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 8;
-    fwd_txfm_ref = fdct8x8_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
-
-TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
-
-TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
-
-TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
-
-TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
-
-class FwdTrans8x8HT : public FwdTrans8x8TestBase,
-                      public ::testing::TestWithParam<Ht8x8Param> {
- public:
-  virtual ~FwdTrans8x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 8;
-    fwd_txfm_ref = fht8x8_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    txfm_param_.tx_type = GET_PARAM(2);
-#if CONFIG_HIGHBITDEPTH
-    switch (bit_depth_) {
-      case AOM_BITS_10: fwd_txfm_ref = fht8x8_10; break;
-      case AOM_BITS_12: fwd_txfm_ref = fht8x8_12; break;
-      default: fwd_txfm_ref = fht8x8_ref; break;
-    }
-#endif
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
-
-TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
-
-TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
-
-class InvTrans8x8DCT : public FwdTrans8x8TestBase,
-                       public ::testing::TestWithParam<Idct8x8Param> {
- public:
-  virtual ~InvTrans8x8DCT() {}
-
-  virtual void SetUp() {
-    ref_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
-    pitch_ = 8;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride);
-  }
-  void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
-
-  IdctFunc ref_txfm_;
-  IdctFunc inv_txfm_;
-  int thresh_;
-};
-
-TEST_P(InvTrans8x8DCT, CompareReference) {
-  CompareInvReference(ref_txfm_, thresh_);
-}
-
-using std::tr1::make_tuple;
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_c,
-                                                     &aom_idct8x8_64_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_c,
-                                                     &aom_idct8x8_64_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_DCT, AOM_BITS_8),
-        make_tuple(&fht8x8_10, &iht8x8_10, DCT_DCT, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, ADST_DCT, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, DCT_ADST, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, ADST_ADST, AOM_BITS_10),
-        make_tuple(&fht8x8_12, &iht8x8_12, DCT_DCT, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, ADST_DCT, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, DCT_ADST, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, ADST_ADST, AOM_BITS_12),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_DCT, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_ADST, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_ADST,
-                   AOM_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_DCT, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_DCT, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_ADST, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_ADST,
-                   AOM_BITS_8)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_neon,
-                                                     &aom_idct8x8_64_add_neon,
-                                                     DCT_DCT, AOM_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, FwdTrans8x8HT,
-    ::testing::Values(make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
-                                                     &aom_idct8x8_64_add_sse2,
-                                                     DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_DAALA_DCT8
-INSTANTIATE_TEST_CASE_P(
-    SSE2, FwdTrans8x8HT,
-    ::testing::Values(make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // !CONFIG_DAALA_DCT8
-#endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
-                                                     &aom_idct8x8_64_add_c,
-                                                     DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_DAALA_DCT8
-INSTANTIATE_TEST_CASE_P(
-    SSE2, FwdTrans8x8HT,
-    ::testing::Values(make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // !CONFIG_DAALA_DCT8
-#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSSE3 && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_ssse3,
-                                                     &aom_idct8x8_64_add_ssse3,
-                                                     DCT_DCT, AOM_BITS_8)));
-#endif
-
-#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&aom_fdct8x8_msa,
-                                                     &aom_idct8x8_64_add_msa,
-                                                     DCT_DCT, AOM_BITS_8)));
-#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT8
-INSTANTIATE_TEST_CASE_P(
-    MSA, FwdTrans8x8HT,
-    ::testing::Values(make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
-                                 DCT_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
-                                 ADST_DCT, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
-                                 DCT_ADST, AOM_BITS_8),
-                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
-                                 ADST_ADST, AOM_BITS_8)));
-#endif  // !CONFIG_EXT_TX && !CONFIG_DAALA_DCT8
-#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
-}  // namespace
diff --git a/third_party/aom/test/fft_test.cc b/third_party/aom/test/fft_test.cc
new file mode 100644
index 0000000000..56187cdbbb
--- /dev/null
+++ b/third_party/aom/test/fft_test.cc
@@ -0,0 +1,263 @@
+#include <math.h>
+
+#include <algorithm>
+#include <complex>
+#include <vector>
+
+#include "aom_dsp/fft_common.h"
+#include "aom_mem/aom_mem.h"
+#if ARCH_X86 || ARCH_X86_64
+#include "aom_ports/x86.h"
+#endif
+#include "av1/common/common.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+typedef void (*tform_fun_t)(const float *input, float *temp, float *output);
+
+// Simple 1D FFT implementation
+template <typename InputType>
+void fft(const InputType *data, std::complex<float> *result, int n) {
+  if (n == 1) {
+    result[0] = data[0];
+    return;
+  }
+  std::vector<InputType> temp(n);
+  for (int k = 0; k < n / 2; ++k) {
+    temp[k] = data[2 * k];
+    temp[n / 2 + k] = data[2 * k + 1];
+  }
+  fft(&temp[0], result, n / 2);
+  fft(&temp[n / 2], result + n / 2, n / 2);
+  for (int k = 0; k < n / 2; ++k) {
+    std::complex<float> w = std::complex<float>((float)cos(2. * PI * k / n),
+                                                (float)-sin(2. * PI * k / n));
+    std::complex<float> a = result[k];
+    std::complex<float> b = result[n / 2 + k];
+    result[k] = a + w * b;
+    result[n / 2 + k] = a - w * b;
+  }
+}
+
+void transpose(std::vector<std::complex<float> > *data, int n) {
+  for (int y = 0; y < n; ++y) {
+    for (int x = y + 1; x < n; ++x) {
+      std::swap((*data)[y * n + x], (*data)[x * n + y]);
+    }
+  }
+}
+
+// Simple 2D FFT implementation
+template <class InputType>
+std::vector<std::complex<float> > fft2d(const InputType *input, int n) {
+  std::vector<std::complex<float> > rowfft(n * n);
+  std::vector<std::complex<float> > result(n * n);
+  for (int y = 0; y < n; ++y) {
+    fft(input + y * n, &rowfft[y * n], n);
+  }
+  transpose(&rowfft, n);
+  for (int y = 0; y < n; ++y) {
+    fft(&rowfft[y * n], &result[y * n], n);
+  }
+  transpose(&result, n);
+  return result;
+}
+
+struct FFTTestArg {
+  int n;
+  void (*fft)(const float *input, float *temp, float *output);
+  int flag;
+  FFTTestArg(int n_in, tform_fun_t fft_in, int flag_in)
+      : n(n_in), fft(fft_in), flag(flag_in) {}
+};
+
+std::ostream &operator<<(std::ostream &os, const FFTTestArg &test_arg) {
+  return os << "fft_arg { n:" << test_arg.n << " fft:" << test_arg.fft
+            << " flag:" << test_arg.flag << "}";
+}
+
+class FFT2DTest : public ::testing::TestWithParam<FFTTestArg> {
+ protected:
+  void SetUp() {
+    int n = GetParam().n;
+    input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n);
+    temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n);
+    output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n * 2);
+    memset(input_, 0, sizeof(*input_) * n * n);
+    memset(temp_, 0, sizeof(*temp_) * n * n);
+    memset(output_, 0, sizeof(*output_) * n * n * 2);
+#if ARCH_X86 || ARCH_X86_64
+    disabled_ = GetParam().flag != 0 && !(x86_simd_caps() & GetParam().flag);
+#else
+    disabled_ = GetParam().flag != 0;
+#endif
+  }
+  void TearDown() {
+    aom_free(input_);
+    aom_free(temp_);
+    aom_free(output_);
+  }
+  int disabled_;
+  float *input_;
+  float *temp_;
+  float *output_;
+};
+
+TEST_P(FFT2DTest, Correct) {
+  if (disabled_) return;
+
+  int n = GetParam().n;
+  for (int i = 0; i < n * n; ++i) {
+    input_[i] = 1;
+    std::vector<std::complex<float> > expected = fft2d<float>(&input_[0], n);
+    GetParam().fft(&input_[0], &temp_[0], &output_[0]);
+    for (int y = 0; y < n; ++y) {
+      for (int x = 0; x < (n / 2) + 1; ++x) {
+        EXPECT_NEAR(expected[y * n + x].real(), output_[2 * (y * n + x)], 1e-5);
+        EXPECT_NEAR(expected[y * n + x].imag(), output_[2 * (y * n + x) + 1],
+                    1e-5);
+      }
+    }
+    input_[i] = 0;
+  }
+}
+
+TEST_P(FFT2DTest, Benchmark) {
+  if (disabled_) return;
+
+  int n = GetParam().n;
+  float sum = 0;
+  for (int i = 0; i < 1000 * (64 - n); ++i) {
+    input_[i % (n * n)] = 1;
+    GetParam().fft(&input_[0], &temp_[0], &output_[0]);
+    sum += output_[0];
+    input_[i % (n * n)] = 0;
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    FFT2DTestC, FFT2DTest,
+    ::testing::Values(FFTTestArg(2, aom_fft2x2_float_c, 0),
+                      FFTTestArg(4, aom_fft4x4_float_c, 0),
+                      FFTTestArg(8, aom_fft8x8_float_c, 0),
+                      FFTTestArg(16, aom_fft16x16_float_c, 0),
+                      FFTTestArg(32, aom_fft32x32_float_c, 0)));
+#if ARCH_X86 || ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+    FFT2DTestSSE2, FFT2DTest,
+    ::testing::Values(FFTTestArg(4, aom_fft4x4_float_sse2, HAS_SSE2),
+                      FFTTestArg(8, aom_fft8x8_float_sse2, HAS_SSE2),
+                      FFTTestArg(16, aom_fft16x16_float_sse2, HAS_SSE2),
+                      FFTTestArg(32, aom_fft32x32_float_sse2, HAS_SSE2)));
+
+INSTANTIATE_TEST_CASE_P(
+    FFT2DTestAVX2, FFT2DTest,
+    ::testing::Values(FFTTestArg(8, aom_fft8x8_float_avx2, HAS_AVX2),
+                      FFTTestArg(16, aom_fft16x16_float_avx2, HAS_AVX2),
+                      FFTTestArg(32, aom_fft32x32_float_avx2, HAS_AVX2)));
+#endif
+
+struct IFFTTestArg {
+  int n;
+  tform_fun_t ifft;
+  int flag;
+  IFFTTestArg(int n_in, tform_fun_t ifft_in, int flag_in)
+      : n(n_in), ifft(ifft_in), flag(flag_in) {}
+};
+
+std::ostream &operator<<(std::ostream &os, const IFFTTestArg &test_arg) {
+  return os << "ifft_arg { n:" << test_arg.n << " fft:" << test_arg.ifft
+            << " flag:" << test_arg.flag << "}";
+}
+
+class IFFT2DTest : public ::testing::TestWithParam<IFFTTestArg> {
+ protected:
+  void SetUp() {
+    int n = GetParam().n;
+    input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n * 2);
+    temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n * 2);
+    output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n);
+    memset(input_, 0, sizeof(*input_) * n * n * 2);
+    memset(temp_, 0, sizeof(*temp_) * n * n * 2);
+    memset(output_, 0, sizeof(*output_) * n * n);
+#if ARCH_X86 || ARCH_X86_64
+    disabled_ = GetParam().flag != 0 && !(x86_simd_caps() & GetParam().flag);
+#else
+    disabled_ = GetParam().flag != 0;
+#endif
+  }
+  void TearDown() {
+    aom_free(input_);
+    aom_free(temp_);
+    aom_free(output_);
+  }
+  int disabled_;
+  float *input_;
+  float *temp_;
+  float *output_;
+};
+
+TEST_P(IFFT2DTest, Correctness) {
+  if (disabled_) return;
+  int n = GetParam().n;
+  ASSERT_GE(n, 2);
+  std::vector<float> expected(n * n);
+  std::vector<float> actual(n * n);
+  // Do forward transform then invert to make sure we get back expected
+  for (int y = 0; y < n; ++y) {
+    for (int x = 0; x < n; ++x) {
+      expected[y * n + x] = 1;
+      std::vector<std::complex<float> > input_c = fft2d(&expected[0], n);
+      for (int i = 0; i < n * n; ++i) {
+        input_[2 * i + 0] = input_c[i].real();
+        input_[2 * i + 1] = input_c[i].imag();
+      }
+      GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
+
+      for (int yy = 0; yy < n; ++yy) {
+        for (int xx = 0; xx < n; ++xx) {
+          EXPECT_NEAR(expected[yy * n + xx], output_[yy * n + xx] / (n * n),
+                      1e-5);
+        }
+      }
+      expected[y * n + x] = 0;
+    }
+  }
+};
+
+TEST_P(IFFT2DTest, Benchmark) {
+  if (disabled_) return;
+  int n = GetParam().n;
+  float sum = 0;
+  for (int i = 0; i < 1000 * (64 - n); ++i) {
+    input_[i % (n * n)] = 1;
+    GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
+    sum += output_[0];
+    input_[i % (n * n)] = 0;
+  }
+}
+INSTANTIATE_TEST_CASE_P(
+    IFFT2DTestC, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(2, aom_ifft2x2_float_c, 0),
+                      IFFTTestArg(4, aom_ifft4x4_float_c, 0),
+                      IFFTTestArg(8, aom_ifft8x8_float_c, 0),
+                      IFFTTestArg(16, aom_ifft16x16_float_c, 0),
+                      IFFTTestArg(32, aom_ifft32x32_float_c, 0)));
+#if ARCH_X86 || ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+    IFFT2DTestSSE2, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(4, aom_ifft4x4_float_sse2, HAS_SSE2),
+                      IFFTTestArg(8, aom_ifft8x8_float_sse2, HAS_SSE2),
+                      IFFTTestArg(16, aom_ifft16x16_float_sse2, HAS_SSE2),
+                      IFFTTestArg(32, aom_ifft32x32_float_sse2, HAS_SSE2)));
+
+INSTANTIATE_TEST_CASE_P(
+    IFFT2DTestAVX2, IFFT2DTest,
+    ::testing::Values(IFFTTestArg(8, aom_ifft8x8_float_avx2, HAS_AVX2),
+                      IFFTTestArg(16, aom_ifft16x16_float_avx2, HAS_AVX2),
+                      IFFTTestArg(32, aom_ifft32x32_float_avx2, HAS_AVX2)));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/film_grain_table_test.cc b/third_party/aom/test/film_grain_table_test.cc
new file mode 100644
index 0000000000..0688146356
--- /dev/null
+++ b/third_party/aom/test/film_grain_table_test.cc
@@ -0,0 +1,239 @@
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "aom_dsp/grain_table.h"
+#include "aom/internal/aom_codec_internal.h"
+#include "av1/encoder/grain_test_vectors.h"
+#include "test/video_source.h"
+
+void grain_equal(const aom_film_grain_t *expected,
+                 const aom_film_grain_t *actual) {
+  EXPECT_EQ(expected->apply_grain, actual->apply_grain);
+  EXPECT_EQ(expected->update_parameters, actual->update_parameters);
+  if (!expected->update_parameters) return;
+  EXPECT_EQ(expected->num_y_points, actual->num_y_points);
+  EXPECT_EQ(expected->num_cb_points, actual->num_cb_points);
+  EXPECT_EQ(expected->num_cr_points, actual->num_cr_points);
+  EXPECT_EQ(0, memcmp(expected->scaling_points_y, actual->scaling_points_y,
+                      expected->num_y_points *
+                          sizeof(expected->scaling_points_y[0])));
+  EXPECT_EQ(0, memcmp(expected->scaling_points_cb, actual->scaling_points_cb,
+                      expected->num_cb_points *
+                          sizeof(expected->scaling_points_cb[0])));
+  EXPECT_EQ(0, memcmp(expected->scaling_points_cr, actual->scaling_points_cr,
+                      expected->num_cr_points *
+                          sizeof(expected->scaling_points_cr[0])));
+  EXPECT_EQ(expected->scaling_shift, actual->scaling_shift);
+  EXPECT_EQ(expected->ar_coeff_lag, actual->ar_coeff_lag);
+  EXPECT_EQ(expected->ar_coeff_shift, actual->ar_coeff_shift);
+
+  const int num_pos_luma =
+      2 * expected->ar_coeff_lag * (expected->ar_coeff_lag + 1);
+  const int num_pos_chroma = num_pos_luma;
+  EXPECT_EQ(0, memcmp(expected->ar_coeffs_y, actual->ar_coeffs_y,
+                      sizeof(expected->ar_coeffs_y[0]) * num_pos_luma));
+  if (actual->num_cb_points || actual->chroma_scaling_from_luma) {
+    EXPECT_EQ(0, memcmp(expected->ar_coeffs_cb, actual->ar_coeffs_cb,
+                        sizeof(expected->ar_coeffs_cb[0]) * num_pos_chroma));
+  }
+  if (actual->num_cr_points || actual->chroma_scaling_from_luma) {
+    EXPECT_EQ(0, memcmp(expected->ar_coeffs_cr, actual->ar_coeffs_cr,
+                        sizeof(expected->ar_coeffs_cr[0]) * num_pos_chroma));
+  }
+  EXPECT_EQ(expected->overlap_flag, actual->overlap_flag);
+  EXPECT_EQ(expected->chroma_scaling_from_luma,
+            actual->chroma_scaling_from_luma);
+  EXPECT_EQ(expected->grain_scale_shift, actual->grain_scale_shift);
+  // EXPECT_EQ(expected->random_seed, actual->random_seed);
+
+  // clip_to_restricted and bit_depth aren't written
+  if (expected->num_cb_points) {
+    EXPECT_EQ(expected->cb_mult, actual->cb_mult);
+    EXPECT_EQ(expected->cb_luma_mult, actual->cb_luma_mult);
+    EXPECT_EQ(expected->cb_offset, actual->cb_offset);
+  }
+  if (expected->num_cr_points) {
+    EXPECT_EQ(expected->cr_mult, actual->cr_mult);
+    EXPECT_EQ(expected->cr_luma_mult, actual->cr_luma_mult);
+    EXPECT_EQ(expected->cr_offset, actual->cr_offset);
+  }
+}
+
+TEST(FilmGrainTableTest, AddAndLookupSingleSegment) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain;
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+
+  aom_film_grain_table_append(&table, 1000, 2000, film_grain_test_vectors + 0);
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+
+  grain.bit_depth = film_grain_test_vectors[0].bit_depth;
+  EXPECT_EQ(0, memcmp(&grain, film_grain_test_vectors + 0, sizeof(table)));
+
+  // Extend the existing segment
+  aom_film_grain_table_append(&table, 2000, 3000, film_grain_test_vectors + 0);
+  EXPECT_EQ(0, table.head->next);
+
+  // Lookup and remove and check that the entry is no longer there
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, true, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+
+  EXPECT_EQ(0, table.head);
+  EXPECT_EQ(0, table.tail);
+  aom_film_grain_table_free(&table);
+}
+
+TEST(FilmGrainTableTest, SplitSingleSegment) {
+  aom_film_grain_table_t table;
+  aom_film_grain_t grain;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0);
+
+  // Test lookup and remove that adjusts start time
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 100, true, &grain));
+  EXPECT_EQ(NULL, table.head->next);
+  EXPECT_EQ(100, table.head->start_time);
+
+  // Test lookup and remove that adjusts end time
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 900, 1000, true, &grain));
+  EXPECT_EQ(NULL, table.head->next);
+  EXPECT_EQ(100, table.head->start_time);
+  EXPECT_EQ(900, table.head->end_time);
+
+  // Test lookup and remove that splits the first entry
+  EXPECT_TRUE(aom_film_grain_table_lookup(&table, 400, 600, true, &grain));
+  EXPECT_EQ(100, table.head->start_time);
+  EXPECT_EQ(400, table.head->end_time);
+
+  ASSERT_NE((void *)NULL, table.head->next);
+  EXPECT_EQ(table.tail, table.head->next);
+  EXPECT_EQ(600, table.head->next->start_time);
+  EXPECT_EQ(900, table.head->next->end_time);
+
+  aom_film_grain_table_free(&table);
+}
+
+TEST(FilmGrainTableTest, AddAndLookupMultipleSegments) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain;
+  const int kNumTestVectors =
+      sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
+                                film_grain_test_vectors + i);
+  }
+
+  for (int i = kNumTestVectors - 1; i >= 0; --i) {
+    EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                            true, &grain));
+    grain_equal(film_grain_test_vectors + i, &grain);
+    EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                             true, &grain));
+  }
+
+  // Verify that all the data has been removed
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                             true, &grain));
+  }
+  aom_film_grain_table_free(&table);
+}
+
+class FilmGrainTableIOTest : public ::testing::Test {
+ protected:
+  void SetUp() { memset(&error_, 0, sizeof(error_)); }
+  struct aom_internal_error_info error_;
+};
+
+TEST_F(FilmGrainTableIOTest, ReadMissingFile) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_ERROR, aom_film_grain_table_read(
+                                 &table, "/path/to/missing/file", &error_));
+}
+
+TEST_F(FilmGrainTableIOTest, ReadTruncatedFile) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  std::string grain_file;
+  FILE *file = libaom_test::GetTempOutFile(&grain_file);
+  fwrite("deadbeef", 8, 1, file);
+  fclose(file);
+  ASSERT_EQ(AOM_CODEC_ERROR,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
+
+TEST_F(FilmGrainTableIOTest, RoundTripReadWrite) {
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t expected_grain[16];
+  const int kNumTestVectors =
+      sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    expected_grain[i] = film_grain_test_vectors[i];
+    expected_grain[i].random_seed = i;
+    expected_grain[i].update_parameters = i % 2;
+    expected_grain[i].apply_grain = (i + 1) % 2;
+    expected_grain[i].bit_depth = 0;
+    aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
+                                expected_grain + i);
+  }
+  std::string grain_file;
+  fclose(libaom_test::GetTempOutFile(&grain_file));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
+  aom_film_grain_table_free(&table);
+
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  for (int i = 0; i < kNumTestVectors; ++i) {
+    aom_film_grain_t grain;
+    EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
+                                            true, &grain));
+    grain_equal(expected_grain + i, &grain);
+  }
+  aom_film_grain_table_free(&table);
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
+
+TEST_F(FilmGrainTableIOTest, RoundTripSplit) {
+  std::string grain_file;
+  fclose(libaom_test::GetTempOutFile(&grain_file));
+
+  aom_film_grain_table_t table;
+  memset(&table, 0, sizeof(table));
+
+  aom_film_grain_t grain = film_grain_test_vectors[0];
+  aom_film_grain_table_append(&table, 0, 3000, &grain);
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
+  aom_film_grain_table_free(&table);
+
+  memset(&table, 0, sizeof(table));
+  ASSERT_EQ(AOM_CODEC_OK,
+            aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
+  ASSERT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
+  ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
+  aom_film_grain_table_free(&table);
+
+  EXPECT_EQ(0, remove(grain_file.c_str()));
+}
diff --git a/third_party/aom/test/filterintra_predictors_test.cc b/third_party/aom/test/filterintra_predictors_test.cc
deleted file mode 100644
index 5c6b56d144..0000000000
--- a/third_party/aom/test/filterintra_predictors_test.cc
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/enums.h"
-
-namespace {
-
-using std::tr1::tuple;
-using libaom_test::ACMRandom;
-
-typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, int bs,
-                          const uint8_t *above, const uint8_t *left);
-
-// Note:
-//  Test parameter list:
-//  Reference predictor, optimized predictor, prediction mode, block size
-//
-typedef tuple<Predictor, Predictor, int> PredFuncMode;
-typedef tuple<PredFuncMode, int> PredParams;
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*HbdPredictor)(uint16_t *dst, ptrdiff_t stride, int bs,
-                             const uint16_t *above, const uint16_t *left,
-                             int bd);
-
-// Note:
-//  Test parameter list:
-//  Reference predictor, optimized predictor, prediction mode, block size,
-//  bit depth
-//
-typedef tuple<HbdPredictor, HbdPredictor, int> HbdPredFuncMode;
-typedef tuple<HbdPredFuncMode, int, int> HbdPredParams;
-#endif
-
-const int MaxBlkSize = 32;
-
-// By default, disable speed test
-#define PREDICTORS_SPEED_TEST (0)
-
-#if PREDICTORS_SPEED_TEST
-const int MaxTestNum = 100000;
-#else
-const int MaxTestNum = 100;
-#endif
-
-class AV1FilterIntraPredOptimzTest
-    : public ::testing::TestWithParam<PredParams> {
- public:
-  virtual ~AV1FilterIntraPredOptimzTest() {}
-  virtual void SetUp() {
-    PredFuncMode funcMode = GET_PARAM(0);
-    predFuncRef_ = std::tr1::get<0>(funcMode);
-    predFunc_ = std::tr1::get<1>(funcMode);
-    mode_ = std::tr1::get<2>(funcMode);
-    blockSize_ = GET_PARAM(1);
-
-    alloc_ = new uint8_t[3 * MaxBlkSize + 2];
-    predRef_ = new uint8_t[MaxBlkSize * MaxBlkSize];
-    pred_ = new uint8_t[MaxBlkSize * MaxBlkSize];
-  }
-
-  virtual void TearDown() {
-    delete[] alloc_;
-    delete[] predRef_;
-    delete[] pred_;
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunTest() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint8_t *left = alloc_;
-    uint8_t *above = alloc_ + MaxBlkSize + 1;
-    while (tstIndex < MaxTestNum) {
-      PrepareBuffer();
-      predFuncRef_(predRef_, stride, blockSize_, &above[1], left);
-      ASM_REGISTER_STATE_CHECK(
-          predFunc_(pred_, stride, blockSize_, &above[1], left));
-      DiffPred(tstIndex);
-      tstIndex += 1;
-    }
-  }
-
-  void RunSpeedTestC() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint8_t *left = alloc_;
-    uint8_t *above = alloc_ + MaxBlkSize + 1;
-    PrepareBuffer();
-    while (tstIndex < MaxTestNum) {
-      predFuncRef_(predRef_, stride, blockSize_, &above[1], left);
-      tstIndex += 1;
-    }
-  }
-
-  void RunSpeedTestSSE() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint8_t *left = alloc_;
-    uint8_t *above = alloc_ + MaxBlkSize + 1;
-    PrepareBuffer();
-    while (tstIndex < MaxTestNum) {
-      predFunc_(predRef_, stride, blockSize_, &above[1], left);
-      tstIndex += 1;
-    }
-  }
-
- private:
-  void PrepareBuffer() const {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int i = 0;
-    while (i < (3 * MaxBlkSize + 2)) {
-      alloc_[i] = rnd.Rand8();
-      i += 1;
-    }
-  }
-
-  void DiffPred(int testNum) const {
-    int i = 0;
-    while (i < blockSize_ * blockSize_) {
-      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
-                                       << "Block size: " << blockSize_ << " "
-                                       << "Test number: " << testNum;
-      i += 1;
-    }
-  }
-
-  Predictor predFunc_;
-  Predictor predFuncRef_;
-  int mode_;
-  int blockSize_;
-  uint8_t *alloc_;
-  uint8_t *pred_;
-  uint8_t *predRef_;
-};
-
-#if CONFIG_HIGHBITDEPTH
-class AV1HbdFilterIntraPredOptimzTest
-    : public ::testing::TestWithParam<HbdPredParams> {
- public:
-  virtual ~AV1HbdFilterIntraPredOptimzTest() {}
-  virtual void SetUp() {
-    HbdPredFuncMode funcMode = GET_PARAM(0);
-    predFuncRef_ = std::tr1::get<0>(funcMode);
-    predFunc_ = std::tr1::get<1>(funcMode);
-    mode_ = std::tr1::get<2>(funcMode);
-    blockSize_ = GET_PARAM(1);
-    bd_ = GET_PARAM(2);
-
-    alloc_ = new uint16_t[3 * MaxBlkSize + 2];
-    predRef_ = new uint16_t[MaxBlkSize * MaxBlkSize];
-    pred_ = new uint16_t[MaxBlkSize * MaxBlkSize];
-  }
-
-  virtual void TearDown() {
-    delete[] alloc_;
-    delete[] predRef_;
-    delete[] pred_;
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunTest() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint16_t *left = alloc_;
-    uint16_t *above = alloc_ + MaxBlkSize + 1;
-    while (tstIndex < MaxTestNum) {
-      PrepareBuffer();
-      predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_);
-      ASM_REGISTER_STATE_CHECK(
-          predFunc_(pred_, stride, blockSize_, &above[1], left, bd_));
-      DiffPred(tstIndex);
-      tstIndex += 1;
-    }
-  }
-
-  void RunSpeedTestC() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint16_t *left = alloc_;
-    uint16_t *above = alloc_ + MaxBlkSize + 1;
-    PrepareBuffer();
-    while (tstIndex < MaxTestNum) {
-      predFuncRef_(predRef_, stride, blockSize_, &above[1], left, bd_);
-      tstIndex += 1;
-    }
-  }
-
-  void RunSpeedTestSSE() const {
-    int tstIndex = 0;
-    int stride = blockSize_;
-    uint16_t *left = alloc_;
-    uint16_t *above = alloc_ + MaxBlkSize + 1;
-    PrepareBuffer();
-    while (tstIndex < MaxTestNum) {
-      predFunc_(predRef_, stride, blockSize_, &above[1], left, bd_);
-      tstIndex += 1;
-    }
-  }
-
- private:
-  void PrepareBuffer() const {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int i = 0;
-    while (i < (3 * MaxBlkSize + 2)) {
-      alloc_[i] = rnd.Rand16() & ((1 << bd_) - 1);
-      i += 1;
-    }
-  }
-
-  void DiffPred(int testNum) const {
-    int i = 0;
-    while (i < blockSize_ * blockSize_) {
-      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
-                                       << "Block size: " << blockSize_ << " "
-                                       << "Bit depth: " << bd_ << " "
-                                       << "Test number: " << testNum;
-      i += 1;
-    }
-  }
-
-  HbdPredictor predFunc_;
-  HbdPredictor predFuncRef_;
-  int mode_;
-  int blockSize_;
-  int bd_;
-  uint16_t *alloc_;
-  uint16_t *pred_;
-  uint16_t *predRef_;
-};
-#endif  // CONFIG_HIGHBITDEPTH
-
-TEST_P(AV1FilterIntraPredOptimzTest, BitExactCheck) { RunTest(); }
-
-#if PREDICTORS_SPEED_TEST
-TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); }
-
-TEST_P(AV1FilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); }
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-TEST_P(AV1HbdFilterIntraPredOptimzTest, BitExactCheck) { RunTest(); }
-
-#if PREDICTORS_SPEED_TEST
-TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckC) { RunSpeedTestC(); }
-
-TEST_P(AV1HbdFilterIntraPredOptimzTest, SpeedCheckSSE) { RunSpeedTestSSE(); }
-#endif  // PREDICTORS_SPEED_TEST
-#endif  // CONFIG_HIGHBITDEPTH
-
-using std::tr1::make_tuple;
-
-const PredFuncMode kPredFuncMdArray[] = {
-  make_tuple(av1_dc_filter_predictor_c, av1_dc_filter_predictor_sse4_1,
-             DC_PRED),
-  make_tuple(av1_v_filter_predictor_c, av1_v_filter_predictor_sse4_1, V_PRED),
-  make_tuple(av1_h_filter_predictor_c, av1_h_filter_predictor_sse4_1, H_PRED),
-  make_tuple(av1_d45_filter_predictor_c, av1_d45_filter_predictor_sse4_1,
-             D45_PRED),
-  make_tuple(av1_d135_filter_predictor_c, av1_d135_filter_predictor_sse4_1,
-             D135_PRED),
-  make_tuple(av1_d117_filter_predictor_c, av1_d117_filter_predictor_sse4_1,
-             D117_PRED),
-  make_tuple(av1_d153_filter_predictor_c, av1_d153_filter_predictor_sse4_1,
-             D153_PRED),
-  make_tuple(av1_d207_filter_predictor_c, av1_d207_filter_predictor_sse4_1,
-             D207_PRED),
-  make_tuple(av1_d63_filter_predictor_c, av1_d63_filter_predictor_sse4_1,
-             D63_PRED),
-  make_tuple(av1_tm_filter_predictor_c, av1_tm_filter_predictor_sse4_1,
-             TM_PRED),
-};
-
-const int kBlkSize[] = { 4, 8, 16, 32 };
-
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, AV1FilterIntraPredOptimzTest,
-    ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray),
-                       ::testing::ValuesIn(kBlkSize)));
-
-#if CONFIG_HIGHBITDEPTH
-const HbdPredFuncMode kHbdPredFuncMdArray[] = {
-  make_tuple(av1_highbd_dc_filter_predictor_c,
-             av1_highbd_dc_filter_predictor_sse4_1, DC_PRED),
-  make_tuple(av1_highbd_v_filter_predictor_c,
-             av1_highbd_v_filter_predictor_sse4_1, V_PRED),
-  make_tuple(av1_highbd_h_filter_predictor_c,
-             av1_highbd_h_filter_predictor_sse4_1, H_PRED),
-  make_tuple(av1_highbd_d45_filter_predictor_c,
-             av1_highbd_d45_filter_predictor_sse4_1, D45_PRED),
-  make_tuple(av1_highbd_d135_filter_predictor_c,
-             av1_highbd_d135_filter_predictor_sse4_1, D135_PRED),
-  make_tuple(av1_highbd_d117_filter_predictor_c,
-             av1_highbd_d117_filter_predictor_sse4_1, D117_PRED),
-  make_tuple(av1_highbd_d153_filter_predictor_c,
-             av1_highbd_d153_filter_predictor_sse4_1, D153_PRED),
-  make_tuple(av1_highbd_d207_filter_predictor_c,
-             av1_highbd_d207_filter_predictor_sse4_1, D207_PRED),
-  make_tuple(av1_highbd_d63_filter_predictor_c,
-             av1_highbd_d63_filter_predictor_sse4_1, D63_PRED),
-  make_tuple(av1_highbd_tm_filter_predictor_c,
-             av1_highbd_tm_filter_predictor_sse4_1, TM_PRED),
-};
-
-const int kBd[] = { 10, 12 };
-
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, AV1HbdFilterIntraPredOptimzTest,
-    ::testing::Combine(::testing::ValuesIn(kHbdPredFuncMdArray),
-                       ::testing::ValuesIn(kBlkSize),
-                       ::testing::ValuesIn(kBd)));
-#endif  // CONFIG_HIGHBITDEPTH
-
-}  // namespace
diff --git a/third_party/aom/test/filterintra_test.cc b/third_party/aom/test/filterintra_test.cc
new file mode 100644
index 0000000000..5971349406
--- /dev/null
+++ b/third_party/aom/test/filterintra_test.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/enums.h"
+
+namespace {
+
+using ::testing::tuple;
+using libaom_test::ACMRandom;
+
+typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+                          const uint8_t *above, const uint8_t *left, int mode);
+
+// Note:
+//  Test parameter list:
+//  Reference predictor, optimized predictor, prediction mode, tx size
+//
+typedef tuple<Predictor, Predictor, int> PredFuncMode;
+typedef tuple<PredFuncMode, TX_SIZE> PredParams;
+
+const int MaxTxSize = 32;
+
+const int MaxTestNum = 100;
+
+class AV1FilterIntraPredTest : public ::testing::TestWithParam<PredParams> {
+ public:
+  virtual ~AV1FilterIntraPredTest() {}
+  virtual void SetUp() {
+    PredFuncMode funcMode = GET_PARAM(0);
+    predFuncRef_ = ::testing::get<0>(funcMode);
+    predFunc_ = ::testing::get<1>(funcMode);
+    mode_ = ::testing::get<2>(funcMode);
+    txSize_ = GET_PARAM(1);
+
+    alloc_ = new uint8_t[2 * MaxTxSize + 1];
+    predRef_ = new uint8_t[MaxTxSize * MaxTxSize];
+    pred_ = new uint8_t[MaxTxSize * MaxTxSize];
+  }
+
+  virtual void TearDown() {
+    delete[] alloc_;
+    delete[] predRef_;
+    delete[] pred_;
+    libaom_test::ClearSystemState();
+  }
+
+ protected:
+  void RunTest() const {
+    int tstIndex = 0;
+    int stride = tx_size_wide[txSize_];
+    uint8_t *left = alloc_;
+    uint8_t *above = alloc_ + MaxTxSize;
+    while (tstIndex < MaxTestNum) {
+      PrepareBuffer();
+      predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_);
+      ASM_REGISTER_STATE_CHECK(
+          predFunc_(pred_, stride, txSize_, &above[1], left, mode_));
+      DiffPred(tstIndex);
+      tstIndex += 1;
+    }
+  }
+
+ private:
+  void PrepareBuffer() const {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    int i = 0;
+    while (i < (2 * MaxTxSize + 1)) {
+      alloc_[i] = rnd.Rand8();
+      i++;
+    }
+  }
+
+  void DiffPred(int testNum) const {
+    int i = 0;
+    while (i < tx_size_wide[txSize_] * tx_size_high[txSize_]) {
+      EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
+                                       << "Tx size: " << tx_size_wide[txSize_]
+                                       << "x" << tx_size_high[txSize_] << " "
+                                       << "Test number: " << testNum;
+      i++;
+    }
+  }
+
+  Predictor predFunc_;
+  Predictor predFuncRef_;
+  int mode_;
+  TX_SIZE txSize_;
+  uint8_t *alloc_;
+  uint8_t *pred_;
+  uint8_t *predRef_;
+};
+
+TEST_P(AV1FilterIntraPredTest, BitExactCheck) { RunTest(); }
+
+using ::testing::make_tuple;
+
+const PredFuncMode kPredFuncMdArray[] = {
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_DC_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_V_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_H_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_D157_PRED),
+  make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
+             FILTER_PAETH_PRED),
+};
+
+const TX_SIZE kTxSize[] = { TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_4X8,
+                            TX_8X4,  TX_8X16, TX_16X8,  TX_16X32, TX_32X16,
+                            TX_4X16, TX_16X4, TX_8X32,  TX_32X8 };
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1FilterIntraPredTest,
+    ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray),
+                       ::testing::ValuesIn(kTxSize)));
+}  // namespace
diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc
index 442f2523da..eaf0b8370a 100644
--- a/third_party/aom/test/frame_size_tests.cc
+++ b/third_party/aom/test/frame_size_tests.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
diff --git a/third_party/aom/test/fwht4x4_test.cc b/third_party/aom/test/fwht4x4_test.cc
new file mode 100644
index 0000000000..c8d98c5198
--- /dev/null
+++ b/third_party/aom/test/fwht4x4_test.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/transform_test_base.h"
+#include "test/util.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_codec.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/mem.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+
+using libaom_test::FhtFunc;
+
+typedef ::testing::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int>
+    Dct4x4Param;
+
+void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 TxfmParam * /*txfm_param*/) {
+  av1_fwht4x4_c(in, out, stride);
+}
+
+void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+}
+
+void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
+  av1_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+}
+
+class Trans4x4WHT : public libaom_test::TransformTestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  virtual ~Trans4x4WHT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    pitch_ = 4;
+    height_ = 4;
+    fwd_txfm_ref = fwht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
+
+TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+using ::testing::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4WHT,
+    ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, DCT_DCT,
+                                 AOM_BITS_10, 16),
+                      make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, DCT_DCT,
+                                 AOM_BITS_12, 16)));
+}  // namespace
diff --git a/third_party/aom/test/gviz_api.py b/third_party/aom/test/gviz_api.py
new file mode 100755
index 0000000000..d3a443dabf
--- /dev/null
+++ b/third_party/aom/test/gviz_api.py
@@ -0,0 +1,1087 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+
+"""Converts Python data into data for Google Visualization API clients.
+
+This library can be used to create a google.visualization.DataTable usable by
+visualizations built on the Google Visualization API. Output formats are raw
+JSON, JSON response, JavaScript, CSV, and HTML table.
+
+See http://code.google.com/apis/visualization/ for documentation on the
+Google Visualization API.
+"""
+
+__author__ = "Amit Weinstein, Misha Seltzer, Jacob Baskin"
+
+import cgi
+import cStringIO
+import csv
+import datetime
+try:
+  import json
+except ImportError:
+  import simplejson as json
+import types
+
+
+class DataTableException(Exception):
+  """The general exception object thrown by DataTable."""
+  pass
+
+
+class DataTableJSONEncoder(json.JSONEncoder):
+  """JSON encoder that handles date/time/datetime objects correctly."""
+
+  def __init__(self):
+    json.JSONEncoder.__init__(self,
+                              separators=(",", ":"),
+                              ensure_ascii=False)
+
+  def default(self, o):
+    if isinstance(o, datetime.datetime):
+      if o.microsecond == 0:
+        # If the time doesn't have ms-resolution, leave it out to keep
+        # things smaller.
+        return "Date(%d,%d,%d,%d,%d,%d)" % (
+            o.year, o.month - 1, o.day, o.hour, o.minute, o.second)
+      else:
+        return "Date(%d,%d,%d,%d,%d,%d,%d)" % (
+            o.year, o.month - 1, o.day, o.hour, o.minute, o.second,
+            o.microsecond / 1000)
+    elif isinstance(o, datetime.date):
+      return "Date(%d,%d,%d)" % (o.year, o.month - 1, o.day)
+    elif isinstance(o, datetime.time):
+      return [o.hour, o.minute, o.second]
+    else:
+      return super(DataTableJSONEncoder, self).default(o)
+
+
+class DataTable(object):
+  """Wraps the data to convert to a Google Visualization API DataTable.
+
+  Create this object, populate it with data, then call one of the ToJS...
+  methods to return a string representation of the data in the format described.
+
+  You can clear all data from the object to reuse it, but you cannot clear
+  individual cells, rows, or columns. You also cannot modify the table schema
+  specified in the class constructor.
+
+  You can add new data one or more rows at a time. All data added to an
+  instantiated DataTable must conform to the schema passed in to __init__().
+
+  You can reorder the columns in the output table, and also specify row sorting
+  order by column. The default column order is according to the original
+  table_description parameter. Default row sort order is ascending, by column
+  1 values. For a dictionary, we sort the keys for order.
+
+  The data and the table_description are closely tied, as described here:
+
+  The table schema is defined in the class constructor's table_description
+  parameter. The user defines each column using a tuple of
+  (id[, type[, label[, custom_properties]]]). The default value for type is
+  string, label is the same as ID if not specified, and custom properties is
+  an empty dictionary if not specified.
+
+  table_description is a dictionary or list, containing one or more column
+  descriptor tuples, nested dictionaries, and lists. Each dictionary key, list
+  element, or dictionary element must eventually be defined as
+  a column description tuple. Here's an example of a dictionary where the key
+  is a tuple, and the value is a list of two tuples:
+    {('a', 'number'): [('b', 'number'), ('c', 'string')]}
+
+  This flexibility in data entry enables you to build and manipulate your data
+  in a Python structure that makes sense for your program.
+
+  Add data to the table using the same nested design as the table's
+  table_description, replacing column descriptor tuples with cell data, and
+  each row is an element in the top level collection. This will be a bit
+  clearer after you look at the following examples showing the
+  table_description, matching data, and the resulting table:
+
+  Columns as list of tuples [col1, col2, col3]
+    table_description: [('a', 'number'), ('b', 'string')]
+    AppendData( [[1, 'z'], [2, 'w'], [4, 'o'], [5, 'k']] )
+    Table:
+    a  b   <--- these are column ids/labels
+    1  z
+    2  w
+    4  o
+    5  k
+
+  Dictionary of columns, where key is a column, and value is a list of
+  columns  {col1: [col2, col3]}
+    table_description: {('a', 'number'): [('b', 'number'), ('c', 'string')]}
+    AppendData( data: {1: [2, 'z'], 3: [4, 'w']}
+    Table:
+    a  b  c
+    1  2  z
+    3  4  w
+
+  Dictionary where key is a column, and the value is itself a dictionary of
+  columns {col1: {col2, col3}}
+    table_description: {('a', 'number'): {'b': 'number', 'c': 'string'}}
+    AppendData( data: {1: {'b': 2, 'c': 'z'}, 3: {'b': 4, 'c': 'w'}}
+    Table:
+    a  b  c
+    1  2  z
+    3  4  w
+  """
+
+  def __init__(self, table_description, data=None, custom_properties=None):
+    """Initialize the data table from a table schema and (optionally) data.
+
+    See the class documentation for more information on table schema and data
+    values.
+
+    Args:
+      table_description: A table schema, following one of the formats described
+                         in TableDescriptionParser(). Schemas describe the
+                         column names, data types, and labels. See
+                         TableDescriptionParser() for acceptable formats.
+      data: Optional. If given, fills the table with the given data. The data
+            structure must be consistent with schema in table_description. See
+            the class documentation for more information on acceptable data. You
+            can add data later by calling AppendData().
+      custom_properties: Optional. A dictionary from string to string that
+                         goes into the table's custom properties. This can be
+                         later changed by changing self.custom_properties.
+
+    Raises:
+      DataTableException: Raised if the data and the description did not match,
+                          or did not use the supported formats.
+    """
+    self.__columns = self.TableDescriptionParser(table_description)
+    self.__data = []
+    self.custom_properties = {}
+    if custom_properties is not None:
+      self.custom_properties = custom_properties
+    if data:
+      self.LoadData(data)
+
+  @staticmethod
+  def CoerceValue(value, value_type):
+    """Coerces a single value into the type expected for its column.
+
+    Internal helper method.
+
+    Args:
+      value: The value which should be converted
+      value_type: One of "string", "number", "boolean", "date", "datetime" or
+                  "timeofday".
+
+    Returns:
+      An item of the Python type appropriate to the given value_type. Strings
+      are also converted to Unicode using UTF-8 encoding if necessary.
+      If a tuple is given, it should be in one of the following forms:
+        - (value, formatted value)
+        - (value, formatted value, custom properties)
+      where the formatted value is a string, and custom properties is a
+      dictionary of the custom properties for this cell.
+      To specify custom properties without specifying formatted value, one can
+      pass None as the formatted value.
+      One can also have a null-valued cell with formatted value and/or custom
+      properties by specifying None for the value.
+      This method ignores the custom properties except for checking that it is a
+      dictionary. The custom properties are handled in the ToJSon and ToJSCode
+      methods.
+      The real type of the given value is not strictly checked. For example,
+      any type can be used for string - as we simply take its str( ) and for
+      boolean value we just check "if value".
+      Examples:
+        CoerceValue(None, "string") returns None
+        CoerceValue((5, "5$"), "number") returns (5, "5$")
+        CoerceValue(100, "string") returns "100"
+        CoerceValue(0, "boolean") returns False
+
+    Raises:
+      DataTableException: The value and type did not match in a not-recoverable
+                          way, for example given value 'abc' for type 'number'.
+    """
+    if isinstance(value, tuple):
+      # In case of a tuple, we run the same function on the value itself and
+      # add the formatted value.
+      if (len(value) not in [2, 3] or
+          (len(value) == 3 and not isinstance(value[2], dict))):
+        raise DataTableException("Wrong format for value and formatting - %s." %
+                                 str(value))
+      if not isinstance(value[1], types.StringTypes + (types.NoneType,)):
+        raise DataTableException("Formatted value is not string, given %s." %
+                                 type(value[1]))
+      js_value = DataTable.CoerceValue(value[0], value_type)
+      return (js_value,) + value[1:]
+
+    t_value = type(value)
+    if value is None:
+      return value
+    if value_type == "boolean":
+      return bool(value)
+
+    elif value_type == "number":
+      if isinstance(value, (int, long, float)):
+        return value
+      raise DataTableException("Wrong type %s when expected number" % t_value)
+
+    elif value_type == "string":
+      if isinstance(value, unicode):
+        return value
+      else:
+        return str(value).decode("utf-8")
+
+    elif value_type == "date":
+      if isinstance(value, datetime.datetime):
+        return datetime.date(value.year, value.month, value.day)
+      elif isinstance(value, datetime.date):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected date" % t_value)
+
+    elif value_type == "timeofday":
+      if isinstance(value, datetime.datetime):
+        return datetime.time(value.hour, value.minute, value.second)
+      elif isinstance(value, datetime.time):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected time" % t_value)
+
+    elif value_type == "datetime":
+      if isinstance(value, datetime.datetime):
+        return value
+      else:
+        raise DataTableException("Wrong type %s when expected datetime" %
+                                 t_value)
+    # If we got here, it means the given value_type was not one of the
+    # supported types.
+    raise DataTableException("Unsupported type %s" % value_type)
+
+  @staticmethod
+  def EscapeForJSCode(encoder, value):
+    if value is None:
+      return "null"
+    elif isinstance(value, datetime.datetime):
+      if value.microsecond == 0:
+        # If it's not ms-resolution, leave that out to save space.
+        return "new Date(%d,%d,%d,%d,%d,%d)" % (value.year,
+                                                value.month - 1,  # To match JS
+                                                value.day,
+                                                value.hour,
+                                                value.minute,
+                                                value.second)
+      else:
+        return "new Date(%d,%d,%d,%d,%d,%d,%d)" % (value.year,
+                                                   value.month - 1,  # match JS
+                                                   value.day,
+                                                   value.hour,
+                                                   value.minute,
+                                                   value.second,
+                                                   value.microsecond / 1000)
+    elif isinstance(value, datetime.date):
+      return "new Date(%d,%d,%d)" % (value.year, value.month - 1, value.day)
+    else:
+      return encoder.encode(value)
+
+  @staticmethod
+  def ToString(value):
+    if value is None:
+      return "(empty)"
+    elif isinstance(value, (datetime.datetime,
+                            datetime.date,
+                            datetime.time)):
+      return str(value)
+    elif isinstance(value, unicode):
+      return value
+    elif isinstance(value, bool):
+      return str(value).lower()
+    else:
+      return str(value).decode("utf-8")
+
+  @staticmethod
+  def ColumnTypeParser(description):
+    """Parses a single column description. Internal helper method.
+
+    Args:
+      description: a column description in the possible formats:
+       'id'
+       ('id',)
+       ('id', 'type')
+       ('id', 'type', 'label')
+       ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
+    Returns:
+      Dictionary with the following keys: id, label, type, and
+      custom_properties where:
+        - If label not given, it equals the id.
+        - If type not given, string is used by default.
+        - If custom properties are not given, an empty dictionary is used by
+          default.
+
+    Raises:
+      DataTableException: The column description did not match the RE, or
+          unsupported type was passed.
+    """
+    if not description:
+      raise DataTableException("Description error: empty description given")
+
+    if not isinstance(description, (types.StringTypes, tuple)):
+      raise DataTableException("Description error: expected either string or "
+                               "tuple, got %s." % type(description))
+
+    if isinstance(description, types.StringTypes):
+      description = (description,)
+
+    # According to the tuple's length, we fill the keys
+    # We verify everything is of type string
+    for elem in description[:3]:
+      if not isinstance(elem, types.StringTypes):
+        raise DataTableException("Description error: expected tuple of "
+                                 "strings, current element of type %s." %
+                                 type(elem))
+    desc_dict = {"id": description[0],
+                 "label": description[0],
+                 "type": "string",
+                 "custom_properties": {}}
+    if len(description) > 1:
+      desc_dict["type"] = description[1].lower()
+      if len(description) > 2:
+        desc_dict["label"] = description[2]
+        if len(description) > 3:
+          if not isinstance(description[3], dict):
+            raise DataTableException("Description error: expected custom "
+                                     "properties of type dict, current element "
+                                     "of type %s." % type(description[3]))
+          desc_dict["custom_properties"] = description[3]
+          if len(description) > 4:
+            raise DataTableException("Description error: tuple of length > 4")
+    if desc_dict["type"] not in ["string", "number", "boolean",
+                                 "date", "datetime", "timeofday"]:
+      raise DataTableException(
+          "Description error: unsupported type '%s'" % desc_dict["type"])
+    return desc_dict
+
+  @staticmethod
+  def TableDescriptionParser(table_description, depth=0):
+    """Parses the table_description object for internal use.
+
+    Parses the user-submitted table description into an internal format used
+    by the Python DataTable class. Returns the flat list of parsed columns.
+
+    Args:
+      table_description: A description of the table which should comply
+                         with one of the formats described below.
+      depth: Optional. The depth of the first level in the current description.
+             Used by recursive calls to this function.
+
+    Returns:
+      List of columns, where each column represented by a dictionary with the
+      keys: id, label, type, depth, container which means the following:
+      - id: the id of the column
+      - name: The name of the column
+      - type: The datatype of the elements in this column. Allowed types are
+              described in ColumnTypeParser().
+      - depth: The depth of this column in the table description
+      - container: 'dict', 'iter' or 'scalar' for parsing the format easily.
+      - custom_properties: The custom properties for this column.
+      The returned description is flattened regardless of how it was given.
+
+    Raises:
+      DataTableException: Error in a column description or in the description
+                          structure.
+
+    Examples:
+      A column description can be of the following forms:
+       'id'
+       ('id',)
+       ('id', 'type')
+       ('id', 'type', 'label')
+       ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
+       or as a dictionary:
+       'id': 'type'
+       'id': ('type',)
+       'id': ('type', 'label')
+       'id': ('type', 'label', {'custom_prop1': 'custom_val1'})
+      If the type is not specified, we treat it as string.
+      If no specific label is given, the label is simply the id.
+      If no custom properties are given, we use an empty dictionary.
+
+      input: [('a', 'date'), ('b', 'timeofday', 'b', {'foo': 'bar'})]
+      output: [{'id': 'a', 'label': 'a', 'type': 'date',
+                'depth': 0, 'container': 'iter', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'timeofday',
+                'depth': 0, 'container': 'iter',
+                'custom_properties': {'foo': 'bar'}}]
+
+      input: {'a': [('b', 'number'), ('c', 'string', 'column c')]}
+      output: [{'id': 'a', 'label': 'a', 'type': 'string',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'number',
+                'depth': 1, 'container': 'iter', 'custom_properties': {}},
+               {'id': 'c', 'label': 'column c', 'type': 'string',
+                'depth': 1, 'container': 'iter', 'custom_properties': {}}]
+
+      input:  {('a', 'number', 'column a'): { 'b': 'number', 'c': 'string'}}
+      output: [{'id': 'a', 'label': 'column a', 'type': 'number',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'b', 'type': 'number',
+                'depth': 1, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'c', 'label': 'c', 'type': 'string',
+                'depth': 1, 'container': 'dict', 'custom_properties': {}}]
+
+      input: { ('w', 'string', 'word'): ('c', 'number', 'count') }
+      output: [{'id': 'w', 'label': 'word', 'type': 'string',
+                'depth': 0, 'container': 'dict', 'custom_properties': {}},
+               {'id': 'c', 'label': 'count', 'type': 'number',
+                'depth': 1, 'container': 'scalar', 'custom_properties': {}}]
+
+      input: {'a': ('number', 'column a'), 'b': ('string', 'column b')}
+      output: [{'id': 'a', 'label': 'column a', 'type': 'number', 'depth': 0,
+               'container': 'dict', 'custom_properties': {}},
+               {'id': 'b', 'label': 'column b', 'type': 'string', 'depth': 0,
+               'container': 'dict', 'custom_properties': {}}
+
+      NOTE: there might be ambiguity in the case of a dictionary representation
+      of a single column. For example, the following description can be parsed
+      in 2 different ways: {'a': ('b', 'c')} can be thought of a single column
+      with the id 'a', of type 'b' and the label 'c', or as 2 columns: one named
+      'a', and the other named 'b' of type 'c'. We choose the first option by
+      default, and in case the second option is the right one, it is possible to
+      make the key into a tuple (i.e. {('a',): ('b', 'c')}) or add more info
+      into the tuple, thus making it look like this: {'a': ('b', 'c', 'b', {})}
+      -- second 'b' is the label, and {} is the custom properties field.
+    """
+    # For the recursion step, we check for a scalar object (string or tuple)
+    if isinstance(table_description, (types.StringTypes, tuple)):
+      parsed_col = DataTable.ColumnTypeParser(table_description)
+      parsed_col["depth"] = depth
+      parsed_col["container"] = "scalar"
+      return [parsed_col]
+
+    # Since it is not scalar, table_description must be iterable.
+    if not hasattr(table_description, "__iter__"):
+      raise DataTableException("Expected an iterable object, got %s" %
+                               type(table_description))
+    if not isinstance(table_description, dict):
+      # We expects a non-dictionary iterable item.
+      columns = []
+      for desc in table_description:
+        parsed_col = DataTable.ColumnTypeParser(desc)
+        parsed_col["depth"] = depth
+        parsed_col["container"] = "iter"
+        columns.append(parsed_col)
+      if not columns:
+        raise DataTableException("Description iterable objects should not"
+                                 " be empty.")
+      return columns
+    # The other case is a dictionary
+    if not table_description:
+      raise DataTableException("Empty dictionaries are not allowed inside"
+                               " description")
+
+    # To differentiate between the two cases of more levels below or this is
+    # the most inner dictionary, we consider the number of keys (more then one
+    # key is indication for most inner dictionary) and the type of the key and
+    # value in case of only 1 key (if the type of key is string and the type of
+    # the value is a tuple of 0-3 items, we assume this is the most inner
+    # dictionary).
+    # NOTE: this way of differentiating might create ambiguity. See docs.
+    if (len(table_description) != 1 or
+        (isinstance(table_description.keys()[0], types.StringTypes) and
+         isinstance(table_description.values()[0], tuple) and
+         len(table_description.values()[0]) < 4)):
+      # This is the most inner dictionary. Parsing types.
+      columns = []
+      # We sort the items, equivalent to sort the keys since they are unique
+      for key, value in sorted(table_description.items()):
+        # We parse the column type as (key, type) or (key, type, label) using
+        # ColumnTypeParser.
+        if isinstance(value, tuple):
+          parsed_col = DataTable.ColumnTypeParser((key,) + value)
+        else:
+          parsed_col = DataTable.ColumnTypeParser((key, value))
+        parsed_col["depth"] = depth
+        parsed_col["container"] = "dict"
+        columns.append(parsed_col)
+      return columns
+    # This is an outer dictionary, must have at most one key.
+    parsed_col = DataTable.ColumnTypeParser(table_description.keys()[0])
+    parsed_col["depth"] = depth
+    parsed_col["container"] = "dict"
+    return ([parsed_col] +
+            DataTable.TableDescriptionParser(table_description.values()[0],
+                                             depth=depth + 1))
+
+  @property
+  def columns(self):
+    """Returns the parsed table description."""
+    return self.__columns
+
+  def NumberOfRows(self):
+    """Returns the number of rows in the current data stored in the table."""
+    return len(self.__data)
+
+  def SetRowsCustomProperties(self, rows, custom_properties):
+    """Sets the custom properties for given row(s).
+
+    Can accept a single row or an iterable of rows.
+    Sets the given custom properties for all specified rows.
+
+    Args:
+      rows: The row, or rows, to set the custom properties for.
+      custom_properties: A string to string dictionary of custom properties to
+      set for all rows.
+    """
+    if not hasattr(rows, "__iter__"):
+      rows = [rows]
+    for row in rows:
+      self.__data[row] = (self.__data[row][0], custom_properties)
+
+  def LoadData(self, data, custom_properties=None):
+    """Loads new rows to the data table, clearing existing rows.
+
+    May also set the custom_properties for the added rows. The given custom
+    properties dictionary specifies the dictionary that will be used for *all*
+    given rows.
+
+    Args:
+      data: The rows that the table will contain.
+      custom_properties: A dictionary of string to string to set as the custom
+                         properties for all rows.
+    """
+    self.__data = []
+    self.AppendData(data, custom_properties)
+
+  def AppendData(self, data, custom_properties=None):
+    """Appends new data to the table.
+
+    Data is appended in rows. Data must comply with
+    the table schema passed in to __init__(). See CoerceValue() for a list
+    of acceptable data types. See the class documentation for more information
+    and examples of schema and data values.
+
+    Args:
+      data: The row to add to the table. The data must conform to the table
+            description format.
+      custom_properties: A dictionary of string to string, representing the
+                         custom properties to add to all the rows.
+
+    Raises:
+      DataTableException: The data structure does not match the description.
+    """
+    # If the maximal depth is 0, we simply iterate over the data table
+    # lines and insert them using _InnerAppendData. Otherwise, we simply
+    # let the _InnerAppendData handle all the levels.
+    if not self.__columns[-1]["depth"]:
+      for row in data:
+        self._InnerAppendData(({}, custom_properties), row, 0)
+    else:
+      self._InnerAppendData(({}, custom_properties), data, 0)
+
+  def _InnerAppendData(self, prev_col_values, data, col_index):
+    """Inner function to assist LoadData."""
+    # We first check that col_index has not exceeded the columns size
+    if col_index >= len(self.__columns):
+      raise DataTableException("The data does not match description, too deep")
+
+    # Dealing with the scalar case, the data is the last value.
+    if self.__columns[col_index]["container"] == "scalar":
+      prev_col_values[0][self.__columns[col_index]["id"]] = data
+      self.__data.append(prev_col_values)
+      return
+
+    if self.__columns[col_index]["container"] == "iter":
+      if not hasattr(data, "__iter__") or isinstance(data, dict):
+        raise DataTableException("Expected iterable object, got %s" %
+                                 type(data))
+      # We only need to insert the rest of the columns
+      # If there are less items than expected, we only add what there is.
+      for value in data:
+        if col_index >= len(self.__columns):
+          raise DataTableException("Too many elements given in data")
+        prev_col_values[0][self.__columns[col_index]["id"]] = value
+        col_index += 1
+      self.__data.append(prev_col_values)
+      return
+
+    # We know the current level is a dictionary, we verify the type.
+    if not isinstance(data, dict):
+      raise DataTableException("Expected dictionary at current level, got %s" %
+                               type(data))
+    # We check if this is the last level
+    if self.__columns[col_index]["depth"] == self.__columns[-1]["depth"]:
+      # We need to add the keys in the dictionary as they are
+      for col in self.__columns[col_index:]:
+        if col["id"] in data:
+          prev_col_values[0][col["id"]] = data[col["id"]]
+      self.__data.append(prev_col_values)
+      return
+
+    # We have a dictionary in an inner depth level.
+    if not data.keys():
+      # In case this is an empty dictionary, we add a record with the columns
+      # filled only until this point.
+      self.__data.append(prev_col_values)
+    else:
+      for key in sorted(data):
+        col_values = dict(prev_col_values[0])
+        col_values[self.__columns[col_index]["id"]] = key
+        self._InnerAppendData((col_values, prev_col_values[1]),
+                              data[key], col_index + 1)
+
+  def _PreparedData(self, order_by=()):
+    """Prepares the data for enumeration - sorting it by order_by.
+
+    Args:
+      order_by: Optional. Specifies the name of the column(s) to sort by, and
+                (optionally) which direction to sort in. Default sort direction
+                is asc. Following formats are accepted:
+                "string_col_name"  -- For a single key in default (asc) order.
+                ("string_col_name", "asc|desc") -- For a single key.
+                [("col_1","asc|desc"), ("col_2","asc|desc")] -- For more than
+                    one column, an array of tuples of (col_name, "asc|desc").
+
+    Returns:
+      The data sorted by the keys given.
+
+    Raises:
+      DataTableException: Sort direction not in 'asc' or 'desc'
+    """
+    if not order_by:
+      return self.__data
+
+    proper_sort_keys = []
+    if isinstance(order_by, types.StringTypes) or (
+        isinstance(order_by, tuple) and len(order_by) == 2 and
+        order_by[1].lower() in ["asc", "desc"]):
+      order_by = (order_by,)
+    for key in order_by:
+      if isinstance(key, types.StringTypes):
+        proper_sort_keys.append((key, 1))
+      elif (isinstance(key, (list, tuple)) and len(key) == 2 and
+            key[1].lower() in ("asc", "desc")):
+        proper_sort_keys.append((key[0], key[1].lower() == "asc" and 1 or -1))
+      else:
+        raise DataTableException("Expected tuple with second value: "
+                                 "'asc' or 'desc'")
+
+    def SortCmpFunc(row1, row2):
+      """cmp function for sorted. Compares by keys and 'asc'/'desc' keywords."""
+      for key, asc_mult in proper_sort_keys:
+        cmp_result = asc_mult * cmp(row1[0].get(key), row2[0].get(key))
+        if cmp_result:
+          return cmp_result
+      return 0
+
+    return sorted(self.__data, cmp=SortCmpFunc)
+
+  def ToJSCode(self, name, columns_order=None, order_by=()):
+    """Writes the data table as a JS code string.
+
+    This method writes a string of JS code that can be run to
+    generate a DataTable with the specified data. Typically used for debugging
+    only.
+
+    Args:
+      name: The name of the table. The name would be used as the DataTable's
+            variable name in the created JS code.
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+
+    Returns:
+      A string of JS code that, when run, generates a DataTable with the given
+      name and the data stored in the DataTable object.
+      Example result:
+        "var tab1 = new google.visualization.DataTable();
+         tab1.addColumn("string", "a", "a");
+         tab1.addColumn("number", "b", "b");
+         tab1.addColumn("boolean", "c", "c");
+         tab1.addRows(10);
+         tab1.setCell(0, 0, "a");
+         tab1.setCell(0, 1, 1, null, {"foo": "bar"});
+         tab1.setCell(0, 2, true);
+         ...
+         tab1.setCell(9, 0, "c");
+         tab1.setCell(9, 1, 3, "3$");
+         tab1.setCell(9, 2, false);"
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    encoder = DataTableJSONEncoder()
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    # We first create the table with the given name
+    jscode = "var %s = new google.visualization.DataTable();\n" % name
+    if self.custom_properties:
+      jscode += "%s.setTableProperties(%s);\n" % (
+          name, encoder.encode(self.custom_properties))
+
+    # We add the columns to the table
+    for i, col in enumerate(columns_order):
+      jscode += "%s.addColumn(%s, %s, %s);\n" % (
+          name,
+          encoder.encode(col_dict[col]["type"]),
+          encoder.encode(col_dict[col]["label"]),
+          encoder.encode(col_dict[col]["id"]))
+      if col_dict[col]["custom_properties"]:
+        jscode += "%s.setColumnProperties(%d, %s);\n" % (
+            name, i, encoder.encode(col_dict[col]["custom_properties"]))
+    jscode += "%s.addRows(%d);\n" % (name, len(self.__data))
+
+    # We now go over the data and add each row
+    for (i, (row, cp)) in enumerate(self._PreparedData(order_by)):
+      # We add all the elements of this row by their order
+      for (j, col) in enumerate(columns_order):
+        if col not in row or row[col] is None:
+          continue
+        value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          cell_cp = ""
+          if len(value) == 3:
+            cell_cp = ", %s" % encoder.encode(row[col][2])
+          # We have a formatted value or custom property as well
+          jscode += ("%s.setCell(%d, %d, %s, %s%s);\n" %
+                     (name, i, j,
+                      self.EscapeForJSCode(encoder, value[0]),
+                      self.EscapeForJSCode(encoder, value[1]), cell_cp))
+        else:
+          jscode += "%s.setCell(%d, %d, %s);\n" % (
+              name, i, j, self.EscapeForJSCode(encoder, value))
+      if cp:
+        jscode += "%s.setRowProperties(%d, %s);\n" % (
+            name, i, encoder.encode(cp))
+    return jscode
+
+  def ToHtml(self, columns_order=None, order_by=()):
+    """Writes the data table as an HTML table code string.
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+
+    Returns:
+      An HTML table code string.
+      Example result (the result is without the newlines):
+       <html><body><table border="1">
+        <thead><tr><th>a</th><th>b</th><th>c</th></tr></thead>
+        <tbody>
+         <tr><td>1</td><td>"z"</td><td>2</td></tr>
+         <tr><td>"3$"</td><td>"w"</td><td></td></tr>
+        </tbody>
+       </table></body></html>
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+    table_template = "<html><body><table border=\"1\">%s</table></body></html>"
+    columns_template = "<thead><tr>%s</tr></thead>"
+    rows_template = "<tbody>%s</tbody>"
+    row_template = "<tr>%s</tr>"
+    header_cell_template = "<th>%s</th>"
+    cell_template = "<td>%s</td>"
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    columns_list = []
+    for col in columns_order:
+      columns_list.append(header_cell_template %
+                          cgi.escape(col_dict[col]["label"]))
+    columns_html = columns_template % "".join(columns_list)
+
+    rows_list = []
+    # We now go over the data and add each row
+    for row, unused_cp in self._PreparedData(order_by):
+      cells_list = []
+      # We add all the elements of this row by their order
+      for col in columns_order:
+        # For empty string we want empty quotes ("").
+        value = ""
+        if col in row and row[col] is not None:
+          value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          # We have a formatted value and we're going to use it
+          cells_list.append(cell_template % cgi.escape(self.ToString(value[1])))
+        else:
+          cells_list.append(cell_template % cgi.escape(self.ToString(value)))
+      rows_list.append(row_template % "".join(cells_list))
+    rows_html = rows_template % "".join(rows_list)
+
+    return table_template % (columns_html + rows_html)
+
+  def ToCsv(self, columns_order=None, order_by=(), separator=","):
+    """Writes the data table as a CSV string.
+
+    Output is encoded in UTF-8 because the Python "csv" module can't handle
+    Unicode properly according to its documentation.
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData.
+      separator: Optional. The separator to use between the values.
+
+    Returns:
+      A CSV string representing the table.
+      Example result:
+       'a','b','c'
+       1,'z',2
+       3,'w',''
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    csv_buffer = cStringIO.StringIO()
+    writer = csv.writer(csv_buffer, delimiter=separator)
+
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    writer.writerow([col_dict[col]["label"].encode("utf-8")
+                     for col in columns_order])
+
+    # We now go over the data and add each row
+    for row, unused_cp in self._PreparedData(order_by):
+      cells_list = []
+      # We add all the elements of this row by their order
+      for col in columns_order:
+        value = ""
+        if col in row and row[col] is not None:
+          value = self.CoerceValue(row[col], col_dict[col]["type"])
+        if isinstance(value, tuple):
+          # We have a formatted value. Using it only for date/time types.
+          if col_dict[col]["type"] in ["date", "datetime", "timeofday"]:
+            cells_list.append(self.ToString(value[1]).encode("utf-8"))
+          else:
+            cells_list.append(self.ToString(value[0]).encode("utf-8"))
+        else:
+          cells_list.append(self.ToString(value).encode("utf-8"))
+      writer.writerow(cells_list)
+    return csv_buffer.getvalue()
+
+  def ToTsvExcel(self, columns_order=None, order_by=()):
+    """Returns a file in tab-separated-format readable by MS Excel.
+
+    Returns a file in UTF-16 little endian encoding, with tabs separating the
+    values.
+
+    Args:
+      columns_order: Delegated to ToCsv.
+      order_by: Delegated to ToCsv.
+
+    Returns:
+      A tab-separated little endian UTF16 file representing the table.
+    """
+    return (self.ToCsv(columns_order, order_by, separator="\t")
+            .decode("utf-8").encode("UTF-16LE"))
+
+  def _ToJSonObj(self, columns_order=None, order_by=()):
+    """Returns an object suitable to be converted to JSON.
+
+    Args:
+      columns_order: Optional. A list of all column IDs in the order in which
+                     you want them created in the output table. If specified,
+                     all column IDs must be present.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData().
+
+    Returns:
+      A dictionary object for use by ToJSon or ToJSonResponse.
+    """
+    if columns_order is None:
+      columns_order = [col["id"] for col in self.__columns]
+    col_dict = dict([(col["id"], col) for col in self.__columns])
+
+    # Creating the column JSON objects
+    col_objs = []
+    for col_id in columns_order:
+      col_obj = {"id": col_dict[col_id]["id"],
+                 "label": col_dict[col_id]["label"],
+                 "type": col_dict[col_id]["type"]}
+      if col_dict[col_id]["custom_properties"]:
+        col_obj["p"] = col_dict[col_id]["custom_properties"]
+      col_objs.append(col_obj)
+
+    # Creating the rows jsons
+    row_objs = []
+    for row, cp in self._PreparedData(order_by):
+      cell_objs = []
+      for col in columns_order:
+        value = self.CoerceValue(row.get(col, None), col_dict[col]["type"])
+        if value is None:
+          cell_obj = None
+        elif isinstance(value, tuple):
+          cell_obj = {"v": value[0]}
+          if len(value) > 1 and value[1] is not None:
+            cell_obj["f"] = value[1]
+          if len(value) == 3:
+            cell_obj["p"] = value[2]
+        else:
+          cell_obj = {"v": value}
+        cell_objs.append(cell_obj)
+      row_obj = {"c": cell_objs}
+      if cp:
+        row_obj["p"] = cp
+      row_objs.append(row_obj)
+
+    json_obj = {"cols": col_objs, "rows": row_objs}
+    if self.custom_properties:
+      json_obj["p"] = self.custom_properties
+
+    return json_obj
+
+  def ToJSon(self, columns_order=None, order_by=()):
+    """Returns a string that can be used in a JS DataTable constructor.
+
+    This method writes a JSON string that can be passed directly into a Google
+    Visualization API DataTable constructor. Use this output if you are
+    hosting the visualization HTML on your site, and want to code the data
+    table in Python. Pass this string into the
+    google.visualization.DataTable constructor, e.g,:
+      ... on my page that hosts my visualization ...
+      google.setOnLoadCallback(drawTable);
+      function drawTable() {
+        var data = new google.visualization.DataTable(_my_JSon_string, 0.6);
+        myTable.draw(data);
+      }
+
+    Args:
+      columns_order: Optional. Specifies the order of columns in the
+                     output table. Specify a list of all column IDs in the order
+                     in which you want the table created.
+                     Note that you must list all column IDs in this parameter,
+                     if you use it.
+      order_by: Optional. Specifies the name of the column(s) to sort by.
+                Passed as is to _PreparedData().
+
+    Returns:
+      A JSon constructor string to generate a JS DataTable with the data
+      stored in the DataTable object.
+      Example result (the result is without the newlines):
+       {cols: [{id:"a",label:"a",type:"number"},
+               {id:"b",label:"b",type:"string"},
+              {id:"c",label:"c",type:"number"}],
+        rows: [{c:[{v:1},{v:"z"},{v:2}]}, c:{[{v:3,f:"3$"},{v:"w"},{v:null}]}],
+        p:    {'foo': 'bar'}}
+
+    Raises:
+      DataTableException: The data does not match the type.
+    """
+
+    encoder = DataTableJSONEncoder()
+    return encoder.encode(
+        self._ToJSonObj(columns_order, order_by)).encode("utf-8")
+
+  def ToJSonResponse(self, columns_order=None, order_by=(), req_id=0,
+                     response_handler="google.visualization.Query.setResponse"):
+    """Writes a table as a JSON response that can be returned as-is to a client.
+
+    This method writes a JSON response to return to a client in response to a
+    Google Visualization API query. This string can be processed by the calling
+    page, and is used to deliver a data table to a visualization hosted on
+    a different page.
+
+    Args:
+      columns_order: Optional. Passed straight to self.ToJSon().
+      order_by: Optional. Passed straight to self.ToJSon().
+      req_id: Optional. The response id, as retrieved by the request.
+      response_handler: Optional. The response handler, as retrieved by the
+          request.
+
+    Returns:
+      A JSON response string to be received by JS the visualization Query
+      object. This response would be translated into a DataTable on the
+      client side.
+      Example result (newlines added for readability):
+       google.visualization.Query.setResponse({
+          'version':'0.6', 'reqId':'0', 'status':'OK',
+          'table': {cols: [...], rows: [...]}});
+
+    Note: The URL returning this string can be used as a data source by Google
+          Visualization Gadgets or from JS code.
+    """
+
+    response_obj = {
+        "version": "0.6",
+        "reqId": str(req_id),
+        "table": self._ToJSonObj(columns_order, order_by),
+        "status": "ok"
+    }
+    encoder = DataTableJSONEncoder()
+    return "%s(%s);" % (response_handler,
+                        encoder.encode(response_obj).encode("utf-8"))
+
+  def ToResponse(self, columns_order=None, order_by=(), tqx=""):
+    """Writes the right response according to the request string passed in tqx.
+
+    This method parses the tqx request string (format of which is defined in
+    the documentation for implementing a data source of Google Visualization),
+    and returns the right response according to the request.
+    It parses out the "out" parameter of tqx, calls the relevant response
+    (ToJSonResponse() for "json", ToCsv() for "csv", ToHtml() for "html",
+    ToTsvExcel() for "tsv-excel") and passes the response function the rest of
+    the relevant request keys.
+
+    Args:
+      columns_order: Optional. Passed as is to the relevant response function.
+      order_by: Optional. Passed as is to the relevant response function.
+      tqx: Optional. The request string as received by HTTP GET. Should be in
+           the format "key1:value1;key2:value2...". All keys have a default
+           value, so an empty string will just do the default (which is calling
+           ToJSonResponse() with no extra parameters).
+
+    Returns:
+      A response string, as returned by the relevant response function.
+
+    Raises:
+      DataTableException: One of the parameters passed in tqx is not supported.
+    """
+    tqx_dict = {}
+    if tqx:
+      tqx_dict = dict(opt.split(":") for opt in tqx.split(";"))
+    if tqx_dict.get("version", "0.6") != "0.6":
+      raise DataTableException(
+          "Version (%s) passed by request is not supported."
+          % tqx_dict["version"])
+
+    if tqx_dict.get("out", "json") == "json":
+      response_handler = tqx_dict.get("responseHandler",
+                                      "google.visualization.Query.setResponse")
+      return self.ToJSonResponse(columns_order, order_by,
+                                 req_id=tqx_dict.get("reqId", 0),
+                                 response_handler=response_handler)
+    elif tqx_dict["out"] == "html":
+      return self.ToHtml(columns_order, order_by)
+    elif tqx_dict["out"] == "csv":
+      return self.ToCsv(columns_order, order_by)
+    elif tqx_dict["out"] == "tsv-excel":
+      return self.ToTsvExcel(columns_order, order_by)
+    else:
+      raise DataTableException(
+          "'out' parameter: '%s' is not supported" % tqx_dict["out"])
diff --git a/third_party/aom/test/hadamard_test.cc b/third_party/aom/test/hadamard_test.cc
deleted file mode 100644
index db5cb74747..0000000000
--- a/third_party/aom/test/hadamard_test.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <algorithm>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-
-namespace {
-
-using ::libaom_test::ACMRandom;
-
-typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
-
-void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
-  int16_t b[8];
-  for (int i = 0; i < 8; i += 2) {
-    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
-    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
-  }
-  int16_t c[8];
-  for (int i = 0; i < 8; i += 4) {
-    c[i + 0] = b[i + 0] + b[i + 2];
-    c[i + 1] = b[i + 1] + b[i + 3];
-    c[i + 2] = b[i + 0] - b[i + 2];
-    c[i + 3] = b[i + 1] - b[i + 3];
-  }
-  out[0] = c[0] + c[4];
-  out[7] = c[1] + c[5];
-  out[3] = c[2] + c[6];
-  out[4] = c[3] + c[7];
-  out[2] = c[0] - c[4];
-  out[6] = c[1] - c[5];
-  out[1] = c[2] - c[6];
-  out[5] = c[3] - c[7];
-}
-
-void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
-  int16_t buf[64];
-  for (int i = 0; i < 8; ++i) {
-    hadamard_loop(a + i, a_stride, buf + i * 8);
-  }
-
-  for (int i = 0; i < 8; ++i) {
-    hadamard_loop(buf + i, 8, b + i * 8);
-  }
-}
-
-void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
-  /* The source is a 16x16 block. The destination is rearranged to 8x32.
-   * Input is 9 bit. */
-  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
-  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
-  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
-  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
-
-  /* Overlay the 8x8 blocks and combine. */
-  for (int i = 0; i < 64; ++i) {
-    /* 8x8 steps the range up to 15 bits. */
-    const int16_t a0 = b[0];
-    const int16_t a1 = b[64];
-    const int16_t a2 = b[128];
-    const int16_t a3 = b[192];
-
-    /* Prevent the result from escaping int16_t. */
-    const int16_t b0 = (a0 + a1) >> 1;
-    const int16_t b1 = (a0 - a1) >> 1;
-    const int16_t b2 = (a2 + a3) >> 1;
-    const int16_t b3 = (a2 - a3) >> 1;
-
-    /* Store a 16 bit value. */
-    b[0] = b0 + b2;
-    b[64] = b1 + b3;
-    b[128] = b0 - b2;
-    b[192] = b1 - b3;
-
-    ++b;
-  }
-}
-
-class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
- public:
-  virtual void SetUp() {
-    h_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  HadamardFunc h_func_;
-  ACMRandom rnd_;
-};
-
-class Hadamard8x8Test : public HadamardTestBase {};
-
-TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
-  DECLARE_ALIGNED(16, int16_t, a[64]);
-  DECLARE_ALIGNED(16, int16_t, b[64]);
-  int16_t b_ref[64];
-  for (int i = 0; i < 64; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-  memset(b, 0, sizeof(b));
-  memset(b_ref, 0, sizeof(b_ref));
-
-  reference_hadamard8x8(a, 8, b_ref);
-  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
-
-  // The order of the output is not important. Sort before checking.
-  std::sort(b, b + 64);
-  std::sort(b_ref, b_ref + 64);
-  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-}
-
-TEST_P(Hadamard8x8Test, VaryStride) {
-  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
-  DECLARE_ALIGNED(16, int16_t, b[64]);
-  int16_t b_ref[64];
-  for (int i = 0; i < 64 * 8; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-
-  for (int i = 8; i < 64; i += 8) {
-    memset(b, 0, sizeof(b));
-    memset(b_ref, 0, sizeof(b_ref));
-
-    reference_hadamard8x8(a, i, b_ref);
-    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
-
-    // The order of the output is not important. Sort before checking.
-    std::sort(b, b + 64);
-    std::sort(b_ref, b_ref + 64);
-    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
-                        ::testing::Values(&aom_hadamard_8x8_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
-                        ::testing::Values(&aom_hadamard_8x8_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3 && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
-                        ::testing::Values(&aom_hadamard_8x8_ssse3));
-#endif  // HAVE_SSSE3 && ARCH_X86_64
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
-                        ::testing::Values(&aom_hadamard_8x8_neon));
-#endif  // HAVE_NEON
-
-class Hadamard16x16Test : public HadamardTestBase {};
-
-TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
-  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
-  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  int16_t b_ref[16 * 16];
-  for (int i = 0; i < 16 * 16; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-  memset(b, 0, sizeof(b));
-  memset(b_ref, 0, sizeof(b_ref));
-
-  reference_hadamard16x16(a, 16, b_ref);
-  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
-
-  // The order of the output is not important. Sort before checking.
-  std::sort(b, b + 16 * 16);
-  std::sort(b_ref, b_ref + 16 * 16);
-  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-}
-
-TEST_P(Hadamard16x16Test, VaryStride) {
-  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
-  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  int16_t b_ref[16 * 16];
-  for (int i = 0; i < 16 * 16 * 8; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-
-  for (int i = 8; i < 64; i += 8) {
-    memset(b, 0, sizeof(b));
-    memset(b_ref, 0, sizeof(b_ref));
-
-    reference_hadamard16x16(a, i, b_ref);
-    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
-
-    // The order of the output is not important. Sort before checking.
-    std::sort(b, b + 16 * 16);
-    std::sort(b_ref, b_ref + 16 * 16);
-    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
-                        ::testing::Values(&aom_hadamard_16x16_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
-                        ::testing::Values(&aom_hadamard_16x16_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
-                        ::testing::Values(&aom_hadamard_16x16_neon));
-#endif  // HAVE_NEON
-}  // namespace
diff --git a/third_party/aom/test/hash_test.cc b/third_party/aom/test/hash_test.cc
new file mode 100644
index 0000000000..e9f7f63c9b
--- /dev/null
+++ b/third_party/aom/test/hash_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdlib>
+#include <new>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/aom_timer.h"
+#include "av1/encoder/hash.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+typedef uint32_t (*get_crc32c_value_func)(void *calculator, uint8_t *p,
+                                          int length);
+
+typedef ::testing::tuple<get_crc32c_value_func, int> HashParam;
+
+class AV1Crc32cHashTest : public ::testing::TestWithParam<HashParam> {
+ public:
+  ~AV1Crc32cHashTest();
+  void SetUp();
+
+  void TearDown();
+
+ protected:
+  void RunCheckOutput(get_crc32c_value_func test_impl);
+  void RunSpeedTest(get_crc32c_value_func test_impl);
+
+  void RunZeroTest(get_crc32c_value_func test_impl);
+
+  libaom_test::ACMRandom rnd_;
+  CRC32C calc_;
+  uint8_t *buffer_;
+  int bsize_;
+  int length_;
+};
+
+AV1Crc32cHashTest::~AV1Crc32cHashTest() { ; }
+
+void AV1Crc32cHashTest::SetUp() {
+  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
+  av1_crc32c_calculator_init(&calc_);
+
+  bsize_ = GET_PARAM(1);
+  length_ = bsize_ * bsize_ * sizeof(uint16_t);
+  buffer_ = new uint8_t[length_];
+  ASSERT_TRUE(buffer_ != NULL);
+  for (int i = 0; i < length_; ++i) {
+    buffer_[i] = rnd_.Rand8();
+  }
+}
+
+void AV1Crc32cHashTest::TearDown() { delete[] buffer_; }
+
+void AV1Crc32cHashTest::RunCheckOutput(get_crc32c_value_func test_impl) {
+  get_crc32c_value_func ref_impl = av1_get_crc32c_value_c;
+  // for the same buffer crc should be the same
+  uint32_t crc0 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc1 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc2 = ref_impl(&calc_, buffer_, length_);
+  ASSERT_EQ(crc0, crc1);
+  ASSERT_EQ(crc0, crc2);  // should equal to software version
+  // modify buffer
+  buffer_[0] += 1;
+  uint32_t crc3 = test_impl(&calc_, buffer_, length_);
+  uint32_t crc4 = ref_impl(&calc_, buffer_, length_);
+  ASSERT_NE(crc0, crc3);  // crc shoud not equal to previous one
+  ASSERT_EQ(crc3, crc4);
+}
+
+void AV1Crc32cHashTest::RunSpeedTest(get_crc32c_value_func test_impl) {
+  get_crc32c_value_func impls[] = { av1_get_crc32c_value_c, test_impl };
+  const int repeat = 10000000 / (bsize_ + bsize_);
+
+  aom_usec_timer timer;
+  double time[2];
+  for (int i = 0; i < 2; ++i) {
+    aom_usec_timer_start(&timer);
+    for (int j = 0; j < repeat; ++j) {
+      impls[i](&calc_, buffer_, length_);
+    }
+    aom_usec_timer_mark(&timer);
+    time[i] = static_cast<double>(aom_usec_timer_elapsed(&timer));
+  }
+  printf("hash %3dx%-3d:%7.2f/%7.2fus", bsize_, bsize_, time[0], time[1]);
+  printf("(%3.2f)\n", time[0] / time[1]);
+}
+
+void AV1Crc32cHashTest::RunZeroTest(get_crc32c_value_func test_impl) {
+  uint8_t buffer0[1024] = { 0 };
+  // for buffer with different size the crc should not be the same
+  const uint32_t crc0 = test_impl(&calc_, buffer0, 32);
+  const uint32_t crc1 = test_impl(&calc_, buffer0, 128);
+  const uint32_t crc2 = test_impl(&calc_, buffer0, 1024);
+  ASSERT_NE(crc0, crc1);
+  ASSERT_NE(crc0, crc2);
+  ASSERT_NE(crc1, crc2);
+}
+
+TEST_P(AV1Crc32cHashTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
+
+TEST_P(AV1Crc32cHashTest, CheckZero) { RunZeroTest(GET_PARAM(0)); }
+
+TEST_P(AV1Crc32cHashTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
+
+const int kValidBlockSize[] = { 64, 32, 8, 4 };
+
+INSTANTIATE_TEST_CASE_P(
+    C, AV1Crc32cHashTest,
+    ::testing::Combine(::testing::Values(&av1_get_crc32c_value_c),
+                       ::testing::ValuesIn(kValidBlockSize)));
+
+#if HAVE_SSE4_2
+INSTANTIATE_TEST_CASE_P(
+    SSE4_2, AV1Crc32cHashTest,
+    ::testing::Combine(::testing::Values(&av1_get_crc32c_value_sse4_2),
+                       ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc
index 4def53b215..09df9bde47 100644
--- a/third_party/aom/test/hbd_metrics_test.cc
+++ b/third_party/aom/test/hbd_metrics_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <math.h>
 #include <stdlib.h>
@@ -16,7 +16,9 @@
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/acm_random.h"
 #include "test/util.h"
-#include "./aom_config.h"
+
+#include "config/aom_config.h"
+
 #include "aom_dsp/psnr.h"
 #include "aom_dsp/ssim.h"
 #include "aom_ports/mem.h"
@@ -171,7 +173,7 @@ class HBDMetricsTestBase {
   HBDMetricFunc hbd_metric_;
 };
 
-typedef std::tr1::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double>
+typedef ::testing::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double>
     MetricTestTParam;
 class HBDMetricsTest : public HBDMetricsTestBase,
                        public ::testing::TestWithParam<MetricTestTParam> {
diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc
index 78e109c9d0..f94a0730c1 100644
--- a/third_party/aom/test/hiprec_convolve_test.cc
+++ b/third_party/aom/test/hiprec_convolve_test.cc
@@ -12,33 +12,51 @@
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/hiprec_convolve_test_util.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
-using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest;
-#if CONFIG_HIGHBITDEPTH
 using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest;
-#endif
+using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest;
 
 namespace {
 
-#if HAVE_SSE2
 TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
-
+TEST_P(AV1HiprecConvolveTest, DISABLED_SpeedTest) {
+  RunSpeedTest(GET_PARAM(3));
+}
+#if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, AV1HiprecConvolveTest,
                         libaom_test::AV1HiprecConvolve::BuildParams(
-                            aom_convolve8_add_src_hip_sse2));
+                            av1_wiener_convolve_add_src_sse2));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AV1HiprecConvolveTest,
+                        libaom_test::AV1HiprecConvolve::BuildParams(
+                            av1_wiener_convolve_add_src_avx2));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, AV1HiprecConvolveTest,
+                        libaom_test::AV1HiprecConvolve::BuildParams(
+                            av1_wiener_convolve_add_src_neon));
 #endif
 
-#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
+#if HAVE_SSSE3 || HAVE_AVX2
 TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) {
   RunCheckOutput(GET_PARAM(4));
 }
-
+TEST_P(AV1HighbdHiprecConvolveTest, DISABLED_SpeedTest) {
+  RunSpeedTest(GET_PARAM(4));
+}
+#if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdHiprecConvolveTest,
                         libaom_test::AV1HighbdHiprecConvolve::BuildParams(
-                            aom_highbd_convolve8_add_src_hip_ssse3));
-
+                            av1_highbd_wiener_convolve_add_src_ssse3));
+#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdHiprecConvolveTest,
+                        libaom_test::AV1HighbdHiprecConvolve::BuildParams(
+                            av1_highbd_wiener_convolve_add_src_avx2));
+#endif
 #endif
 
 }  // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index 4dee6ab4d3..2672bcec3d 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -13,8 +13,8 @@
 
 #include "av1/common/restoration.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 namespace libaom_test {
 
@@ -52,8 +52,13 @@ namespace AV1HiprecConvolve {
 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
     hiprec_convolve_func filter) {
   const HiprecConvolveParam params[] = {
-    make_tuple(8, 8, 50000, filter), make_tuple(64, 64, 1000, filter),
-    make_tuple(32, 8, 10000, filter),
+    make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
+    make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
+    make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
+    make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
+    make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
+    make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
+    make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
   };
   return ::testing::ValuesIn(params);
 }
@@ -70,14 +75,15 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
   const int num_iters = GET_PARAM(2);
   int i, j;
+  const ConvolveParams conv_params = get_conv_params_wiener(8);
 
   uint8_t *input_ = new uint8_t[h * w];
   uint8_t *input = input_;
 
-  // The convolve functions always write rows with widths that are multiples of
-  // 8.
-  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
-  int output_n = ((out_w + 7) & ~7) * out_h;
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
   uint8_t *output = new uint8_t[output_n];
   uint8_t *output2 = new uint8_t[output_n];
 
@@ -94,10 +100,11 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
     // Choose random locations within the source block
     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    aom_convolve8_add_src_hip_c(input + offset_r * w + offset_c, w, output,
-                                out_w, hkernel, 16, vkernel, 16, out_w, out_h);
+    av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
+                                  out_w, hkernel, 16, vkernel, 16, out_w, out_h,
+                                  &conv_params);
     test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
-              vkernel, 16, out_w, out_h);
+              vkernel, 16, out_w, out_h, &conv_params);
 
     for (j = 0; j < out_w * out_h; ++j)
       ASSERT_EQ(output[j], output2[j])
@@ -108,9 +115,74 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
   delete[] output;
   delete[] output2;
 }
+
+void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  int i, j, k;
+  const ConvolveParams conv_params = get_conv_params_wiener(8);
+
+  uint8_t *input_ = new uint8_t[h * w];
+  uint8_t *input = input_;
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  uint8_t *output = new uint8_t[output_n];
+  uint8_t *output2 = new uint8_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
+                                      hkernel, 16, vkernel, 16, out_w, out_h,
+                                      &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
+                  16, out_w, out_h, &conv_params);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+
+  delete[] input_;
+  delete[] output;
+  delete[] output2;
+}
 }  // namespace AV1HiprecConvolve
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdHiprecConvolve {
 
 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
@@ -141,13 +213,14 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
   const int num_iters = GET_PARAM(2);
   const int bd = GET_PARAM(3);
   int i, j;
+  const ConvolveParams conv_params = get_conv_params_wiener(bd);
 
   uint16_t *input = new uint16_t[h * w];
 
-  // The convolve functions always write rows with widths that are multiples of
-  // 8.
-  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
-  int output_n = ((out_w + 7) & ~7) * out_h;
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
   uint16_t *output = new uint16_t[output_n];
   uint16_t *output2 = new uint16_t[output_n];
 
@@ -168,11 +241,11 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
     // Choose random locations within the source block
     int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
     int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-    aom_highbd_convolve8_add_src_hip_c(input_ptr + offset_r * w + offset_c, w,
-                                       output_ptr, out_w, hkernel, 16, vkernel,
-                                       16, out_w, out_h, bd);
+    av1_highbd_wiener_convolve_add_src_c(
+        input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
+        vkernel, 16, out_w, out_h, &conv_params, bd);
     test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
-              hkernel, 16, vkernel, 16, out_w, out_h, bd);
+              hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
 
     for (j = 0; j < out_w * out_h; ++j)
       ASSERT_EQ(output[j], output2[j])
@@ -183,6 +256,76 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
   delete[] output;
   delete[] output2;
 }
+
+void AV1HighbdHiprecConvolveTest::RunSpeedTest(
+    highbd_hiprec_convolve_func test_impl) {
+  const int w = 128, h = 128;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2) / 500;
+  const int bd = GET_PARAM(3);
+  int i, j, k;
+  const ConvolveParams conv_params = get_conv_params_wiener(bd);
+
+  uint16_t *input = new uint16_t[h * w];
+
+  // The AVX2 convolve functions always write rows with widths that are
+  // multiples of 16. So to avoid a buffer overflow, we may need to pad
+  // rows to a multiple of 16.
+  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
+  uint16_t *output = new uint16_t[output_n];
+  uint16_t *output2 = new uint16_t[output_n];
+
+  // Generate random filter kernels
+  DECLARE_ALIGNED(16, InterpKernel, hkernel);
+  DECLARE_ALIGNED(16, InterpKernel, vkernel);
+
+  generate_kernels(&rnd_, hkernel, vkernel);
+
+  for (i = 0; i < h; ++i)
+    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
+
+  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
+  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
+  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
+
+  aom_usec_timer ref_timer;
+  aom_usec_timer_start(&ref_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        av1_highbd_wiener_convolve_add_src_c(
+            input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
+            16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&ref_timer);
+  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer tst_timer;
+  aom_usec_timer_start(&tst_timer);
+  for (i = 0; i < num_iters; ++i) {
+    for (j = 3; j < h - out_h - 4; j++) {
+      for (k = 3; k < w - out_w - 4; k++) {
+        test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
+                  vkernel, 16, out_w, out_h, &conv_params, bd);
+      }
+    }
+  }
+  aom_usec_timer_mark(&tst_timer);
+  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+  std::cout << "[          ] C time = " << ref_time / 1000
+            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+  EXPECT_GT(ref_time, tst_time)
+      << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
+      << "C time: " << ref_time << " us\n"
+      << "SIMD time: " << tst_time << " us\n";
+
+  delete[] input;
+  delete[] output;
+  delete[] output2;
+}
 }  // namespace AV1HighbdHiprecConvolve
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace libaom_test
diff --git a/third_party/aom/test/hiprec_convolve_test_util.h b/third_party/aom/test/hiprec_convolve_test_util.h
index fe31570f5d..81471c8b9d 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.h
+++ b/third_party/aom/test/hiprec_convolve_test_util.h
@@ -12,14 +12,16 @@
 #ifndef TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
 #define TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
 
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "config/av1_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/util.h"
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
+#include "aom_ports/aom_timer.h"
+#include "av1/common/convolve.h"
 #include "av1/common/mv.h"
 
 namespace libaom_test {
@@ -30,9 +32,10 @@ typedef void (*hiprec_convolve_func)(const uint8_t *src, ptrdiff_t src_stride,
                                      uint8_t *dst, ptrdiff_t dst_stride,
                                      const int16_t *filter_x, int x_step_q4,
                                      const int16_t *filter_y, int y_step_q4,
-                                     int w, int h);
+                                     int w, int h,
+                                     const ConvolveParams *conv_params);
 
-typedef std::tr1::tuple<int, int, int, hiprec_convolve_func>
+typedef ::testing::tuple<int, int, int, hiprec_convolve_func>
     HiprecConvolveParam;
 
 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
@@ -48,20 +51,21 @@ class AV1HiprecConvolveTest
 
  protected:
   void RunCheckOutput(hiprec_convolve_func test_impl);
+  void RunSpeedTest(hiprec_convolve_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
 }  // namespace AV1HiprecConvolve
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdHiprecConvolve {
 typedef void (*highbd_hiprec_convolve_func)(
     const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
-    const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
+    const int16_t *filter_y, int y_step_q4, int w, int h,
+    const ConvolveParams *conv_params, int bps);
 
-typedef std::tr1::tuple<int, int, int, int, highbd_hiprec_convolve_func>
+typedef ::testing::tuple<int, int, int, int, highbd_hiprec_convolve_func>
     HighbdHiprecConvolveParam;
 
 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
@@ -77,12 +81,12 @@ class AV1HighbdHiprecConvolveTest
 
  protected:
   void RunCheckOutput(highbd_hiprec_convolve_func test_impl);
+  void RunSpeedTest(highbd_hiprec_convolve_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
 }  // namespace AV1HighbdHiprecConvolve
-#endif  // CONFIG_HIGHBITDEPTH
 
 }  // namespace libaom_test
 
diff --git a/third_party/aom/test/horz_superres_test.cc b/third_party/aom/test/horz_superres_test.cc
new file mode 100644
index 0000000000..973f55b668
--- /dev/null
+++ b/third_party/aom/test/horz_superres_test.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "av1/encoder/encoder.h"
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+
+namespace {
+
+using ::testing::make_tuple;
+using ::testing::tuple;
+
+/* TESTING PARAMETERS */
+
+#define NUM_TEST_VIDEOS 3
+
+const int kBitrate = 40;
+
+// PSNR thresholds found by experiment
+const double kPSNRThresholds[] = { 26.0, 28.0, 20.0 };
+
+typedef struct {
+  const char *filename;
+  aom_img_fmt fmt;
+  aom_bit_depth_t bit_depth;
+  unsigned int profile;
+  unsigned int limit;
+  unsigned int screen_content;
+} TestVideoParam;
+
+const TestVideoParam kTestVideoVectors[] = {
+  { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0 },
+  { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0 },
+  { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1 },
+};
+
+// Superres modes tested
+// SUPERRES_QTHRESH is not included, as it has its own test
+const SUPERRES_MODE kSuperresModesNotQThresh[] = { SUPERRES_FIXED,
+                                                   SUPERRES_RANDOM };
+
+// Superres denominators and superres kf denominators to be tested
+typedef tuple<int, int> SuperresDenominatorPair;
+const SuperresDenominatorPair kSuperresDenominators[] = {
+  make_tuple(16, 9),  make_tuple(13, 11), make_tuple(9, 9),
+  make_tuple(13, 13), make_tuple(11, 16), make_tuple(8, 16),
+  make_tuple(16, 8),  make_tuple(8, 8),   make_tuple(9, 14),
+};
+
+// Superres q thresholds and superres kf q thresholds to be tested
+typedef tuple<int, int> SuperresQThresholdPair;
+const SuperresQThresholdPair kSuperresQThresholds[] = {
+  make_tuple(63, 63), make_tuple(63, 41), make_tuple(17, 63),
+  make_tuple(41, 11), make_tuple(1, 37),  make_tuple(11, 11),
+  make_tuple(1, 1),   make_tuple(17, 29), make_tuple(29, 11),
+};
+
+/* END (TESTING PARAMETERS) */
+
+// Test parameter list:
+//  <[needed for EncoderTest], test_video_idx_, superres_mode_,
+//  tuple(superres_denom_, superres_kf_denom_)>
+typedef tuple<const libaom_test::CodecFactory *, int, SUPERRES_MODE,
+              SuperresDenominatorPair>
+    HorzSuperresTestParam;
+
+class HorzSuperresEndToEndTest
+    : public ::testing::TestWithParam<HorzSuperresTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  HorzSuperresEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_idx_(GET_PARAM(1)),
+        superres_mode_(GET_PARAM(2)), psnr_(0.0), frame_count_(0) {
+    test_video_param_ = kTestVideoVectors[test_video_idx_];
+
+    SuperresDenominatorPair denoms = GET_PARAM(3);
+    superres_denom_ = ::testing::get<0>(denoms);
+    superres_kf_denom_ = ::testing::get<1>(denoms);
+  }
+
+  virtual ~HorzSuperresEndToEndTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    // Set superres parameters
+    cfg_.rc_superres_mode = superres_mode_;
+    cfg_.rc_superres_denominator = superres_denom_;
+    cfg_.rc_superres_kf_denominator = superres_kf_denom_;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    frame_count_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    frame_count_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
+
+      // Set cpu-used = 8 for speed
+      encoder->Control(AOME_SET_CPUUSED, 8);
+
+      // Test screen coding tools
+      if (test_video_param_.screen_content)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (frame_count_) return psnr_ / frame_count_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() { return kPSNRThresholds[test_video_idx_]; }
+
+  void DoTest() {
+    testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                test_video_param_.limit));
+    ASSERT_TRUE(video.get() != NULL);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_;
+
+    EXPECT_EQ(test_video_param_.limit, frame_count_)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_;
+  }
+
+  int test_video_idx_;
+  TestVideoParam test_video_param_;
+  SUPERRES_MODE superres_mode_;
+  int superres_denom_;
+  int superres_kf_denom_;
+
+ private:
+  double psnr_;
+  unsigned int frame_count_;
+};
+
+TEST_P(HorzSuperresEndToEndTest, HorzSuperresEndToEndPSNRTest) { DoTest(); }
+
+AV1_INSTANTIATE_TEST_CASE(HorzSuperresEndToEndTest,
+                          ::testing::Range(0, NUM_TEST_VIDEOS),
+                          ::testing::ValuesIn(kSuperresModesNotQThresh),
+                          ::testing::ValuesIn(kSuperresDenominators));
+
+// Test parameter list:
+//  <[needed for EncoderTest], test_video_idx_, tuple(superres_denom_,
+//  superres_kf_denom_), tuple(superres_qthresh_,superres_kf_qthresh_)>
+typedef tuple<const libaom_test::CodecFactory *, int, SuperresDenominatorPair,
+              SuperresQThresholdPair>
+    HorzSuperresQThreshTestParam;
+
+class HorzSuperresQThreshEndToEndTest
+    : public ::testing::TestWithParam<HorzSuperresQThreshTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  HorzSuperresQThreshEndToEndTest()
+      : EncoderTest(GET_PARAM(0)), test_video_idx_(GET_PARAM(1)),
+        superres_mode_(SUPERRES_QTHRESH), psnr_(0.0), frame_count_(0) {
+    test_video_param_ = kTestVideoVectors[test_video_idx_];
+
+    SuperresDenominatorPair denoms = GET_PARAM(2);
+    superres_denom_ = ::testing::get<0>(denoms);
+    superres_kf_denom_ = ::testing::get<1>(denoms);
+
+    SuperresQThresholdPair qthresholds = GET_PARAM(3);
+    superres_qthresh_ = ::testing::get<0>(qthresholds);
+    superres_kf_qthresh_ = ::testing::get<1>(qthresholds);
+  }
+
+  virtual ~HorzSuperresQThreshEndToEndTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libaom_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 5;
+    cfg_.rc_end_usage = AOM_VBR;
+    cfg_.rc_target_bitrate = kBitrate;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_profile = test_video_param_.profile;
+    cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
+    cfg_.g_bit_depth = test_video_param_.bit_depth;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+    if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
+
+    // Set superres parameters
+    cfg_.rc_superres_mode = superres_mode_;
+    cfg_.rc_superres_denominator = superres_denom_;
+    cfg_.rc_superres_kf_denominator = superres_kf_denom_;
+    cfg_.rc_superres_qthresh = superres_qthresh_;
+    cfg_.rc_superres_kf_qthresh = superres_kf_qthresh_;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    frame_count_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    frame_count_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(AV1E_SET_TILE_COLUMNS, 0);
+
+      // Set cpu-used = 8 for speed
+      encoder->Control(AOME_SET_CPUUSED, 8);
+
+      // Test screen coding tools
+      if (test_video_param_.screen_content)
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
+      else
+        encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
+
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (frame_count_) return psnr_ / frame_count_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() { return kPSNRThresholds[test_video_idx_]; }
+
+  void DoTest() {
+    testing::internal::scoped_ptr<libaom_test::VideoSource> video;
+    video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
+                                                test_video_param_.limit));
+    ASSERT_TRUE(video.get() != NULL);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+    const double psnr = GetAveragePsnr();
+    EXPECT_GT(psnr, GetPsnrThreshold())
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_
+        << ", superres_qthresh_ = " << superres_qthresh_
+        << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
+
+    EXPECT_EQ(test_video_param_.limit, frame_count_)
+        << "superres_mode_ = " << superres_mode_
+        << ", superres_denom_ = " << superres_denom_
+        << ", superres_kf_denom_ = " << superres_kf_denom_
+        << ", superres_qthresh_ = " << superres_qthresh_
+        << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
+  }
+
+  int test_video_idx_;
+  TestVideoParam test_video_param_;
+  SUPERRES_MODE superres_mode_;
+  int superres_denom_;
+  int superres_kf_denom_;
+  int superres_qthresh_;
+  int superres_kf_qthresh_;
+
+ private:
+  double psnr_;
+  unsigned int frame_count_;
+};
+
+TEST_P(HorzSuperresQThreshEndToEndTest, HorzSuperresQThreshEndToEndPSNRTest) {
+  DoTest();
+}
+
+AV1_INSTANTIATE_TEST_CASE(HorzSuperresQThreshEndToEndTest,
+                          ::testing::Range(0, NUM_TEST_VIDEOS),
+                          ::testing::ValuesIn(kSuperresDenominators),
+                          ::testing::ValuesIn(kSuperresQThresholds));
+
+}  // namespace
diff --git a/third_party/aom/test/idct8x8_test.cc b/third_party/aom/test/idct8x8_test.cc
deleted file mode 100644
index f99a4075f8..0000000000
--- a/third_party/aom/test/idct8x8_test.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/msvc.h"  // for round()
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-void reference_dct_1d(double input[8], double output[8]) {
-  const double kPi = 3.141592653589793238462643383279502884;
-  const double kInvSqrt2 = 0.707106781186547524400844362104;
-  for (int k = 0; k < 8; k++) {
-    output[k] = 0.0;
-    for (int n = 0; n < 8; n++)
-      output[k] += input[n] * cos(kPi * (2 * n + 1) * k / 16.0);
-    if (k == 0) output[k] = output[k] * kInvSqrt2;
-  }
-}
-
-void reference_dct_2d(int16_t input[64], double output[64]) {
-  // First transform columns
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
-    reference_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
-  }
-  // Then transform rows
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
-    reference_dct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j];
-  }
-  // Scale by some magic number
-  for (int i = 0; i < 64; ++i) output[i] *= 2;
-}
-
-TEST(AV1Idct8x8Test, AccuracyCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 10000;
-  for (int i = 0; i < count_test_block; ++i) {
-    int16_t input[64];
-    tran_low_t coeff[64];
-    double output_r[64];
-    uint8_t dst[64], src[64];
-
-    for (int j = 0; j < 64; ++j) {
-      src[j] = rnd.Rand8();
-      dst[j] = rnd.Rand8();
-    }
-    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < 64; ++j) input[j] = src[j] - dst[j];
-
-    reference_dct_2d(input, output_r);
-    for (int j = 0; j < 64; ++j)
-      coeff[j] = static_cast<tran_low_t>(round(output_r[j]));
-    aom_idct8x8_64_add_c(coeff, dst, 8);
-    for (int j = 0; j < 64; ++j) {
-      const int diff = dst[j] - src[j];
-      const int error = diff * diff;
-      EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error
-                          << " at index " << j;
-    }
-  }
-}
-
-}  // namespace
diff --git a/third_party/aom/test/intra_edge_test.cc b/third_party/aom/test/intra_edge_test.cc
new file mode 100644
index 0000000000..ce61402acb
--- /dev/null
+++ b/third_party/aom/test/intra_edge_test.cc
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+#include "test/function_equivalence_test.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "av1/common/enums.h"
+
+using libaom_test::FunctionEquivalenceTest;
+
+namespace {
+
+template <typename F, typename T>
+class UpsampleTest : public FunctionEquivalenceTest<F> {
+ protected:
+  static const int kIterations = 1000000;
+  static const int kMinEdge = 4;
+  static const int kMaxEdge = 24;
+  static const int kBufSize = 2 * 64 + 32;
+  static const int kOffset = 16;
+
+  virtual ~UpsampleTest() {}
+
+  virtual void Execute(T *edge_tst) = 0;
+
+  void Common() {
+    edge_ref_ = &edge_ref_data_[kOffset];
+    edge_tst_ = &edge_tst_data_[kOffset];
+
+    Execute(edge_tst_);
+
+    const int max_idx = (size_ - 1) * 2;
+    for (int r = -2; r <= max_idx; ++r) {
+      ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
+    }
+  }
+
+  T edge_ref_data_[kBufSize];
+  T edge_tst_data_[kBufSize];
+
+  T *edge_ref_;
+  T *edge_tst_;
+
+  int size_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*UP8B)(uint8_t *p, int size);
+typedef libaom_test::FuncParam<UP8B> TestFuncs;
+
+class UpsampleTest8B : public UpsampleTest<UP8B, uint8_t> {
+ protected:
+  void Execute(uint8_t *edge_tst) {
+    params_.ref_func(edge_ref_, size_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_));
+  }
+};
+
+TEST_P(UpsampleTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    size_ = 4 * (this->rng_(4) + 1);
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_.Rand8();
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = edge_ref_data_[i];
+    }
+
+    // Extend final sample
+    while (i < kBufSize) {
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+      i++;
+    }
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, UpsampleTest8B,
+    ::testing::Values(TestFuncs(av1_upsample_intra_edge_c,
+                                av1_upsample_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*UPHB)(uint16_t *p, int size, int bd);
+typedef libaom_test::FuncParam<UPHB> TestFuncsHBD;
+
+class UpsampleTestHB : public UpsampleTest<UPHB, uint16_t> {
+ protected:
+  void Execute(uint16_t *edge_tst) {
+    params_.ref_func(edge_ref_, size_, bit_depth_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, bit_depth_));
+  }
+  int bit_depth_;
+};
+
+TEST_P(UpsampleTestHB, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+    const int hi = 1 << bit_depth_;
+
+    size_ = 4 * (this->rng_(4) + 1);
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_(hi);
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    // Extend final sample
+    while (i < kBufSize) {
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+      i++;
+    }
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, UpsampleTestHB,
+    ::testing::Values(TestFuncsHBD(av1_upsample_intra_edge_high_c,
+                                   av1_upsample_intra_edge_high_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+template <typename F, typename T>
+class FilterEdgeTest : public FunctionEquivalenceTest<F> {
+ protected:
+  static const int kIterations = 1000000;
+  static const int kMaxEdge = 2 * 64;
+  static const int kBufSize = kMaxEdge + 32;
+  static const int kOffset = 15;
+
+  virtual ~FilterEdgeTest() {}
+
+  virtual void Execute(T *edge_tst) = 0;
+
+  void Common() {
+    edge_ref_ = &edge_ref_data_[kOffset];
+    edge_tst_ = &edge_tst_data_[kOffset];
+
+    Execute(edge_tst_);
+
+    for (int r = 0; r < size_; ++r) {
+      ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
+    }
+  }
+
+  T edge_ref_data_[kBufSize];
+  T edge_tst_data_[kBufSize];
+
+  T *edge_ref_;
+  T *edge_tst_;
+
+  int size_;
+  int strength_;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FE8B)(uint8_t *p, int size, int strength);
+typedef libaom_test::FuncParam<FE8B> FilterEdgeTestFuncs;
+
+class FilterEdgeTest8B : public FilterEdgeTest<FE8B, uint8_t> {
+ protected:
+  void Execute(uint8_t *edge_tst) {
+    params_.ref_func(edge_ref_, size_, strength_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
+  }
+};
+
+TEST_P(FilterEdgeTest8B, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    strength_ = this->rng_(4);
+    size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_.Rand8();
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, FilterEdgeTest8B,
+    ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c,
+                                          av1_filter_intra_edge_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FEHB)(uint16_t *p, int size, int strength);
+typedef libaom_test::FuncParam<FEHB> FilterEdgeTestFuncsHBD;
+
+class FilterEdgeTestHB : public FilterEdgeTest<FEHB, uint16_t> {
+ protected:
+  void Execute(uint16_t *edge_tst) {
+    params_.ref_func(edge_ref_, size_, strength_);
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
+  }
+  int bit_depth_;
+};
+
+TEST_P(FilterEdgeTestHB, RandomValues) {
+  for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
+    switch (rng_(3)) {
+      case 0: bit_depth_ = 8; break;
+      case 1: bit_depth_ = 10; break;
+      default: bit_depth_ = 12; break;
+    }
+    const int hi = 1 << bit_depth_;
+    strength_ = this->rng_(4);
+    size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
+
+    int i, pix = 0;
+    for (i = 0; i < kOffset + size_; ++i) {
+      pix = rng_(hi);
+      edge_ref_data_[i] = pix;
+      edge_tst_data_[i] = pix;
+    }
+
+    Common();
+  }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, FilterEdgeTestHB,
+                        ::testing::Values(FilterEdgeTestFuncsHBD(
+                            av1_filter_intra_edge_high_c,
+                            av1_filter_intra_edge_high_sse4_1)));
+#endif  // HAVE_SSE4_1
+
+// Speed tests
+
+TEST_P(UpsampleTest8B, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_.Rand8();
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_));
+  }
+}
+
+TEST_P(UpsampleTestHB, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  bit_depth_ = 12;
+  const int hi = 1 << bit_depth_;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_(hi);
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, bit_depth_));
+  }
+}
+
+TEST_P(FilterEdgeTest8B, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  strength_ = 1;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_.Rand8();
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
+    // iterate over filter strengths (1,2,3)
+    strength_ = (strength_ == 3) ? 1 : strength_ + 1;
+  }
+}
+
+TEST_P(FilterEdgeTestHB, DISABLED_Speed) {
+  const int test_count = 10000000;
+  size_ = kMaxEdge;
+  strength_ = 1;
+  bit_depth_ = 12;
+  const int hi = 1 << bit_depth_;
+  for (int i = 0; i < kOffset + size_; ++i) {
+    edge_tst_data_[i] = rng_(hi);
+  }
+  edge_tst_ = &edge_tst_data_[kOffset];
+  for (int iter = 0; iter < test_count; ++iter) {
+    ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
+    // iterate over filter strengths (1,2,3)
+    strength_ = (strength_ == 3) ? 1 : strength_ + 1;
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc
index 84cfa5c485..3ea4217081 100644
--- a/third_party/aom/test/intrabc_test.cc
+++ b/third_party/aom/test/intrabc_test.cc
@@ -11,10 +11,13 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
+#include "av1/common/blockd.h"
 #include "av1/common/enums.h"
 #include "av1/common/mv.h"
 #include "av1/common/mvref_common.h"
+#include "av1/common/onyxc_int.h"
 #include "av1/common/tile_common.h"
 
 namespace {
@@ -29,9 +32,7 @@ TEST(IntrabcTest, DvValidation) {
   const int kSubPelScale = 8;
   const int kTileMaxMibWidth = 8;
   const DvTestCase kDvCases[] = {
-#if CONFIG_EXT_PARTITION
     { { 0, 0 }, 0, 0, BLOCK_128X128, false },
-#endif
     { { 0, 0 }, 0, 0, BLOCK_64X64, false },
     { { 0, 0 }, 0, 0, BLOCK_32X32, false },
     { { 0, 0 }, 0, 0, BLOCK_16X16, false },
@@ -46,7 +47,7 @@ TEST(IntrabcTest, DvValidation) {
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
       BLOCK_16X16,
-      true },
+      false },
     { { -MAX_SB_SIZE * kSubPelScale, 0 },
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
@@ -111,7 +112,7 @@ TEST(IntrabcTest, DvValidation) {
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
       BLOCK_LARGEST,
-      true },
+      false },
     { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
@@ -126,13 +127,13 @@ TEST(IntrabcTest, DvValidation) {
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
       BLOCK_LARGEST,
-      true },
+      false },
     { { -MAX_SB_SIZE * kSubPelScale,
         (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale },
       MAX_SB_SIZE / MI_SIZE,
       MAX_SB_SIZE / MI_SIZE,
       BLOCK_LARGEST,
-      true },
+      false },
     { { -MAX_SB_SIZE * kSubPelScale,
         ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale },
       MAX_SB_SIZE / MI_SIZE,
@@ -140,17 +141,27 @@ TEST(IntrabcTest, DvValidation) {
       BLOCK_LARGEST,
       false },
   };
-  TileInfo tile;
-  tile.mi_row_start = 8 * MAX_MIB_SIZE;
-  tile.mi_row_end = 16 * MAX_MIB_SIZE;
-  tile.mi_col_start = 24 * MAX_MIB_SIZE;
-  tile.mi_col_end = tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE;
+
+  MACROBLOCKD xd;
+  memset(&xd, 0, sizeof(xd));
+  xd.tile.mi_row_start = 8 * MAX_MIB_SIZE;
+  xd.tile.mi_row_end = 16 * MAX_MIB_SIZE;
+  xd.tile.mi_col_start = 24 * MAX_MIB_SIZE;
+  xd.tile.mi_col_end = xd.tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE;
+  xd.plane[1].subsampling_x = 1;
+  xd.plane[1].subsampling_y = 1;
+  xd.plane[2].subsampling_x = 1;
+  xd.plane[2].subsampling_y = 1;
+
+  AV1_COMMON cm;
+  memset(&cm, 0, sizeof(cm));
+
   for (int i = 0; i < static_cast<int>(GTEST_ARRAY_SIZE_(kDvCases)); ++i) {
-    EXPECT_EQ(kDvCases[i].valid,
-              is_dv_valid(kDvCases[i].dv, &tile,
-                          tile.mi_row_start + kDvCases[i].mi_row_offset,
-                          tile.mi_col_start + kDvCases[i].mi_col_offset,
-                          kDvCases[i].bsize))
+    EXPECT_EQ(static_cast<int>(kDvCases[i].valid),
+              av1_is_dv_valid(kDvCases[i].dv, &cm, &xd,
+                              xd.tile.mi_row_start + kDvCases[i].mi_row_offset,
+                              xd.tile.mi_col_start + kDvCases[i].mi_col_offset,
+                              kDvCases[i].bsize, MAX_MIB_SIZE_LOG2))
         << "DvCases[" << i << "]";
   }
 }
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
index 12da1601ce..82f1914493 100644
--- a/third_party/aom/test/intrapred_test.cc
+++ b/third_party/aom/test/intrapred_test.cc
@@ -7,19 +7,21 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "av1/common/blockd.h"
+#include "av1/common/common.h"
 #include "av1/common/pred_common.h"
 #include "aom_mem/aom_mem.h"
 
@@ -139,33 +141,42 @@ class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
   }
 };
 
+// Suppress an unitialized warning. Once there are implementations to test then
+// this can be restored.
 TEST_P(HighbdIntraPredTest, Bitexact) {
-  // max block size is 32
-  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]);
-  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]);
-  DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]);
-  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]);
+  // max block size is 64
+  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
+  DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
+  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
+  av1_zero(left_col);
+  av1_zero(above_data);
   RunTest(left_col, above_data, dst, ref_dst);
 }
 
+// Same issue as above but for arm.
+#if !HAVE_NEON
 TEST_P(LowbdIntraPredTest, Bitexact) {
   // max block size is 32
   DECLARE_ALIGNED(16, uint8_t, left_col[2 * 32]);
   DECLARE_ALIGNED(16, uint8_t, above_data[2 * 32 + 32]);
   DECLARE_ALIGNED(16, uint8_t, dst[3 * 32 * 32]);
   DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 32 * 32]);
+  av1_zero(left_col);
+  av1_zero(above_data);
   RunTest(left_col, above_data, dst, ref_dst);
 }
+#endif  // !HAVE_NEON
 
 // -----------------------------------------------------------------------------
 // High Bit Depth Tests
-
 #define highbd_entry(type, width, height, opt, bd)                          \
   IntraPredFunc<HighbdIntraPred>(                                           \
       &aom_highbd_##type##_predictor_##width##x##height##_##opt,            \
       &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
       bd)
 
+#if 0
 #define highbd_intrapred(type, opt, bd)                                       \
   highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd),       \
       highbd_entry(type, 8, 4, opt, bd), highbd_entry(type, 8, 8, opt, bd),   \
@@ -173,111 +184,10 @@ TEST_P(LowbdIntraPredTest, Bitexact) {
       highbd_entry(type, 16, 16, opt, bd),                                    \
       highbd_entry(type, 16, 32, opt, bd),                                    \
       highbd_entry(type, 32, 16, opt, bd), highbd_entry(type, 32, 32, opt, bd)
+#endif
 
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVector8[] = {
-  highbd_intrapred(dc, sse2, 8),     highbd_intrapred(dc_left, sse2, 8),
-  highbd_intrapred(dc_top, sse2, 8), highbd_intrapred(dc_128, sse2, 8),
-  highbd_intrapred(h, sse2, 8),      highbd_intrapred(v, sse2, 8),
-  highbd_entry(d117, 4, 4, sse2, 8), highbd_entry(d135, 4, 4, sse2, 8),
-  highbd_entry(d153, 4, 4, sse2, 8), highbd_entry(d45e, 4, 4, sse2, 8),
-  highbd_entry(d45e, 4, 8, sse2, 8), highbd_entry(d45e, 8, 4, sse2, 8),
-  highbd_entry(d45e, 8, 8, sse2, 8), highbd_entry(d45e, 8, 16, sse2, 8),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVector8));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVector10[] = {
-  highbd_intrapred(dc, sse2, 10),     highbd_intrapred(dc_left, sse2, 10),
-  highbd_intrapred(dc_top, sse2, 10), highbd_intrapred(dc_128, sse2, 10),
-  highbd_intrapred(h, sse2, 10),      highbd_intrapred(v, sse2, 10),
-  highbd_entry(d117, 4, 4, sse2, 10), highbd_entry(d135, 4, 4, sse2, 10),
-  highbd_entry(d153, 4, 4, sse2, 10), highbd_entry(d45e, 4, 4, sse2, 10),
-  highbd_entry(d45e, 4, 8, sse2, 10), highbd_entry(d45e, 8, 4, sse2, 10),
-  highbd_entry(d45e, 8, 8, sse2, 10), highbd_entry(d45e, 8, 16, sse2, 10),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVector10));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVector12[] = {
-  highbd_intrapred(dc, sse2, 12),     highbd_intrapred(dc_left, sse2, 12),
-  highbd_intrapred(dc_top, sse2, 12), highbd_intrapred(dc_128, sse2, 12),
-  highbd_intrapred(h, sse2, 12),      highbd_intrapred(v, sse2, 12),
-  highbd_entry(d117, 4, 4, sse2, 12), highbd_entry(d135, 4, 4, sse2, 12),
-  highbd_entry(d153, 4, 4, sse2, 12), highbd_entry(d45e, 4, 4, sse2, 12),
-  highbd_entry(d45e, 4, 8, sse2, 12), highbd_entry(d45e, 8, 4, sse2, 12),
-  highbd_entry(d45e, 8, 8, sse2, 12), highbd_entry(d45e, 8, 16, sse2, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVector12));
-
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_8[] = {
-  highbd_entry(d117, 8, 8, ssse3, 8),   highbd_entry(d117, 16, 16, ssse3, 8),
-  highbd_entry(d117, 32, 32, ssse3, 8), highbd_entry(d135, 8, 8, ssse3, 8),
-  highbd_entry(d135, 16, 16, ssse3, 8), highbd_entry(d135, 32, 32, ssse3, 8),
-  highbd_entry(d153, 8, 8, ssse3, 8),   highbd_entry(d153, 16, 16, ssse3, 8),
-  highbd_entry(d153, 32, 32, ssse3, 8),
-};
-INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_8, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorSsse3_8));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_10[] = {
-  highbd_entry(d117, 8, 8, ssse3, 10),   highbd_entry(d117, 16, 16, ssse3, 10),
-  highbd_entry(d117, 32, 32, ssse3, 10), highbd_entry(d135, 8, 8, ssse3, 10),
-  highbd_entry(d135, 16, 16, ssse3, 10), highbd_entry(d135, 32, 32, ssse3, 10),
-  highbd_entry(d153, 8, 8, ssse3, 10),   highbd_entry(d153, 16, 16, ssse3, 10),
-  highbd_entry(d153, 32, 32, ssse3, 10),
-};
-INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_10, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorSsse3_10));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_12[] = {
-  highbd_entry(d117, 8, 8, ssse3, 12),   highbd_entry(d117, 16, 16, ssse3, 12),
-  highbd_entry(d117, 32, 32, ssse3, 12), highbd_entry(d135, 8, 8, ssse3, 12),
-  highbd_entry(d135, 16, 16, ssse3, 12), highbd_entry(d135, 32, 32, ssse3, 12),
-  highbd_entry(d153, 8, 8, ssse3, 12),   highbd_entry(d153, 16, 16, ssse3, 12),
-  highbd_entry(d153, 32, 32, ssse3, 12),
-};
-INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_12, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorSsse3_12));
-#endif  // HAVE_SSSE3
-
-#if HAVE_AVX2
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_8[] = {
-  highbd_entry(d45e, 16, 8, avx2, 8),  highbd_entry(d45e, 16, 16, avx2, 8),
-  highbd_entry(d45e, 16, 32, avx2, 8), highbd_entry(d45e, 32, 16, avx2, 8),
-  highbd_entry(d45e, 32, 32, avx2, 8),
-};
-INSTANTIATE_TEST_CASE_P(AVX2_TO_C_8, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorAvx2_8));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_10[] = {
-  highbd_entry(d45e, 16, 8, avx2, 10),  highbd_entry(d45e, 16, 16, avx2, 10),
-  highbd_entry(d45e, 16, 32, avx2, 10), highbd_entry(d45e, 32, 16, avx2, 10),
-  highbd_entry(d45e, 32, 32, avx2, 10),
-};
-INSTANTIATE_TEST_CASE_P(AVX2_TO_C_10, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorAvx2_10));
-
-const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_12[] = {
-  highbd_entry(d45e, 16, 8, avx2, 12),  highbd_entry(d45e, 16, 16, avx2, 12),
-  highbd_entry(d45e, 16, 32, avx2, 12), highbd_entry(d45e, 32, 16, avx2, 12),
-  highbd_entry(d45e, 32, 32, avx2, 12),
-};
-INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
-                        ::testing::ValuesIn(IntraPredTestVectorAvx2_12));
-#endif  // HAVE_AVX2
-#endif  // CONFIG_HIGHBITDEPTH
-
-// -----------------------------------------------------------------------------
-// Low Bit Depth Tests
+  // ---------------------------------------------------------------------------
+  // Low Bit Depth Tests
 
 #define lowbd_entry(type, width, height, opt)                                  \
   IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
@@ -303,6 +213,17 @@ INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
 
 #endif  // HAVE_SSE2
 
+#if HAVE_SSSE3
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
+  lowbd_intrapred(paeth, ssse3),
+  lowbd_intrapred(smooth, ssse3),
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
+                        ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
+
+#endif  // HAVE_SSSE3
+
 #if HAVE_AVX2
 const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
   lowbd_entry(dc, 32, 32, avx2),      lowbd_entry(dc_top, 32, 32, avx2),
@@ -318,16 +239,17 @@ const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
 INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
                         ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
 
-#endif  // HAVE_SSE2
+#endif  // HAVE_AVX2
 
-#if HAVE_SSSE3
-const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
-  lowbd_intrapred(paeth, ssse3), lowbd_intrapred(smooth, ssse3),
+#if HAVE_NEON
+const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = {
+  highbd_entry(dc, 4, 4, neon, 8),   highbd_entry(dc, 8, 8, neon, 8),
+  highbd_entry(dc, 16, 16, neon, 8), highbd_entry(dc, 32, 32, neon, 8),
+  highbd_entry(dc, 64, 64, neon, 8),
 };
 
-INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
-                        ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
-
-#endif  // HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(NEON, HighbdIntraPredTest,
+                        ::testing::ValuesIn(HighbdIntraPredTestVectorNeon));
 
+#endif  // HAVE_NEON
 }  // namespace
diff --git a/third_party/aom/test/invalid_file_test.cc b/third_party/aom/test/invalid_file_test.cc
new file mode 100644
index 0000000000..869f3da667
--- /dev/null
+++ b/third_party/aom/test/invalid_file_test.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/ivf_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+
+namespace {
+
+struct DecodeParam {
+  int threads;
+  const char *filename;
+};
+
+std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) {
+  return os << "threads: " << dp.threads << " file: " << dp.filename;
+}
+
+class InvalidFileTest : public ::libaom_test::DecoderTest,
+                        public ::libaom_test::CodecTestWithParam<DecodeParam> {
+ protected:
+  InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {}
+
+  virtual ~InvalidFileTest() {
+    if (res_file_ != NULL) fclose(res_file_);
+  }
+
+  void OpenResFile(const std::string &res_file_name) {
+    res_file_ = libaom_test::OpenTestDataFile(res_file_name);
+    ASSERT_TRUE(res_file_ != NULL)
+        << "Result file open failed. Filename: " << res_file_name;
+  }
+
+  virtual bool HandleDecodeResult(
+      const aom_codec_err_t res_dec,
+      const libaom_test::CompressedVideoSource &video,
+      libaom_test::Decoder *decoder) {
+    EXPECT_TRUE(res_file_ != NULL);
+    int expected_res_dec;
+
+    // Read integer result.
+    const int res = fscanf(res_file_, "%d", &expected_res_dec);
+    EXPECT_NE(res, EOF) << "Read result data failed";
+
+    // Check results match.
+    const DecodeParam input = GET_PARAM(1);
+    if (input.threads > 1) {
+      // The serial decode check is too strict for tile-threaded decoding as
+      // there is no guarantee on the decode order nor which specific error
+      // will take precedence. Currently a tile-level error is not forwarded so
+      // the frame will simply be marked corrupt.
+      EXPECT_TRUE(res_dec == expected_res_dec ||
+                  res_dec == AOM_CODEC_CORRUPT_FRAME)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError()
+          << "). Expected: " << expected_res_dec << " or "
+          << AOM_CODEC_CORRUPT_FRAME;
+    } else {
+      EXPECT_EQ(expected_res_dec, res_dec)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError() << ")";
+    }
+
+    return !HasFailure();
+  }
+
+  virtual void HandlePeekResult(libaom_test::Decoder *const /*decoder*/,
+                                libaom_test::CompressedVideoSource * /*video*/,
+                                const aom_codec_err_t /*res_peek*/) {}
+
+  void RunTest() {
+    const DecodeParam input = GET_PARAM(1);
+    aom_codec_dec_cfg_t cfg = { 0, 0, 0, CONFIG_LOWBITDEPTH, { 1 } };
+    cfg.threads = input.threads;
+    const std::string filename = input.filename;
+    libaom_test::IVFVideoSource decode_video(filename);
+    decode_video.Init();
+
+    // Construct result file name. The file holds a list of expected integer
+    // results, one for each decoded frame.  Any result that doesn't match
+    // the files list will cause a test failure.
+    const std::string res_filename = filename + ".res";
+    OpenResFile(res_filename);
+
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg));
+  }
+
+ private:
+  FILE *res_file_;
+};
+
+TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
+
+const DecodeParam kAV1InvalidFileTests[] = {
+  { 1, "invalid-bug-1814.ivf" },
+};
+
+AV1_INSTANTIATE_TEST_CASE(InvalidFileTest,
+                          ::testing::ValuesIn(kAV1InvalidFileTests));
+
+}  // namespace
diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h
index 956c145ac9..4b2713537e 100644
--- a/third_party/aom/test/ivf_video_source.h
+++ b/third_party/aom/test/ivf_video_source.h
@@ -10,14 +10,17 @@
  */
 #ifndef TEST_IVF_VIDEO_SOURCE_H_
 #define TEST_IVF_VIDEO_SOURCE_H_
+
 #include <cstdio>
 #include <cstdlib>
 #include <new>
 #include <string>
+
+#include "aom_ports/sanitizer.h"
 #include "test/video_source.h"
 
 namespace libaom_test {
-const unsigned int kCodeBufferSize = 256 * 1024;
+const unsigned int kCodeBufferSize = 256 * 1024 * 1024;
 const unsigned int kIvfFileHdrSize = 32;
 const unsigned int kIvfFrameHdrSize = 12;
 
@@ -41,9 +44,10 @@ class IVFVideoSource : public CompressedVideoSource {
 
   virtual void Init() {
     // Allocate a buffer for read in the compressed video frame.
-    compressed_frame_buf_ = new uint8_t[libaom_test::kCodeBufferSize];
+    compressed_frame_buf_ = new uint8_t[kCodeBufferSize];
     ASSERT_TRUE(compressed_frame_buf_ != NULL)
         << "Allocate frame buffer failed";
+    ASAN_POISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
   }
 
   virtual void Begin() {
@@ -81,9 +85,12 @@ class IVFVideoSource : public CompressedVideoSource {
       frame_sz_ = MemGetLe32(frame_hdr);
       ASSERT_LE(frame_sz_, kCodeBufferSize)
           << "Frame is too big for allocated code buffer";
+      ASAN_UNPOISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
       ASSERT_EQ(frame_sz_,
                 fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
           << "Failed to read complete frame";
+      ASAN_POISON_MEMORY_REGION(compressed_frame_buf_ + frame_sz_,
+                                kCodeBufferSize - frame_sz_);
     }
   }
 
diff --git a/third_party/aom/test/level_test.cc b/third_party/aom/test/level_test.cc
deleted file mode 100644
index 12f3918179..0000000000
--- a/third_party/aom/test/level_test.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-class LevelTest
-    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
-      public ::libaom_test::EncoderTest {
- protected:
-  LevelTest()
-      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
-        cpu_used_(GET_PARAM(2)), min_gf_internal_(24), target_level_(0),
-        level_(0) {}
-  virtual ~LevelTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-    if (encoding_mode_ != ::libaom_test::kRealTime) {
-      cfg_.g_lag_in_frames = 25;
-      cfg_.rc_end_usage = AOM_VBR;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = AOM_CBR;
-    }
-    cfg_.rc_2pass_vbr_minsection_pct = 5;
-    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
-    cfg_.rc_target_bitrate = 400;
-    cfg_.rc_max_quantizer = 63;
-    cfg_.rc_min_quantizer = 0;
-  }
-
-  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
-                                  ::libaom_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
-      encoder->Control(AV1E_SET_TARGET_LEVEL, target_level_);
-      encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_gf_internal_);
-      if (encoding_mode_ != ::libaom_test::kRealTime) {
-        encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
-      }
-    }
-    encoder->Control(AV1E_GET_LEVEL, &level_);
-    ASSERT_LE(level_, 51);
-    ASSERT_GE(level_, 0);
-  }
-
-  ::libaom_test::TestMode encoding_mode_;
-  int cpu_used_;
-  int min_gf_internal_;
-  int target_level_;
-  int level_;
-};
-
-// Test for keeping level stats only
-TEST_P(LevelTest, TestTargetLevel0) {
-  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
-  target_level_ = 0;
-  min_gf_internal_ = 4;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_EQ(11, level_);
-
-  cfg_.rc_target_bitrate = 1600;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_EQ(20, level_);
-}
-
-// Test for level control being turned off
-TEST_P(LevelTest, TestTargetLevel255) {
-  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       30);
-  target_level_ = 255;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-TEST_P(LevelTest, TestTargetLevelApi) {
-  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
-  static const aom_codec_iface_t *codec = &aom_codec_av1_cx_algo;
-  aom_codec_ctx_t enc;
-  aom_codec_enc_cfg_t cfg;
-  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(codec, &cfg, 0));
-  EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, codec, &cfg, 0));
-  for (int level = 0; level <= 256; ++level) {
-    if (level == 10 || level == 11 || level == 20 || level == 21 ||
-        level == 30 || level == 31 || level == 40 || level == 41 ||
-        level == 50 || level == 51 || level == 52 || level == 60 ||
-        level == 61 || level == 62 || level == 0 || level == 255)
-      EXPECT_EQ(AOM_CODEC_OK,
-                aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level));
-    else
-      EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
-                aom_codec_control(&enc, AV1E_SET_TARGET_LEVEL, level));
-  }
-  EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
-}
-
-AV1_INSTANTIATE_TEST_CASE(LevelTest,
-                          ::testing::Values(::libaom_test::kTwoPassGood,
-                                            ::libaom_test::kOnePassGood),
-                          ::testing::Range(0, 9));
-}  // namespace
diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc
index f4978fe21f..3f8e89c815 100644
--- a/third_party/aom/test/lossless_test.cc
+++ b/third_party/aom/test/lossless_test.cc
@@ -7,11 +7,12 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/i420_video_source.h"
diff --git a/third_party/aom/test/lpf_8_test.cc b/third_party/aom/test/lpf_8_test.cc
deleted file mode 100644
index 4859a8ee70..0000000000
--- a/third_party/aom/test/lpf_8_test.cc
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_integer.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
-//                                         16 Coefs within filtered section
-//                                         8  Coeffs following filtered section
-const int kNumCoeffs = 1024;
-
-const int number_of_iterations = 10000;
-
-const int kSpeedTestNum = 500000;
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh, int bd);
-typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
-                               const uint8_t *limit0, const uint8_t *thresh0,
-                               const uint8_t *blimit1, const uint8_t *limit1,
-                               const uint8_t *thresh1, int bd);
-#else
-typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh);
-typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
-                               const uint8_t *limit0, const uint8_t *thresh0,
-                               const uint8_t *blimit1, const uint8_t *limit1,
-                               const uint8_t *thresh1);
-#endif  // CONFIG_HIGHBITDEPTH
-
-typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
-typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
-
-class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
- public:
-  virtual ~Loop8Test6Param() {}
-  virtual void SetUp() {
-    loopfilter_op_ = GET_PARAM(0);
-    ref_loopfilter_op_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int bit_depth_;
-  int mask_;
-  loop_op_t loopfilter_op_;
-  loop_op_t ref_loopfilter_op_;
-};
-
-class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
- public:
-  virtual ~Loop8Test9Param() {}
-  virtual void SetUp() {
-    loopfilter_op_ = GET_PARAM(0);
-    ref_loopfilter_op_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int bit_depth_;
-  int mask_;
-  dual_loop_op_t loopfilter_op_;
-  dual_loop_op_t ref_loopfilter_op_;
-};
-
-TEST_P(Loop8Test6Param, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_HIGHBITDEPTH
-  int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    int32_t p = kNumCoeffs / 32;
-
-    uint16_t tmp_s[kNumCoeffs];
-    int j = 0;
-    while (j < kNumCoeffs) {
-      uint8_t val = rnd.Rand8();
-      if (val & 0x80) {  // 50% chance to choose a new value.
-        tmp_s[j] = rnd.Rand16();
-        j++;
-      } else {  // 50% chance to repeat previous value in row X times
-        int k = 0;
-        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
-          if (j < 1) {
-            tmp_s[j] = rnd.Rand16();
-          } else if (val & 0x20) {  // Increment by an value within the limit
-            tmp_s[j] = (tmp_s[j - 1] + (*limit - 1));
-          } else {  // Decrement by an value within the limit
-            tmp_s[j] = (tmp_s[j - 1] - (*limit - 1));
-          }
-          j++;
-        }
-      }
-    }
-    for (j = 0; j < kNumCoeffs; j++) {
-      if (i % 2) {
-        s[j] = tmp_s[j] & mask_;
-      } else {
-        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
-      }
-      ref_s[j] = s[j];
-    }
-#if CONFIG_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
-#endif  // CONFIG_HIGHBITDEPTH
-
-    for (j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test6Param, ValueCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-
-  // NOTE: The code in av1_loopfilter.c:update_sharpness computes mblim as a
-  // function of sharpness_lvl and the loopfilter lvl as:
-  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
-  // ...
-  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
-  //        SIMD_WIDTH);
-  // This means that the largest value for mblim will occur when sharpness_lvl
-  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
-  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
-  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
-  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
-
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    int32_t p = kNumCoeffs / 32;
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      s[j] = rnd.Rand16() & mask_;
-      ref_s[j] = s[j];
-    }
-#if CONFIG_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh);
-    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
-#endif  // CONFIG_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test6Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test6Param, DISABLED_Speed) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = kSpeedTestNum;
-#if CONFIG_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-
-  uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = rnd.Rand8();
-  DECLARE_ALIGNED(16, const uint8_t,
-                  thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-
-  int32_t p = kNumCoeffs / 32;
-  for (int j = 0; j < kNumCoeffs; ++j) {
-    s[j] = rnd.Rand16() & mask_;
-  }
-
-  for (int i = 0; i < count_test_block; ++i) {
-#if CONFIG_HIGHBITDEPTH
-    loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd);
-#else
-    loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh);
-#endif  // CONFIG_HIGHBITDEPTH
-  }
-}
-
-TEST_P(Loop8Test9Param, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    int32_t p = kNumCoeffs / 32;
-    uint16_t tmp_s[kNumCoeffs];
-    int j = 0;
-    const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;
-    while (j < kNumCoeffs) {
-      uint8_t val = rnd.Rand8();
-      if (val & 0x80) {  // 50% chance to choose a new value.
-        tmp_s[j] = rnd.Rand16();
-        j++;
-      } else {  // 50% chance to repeat previous value in row X times.
-        int k = 0;
-        while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
-          if (j < 1) {
-            tmp_s[j] = rnd.Rand16();
-          } else if (val & 0x20) {  // Increment by a value within the limit.
-            tmp_s[j] = (tmp_s[j - 1] + (limit - 1));
-          } else {  // Decrement by an value within the limit.
-            tmp_s[j] = (tmp_s[j - 1] - (limit - 1));
-          }
-          j++;
-        }
-      }
-    }
-    for (j = 0; j < kNumCoeffs; j++) {
-      if (i % 2) {
-        s[j] = tmp_s[j] & mask_;
-      } else {
-        s[j] = tmp_s[p * (j % p) + j / p] & mask_;
-      }
-      ref_s[j] = s[j];
-    }
-#if CONFIG_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
-                       limit1, thresh1, bd);
-    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
-                                            thresh0, blimit1, limit1, thresh1,
-                                            bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
-                       limit1, thresh1);
-    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
-                                            thresh0, blimit1, limit1, thresh1));
-#endif  // CONFIG_HIGHBITDEPTH
-    for (j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test9Param, C output doesn't match SSE2 "
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test9Param, ValueCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = number_of_iterations;
-#if CONFIG_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < count_test_block; ++i) {
-    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-    DECLARE_ALIGNED(16, const uint8_t,
-                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = rnd.Rand8();
-    DECLARE_ALIGNED(16, const uint8_t,
-                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      s[j] = rnd.Rand16() & mask_;
-      ref_s[j] = s[j];
-    }
-#if CONFIG_HIGHBITDEPTH
-    const int32_t bd = bit_depth_;
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
-                       limit1, thresh1, bd);
-    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
-                                            thresh0, blimit1, limit1, thresh1,
-                                            bd));
-#else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,
-                       limit1, thresh1);
-    ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
-                                            thresh0, blimit1, limit1, thresh1));
-#endif  // CONFIG_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      err_count += ref_s[j] != s[j];
-    }
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Loop8Test9Param, C output doesn't match SSE2"
-         "loopfilter output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(Loop8Test9Param, DISABLED_Speed) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = kSpeedTestNum;
-#if CONFIG_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-#else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-#endif  // CONFIG_HIGHBITDEPTH
-
-  uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = rnd.Rand8();
-  DECLARE_ALIGNED(16, const uint8_t,
-                  thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
-  DECLARE_ALIGNED(16, const uint8_t,
-                  limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  tmp = rnd.Rand8();
-  DECLARE_ALIGNED(16, const uint8_t,
-                  thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
-                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-  int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
-  for (int j = 0; j < kNumCoeffs; ++j) {
-    s[j] = rnd.Rand16() & mask_;
-  }
-
-  for (int i = 0; i < count_test_block; ++i) {
-#if CONFIG_HIGHBITDEPTH
-    const int32_t bd = bit_depth_;
-    loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
-                   thresh1, bd);
-#else
-    loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
-                   thresh1);
-#endif  // CONFIG_HIGHBITDEPTH
-  }
-}
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2
-#if CONFIG_HIGHBITDEPTH
-
-const loop8_param_t kHbdLoop8Test6[] = {
-  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
-             8),
-  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
-             8),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
-             &aom_highbd_lpf_horizontal_edge_8_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
-             8),
-  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
-             10),
-  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
-             10),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
-             &aom_highbd_lpf_horizontal_edge_8_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
-             10),
-  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
-             12),
-  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
-  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
-             12),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_8_sse2,
-             &aom_highbd_lpf_horizontal_edge_8_c, 12),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_sse2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
-             12),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
-             &aom_highbd_lpf_vertical_16_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
-             &aom_highbd_lpf_vertical_16_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
-             &aom_highbd_lpf_vertical_16_dual_c, 12)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
-                        ::testing::ValuesIn(kHbdLoop8Test6));
-#else
-const loop8_param_t kLoop8Test6[] = {
-  make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
-  make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
-  make_tuple(&aom_lpf_horizontal_edge_8_sse2, &aom_lpf_horizontal_edge_8_c, 8),
-  make_tuple(&aom_lpf_horizontal_edge_16_sse2, &aom_lpf_horizontal_edge_16_c,
-             8),
-  make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
-  make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
-  make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
-#if !CONFIG_PARALLEL_DEBLOCKING
-  make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
-#endif
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
-                        ::testing::ValuesIn(kLoop8Test6));
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // HAVE_SSE2
-
-#if HAVE_AVX2
-#if CONFIG_HIGHBITDEPTH
-
-const loop8_param_t kHbdLoop8Test6Avx2[] = {
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
-             &aom_highbd_lpf_horizontal_edge_16_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
-             &aom_highbd_lpf_vertical_16_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
-             &aom_highbd_lpf_vertical_16_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
-             &aom_highbd_lpf_vertical_16_dual_c, 12)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test6Param,
-                        ::testing::ValuesIn(kHbdLoop8Test6Avx2));
-
-#endif
-#endif
-
-#if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Loop8Test6Param,
-    ::testing::Values(make_tuple(&aom_lpf_horizontal_edge_8_avx2,
-                                 &aom_lpf_horizontal_edge_8_c, 8),
-                      make_tuple(&aom_lpf_horizontal_edge_16_avx2,
-                                 &aom_lpf_horizontal_edge_16_c, 8)));
-#endif
-
-#if HAVE_SSE2
-#if CONFIG_HIGHBITDEPTH
-const dualloop8_param_t kHbdLoop8Test9[] = {
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
-             &aom_highbd_lpf_vertical_4_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
-             &aom_highbd_lpf_vertical_8_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
-             &aom_highbd_lpf_vertical_4_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
-             &aom_highbd_lpf_vertical_8_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
-             &aom_highbd_lpf_vertical_4_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
-             &aom_highbd_lpf_vertical_8_dual_c, 12)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
-                        ::testing::ValuesIn(kHbdLoop8Test9));
-#else
-#if !CONFIG_PARALLEL_DEBLOCKING
-const dualloop8_param_t kLoop8Test9[] = {
-  make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
-  make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
-  make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
-  make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
-                        ::testing::ValuesIn(kLoop8Test9));
-#endif
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // HAVE_SSE2
-
-#if HAVE_AVX2
-#if CONFIG_HIGHBITDEPTH
-const dualloop8_param_t kHbdLoop8Test9Avx2[] = {
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
-             &aom_highbd_lpf_horizontal_4_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
-             &aom_highbd_lpf_horizontal_8_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
-             &aom_highbd_lpf_vertical_4_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
-             &aom_highbd_lpf_vertical_4_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
-             &aom_highbd_lpf_vertical_4_dual_c, 12),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
-             &aom_highbd_lpf_vertical_8_dual_c, 8),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
-             &aom_highbd_lpf_vertical_8_dual_c, 10),
-  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
-             &aom_highbd_lpf_vertical_8_dual_c, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param,
-                        ::testing::ValuesIn(kHbdLoop8Test9Avx2));
-#endif
-#endif
-
-#if HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
-#if CONFIG_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test6Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-        // Using #if inside the macro is unsupported on MSVS but the tests are
-        // not
-        // currently built for MSVS with ARM and NEON.
-        make_tuple(&aom_lpf_horizontal_edge_8_neon,
-                   &aom_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&aom_lpf_horizontal_edge_16_neon,
-                   &aom_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&aom_lpf_vertical_16_neon, &aom_lpf_vertical_16_c, 8),
-        make_tuple(&aom_lpf_vertical_16_dual_neon, &aom_lpf_vertical_16_dual_c,
-                   8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
-        make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
-        make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8),
-        make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8)));
-INSTANTIATE_TEST_CASE_P(NEON, Loop8Test9Param,
-                        ::testing::Values(
-#if HAVE_NEON_ASM
-                            make_tuple(&aom_lpf_horizontal_8_dual_neon,
-                                       &aom_lpf_horizontal_8_dual_c, 8),
-                            make_tuple(&aom_lpf_vertical_8_dual_neon,
-                                       &aom_lpf_vertical_8_dual_c, 8),
-#endif  // HAVE_NEON_ASM
-                            make_tuple(&aom_lpf_horizontal_4_dual_neon,
-                                       &aom_lpf_horizontal_4_dual_c, 8),
-                            make_tuple(&aom_lpf_vertical_4_dual_neon,
-                                       &aom_lpf_vertical_4_dual_c, 8)));
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
-
-#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
-INSTANTIATE_TEST_CASE_P(
-    DSPR2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&aom_lpf_horizontal_4_dspr2, &aom_lpf_horizontal_4_c, 8),
-        make_tuple(&aom_lpf_horizontal_8_dspr2, &aom_lpf_horizontal_8_c, 8),
-        make_tuple(&aom_lpf_horizontal_edge_8, &aom_lpf_horizontal_edge_8, 8),
-        make_tuple(&aom_lpf_horizontal_edge_16, &aom_lpf_horizontal_edge_16, 8),
-        make_tuple(&aom_lpf_vertical_4_dspr2, &aom_lpf_vertical_4_c, 8),
-        make_tuple(&aom_lpf_vertical_8_dspr2, &aom_lpf_vertical_8_c, 8),
-        make_tuple(&aom_lpf_vertical_16_dspr2, &aom_lpf_vertical_16_c, 8),
-        make_tuple(&aom_lpf_vertical_16_dual_dspr2, &aom_lpf_vertical_16_dual_c,
-                   8)));
-
-INSTANTIATE_TEST_CASE_P(
-    DSPR2, Loop8Test9Param,
-    ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_dspr2,
-                                 &aom_lpf_horizontal_4_dual_c, 8),
-                      make_tuple(&aom_lpf_horizontal_8_dual_dspr2,
-                                 &aom_lpf_horizontal_8_dual_c, 8),
-                      make_tuple(&aom_lpf_vertical_4_dual_dspr2,
-                                 &aom_lpf_vertical_4_dual_c, 8),
-                      make_tuple(&aom_lpf_vertical_8_dual_dspr2,
-                                 &aom_lpf_vertical_8_dual_c, 8)));
-#endif  // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH && (!CONFIG_PARALLEL_DEBLOCKING)
-
-#if HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&aom_lpf_horizontal_4_msa, &aom_lpf_horizontal_4_c, 8),
-        make_tuple(&aom_lpf_horizontal_8_msa, &aom_lpf_horizontal_8_c, 8),
-        make_tuple(&aom_lpf_horizontal_edge_8_msa, &aom_lpf_horizontal_edge_8_c,
-                   8),
-        make_tuple(&aom_lpf_horizontal_edge_16_msa,
-                   &aom_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&aom_lpf_vertical_4_msa, &aom_lpf_vertical_4_c, 8),
-        make_tuple(&aom_lpf_vertical_8_msa, &aom_lpf_vertical_8_c, 8),
-        make_tuple(&aom_lpf_vertical_16_msa, &aom_lpf_vertical_16_c, 8)));
-
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test9Param,
-    ::testing::Values(make_tuple(&aom_lpf_horizontal_4_dual_msa,
-                                 &aom_lpf_horizontal_4_dual_c, 8),
-                      make_tuple(&aom_lpf_horizontal_8_dual_msa,
-                                 &aom_lpf_horizontal_8_dual_c, 8),
-                      make_tuple(&aom_lpf_vertical_4_dual_msa,
-                                 &aom_lpf_vertical_4_dual_c, 8),
-                      make_tuple(&aom_lpf_vertical_8_dual_msa,
-                                 &aom_lpf_vertical_8_dual_c, 8)));
-#endif  // HAVE_MSA && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
-
-}  // namespace
diff --git a/third_party/aom/test/lpf_test.cc b/third_party/aom/test/lpf_test.cc
new file mode 100644
index 0000000000..1e2862ac8c
--- /dev/null
+++ b/third_party/aom/test/lpf_test.cc
@@ -0,0 +1,623 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "av1/common/av1_loopfilter.h"
+#include "av1/common/entropy.h"
+#include "aom/aom_integer.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+// Horizontally and Vertically need 32x32: 8  Coeffs preceeding filtered section
+//                                         16 Coefs within filtered section
+//                                         8  Coeffs following filtered section
+const int kNumCoeffs = 1024;
+
+const int number_of_iterations = 10000;
+
+const int kSpeedTestNum = 500000;
+
+#define LOOP_PARAM \
+  int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh
+#define DUAL_LOOP_PARAM                                                      \
+  int p, const uint8_t *blimit0, const uint8_t *limit0,                      \
+      const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, \
+      const uint8_t *thresh1
+
+typedef void (*loop_op_t)(uint8_t *s, LOOP_PARAM);
+typedef void (*dual_loop_op_t)(uint8_t *s, DUAL_LOOP_PARAM);
+typedef void (*hbdloop_op_t)(uint16_t *s, LOOP_PARAM, int bd);
+typedef void (*hbddual_loop_op_t)(uint16_t *s, DUAL_LOOP_PARAM, int bd);
+
+typedef ::testing::tuple<hbdloop_op_t, hbdloop_op_t, int> hbdloop_param_t;
+typedef ::testing::tuple<hbddual_loop_op_t, hbddual_loop_op_t, int>
+    hbddual_loop_param_t;
+typedef ::testing::tuple<loop_op_t, loop_op_t, int> loop_param_t;
+typedef ::testing::tuple<dual_loop_op_t, dual_loop_op_t, int> dual_loop_param_t;
+
+template <typename Pixel_t, int PIXEL_WIDTH_t>
+void InitInput(Pixel_t *s, Pixel_t *ref_s, ACMRandom *rnd, const uint8_t limit,
+               const int mask, const int32_t p, const int i) {
+  uint16_t tmp_s[kNumCoeffs];
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {  // 50% chance to choose a new value.
+      tmp_s[j] = rnd->Rand16();
+      j++;
+    } else {  // 50% chance to repeat previous value in row X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[j] = tmp_s[j - 1] + (limit - 1);
+        } else {  // Decrement by a value within the limit.
+          tmp_s[j] = tmp_s[j - 1] - (limit - 1);
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs;) {
+    const uint8_t val = rnd->Rand8();
+    if (val & 0x80) {
+      j++;
+    } else {  // 50% chance to repeat previous value in column X times.
+      int k = 0;
+      while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
+        if (j < 1) {
+          tmp_s[j] = rnd->Rand16();
+        } else if (val & 0x20) {  // Increment by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] =
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1);
+        } else {  // Decrement by a value within the limit.
+          tmp_s[(j % 32) * 32 + j / 32] =
+              tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1);
+        }
+        j++;
+      }
+    }
+  }
+
+  for (int j = 0; j < kNumCoeffs; j++) {
+    if (i % 2) {
+      s[j] = tmp_s[j] & mask;
+    } else {
+      s[j] = tmp_s[p * (j % p) + j / p] & mask;
+    }
+    ref_s[j] = s[j];
+  }
+}
+
+uint8_t GetOuterThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(3 * MAX_LOOP_FILTER + 5));
+}
+
+uint8_t GetInnerThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1));
+}
+
+uint8_t GetHevThresh(ACMRandom *rnd) {
+  return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1) >> 4);
+}
+
+template <typename func_type_t, typename params_t>
+class LoopTestParam : public ::testing::TestWithParam<params_t> {
+ public:
+  virtual ~LoopTestParam() {}
+  virtual void SetUp() {
+    loopfilter_op_ = ::testing::get<0>(this->GetParam());
+    ref_loopfilter_op_ = ::testing::get<1>(this->GetParam());
+    bit_depth_ = ::testing::get<2>(this->GetParam());
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  func_type_t loopfilter_op_;
+  func_type_t ref_loopfilter_op_;
+};
+
+void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) {
+  op(s, p, blimit, limit, thresh, bd);
+}
+void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
+  (void)bd;
+  op(s, p, blimit, limit, thresh);
+}
+void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd,
+                     hbddual_loop_op_t op) {
+  op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd);
+}
+void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) {
+  (void)bd;
+  op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
+};
+
+typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd;
+typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
+typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t>
+    Loop8Test9Param_hbd;
+typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd;
+
+#define OPCHECK(a, b)                                                          \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
+  const int count_test_block = number_of_iterations;                           \
+  const int32_t p = kNumCoeffs / 32;                                           \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
+  DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
+  int err_count_total = 0;                                                     \
+  int first_failure = -1;                                                      \
+  for (int i = 0; i < count_test_block; ++i) {                                 \
+    int err_count = 0;                                                         \
+    uint8_t tmp = GetOuterThresh(&rnd);                                        \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    InitInput<a, b>(s, ref_s, &rnd, *limit, mask_, p, i);                      \
+    call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,       \
+                ref_loopfilter_op_);                                           \
+    ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,      \
+                                         thresh, bit_depth_, loopfilter_op_)); \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      err_count += ref_s[j] != s[j];                                           \
+    }                                                                          \
+    if (err_count && !err_count_total) {                                       \
+      first_failure = i;                                                       \
+    }                                                                          \
+    err_count_total += err_count;                                              \
+  }                                                                            \
+  EXPECT_EQ(0, err_count_total)                                                \
+      << "Error: Loop8Test6Param, C output doesn't match SIMD "                \
+         "loopfilter output. "                                                 \
+      << "First failed at test case " << first_failure;
+
+TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); }
+TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); }
+
+#define VALCHECK(a, b)                                                         \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
+  const int count_test_block = number_of_iterations;                           \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
+  DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
+  int err_count_total = 0;                                                     \
+  int first_failure = -1;                                                      \
+  for (int i = 0; i < count_test_block; ++i) {                                 \
+    int err_count = 0;                                                         \
+    uint8_t tmp = GetOuterThresh(&rnd);                                        \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    int32_t p = kNumCoeffs / 32;                                               \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      s[j] = rnd.Rand16() & mask_;                                             \
+      ref_s[j] = s[j];                                                         \
+    }                                                                          \
+    call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_,       \
+                ref_loopfilter_op_);                                           \
+    ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit,      \
+                                         thresh, bit_depth_, loopfilter_op_)); \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      err_count += ref_s[j] != s[j];                                           \
+    }                                                                          \
+    if (err_count && !err_count_total) {                                       \
+      first_failure = i;                                                       \
+    }                                                                          \
+    err_count_total += err_count;                                              \
+  }                                                                            \
+  EXPECT_EQ(0, err_count_total)                                                \
+      << "Error: Loop8Test6Param, C output doesn't match SIMD "                \
+         "loopfilter output. "                                                 \
+      << "First failed at test case " << first_failure;
+
+TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); }
+TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); }
+
+#define SPEEDCHECK(a, b)                                                      \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                              \
+  const int count_test_block = kSpeedTestNum;                                 \
+  const int32_t bd = bit_depth_;                                              \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                       \
+  uint8_t tmp = GetOuterThresh(&rnd);                                         \
+  DECLARE_ALIGNED(16, const uint8_t,                                          \
+                  blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
+  tmp = GetInnerThresh(&rnd);                                                 \
+  DECLARE_ALIGNED(16, const uint8_t,                                          \
+                  limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,      \
+                                 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };    \
+  tmp = GetHevThresh(&rnd);                                                   \
+  DECLARE_ALIGNED(16, const uint8_t,                                          \
+                  thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,     \
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };   \
+  int32_t p = kNumCoeffs / 32;                                                \
+  for (int j = 0; j < kNumCoeffs; ++j) {                                      \
+    s[j] = rnd.Rand16() & mask_;                                              \
+  }                                                                           \
+  for (int i = 0; i < count_test_block; ++i) {                                \
+    call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd, loopfilter_op_); \
+  }
+
+TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); }
+TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); }
+
+#define OPCHECKd(a, b)                                                         \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
+  const int count_test_block = number_of_iterations;                           \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
+  DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
+  int err_count_total = 0;                                                     \
+  int first_failure = -1;                                                      \
+  for (int i = 0; i < count_test_block; ++i) {                                 \
+    int err_count = 0;                                                         \
+    uint8_t tmp = GetOuterThresh(&rnd);                                        \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetOuterThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    int32_t p = kNumCoeffs / 32;                                               \
+    const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1;               \
+    InitInput<a, b>(s, ref_s, &rnd, limit, mask_, p, i);                       \
+    call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
+                    limit1, thresh1, bit_depth_, ref_loopfilter_op_);          \
+    ASM_REGISTER_STATE_CHECK(                                                  \
+        call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
+                        limit1, thresh1, bit_depth_, loopfilter_op_));         \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      err_count += ref_s[j] != s[j];                                           \
+    }                                                                          \
+    if (err_count && !err_count_total) {                                       \
+      first_failure = i;                                                       \
+    }                                                                          \
+    err_count_total += err_count;                                              \
+  }                                                                            \
+  EXPECT_EQ(0, err_count_total)                                                \
+      << "Error: Loop8Test9Param, C output doesn't match SIMD "                \
+         "loopfilter output. "                                                 \
+      << "First failed at test case " << first_failure;
+
+TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); }
+TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); }
+
+#define VALCHECKd(a, b)                                                        \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                               \
+  const int count_test_block = number_of_iterations;                           \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                        \
+  DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]);                                    \
+  int err_count_total = 0;                                                     \
+  int first_failure = -1;                                                      \
+  for (int i = 0; i < count_test_block; ++i) {                                 \
+    int err_count = 0;                                                         \
+    uint8_t tmp = GetOuterThresh(&rnd);                                        \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetOuterThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    tmp = GetInnerThresh(&rnd);                                                \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+    tmp = GetHevThresh(&rnd);                                                  \
+    DECLARE_ALIGNED(16, const uint8_t,                                         \
+                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+    int32_t p = kNumCoeffs / 32;                                               \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      s[j] = rnd.Rand16() & mask_;                                             \
+      ref_s[j] = s[j];                                                         \
+    }                                                                          \
+    call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
+                    limit1, thresh1, bit_depth_, ref_loopfilter_op_);          \
+    ASM_REGISTER_STATE_CHECK(                                                  \
+        call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,   \
+                        limit1, thresh1, bit_depth_, loopfilter_op_));         \
+    for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+      err_count += ref_s[j] != s[j];                                           \
+    }                                                                          \
+    if (err_count && !err_count_total) {                                       \
+      first_failure = i;                                                       \
+    }                                                                          \
+    err_count_total += err_count;                                              \
+  }                                                                            \
+  EXPECT_EQ(0, err_count_total)                                                \
+      << "Error: Loop8Test9Param, C output doesn't match SIMD "                \
+         "loopfilter output. "                                                 \
+      << "First failed at test case " << first_failure;
+
+TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); }
+TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); }
+
+#define SPEEDCHECKd(a, b)                                                    \
+  ACMRandom rnd(ACMRandom::DeterministicSeed());                             \
+  const int count_test_block = kSpeedTestNum;                                \
+  DECLARE_ALIGNED(b, a, s[kNumCoeffs]);                                      \
+  uint8_t tmp = GetOuterThresh(&rnd);                                        \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+  tmp = GetInnerThresh(&rnd);                                                \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+  tmp = GetHevThresh(&rnd);                                                  \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+  tmp = GetOuterThresh(&rnd);                                                \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+  tmp = GetInnerThresh(&rnd);                                                \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,    \
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };  \
+  tmp = GetHevThresh(&rnd);                                                  \
+  DECLARE_ALIGNED(16, const uint8_t,                                         \
+                  thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,   \
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
+  int32_t p = kNumCoeffs / 32;                                               \
+  for (int j = 0; j < kNumCoeffs; ++j) {                                     \
+    s[j] = rnd.Rand16() & mask_;                                             \
+  }                                                                          \
+  for (int i = 0; i < count_test_block; ++i) {                               \
+    call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1,     \
+                    limit1, thresh1, bit_depth_, loopfilter_op_);            \
+  }
+
+TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); }
+TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); }
+
+using ::testing::make_tuple;
+
+#if HAVE_SSE2
+
+const hbdloop_param_t kHbdLoop8Test6[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             8),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
+
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             10),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
+             12),
+  make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
+             12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
+             &aom_highbd_lpf_horizontal_14_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
+             12),
+  make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_hbd,
+                        ::testing::ValuesIn(kHbdLoop8Test6));
+
+const loop_param_t kLoop8Test6[] = {
+  make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
+  make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_sse2, &aom_lpf_horizontal_14_c, 8),
+  make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
+  make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
+  make_tuple(&aom_lpf_vertical_14_sse2, &aom_lpf_vertical_14_c, 8),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,
+                        ::testing::ValuesIn(kLoop8Test6));
+
+const dual_loop_param_t kLoop8Test9[] = {
+  make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_dual_sse2, &aom_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_6_dual_sse2, &aom_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_lpf_horizontal_14_dual_sse2, &aom_lpf_horizontal_14_dual_c,
+             8),
+  make_tuple(&aom_lpf_vertical_14_dual_sse2, &aom_lpf_vertical_14_dual_c, 8)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_lbd,
+                        ::testing::ValuesIn(kLoop8Test9));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE2
+const hbddual_loop_param_t kHbdLoop8Test9[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
+             &aom_highbd_lpf_horizontal_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
+             &aom_highbd_lpf_horizontal_14_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
+             &aom_highbd_lpf_vertical_6_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
+             &aom_highbd_lpf_vertical_14_dual_c, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_hbd,
+                        ::testing::ValuesIn(kHbdLoop8Test9));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+const loop_param_t kLoop8Test6[] = {
+  make_tuple(&aom_lpf_vertical_14_neon, &aom_lpf_vertical_14_c, 8),
+  make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
+  make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
+  make_tuple(&aom_lpf_horizontal_6_neon, &aom_lpf_horizontal_6_c, 8)
+};
+
+INSTANTIATE_TEST_CASE_P(NEON, Loop8Test6Param_lbd,
+                        ::testing::ValuesIn(kLoop8Test6));
+#endif  // HAVE_NEON
+
+#if HAVE_AVX2
+const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param_hbd,
+                        ::testing::ValuesIn(kHbdLoop8Test9Avx2));
+#endif
+}  // namespace
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
index 19f97718de..1a393a0015 100644
--- a/third_party/aom/test/masked_sad_test.cc
+++ b/third_party/aom/test/masked_sad_test.cc
@@ -18,8 +18,9 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom/aom_integer.h"
 
 using libaom_test::ACMRandom;
@@ -32,7 +33,7 @@ typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride,
                                       const uint8_t *second_pred,
                                       const uint8_t *msk, int msk_stride,
                                       int invert_mask);
-typedef std::tr1::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
+typedef ::testing::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
 
 class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
  public:
@@ -89,13 +90,12 @@ TEST_P(MaskedSADTest, OperationCheck) {
       << "First failed at test case " << first_failure;
 }
 
-#if CONFIG_HIGHBITDEPTH
 typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
                                             const uint8_t *ref, int ref_stride,
                                             const uint8_t *second_pred,
                                             const uint8_t *msk, int msk_stride,
                                             int invert_mask);
-typedef std::tr1::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
+typedef ::testing::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
     HighbdMaskedSADParam;
 
 class HighbdMaskedSADTest
@@ -155,17 +155,14 @@ TEST_P(HighbdMaskedSADTest, OperationCheck) {
       << "Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. "
       << "First failed at test case " << first_failure;
 }
-#endif  // CONFIG_HIGHBITDEPTH
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 #if HAVE_SSSE3
 const MaskedSADParam msad_test[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
   make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
   make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
   make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
   make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
@@ -183,16 +180,13 @@ const MaskedSADParam msad_test[] = {
 
 INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, MaskedSADTest,
                         ::testing::ValuesIn(msad_test));
-#if CONFIG_HIGHBITDEPTH
 const HighbdMaskedSADParam hbd_msad_test[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_masked_sad128x128_ssse3,
              &aom_highbd_masked_sad128x128_c),
   make_tuple(&aom_highbd_masked_sad128x64_ssse3,
              &aom_highbd_masked_sad128x64_c),
   make_tuple(&aom_highbd_masked_sad64x128_ssse3,
              &aom_highbd_masked_sad64x128_c),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
   make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
   make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
@@ -210,6 +204,5 @@ const HighbdMaskedSADParam hbd_msad_test[] = {
 
 INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSADTest,
                         ::testing::ValuesIn(hbd_msad_test));
-#endif  // CONFIG_HIGHBITDEPTH
 #endif  // HAVE_SSSE3
 }  // namespace
diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc
index a9cbdc80d3..275b9feb6a 100644
--- a/third_party/aom/test/masked_variance_test.cc
+++ b/third_party/aom/test/masked_variance_test.cc
@@ -19,8 +19,9 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom/aom_codec.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_filter.h"
@@ -36,7 +37,7 @@ typedef unsigned int (*MaskedSubPixelVarianceFunc)(
     const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
     const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
 
-typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
+typedef ::testing::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
     MaskedSubPixelVarianceParam;
 
 class MaskedSubPixelVarianceTest
@@ -169,9 +170,8 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
                           << " y_offset = " << first_failure_y;
 }
 
-#if CONFIG_HIGHBITDEPTH
-typedef std::tr1::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
-                        aom_bit_depth_t>
+typedef ::testing::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
+                         aom_bit_depth_t>
     HighbdMaskedSubPixelVarianceParam;
 
 class HighbdMaskedSubPixelVarianceTest
@@ -311,21 +311,18 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
                           << " x_offset = " << first_failure_x
                           << " y_offset = " << first_failure_y;
 }
-#endif  // CONFIG_HIGHBITDEPTH
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 #if HAVE_SSSE3
 
 const MaskedSubPixelVarianceParam sub_pel_var_test[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3,
              &aom_masked_sub_pixel_variance128x128_c),
   make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3,
              &aom_masked_sub_pixel_variance128x64_c),
   make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3,
              &aom_masked_sub_pixel_variance64x128_c),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3,
              &aom_masked_sub_pixel_variance64x64_c),
   make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3,
@@ -357,16 +354,13 @@ const MaskedSubPixelVarianceParam sub_pel_var_test[] = {
 INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
                         ::testing::ValuesIn(sub_pel_var_test));
 
-#if CONFIG_HIGHBITDEPTH
 const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_ssse3,
              &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_ssse3,
              &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_ssse3,
              &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_ssse3,
              &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_ssse3,
@@ -393,14 +387,12 @@ const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
              &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
   make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_ssse3,
              &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3,
              &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10),
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3,
              &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10),
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3,
              &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3,
              &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10),
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3,
@@ -427,14 +419,12 @@ const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
              &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10),
   make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3,
              &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10),
-#if CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3,
              &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12),
   make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3,
              &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12),
   make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3,
              &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12),
-#endif  // CONFIG_EXT_PARTITION
   make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3,
              &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12),
   make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3,
@@ -465,7 +455,5 @@ const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
 
 INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
                         ::testing::ValuesIn(hbd_sub_pel_var_test));
-#endif  // CONFIG_HIGHBITDEPTH
-
 #endif  // HAVE_SSSE3
 }  // namespace
diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h
index 8c9d4f706f..b2b14cf21f 100644
--- a/third_party/aom/test/md5_helper.h
+++ b/third_party/aom/test/md5_helper.h
@@ -12,8 +12,8 @@
 #ifndef TEST_MD5_HELPER_H_
 #define TEST_MD5_HELPER_H_
 
-#include "./md5_utils.h"
 #include "aom/aom_decoder.h"
+#include "common/md5_utils.h"
 
 namespace libaom_test {
 class MD5 {
diff --git a/third_party/aom/test/metrics_template.html b/third_party/aom/test/metrics_template.html
new file mode 100644
index 0000000000..b57c62314a
--- /dev/null
+++ b/third_party/aom/test/metrics_template.html
@@ -0,0 +1,422 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Video Codec Test Results</title>
+<style type="text/css">
+<!-- Begin 960 reset -->
+a,abbr,acronym,address,applet,article,aside,audio,b,big,blockquote,body,canvas,caption,center,cite,c
+ode,dd,del,details,dfn,dialog,div,dl,dt,em,embed,fieldset,figcaption,figure,font,footer,form,h1,h2,h
+3,h4,h5,h6,header,hgroup,hr,html,i,iframe,img,ins,kbd,label,legend,li,mark,menu,meter,nav,object,ol,
+output,p,pre,progress,q,rp,rt,ruby,s,samp,section,small,span,strike,strong,sub,summary,sup,table,tbo
+dy,td,tfoot,th,thead,time,tr,tt,u,ul,var,video,xmp{border:0;margin:0;padding:0;font-size:100%}html,b
+ody{height:100%}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{displa
+y:block}b,strong{font-weight:bold}img{color:transparent;font-size:0;vertical-align:middle;-ms-interp
+olation-mode:bicubic}ol,ul{list-style:none}li{display:list-item}table{border-collapse:collapse;borde
+r-spacing:0}th,td,caption{font-weight:normal;vertical-align:top;text-align:left}q{quotes:none}q:befo
+re,q:after{content:'';content:none}sub,sup,small{font-size:75%}sub,sup{line-height:0;position:relati
+ve;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}svg{overflow:hidden}
+<!-- End 960 reset -->
+<!-- Begin 960 text -->
+body{font:13px/1.5 'Helvetica Neue',Arial,'Liberation Sans',FreeSans,sans-serif}pre,code{font-family
+:'DejaVu Sans Mono',Menlo,Consolas,monospace}hr{border:0 #ccc solid;border-top-width:1px;clear:both;
+height:0}h1{font-size:25px}h2{font-size:23px}h3{font-size:21px}h4{font-size:19px}h5{font-size:17px}h
+6{font-size:15px}ol{list-style:decimal}ul{list-style:disc}li{margin-left:30px}p,dl,hr,h1,h2,h3,h4,h5
+,h6,ol,ul,pre,table,address,fieldset,figure{margin-bottom:20px}
+<!-- End 960 text -->
+<!-- Begin 960 grid (fluid variant)
+     12 columns, 1152px total width
+     http://960.gs/ | http://grids.heroku.com/ -->
+.container_12{width:92%;margin-left:4%;margin-right:4%}.grid_1,.grid_2,.grid_3,.grid_4,.grid_5,.grid
+_6,.grid_7,.grid_8,.grid_9,.grid_10,.grid_11,.grid_12{display:inline;float:left;position:relative;ma
+rgin-left:1%;margin-right:1%}.alpha{margin-left:0}.omega{margin-right:0}.container_12 .grid_1{width:
+6.333%}.container_12 .grid_2{width:14.667%}.container_12 .grid_3{width:23.0%}.container_12 .grid_4{w
+idth:31.333%}.container_12 .grid_5{width:39.667%}.container_12 .grid_6{width:48.0%}.container_12 .gr
+id_7{width:56.333%}.container_12 .grid_8{width:64.667%}.container_12 .grid_9{width:73.0%}.container_
+12 .grid_10{width:81.333%}.container_12 .grid_11{width:89.667%}.container_12 .grid_12{width:98.0%}.c
+ontainer_12 .prefix_1{padding-left:8.333%}.container_12 .prefix_2{padding-left:16.667%}.container_12
+ .prefix_3{padding-left:25.0%}.container_12 .prefix_4{padding-left:33.333%}.container_12 .prefix_5{p
+adding-left:41.667%}.container_12 .prefix_6{padding-left:50.0%}.container_12 .prefix_7{padding-left:
+58.333%}.container_12 .prefix_8{padding-left:66.667%}.container_12 .prefix_9{padding-left:75.0%}.con
+tainer_12 .prefix_10{padding-left:83.333%}.container_12 .prefix_11{padding-left:91.667%}.container_1
+2 .suffix_1{padding-right:8.333%}.container_12 .suffix_2{padding-right:16.667%}.container_12 .suffix
+_3{padding-right:25.0%}.container_12 .suffix_4{padding-right:33.333%}.container_12 .suffix_5{padding
+-right:41.667%}.container_12 .suffix_6{padding-right:50.0%}.container_12 .suffix_7{padding-right:58.
+333%}.container_12 .suffix_8{padding-right:66.667%}.container_12 .suffix_9{padding-right:75.0%}.cont
+ainer_12 .suffix_10{padding-right:83.333%}.container_12 .suffix_11{padding-right:91.667%}.container_
+12 .push_1{left:8.333%}.container_12 .push_2{left:16.667%}.container_12 .push_3{left:25.0%}.containe
+r_12 .push_4{left:33.333%}.container_12 .push_5{left:41.667%}.container_12 .push_6{left:50.0%}.conta
+iner_12 .push_7{left:58.333%}.container_12 .push_8{left:66.667%}.container_12 .push_9{left:75.0%}.co
+ntainer_12 .push_10{left:83.333%}.container_12 .push_11{left:91.667%}.container_12 .pull_1{left:-8.3
+33%}.container_12 .pull_2{left:-16.667%}.container_12 .pull_3{left:-25.0%}.container_12 .pull_4{left
+:-33.333%}.container_12 .pull_5{left:-41.667%}.container_12 .pull_6{left:-50.0%}.container_12 .pull_
+7{left:-58.333%}.container_12 .pull_8{left:-66.667%}.container_12 .pull_9{left:-75.0%}.container_12
+.pull_10{left:-83.333%}.container_12 .pull_11{left:-91.667%}.clear{clear:both;display:block;overflow
+:hidden;visibility:hidden;width:0;height:0}.clearfix:after{clear:both;content:' ';display:block;font
+-size:0;line-height:0;visibility:hidden;width:0;height:0}.clearfix{display:inline-block}* html .clea
+rfix{height:1%}.clearfix{display:block}
+<!-- End 960 grid -->
+
+div.metricgraph {
+
+}
+
+body {
+
+}
+
+div.header {
+  font-family: Arial, sans-serif;
+}
+
+div.header h2 {
+  margin: .5em auto;
+}
+
+div.radio {
+  font-family: Arial, sans-serif;
+  margin-bottom: 1em;
+}
+
+div.main {
+
+}
+
+div.cliplist {
+  font-family: Arial, sans-serif;
+  margin-top: 6px;
+}
+
+div.chartarea {
+  font-family: Arial, sans-serif;
+}
+
+div.indicators {
+  font-family: Arial, sans-serif;
+  font-size: 13px;
+  margin-top: 6px;
+  min-height: 600px;
+  background-color: #f7f7f7;
+}
+
+div.indicators div.content {
+  margin: 1em;
+}
+
+div.indicators div.content h5 {
+  font-size: 13px;
+  text-align: center;
+  margin: 0;
+}
+
+div.indicators div.content ul {
+  margin-left: 0;
+  padding-left: 0;
+  margin-top: 0;
+}
+
+div.indicators div.content ul li {
+  margin-left: 1.5em;
+}
+
+div.indicators div.content p:first-child {
+  margin-bottom: .5em;
+}
+
+span.google-visualization-table-sortind {
+  color: #000;
+}
+.header-style {
+  font-weight: bold;
+  border: 1px solid #fff;
+  background-color: #ccc;
+}
+
+td.header-style+td {
+
+}
+
+.orange-background {
+  background-color: orange;
+}
+
+.light-gray-background {
+  background-color: #f0f0f0;
+}
+</style>
+<script type="text/javascript" src="https://www.google.com/jsapi"></script>
+<script type="text/javascript">
+var chart_left   = 40;
+var chart_top    = 6;
+var chart_height = document.documentElement.clientHeight-100;
+var chart_width  = "100%";
+ftable='filestable_avg'
+var snrs = [];
+var filestable_dsnr = [];
+var filestable_drate = [];
+var filestable_avg = [];
+
+// Python template code replaces the following 2 lines.
+//%%metrics_js%%//
+//%%filestable_dpsnr%%//
+//%%filestable_avg%%//
+//%%filestable_drate%%//
+//%%snrs%%//
+
+var selected = 0
+var imagestr = '';
+var bettertable=0;
+var chart=0;
+var better=0;
+var metricdata=0;
+var metricView=0;
+var column=1;
+var formatter=0;
+
+function changeColumn(col) {
+  column = col;
+  console.log(col)
+  draw_files();
+}
+
+function changeMetric(m) {
+  ftable=m
+  draw_files()
+}
+
+function setup_vis() {
+  chart = new google.visualization.ScatterChart(
+      document.getElementById("metricgraph"));
+
+  bettertable = new google.visualization.Table(
+      document.getElementById("bettertable"));
+
+  draw_files();
+  build_metrics_radio();
+}
+
+function build_metrics_radio() {
+  for (metric=1; metric < metrics.length; metric++) {
+    var rb = document.createElement('input');
+    var l = document.createElement('label');
+    rb.setAttribute('type','radio');
+    rb.setAttribute('name','metric');
+    rb.setAttribute('onClick', "changeColumn('"+metric.toString()+"')");
+    l.innerHTML = metrics[metric];
+    document.getElementById('metrics').appendChild(rb);
+    document.getElementById('metrics').appendChild(l);
+  }
+}
+
+function draw_files() {
+  var options = {'allowHtml': true, 'width': "100%", 'height': "50%"};
+  if (better != 0) delete better;
+
+  col=eval(ftable+'[column]')
+  better = new google.visualization.DataTable(col)
+
+  // Python Template code replaces the following line with a list of
+  // formatters.
+  if (ftable == 'filestable_dsnr')
+    formatter = new google.visualization.NumberFormat(
+      {fractionDigits: 4, suffix:" db"});
+  else
+    formatter = new google.visualization.NumberFormat(
+       {fractionDigits: 4, suffix:"%"});
+
+  //%%formatters%%//
+
+  bettertable.draw(better,options);
+  google.visualization.events.addListener(bettertable, 'select',
+                                          selectBetterHandler);
+  query_file()
+}
+
+function query_file() {
+  imagestr = better.getFormattedValue(selected, 0)
+  var metricjson = eval('(' + snrs[column][selected] + ')');
+  metricdata = new google.visualization.DataTable(metricjson, 0.6);
+  if( metricView != 0 ) delete metricView;
+  metricView = new google.visualization.DataView(metricdata);
+
+  chart.draw(metricView, {curveType:'function',
+      explorer: {},
+      chartArea:{left:chart_left, top:chart_top, width:chart_width,
+      height:chart_height-90},
+      hAxis:{title:"Datarate in kbps"},
+      vAxis:{title:"Quality in decibels", format: '##.0', textPosition: 'in'},
+      legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
+      width:chart_width, height:chart_height-50 });
+
+  google.visualization.events.addListener(chart, 'select', chartSelect);
+  google.visualization.events.addListener(chart, 'onmouseover', chartMouseOver);
+  google.visualization.events.addListener(chart, 'onmouseout', chartMouseOut);
+}
+
+function chartMouseOut(e) {
+  statusbar = document.getElementById('status');
+  statusbar.style.display = 'none';
+}
+
+function chartMouseOver(e) {
+  pointDifference(e.row, e.column)
+}
+
+function pointDifference(row, col) {
+  if(!row || !col)
+    return;
+
+  var cols = metricdata.getNumberOfColumns();
+  var rows = metricdata.getNumberOfRows();
+
+  var sel_bitrate = metricView.getValue(row, 0 );
+  var sel_metric = metricView.getValue(row, col);
+
+  var message = '<ul>' + metricView.getColumnLabel(col) +
+     ' (' + sel_bitrate.toFixed(0) + ' kbps, ' + sel_metric.toFixed(2) + ')' + ' is ';
+
+
+  // col 0 is datarate
+  for( var i=1;i<cols;++i) {
+
+    var metric_greatest_thats_less = 0;
+    var rate_greatest_thats_less = 0;
+    var metric_smallest_thats_greater = 999;
+    var rate_smallest_thats_greater = 0;
+
+    if(i==col)
+      continue;
+
+    // Find the lowest metric for the column that's greater than sel_metric and
+    // the highest metric for this column that's less than the metric.
+    for(var line_count = 0; line_count < rows; ++line_count) {
+      this_metric = metricdata.getValue(line_count, i)
+      this_rate = metricdata.getValue(line_count, 0)
+      if(!this_metric)
+        continue;
+
+      if(this_metric > metric_greatest_thats_less &&
+         this_metric <= sel_metric) {
+        metric_greatest_thats_less = this_metric;
+        rate_greatest_thats_less = this_rate;
+      }
+      if(this_metric < metric_smallest_thats_greater &&
+        this_metric > sel_metric) {
+        metric_smallest_thats_greater = this_metric;
+        rate_smallest_thats_greater = this_rate;
+      }
+    }
+
+    if(rate_smallest_thats_greater == 0 || rate_greatest_thats_less == 0) {
+      message = message + " <li> Couldn't find a point on both sides.</li>"
+    } else {
+      metric_slope = ( rate_smallest_thats_greater - rate_greatest_thats_less) /
+          ( metric_smallest_thats_greater - metric_greatest_thats_less);
+
+      projected_rate = ( sel_metric - metric_greatest_thats_less) *
+          metric_slope + rate_greatest_thats_less;
+
+      difference = 100 * (projected_rate / sel_bitrate - 1);
+
+
+      if (difference > 0)
+        message = message + "<li>  " + difference.toFixed(2) +
+                  "% smaller than <em>" +
+                  metricdata.getColumnLabel(i) + "</em></li> "
+      else
+        message = message + "<li>  " + -difference.toFixed(2) +
+                  "% bigger than <em>" +
+                  metricdata.getColumnLabel(i) + "</em></li> "
+    }
+
+  }
+  message = message + "</ul>"
+  statusbar = document.getElementById('status');
+  statusbar.innerHTML = "<p>" + message + "</p>";
+  statusbar.style.display = 'block';
+}
+
+function chartSelect() {
+  var selection = chart.getSelection();
+  var message = '';
+  var min = metricView.getFormattedValue(selection[0].row, 0);
+  var max = metricView.getFormattedValue(selection[selection.length-1].row, 0);
+  var val = metricView.getFormattedValue(selection[0].row,selection[0].column);
+
+  pointDifference(selection[0].row, selection[0].column)
+  min = min / 3
+  max = max * 3
+  metricView.setRows(metricdata.getFilteredRows(
+      [{column: 0,minValue: min, maxValue:max}]));
+
+  chart.draw(metricView, {curveType:'function',
+      chartArea:{left:40, top:10, width:chart_width, height:chart_height - 110},
+      hAxis:{title:"datarate in kbps"}, vAxis:{title:"quality in decibels"},
+      legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
+      width:chart_width, height:chart_height - 50});
+}
+
+function selectBetterHandler() {
+  var selection = bettertable.getSelection();
+  for (var i = 0; i < selection.length; i++) {
+    item = selection[i];
+  }
+  selected = item.row
+  query_file()
+}
+
+
+google.load('visualization', '1', {'packages' : ['corechart','table']});
+google.setOnLoadCallback(setup_vis);
+</script>
+</head>
+
+<body>
+
+  <div class="container_12">
+
+    <div class="grid_12 header">
+      <h2>Codec Comparison Results</h2>
+    </div>
+
+    <div class="grid_12 radio">
+
+      <form name="myform">
+        Method For Combining Points
+        <input type="radio" checked name="column" value="1"
+          onClick="changeMetric('filestable_avg')" />Average of bitrates difference
+        <input type="radio" name="column" value="2"
+          onClick="changeMetric('filestable_dsnr')" />BDSNR
+        <input type="radio" name="column" value="3"
+          onClick="changeMetric('filestable_drate')" />BDRATE
+      </form>
+
+      <form id="metrics" name="myform">
+      </form>
+
+    </div>
+
+    <div class="grid_12 main">
+
+      <div class="grid_5 alpha cliplist">
+        <div id="bettertable"></div>
+      </div>
+
+      <div class="grid_5 chartarea">
+        <div id="metricgraph"></div>
+      </div>
+
+      <div class="grid_2 omega indicators">
+        <div class="content">
+          <h5>Indicators</h5>
+          <hr>
+          <div id="status"></div>
+        </div>
+      </div>
+
+    </div>
+
+  </div>
+
+</body>
+</html>
diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc
deleted file mode 100644
index aaac72c651..0000000000
--- a/third_party/aom/test/minmax_test.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "aom/aom_integer.h"
-
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-
-namespace {
-
-using ::libaom_test::ACMRandom;
-
-typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
-                           int b_stride, int *min, int *max);
-
-class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
- public:
-  virtual void SetUp() {
-    mm_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  MinMaxFunc mm_func_;
-  ACMRandom rnd_;
-};
-
-void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b,
-                      int b_stride, int *min_ret, int *max_ret) {
-  int min = 255;
-  int max = 0;
-  for (int i = 0; i < 8; i++) {
-    for (int j = 0; j < 8; j++) {
-      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
-      if (min > diff) min = diff;
-      if (max < diff) max = diff;
-    }
-  }
-
-  *min_ret = min;
-  *max_ret = max;
-}
-
-TEST_P(MinMaxTest, MinValue) {
-  for (int i = 0; i < 64; i++) {
-    uint8_t a[64], b[64];
-    memset(a, 0, sizeof(a));
-    memset(b, 255, sizeof(b));
-    b[i] = i;  // Set a minimum difference of i.
-
-    int min, max;
-    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-    EXPECT_EQ(255, max);
-    EXPECT_EQ(i, min);
-  }
-}
-
-TEST_P(MinMaxTest, MaxValue) {
-  for (int i = 0; i < 64; i++) {
-    uint8_t a[64], b[64];
-    memset(a, 0, sizeof(a));
-    memset(b, 0, sizeof(b));
-    b[i] = i;  // Set a maximum difference of i.
-
-    int min, max;
-    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-    EXPECT_EQ(i, max);
-    EXPECT_EQ(0, min);
-  }
-}
-
-TEST_P(MinMaxTest, CompareReference) {
-  uint8_t a[64], b[64];
-  for (int j = 0; j < 64; j++) {
-    a[j] = rnd_.Rand8();
-    b[j] = rnd_.Rand8();
-  }
-
-  int min_ref, max_ref, min, max;
-  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
-  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-  EXPECT_EQ(max_ref, max);
-  EXPECT_EQ(min_ref, min);
-}
-
-TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
-  uint8_t a[8 * 64], b[8 * 64];
-  for (int i = 0; i < 8 * 64; i++) {
-    a[i] = rnd_.Rand8();
-    b[i] = rnd_.Rand8();
-  }
-  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
-    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
-      int min_ref, max_ref, min, max;
-      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
-      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
-      EXPECT_EQ(max_ref, max)
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
-      EXPECT_EQ(min_ref, min)
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&aom_minmax_8x8_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
-                        ::testing::Values(&aom_minmax_8x8_sse2));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
-                        ::testing::Values(&aom_minmax_8x8_neon));
-#endif
-
-}  // namespace
diff --git a/third_party/aom/test/monochrome_test.cc b/third_party/aom/test/monochrome_test.cc
new file mode 100644
index 0000000000..ebccba5842
--- /dev/null
+++ b/third_party/aom/test/monochrome_test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class MonochromeTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  MonochromeTest() : EncoderTest(GET_PARAM(0)), frame0_psnr_y_(0.) {}
+
+  virtual ~MonochromeTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     aom_codec_pts_t pts) {
+    (void)pts;
+
+    // Get value of top-left corner pixel of U plane
+    int chroma_value = img.planes[AOM_PLANE_U][0];
+
+    bool is_chroma_constant =
+        ComparePlaneToValue(img, AOM_PLANE_U, chroma_value) &&
+        ComparePlaneToValue(img, AOM_PLANE_V, chroma_value);
+
+    // Chroma planes should be constant
+    EXPECT_TRUE(is_chroma_constant);
+
+    // Monochrome flag on image should be set
+    EXPECT_EQ(img.monochrome, 1);
+
+    chroma_value_list_.push_back(chroma_value);
+  }
+
+  // Returns true if all pixels on the plane are equal to value, and returns
+  // false otherwise.
+  bool ComparePlaneToValue(const aom_image_t &img, const int plane,
+                           const int value) {
+    const int w = aom_img_plane_width(&img, plane);
+    const int h = aom_img_plane_height(&img, plane);
+    const uint8_t *const buf = img.planes[plane];
+    const int stride = img.stride[plane];
+
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        if (buf[r * stride + c] != value) return false;
+      }
+    }
+    return true;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    // Check that the initial Y PSNR value is 'high enough', and check that
+    // subsequent Y PSNR values are 'close' to this initial value.
+    if (frame0_psnr_y_ == 0.) {
+      frame0_psnr_y_ = pkt->data.psnr.psnr[1];
+      EXPECT_GT(frame0_psnr_y_, 29.);
+    }
+    EXPECT_NEAR(pkt->data.psnr.psnr[1], frame0_psnr_y_, 2.5);
+  }
+
+  std::vector<int> chroma_value_list_;
+  double frame0_psnr_y_;
+};
+
+TEST_P(MonochromeTest, TestMonochromeEncoding) {
+  ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 5);
+
+  init_flags_ = AOM_CODEC_USE_PSNR;
+
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 600;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_undershoot_pct = 50;
+  cfg_.rc_overshoot_pct = 50;
+  cfg_.rc_end_usage = AOM_CBR;
+  cfg_.kf_mode = AOM_KF_AUTO;
+  cfg_.g_lag_in_frames = 1;
+  cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+  // Enable dropped frames.
+  cfg_.rc_dropframe_thresh = 1;
+  // Disable error_resilience mode.
+  cfg_.g_error_resilient = 0;
+  // Run at low bitrate.
+  cfg_.rc_target_bitrate = 40;
+  // Set monochrome encoding flag
+  cfg_.monochrome = 1;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check that the chroma planes are equal across all frames
+  std::vector<int>::const_iterator iter = chroma_value_list_.begin();
+  int initial_chroma_value = *iter;
+  for (; iter != chroma_value_list_.end(); ++iter) {
+    // Check that all decoded frames have the same constant chroma planes.
+    EXPECT_EQ(*iter, initial_chroma_value);
+  }
+}
+
+AV1_INSTANTIATE_TEST_CASE(MonochromeTest,
+                          ::testing::Values(::libaom_test::kTwoPassGood));
+
+}  // namespace
diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc
index fe20fd10ae..27eb938930 100644
--- a/third_party/aom/test/motion_vector_test.cc
+++ b/third_party/aom/test/motion_vector_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
@@ -22,7 +22,8 @@ namespace {
 
 // Encoding modes
 const libaom_test::TestMode kEncodingModeVectors[] = {
-  ::libaom_test::kTwoPassGood, ::libaom_test::kOnePassGood,
+  ::libaom_test::kTwoPassGood,
+  ::libaom_test::kOnePassGood,
 };
 
 // Encoding speeds
@@ -82,7 +83,7 @@ TEST_P(MotionVectorTestLarge, OverallTest) {
   // Reduce the test clip's resolution while testing on 32-bit system.
   if (sizeof(void *) == 4) {
     width = 2048;
-    height = 1080;
+    height = 360;
   }
 
   cfg_.rc_target_bitrate = 24000;
diff --git a/third_party/aom/test/noise_model_test.cc b/third_party/aom/test/noise_model_test.cc
new file mode 100644
index 0000000000..9b7fff8a2c
--- /dev/null
+++ b/third_party/aom/test/noise_model_test.cc
@@ -0,0 +1,1332 @@
+#include <math.h>
+#include <algorithm>
+#include <vector>
+
+#include "aom_dsp/noise_model.h"
+#include "aom_dsp/noise_util.h"
+#include "config/aom_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Return normally distrbuted values with standard deviation of sigma.
+double randn(libaom_test::ACMRandom *random, double sigma) {
+  while (1) {
+    const double u = 2.0 * ((double)random->Rand31() /
+                            testing::internal::Random::kMaxRange) -
+                     1.0;
+    const double v = 2.0 * ((double)random->Rand31() /
+                            testing::internal::Random::kMaxRange) -
+                     1.0;
+    const double s = u * u + v * v;
+    if (s > 0 && s < 1) {
+      return sigma * (u * sqrt(-2.0 * log(s) / s));
+    }
+  }
+  return 0;
+}
+
+// Synthesizes noise using the auto-regressive filter of the given lag,
+// with the provided n coefficients sampled at the given coords.
+void noise_synth(libaom_test::ACMRandom *random, int lag, int n,
+                 const int (*coords)[2], const double *coeffs, double *data,
+                 int w, int h) {
+  const int pad_size = 3 * lag;
+  const int padded_w = w + pad_size;
+  const int padded_h = h + pad_size;
+  int x = 0, y = 0;
+  std::vector<double> padded(padded_w * padded_h);
+
+  for (y = 0; y < padded_h; ++y) {
+    for (x = 0; x < padded_w; ++x) {
+      padded[y * padded_w + x] = randn(random, 1.0);
+    }
+  }
+  for (y = lag; y < padded_h; ++y) {
+    for (x = lag; x < padded_w; ++x) {
+      double sum = 0;
+      int i = 0;
+      for (i = 0; i < n; ++i) {
+        const int dx = coords[i][0];
+        const int dy = coords[i][1];
+        sum += padded[(y + dy) * padded_w + (x + dx)] * coeffs[i];
+      }
+      padded[y * padded_w + x] += sum;
+    }
+  }
+  // Copy over the padded rows to the output
+  for (y = 0; y < h; ++y) {
+    memcpy(data + y * w, &padded[0] + y * padded_w, sizeof(*data) * w);
+  }
+}
+
+std::vector<float> get_noise_psd(double *noise, int width, int height,
+                                 int block_size) {
+  float *block =
+      (float *)aom_memalign(32, block_size * block_size * sizeof(block));
+  std::vector<float> psd(block_size * block_size);
+  int num_blocks = 0;
+  struct aom_noise_tx_t *tx = aom_noise_tx_malloc(block_size);
+  for (int y = 0; y <= height - block_size; y += block_size / 2) {
+    for (int x = 0; x <= width - block_size; x += block_size / 2) {
+      for (int yy = 0; yy < block_size; ++yy) {
+        for (int xx = 0; xx < block_size; ++xx) {
+          block[yy * block_size + xx] = (float)noise[(y + yy) * width + x + xx];
+        }
+      }
+      aom_noise_tx_forward(tx, &block[0]);
+      aom_noise_tx_add_energy(tx, &psd[0]);
+      num_blocks++;
+    }
+  }
+  for (int yy = 0; yy < block_size; ++yy) {
+    for (int xx = 0; xx <= block_size / 2; ++xx) {
+      psd[yy * block_size + xx] /= num_blocks;
+    }
+  }
+  // Fill in the data that is missing due to symmetries
+  for (int xx = 1; xx < block_size / 2; ++xx) {
+    psd[(block_size - xx)] = psd[xx];
+  }
+  for (int yy = 1; yy < block_size; ++yy) {
+    for (int xx = 1; xx < block_size / 2; ++xx) {
+      psd[(block_size - yy) * block_size + (block_size - xx)] =
+          psd[yy * block_size + xx];
+    }
+  }
+  aom_noise_tx_free(tx);
+  aom_free(block);
+  return psd;
+}
+
+}  // namespace
+
+TEST(NoiseStrengthSolver, GetCentersTwoBins) {
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, 2, 8);
+  EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
+  EXPECT_NEAR(255, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, GetCentersTwoBins10bit) {
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, 2, 10);
+  EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
+  EXPECT_NEAR(1023, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, GetCenters256Bins) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  aom_noise_strength_solver_init(&solver, num_bins, 8);
+
+  for (int i = 0; i < 256; ++i) {
+    EXPECT_NEAR(i, aom_noise_strength_solver_get_center(&solver, i), 1e-5);
+  }
+  aom_noise_strength_solver_free(&solver);
+}
+
+// Tests that the noise strength solver returns the identity transform when
+// given identity-like constraints.
+TEST(NoiseStrengthSolver, ObserveIdentity) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
+
+  // We have to add a big more strength to constraints at the boundary to
+  // overcome any regularization.
+  for (int j = 0; j < 5; ++j) {
+    aom_noise_strength_solver_add_measurement(&solver, 0, 0);
+    aom_noise_strength_solver_add_measurement(&solver, 255, 255);
+  }
+  for (int i = 0; i < 256; ++i) {
+    aom_noise_strength_solver_add_measurement(&solver, i, i);
+  }
+  EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
+  for (int i = 2; i < num_bins - 2; ++i) {
+    EXPECT_NEAR(i, solver.eqns.x[i], 0.1);
+  }
+
+  aom_noise_strength_lut_t lut;
+  EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, 2, &lut));
+
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(0.0, lut.points[0][1], 0.5);
+  EXPECT_NEAR(255.0, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(255.0, lut.points[1][1], 0.5);
+
+  aom_noise_strength_lut_free(&lut);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthSolver, SimplifiesCurve) {
+  const int num_bins = 256;
+  aom_noise_strength_solver_t solver;
+  EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
+
+  // Create a parabolic input
+  for (int i = 0; i < 256; ++i) {
+    const double x = (i - 127.5) / 63.5;
+    aom_noise_strength_solver_add_measurement(&solver, i, x * x);
+  }
+  EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
+
+  // First try to fit an unconstrained lut
+  aom_noise_strength_lut_t lut;
+  EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, -1, &lut));
+  ASSERT_LE(20, lut.num_points);
+  aom_noise_strength_lut_free(&lut);
+
+  // Now constrain the maximum number of points
+  const int kMaxPoints = 9;
+  EXPECT_EQ(1,
+            aom_noise_strength_solver_fit_piecewise(&solver, kMaxPoints, &lut));
+  ASSERT_EQ(kMaxPoints, lut.num_points);
+
+  // Check that the input parabola is still well represented
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(4.0, lut.points[0][1], 0.1);
+  for (int i = 1; i < lut.num_points - 1; ++i) {
+    const double x = (lut.points[i][0] - 128.) / 64.;
+    EXPECT_NEAR(x * x, lut.points[i][1], 0.1);
+  }
+  EXPECT_NEAR(255.0, lut.points[kMaxPoints - 1][0], 1e-5);
+
+  EXPECT_NEAR(4.0, lut.points[kMaxPoints - 1][1], 0.1);
+  aom_noise_strength_lut_free(&lut);
+  aom_noise_strength_solver_free(&solver);
+}
+
+TEST(NoiseStrengthLut, LutEvalSinglePoint) {
+  aom_noise_strength_lut_t lut;
+  ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 1));
+  ASSERT_EQ(1, lut.num_points);
+  lut.points[0][0] = 0;
+  lut.points[0][1] = 1;
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, -1));
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 0));
+  EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 1));
+  aom_noise_strength_lut_free(&lut);
+}
+
+TEST(NoiseStrengthLut, LutEvalMultiPointInterp) {
+  const double kEps = 1e-5;
+  aom_noise_strength_lut_t lut;
+  ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 4));
+  ASSERT_EQ(4, lut.num_points);
+
+  lut.points[0][0] = 0;
+  lut.points[0][1] = 0;
+
+  lut.points[1][0] = 1;
+  lut.points[1][1] = 1;
+
+  lut.points[2][0] = 2;
+  lut.points[2][1] = 1;
+
+  lut.points[3][0] = 100;
+  lut.points[3][1] = 1001;
+
+  // Test lower boundary
+  EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, -1));
+  EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, 0));
+
+  // Test first part that should be identity
+  EXPECT_NEAR(0.25, aom_noise_strength_lut_eval(&lut, 0.25), kEps);
+  EXPECT_NEAR(0.75, aom_noise_strength_lut_eval(&lut, 0.75), kEps);
+
+  // This is a constant section (should evaluate to 1)
+  EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.25), kEps);
+  EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.75), kEps);
+
+  // Test interpolation between to non-zero y coords.
+  EXPECT_NEAR(1, aom_noise_strength_lut_eval(&lut, 2), kEps);
+  EXPECT_NEAR(251, aom_noise_strength_lut_eval(&lut, 26.5), kEps);
+  EXPECT_NEAR(751, aom_noise_strength_lut_eval(&lut, 75.5), kEps);
+
+  // Test upper boundary
+  EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 100));
+  EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 101));
+
+  aom_noise_strength_lut_free(&lut);
+}
+
+TEST(NoiseModel, InitSuccessWithValidSquareShape) {
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 };
+  aom_noise_model_t model;
+
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  const int kNumCoords = 12;
+  const int kCoords[][2] = { { -2, -2 }, { -1, -2 }, { 0, -2 },  { 1, -2 },
+                             { 2, -2 },  { -2, -1 }, { -1, -1 }, { 0, -1 },
+                             { 1, -1 },  { 2, -1 },  { -2, 0 },  { -1, 0 } };
+  EXPECT_EQ(kNumCoords, model.n);
+  for (int i = 0; i < kNumCoords; ++i) {
+    const int *coord = kCoords[i];
+    EXPECT_EQ(coord[0], model.coords[i][0]);
+    EXPECT_EQ(coord[1], model.coords[i][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitSuccessWithValidDiamondShape) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_DIAMOND, 2, 8, 0 };
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+  EXPECT_EQ(6, model.n);
+  const int kNumCoords = 6;
+  const int kCoords[][2] = { { 0, -2 }, { -1, -1 }, { 0, -1 },
+                             { 1, -1 }, { -2, 0 },  { -1, 0 } };
+  EXPECT_EQ(kNumCoords, model.n);
+  for (int i = 0; i < kNumCoords; ++i) {
+    const int *coord = kCoords[i];
+    EXPECT_EQ(coord[0], model.coords[i][0]);
+    EXPECT_EQ(coord[1], model.coords[i][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithTooLargeLag) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 10, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithTooSmallLag) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 0, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModel, InitFailsWithInvalidShape) {
+  aom_noise_model_t model;
+  aom_noise_model_params_t params = { aom_noise_shape(100), 3, 8, 0 };
+  EXPECT_FALSE(aom_noise_model_init(&model, params));
+  aom_noise_model_free(&model);
+}
+
+// A container template class to hold a data type and extra arguments.
+// All of these args are bundled into one struct so that we can use
+// parameterized tests on combinations of supported data types
+// (uint8_t and uint16_t) and bit depths (8, 10, 12).
+template <typename T, int bit_depth, bool use_highbd>
+struct BitDepthParams {
+  typedef T data_type_t;
+  static const int kBitDepth = bit_depth;
+  static const bool kUseHighBD = use_highbd;
+};
+
+template <typename T>
+class FlatBlockEstimatorTest : public ::testing::Test, public T {
+ public:
+  virtual void SetUp() { random_.Reset(171); }
+  typedef std::vector<typename T::data_type_t> VecType;
+  VecType data_;
+  libaom_test::ACMRandom random_;
+};
+
+TYPED_TEST_CASE_P(FlatBlockEstimatorTest);
+
+TYPED_TEST_P(FlatBlockEstimatorTest, ExtractBlock) {
+  const int kBlockSize = 16;
+  aom_flat_block_finder_t flat_block_finder;
+  ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
+                                          this->kBitDepth, this->kUseHighBD));
+  const double normalization = flat_block_finder.normalization;
+
+  // Test with an image of more than one block.
+  const int h = 2 * kBlockSize;
+  const int w = 2 * kBlockSize;
+  const int stride = 2 * kBlockSize;
+  this->data_.resize(h * stride, 128);
+
+  // Set up the (0,0) block to be a plane and the (0,1) block to be a
+  // checkerboard
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      this->data_[y * stride + x] = (-y + x + 128) << shift;
+      this->data_[y * stride + x + kBlockSize] =
+          ((x % 2 + y % 2) % 2 ? 128 - 20 : 128 + 20) << shift;
+    }
+  }
+  std::vector<double> block(kBlockSize * kBlockSize, 1);
+  std::vector<double> plane(kBlockSize * kBlockSize, 1);
+
+  // The block data should be a constant (zero) and the rest of the plane
+  // trend is covered in the plane data.
+  aom_flat_block_finder_extract_block(&flat_block_finder,
+                                      (uint8_t *)&this->data_[0], w, h, stride,
+                                      0, 0, &plane[0], &block[0]);
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      EXPECT_NEAR(0, block[y * kBlockSize + x], 1e-5);
+      EXPECT_NEAR((double)(this->data_[y * stride + x]) / normalization,
+                  plane[y * kBlockSize + x], 1e-5);
+    }
+  }
+
+  // The plane trend is a constant, and the block is a zero mean checkerboard.
+  aom_flat_block_finder_extract_block(&flat_block_finder,
+                                      (uint8_t *)&this->data_[0], w, h, stride,
+                                      kBlockSize, 0, &plane[0], &block[0]);
+  const int mid = 128 << shift;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      EXPECT_NEAR(((double)this->data_[y * stride + x + kBlockSize] - mid) /
+                      normalization,
+                  block[y * kBlockSize + x], 1e-5);
+      EXPECT_NEAR(mid / normalization, plane[y * kBlockSize + x], 1e-5);
+    }
+  }
+  aom_flat_block_finder_free(&flat_block_finder);
+}
+
+TYPED_TEST_P(FlatBlockEstimatorTest, FindFlatBlocks) {
+  const int kBlockSize = 32;
+  aom_flat_block_finder_t flat_block_finder;
+  ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
+                                          this->kBitDepth, this->kUseHighBD));
+
+  const int num_blocks_w = 8;
+  const int h = kBlockSize;
+  const int w = kBlockSize * num_blocks_w;
+  const int stride = w;
+  this->data_.resize(h * stride, 128);
+  std::vector<uint8_t> flat_blocks(num_blocks_w, 0);
+
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize; ++x) {
+      // Block 0 (not flat): constant doesn't have enough variance to qualify
+      this->data_[y * stride + x + 0 * kBlockSize] = 128 << shift;
+
+      // Block 1 (not flat): too high of variance is hard to validate as flat
+      this->data_[y * stride + x + 1 * kBlockSize] =
+          ((uint8_t)(128 + randn(&this->random_, 5))) << shift;
+
+      // Block 2 (flat): slight checkerboard added to constant
+      const int check = (x % 2 + y % 2) % 2 ? -2 : 2;
+      this->data_[y * stride + x + 2 * kBlockSize] = (128 + check) << shift;
+
+      // Block 3 (flat): planar block with checkerboard pattern is also flat
+      this->data_[y * stride + x + 3 * kBlockSize] =
+          (y * 2 - x / 2 + 128 + check) << shift;
+
+      // Block 4 (flat): gaussian random with standard deviation 1.
+      this->data_[y * stride + x + 4 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + x + 128.0)) << shift;
+
+      // Block 5 (flat): gaussian random with standard deviation 2.
+      this->data_[y * stride + x + 5 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 2) + y + 128.0)) << shift;
+
+      // Block 6 (not flat): too high of directional gradient.
+      const int strong_edge = x > kBlockSize / 2 ? 64 : 0;
+      this->data_[y * stride + x + 6 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + strong_edge + 128.0)) << shift;
+
+      // Block 7 (not flat): too high gradient.
+      const int big_check = ((x >> 2) % 2 + (y >> 2) % 2) % 2 ? -16 : 16;
+      this->data_[y * stride + x + 7 * kBlockSize] =
+          ((uint8_t)(randn(&this->random_, 1) + big_check + 128.0)) << shift;
+    }
+  }
+
+  EXPECT_EQ(4, aom_flat_block_finder_run(&flat_block_finder,
+                                         (uint8_t *)&this->data_[0], w, h,
+                                         stride, &flat_blocks[0]));
+
+  // First two blocks are not flat
+  EXPECT_EQ(0, flat_blocks[0]);
+  EXPECT_EQ(0, flat_blocks[1]);
+
+  // Next 4 blocks are flat.
+  EXPECT_EQ(255, flat_blocks[2]);
+  EXPECT_EQ(255, flat_blocks[3]);
+  EXPECT_EQ(255, flat_blocks[4]);
+  EXPECT_EQ(255, flat_blocks[5]);
+
+  // Last 2 are not flat by threshold
+  EXPECT_EQ(0, flat_blocks[6]);
+  EXPECT_EQ(0, flat_blocks[7]);
+
+  // Add the noise from non-flat block 1 to every block.
+  for (int y = 0; y < kBlockSize; ++y) {
+    for (int x = 0; x < kBlockSize * num_blocks_w; ++x) {
+      this->data_[y * stride + x] +=
+          (this->data_[y * stride + x % kBlockSize + kBlockSize] -
+           (128 << shift));
+    }
+  }
+  // Now the scored selection will pick the one that is most likely flat (block
+  // 0)
+  EXPECT_EQ(1, aom_flat_block_finder_run(&flat_block_finder,
+                                         (uint8_t *)&this->data_[0], w, h,
+                                         stride, &flat_blocks[0]));
+  EXPECT_EQ(1, flat_blocks[0]);
+  EXPECT_EQ(0, flat_blocks[1]);
+  EXPECT_EQ(0, flat_blocks[2]);
+  EXPECT_EQ(0, flat_blocks[3]);
+  EXPECT_EQ(0, flat_blocks[4]);
+  EXPECT_EQ(0, flat_blocks[5]);
+  EXPECT_EQ(0, flat_blocks[6]);
+  EXPECT_EQ(0, flat_blocks[7]);
+
+  aom_flat_block_finder_free(&flat_block_finder);
+}
+
+REGISTER_TYPED_TEST_CASE_P(FlatBlockEstimatorTest, ExtractBlock,
+                           FindFlatBlocks);
+
+typedef ::testing::Types<BitDepthParams<uint8_t, 8, false>,   // lowbd
+                         BitDepthParams<uint16_t, 8, true>,   // lowbd in 16-bit
+                         BitDepthParams<uint16_t, 10, true>,  // highbd data
+                         BitDepthParams<uint16_t, 12, true> >
+    AllBitDepthParams;
+INSTANTIATE_TYPED_TEST_CASE_P(FlatBlockInstatiation, FlatBlockEstimatorTest,
+                              AllBitDepthParams);
+
+template <typename T>
+class NoiseModelUpdateTest : public ::testing::Test, public T {
+ public:
+  static const int kWidth = 128;
+  static const int kHeight = 128;
+  static const int kBlockSize = 16;
+  static const int kNumBlocksX = kWidth / kBlockSize;
+  static const int kNumBlocksY = kHeight / kBlockSize;
+
+  virtual void SetUp() {
+    const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
+                                              T::kBitDepth, T::kUseHighBD };
+    ASSERT_TRUE(aom_noise_model_init(&model_, params));
+
+    random_.Reset(100171);
+
+    data_.resize(kWidth * kHeight * 3);
+    denoised_.resize(kWidth * kHeight * 3);
+    noise_.resize(kWidth * kHeight * 3);
+    renoise_.resize(kWidth * kHeight);
+    flat_blocks_.resize(kNumBlocksX * kNumBlocksY);
+
+    for (int c = 0, offset = 0; c < 3; ++c, offset += kWidth * kHeight) {
+      data_ptr_[c] = &data_[offset];
+      noise_ptr_[c] = &noise_[offset];
+      denoised_ptr_[c] = &denoised_[offset];
+      strides_[c] = kWidth;
+
+      data_ptr_raw_[c] = (uint8_t *)&data_[offset];
+      denoised_ptr_raw_[c] = (uint8_t *)&denoised_[offset];
+    }
+    chroma_sub_[0] = 0;
+    chroma_sub_[1] = 0;
+  }
+
+  int NoiseModelUpdate(int block_size = kBlockSize) {
+    return aom_noise_model_update(&model_, data_ptr_raw_, denoised_ptr_raw_,
+                                  kWidth, kHeight, strides_, chroma_sub_,
+                                  &flat_blocks_[0], block_size);
+  }
+
+  void TearDown() { aom_noise_model_free(&model_); }
+
+ protected:
+  aom_noise_model_t model_;
+  std::vector<typename T::data_type_t> data_;
+  std::vector<typename T::data_type_t> denoised_;
+
+  std::vector<double> noise_;
+  std::vector<double> renoise_;
+  std::vector<uint8_t> flat_blocks_;
+
+  typename T::data_type_t *data_ptr_[3];
+  typename T::data_type_t *denoised_ptr_[3];
+
+  double *noise_ptr_[3];
+  int strides_[3];
+  int chroma_sub_[2];
+  libaom_test::ACMRandom random_;
+
+ private:
+  uint8_t *data_ptr_raw_[3];
+  uint8_t *denoised_ptr_raw_[3];
+};
+
+TYPED_TEST_CASE_P(NoiseModelUpdateTest);
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks) {
+  EXPECT_EQ(AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS,
+            this->NoiseModelUpdate());
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForZeroNoiseAllFlat) {
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  this->denoised_.assign(this->denoised_.size(), 128);
+  this->data_.assign(this->denoised_.size(), 128);
+  EXPECT_EQ(AOM_NOISE_STATUS_INTERNAL_ERROR, this->NoiseModelUpdate());
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsBlockSizeTooSmall) {
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  this->denoised_.assign(this->denoised_.size(), 128);
+  this->data_.assign(this->denoised_.size(), 128);
+  EXPECT_EQ(AOM_NOISE_STATUS_INVALID_ARGUMENT,
+            this->NoiseModelUpdate(6 /* block_size=6 is too small*/));
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForWhiteRandomNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kHeight; ++y) {
+    for (int x = 0; x < kWidth; ++x) {
+      this->data_ptr_[0][y * kWidth + x] =
+          int(64 + y + randn(&this->random_, 1)) << shift;
+      this->denoised_ptr_[0][y * kWidth + x] = (64 + y) << shift;
+      // Make the chroma planes completely correlated with the Y plane
+      for (int c = 1; c < 3; ++c) {
+        this->data_ptr_[c][y * kWidth + x] = this->data_ptr_[0][y * kWidth + x];
+        this->denoised_ptr_[c][y * kWidth + x] =
+            this->denoised_ptr_[0][y * kWidth + x];
+      }
+    }
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const double kCoeffEps = 0.075;
+  const int n = model.n;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    // The second and third channels are highly correlated with the first.
+    if (c > 0) {
+      ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
+      ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
+
+      EXPECT_NEAR(1, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(1, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+  }
+
+  // The fitted noise strength should be close to the standard deviation
+  // for all intensity bins.
+  const double kStdEps = 0.1;
+  const double normalize = 1 << shift;
+
+  for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
+    EXPECT_NEAR(1.0,
+                model.latest_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+    EXPECT_NEAR(1.0,
+                model.combined_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+  }
+
+  aom_noise_strength_lut_t lut;
+  aom_noise_strength_solver_fit_piecewise(
+      &model.latest_state[0].strength_solver, -1, &lut);
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
+  EXPECT_NEAR(1.0, lut.points[0][1] / normalize, kStdEps);
+  EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(1.0, lut.points[1][1] / normalize, kStdEps);
+  aom_noise_strength_lut_free(&lut);
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForScaledWhiteNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+
+  const double kCoeffEps = 0.055;
+  const double kLowStd = 1;
+  const double kHighStd = 4;
+  const int shift = this->kBitDepth - 8;
+  for (int y = 0; y < kHeight; ++y) {
+    for (int x = 0; x < kWidth; ++x) {
+      for (int c = 0; c < 3; ++c) {
+        // The image data is bimodal:
+        // Bottom half has low intensity and low noise strength
+        // Top half has high intensity and high noise strength
+        const int avg = (y < kHeight / 2) ? 4 : 245;
+        const double std = (y < kHeight / 2) ? kLowStd : kHighStd;
+        this->data_ptr_[c][y * kWidth + x] =
+            ((uint8_t)std::min((int)255,
+                               (int)(2 + avg + randn(&this->random_, std))))
+            << shift;
+        this->denoised_ptr_[c][y * kWidth + x] = (2 + avg) << shift;
+      }
+    }
+  }
+  // Label all blocks as flat for the update
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const int n = model.n;
+  // The noise is uncorrelated spatially and with the y channel.
+  // All coefficients should be reasonably close to zero.
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    if (c > 0) {
+      ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
+      ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
+
+      // The correlation to the y channel should be low (near zero)
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+  }
+
+  // Noise strength should vary between kLowStd and kHighStd.
+  const double kStdEps = 0.15;
+  // We have to normalize fitted standard deviation based on bit depth.
+  const double normalize = (1 << shift);
+
+  ASSERT_EQ(20, model.latest_state[0].strength_solver.eqns.n);
+  for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
+    const double a = i / 19.0;
+    const double expected = (kLowStd * (1.0 - a) + kHighStd * a);
+    EXPECT_NEAR(expected,
+                model.latest_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+    EXPECT_NEAR(expected,
+                model.combined_state[0].strength_solver.eqns.x[i] / normalize,
+                kStdEps);
+  }
+
+  // If we fit a piecewise linear model, there should be two points:
+  // one near kLowStd at 0, and the other near kHighStd and 255.
+  aom_noise_strength_lut_t lut;
+  aom_noise_strength_solver_fit_piecewise(
+      &model.latest_state[0].strength_solver, 2, &lut);
+  ASSERT_EQ(2, lut.num_points);
+  EXPECT_NEAR(0, lut.points[0][0], 1e-4);
+  EXPECT_NEAR(kLowStd, lut.points[0][1] / normalize, kStdEps);
+  EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
+  EXPECT_NEAR(kHighStd, lut.points[1][1] / normalize, kStdEps);
+  aom_noise_strength_lut_free(&lut);
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForCorrelatedNoise) {
+  aom_noise_model_t &model = this->model_;
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+  const int kNumCoeffs = 24;
+  const double kStd = 4;
+  const double kStdEps = 0.3;
+  const double kCoeffEps = 0.065;
+  // Use different coefficients for each channel
+  const double kCoeffs[3][24] = {
+    { 0.02884, -0.03356, 0.00633,  0.01757,  0.02849,  -0.04620,
+      0.02833, -0.07178, 0.07076,  -0.11603, -0.10413, -0.16571,
+      0.05158, -0.07969, 0.02640,  -0.07191, 0.02530,  0.41968,
+      0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
+    { 0.00269, -0.01291, -0.01513, 0.07234,  0.03208,   0.00477,
+      0.00226, -0.00254, 0.03533,  0.12841,  -0.25970,  -0.06336,
+      0.05238, -0.00845, -0.03118, 0.09043,  -0.36558,  0.48903,
+      0.00595, -0.11938, 0.02106,  0.095956, -0.350139, 0.59305 },
+    { -0.00643, -0.01080, -0.01466, 0.06951, 0.03707,  -0.00482,
+      0.00817,  -0.00909, 0.02949,  0.12181, -0.25210, -0.07886,
+      0.06083,  -0.01210, -0.03108, 0.08944, -0.35875, 0.49150,
+      0.00415,  -0.12905, 0.02870,  0.09740, -0.34610, 0.58824 },
+  };
+
+  ASSERT_EQ(model.n, kNumCoeffs);
+  this->chroma_sub_[0] = this->chroma_sub_[1] = 1;
+
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+
+  // Add different noise onto each plane
+  const int shift = this->kBitDepth - 8;
+  for (int c = 0; c < 3; ++c) {
+    noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+                kCoeffs[c], this->noise_ptr_[c], kWidth, kHeight);
+    const int x_shift = c > 0 ? this->chroma_sub_[0] : 0;
+    const int y_shift = c > 0 ? this->chroma_sub_[1] : 0;
+    for (int y = 0; y < (kHeight >> y_shift); ++y) {
+      for (int x = 0; x < (kWidth >> x_shift); ++x) {
+        const uint8_t value = 64 + x / 2 + y / 4;
+        this->data_ptr_[c][y * kWidth + x] =
+            (uint8_t(value + this->noise_ptr_[c][y * kWidth + x] * kStd))
+            << shift;
+        this->denoised_ptr_[c][y * kWidth + x] = value << shift;
+      }
+    }
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  // For the Y plane, the solved coefficients should be close to the original
+  const int n = model.n;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < n; ++i) {
+      EXPECT_NEAR(kCoeffs[c][i], model.latest_state[c].eqns.x[i], kCoeffEps);
+      EXPECT_NEAR(kCoeffs[c][i], model.combined_state[c].eqns.x[i], kCoeffEps);
+    }
+    // The chroma planes should be uncorrelated with the luma plane
+    if (c > 0) {
+      EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
+      EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
+    }
+    // Correlation between the coefficient vector and the fitted coefficients
+    // should be close to 1.
+    EXPECT_LT(0.98, aom_normalized_cross_correlation(
+                        model.latest_state[c].eqns.x, kCoeffs[c], kNumCoeffs));
+
+    noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+                model.latest_state[c].eqns.x, &this->renoise_[0], kWidth,
+                kHeight);
+
+    EXPECT_TRUE(aom_noise_data_validate(&this->renoise_[0], kWidth, kHeight));
+  }
+
+  // Check fitted noise strength
+  const double normalize = 1 << shift;
+  for (int c = 0; c < 3; ++c) {
+    for (int i = 0; i < model.latest_state[c].strength_solver.eqns.n; ++i) {
+      EXPECT_NEAR(kStd,
+                  model.latest_state[c].strength_solver.eqns.x[i] / normalize,
+                  kStdEps);
+    }
+  }
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest,
+             NoiseStrengthChangeSignalsDifferentNoiseType) {
+  aom_noise_model_t &model = this->model_;
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+  const int kBlockSize = this->kBlockSize;
+  // Create a gradient image with std = 2 uncorrelated noise
+  const double kStd = 2;
+  const int shift = this->kBitDepth - 8;
+
+  for (int i = 0; i < kWidth * kHeight; ++i) {
+    const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 192;
+    for (int c = 0; c < 3; ++c) {
+      this->noise_ptr_[c][i] = randn(&this->random_, 1);
+      this->data_ptr_[c][i] = ((uint8_t)(this->noise_ptr_[c][i] * kStd + val))
+                              << shift;
+      this->denoised_ptr_[c][i] = val << shift;
+    }
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const int kNumBlocks = kWidth * kHeight / kBlockSize / kBlockSize;
+  EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.latest_state[1].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.latest_state[2].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[0].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[1].strength_solver.num_equations);
+  EXPECT_EQ(kNumBlocks, model.combined_state[2].strength_solver.num_equations);
+
+  // Bump up noise by an insignificant amount
+  for (int i = 0; i < kWidth * kHeight; ++i) {
+    const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 192;
+    this->data_ptr_[0][i] =
+        ((uint8_t)(this->noise_ptr_[0][i] * (kStd + 0.085) + val)) << shift;
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  const double kARGainTolerance = 0.02;
+  for (int c = 0; c < 3; ++c) {
+    EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.latest_state[c].num_observations);
+    EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
+
+    EXPECT_EQ(2 * kNumBlocks,
+              model.combined_state[c].strength_solver.num_equations);
+    EXPECT_EQ(2 * 15250, model.combined_state[c].num_observations);
+    EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
+  }
+
+  // Bump up the noise strength on half the image for one channel by a
+  // significant amount.
+  for (int i = 0; i < kWidth * kHeight; ++i) {
+    const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 128;
+    if (i % kWidth < kWidth / 2) {
+      this->data_ptr_[0][i] =
+          ((uint8_t)(randn(&this->random_, kStd + 0.5) + val)) << shift;
+    }
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
+
+  // Since we didn't update the combined state, it should still be at 2 *
+  // num_blocks
+  EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
+  EXPECT_EQ(2 * kNumBlocks,
+            model.combined_state[0].strength_solver.num_equations);
+
+  // In normal operation, the "latest" estimate can be saved to the "combined"
+  // state for continued updates.
+  aom_noise_model_save_latest(&model);
+  for (int c = 0; c < 3; ++c) {
+    EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.latest_state[c].num_observations);
+    EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
+
+    EXPECT_EQ(kNumBlocks,
+              model.combined_state[c].strength_solver.num_equations);
+    EXPECT_EQ(15250, model.combined_state[c].num_observations);
+    EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
+  }
+}
+
+TYPED_TEST_P(NoiseModelUpdateTest, NoiseCoeffsSignalsDifferentNoiseType) {
+  aom_noise_model_t &model = this->model_;
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+  const double kCoeffs[2][24] = {
+    { 0.02884, -0.03356, 0.00633,  0.01757,  0.02849,  -0.04620,
+      0.02833, -0.07178, 0.07076,  -0.11603, -0.10413, -0.16571,
+      0.05158, -0.07969, 0.02640,  -0.07191, 0.02530,  0.41968,
+      0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
+    { 0.00269, -0.01291, -0.01513, 0.07234,  0.03208,   0.00477,
+      0.00226, -0.00254, 0.03533,  0.12841,  -0.25970,  -0.06336,
+      0.05238, -0.00845, -0.03118, 0.09043,  -0.36558,  0.48903,
+      0.00595, -0.11938, 0.02106,  0.095956, -0.350139, 0.59305 }
+  };
+
+  noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+              kCoeffs[0], this->noise_ptr_[0], kWidth, kHeight);
+  for (int i = 0; i < kWidth * kHeight; ++i) {
+    this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
+  }
+  this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
+  EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
+
+  // Now try with the second set of AR coefficients
+  noise_synth(&this->random_, model.params.lag, model.n, model.coords,
+              kCoeffs[1], this->noise_ptr_[0], kWidth, kHeight);
+  for (int i = 0; i < kWidth * kHeight; ++i) {
+    this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
+  }
+  EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
+}
+REGISTER_TYPED_TEST_CASE_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks,
+                           UpdateSuccessForZeroNoiseAllFlat,
+                           UpdateFailsBlockSizeTooSmall,
+                           UpdateSuccessForWhiteRandomNoise,
+                           UpdateSuccessForScaledWhiteNoise,
+                           UpdateSuccessForCorrelatedNoise,
+                           NoiseStrengthChangeSignalsDifferentNoiseType,
+                           NoiseCoeffsSignalsDifferentNoiseType);
+
+INSTANTIATE_TYPED_TEST_CASE_P(NoiseModelUpdateTestInstatiation,
+                              NoiseModelUpdateTest, AllBitDepthParams);
+
+TEST(NoiseModelGetGrainParameters, TestLagSize) {
+  aom_film_grain_t film_grain;
+  for (int lag = 1; lag <= 3; ++lag) {
+    aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+    aom_noise_model_t model;
+    EXPECT_TRUE(aom_noise_model_init(&model, params));
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    EXPECT_EQ(lag, film_grain.ar_coeff_lag);
+    aom_noise_model_free(&model);
+  }
+
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 4, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+  EXPECT_FALSE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModelGetGrainParameters, TestARCoeffShiftBounds) {
+  struct TestCase {
+    double max_input_value;
+    int expected_ar_coeff_shift;
+    int expected_value;
+  };
+  const int lag = 1;
+  const int kNumTestCases = 19;
+  const TestCase test_cases[] = {
+    // Test cases for ar_coeff_shift = 9
+    { 0, 9, 0 },
+    { 0.125, 9, 64 },
+    { -0.125, 9, -64 },
+    { 0.2499, 9, 127 },
+    { -0.25, 9, -128 },
+    // Test cases for ar_coeff_shift = 8
+    { 0.25, 8, 64 },
+    { -0.2501, 8, -64 },
+    { 0.499, 8, 127 },
+    { -0.5, 8, -128 },
+    // Test cases for ar_coeff_shift = 7
+    { 0.5, 7, 64 },
+    { -0.5001, 7, -64 },
+    { 0.999, 7, 127 },
+    { -1, 7, -128 },
+    // Test cases for ar_coeff_shift = 6
+    { 1.0, 6, 64 },
+    { -1.0001, 6, -64 },
+    { 2.0, 6, 127 },
+    { -2.0, 6, -128 },
+    { 4, 6, 127 },
+    { -4, 6, -128 },
+  };
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  for (int i = 0; i < kNumTestCases; ++i) {
+    const TestCase &test_case = test_cases[i];
+    model.combined_state[0].eqns.x[0] = test_case.max_input_value;
+
+    aom_film_grain_t film_grain;
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    EXPECT_EQ(1, film_grain.ar_coeff_lag);
+    EXPECT_EQ(test_case.expected_ar_coeff_shift, film_grain.ar_coeff_shift);
+    EXPECT_EQ(test_case.expected_value, film_grain.ar_coeffs_y[0]);
+  }
+  aom_noise_model_free(&model);
+}
+
+TEST(NoiseModelGetGrainParameters, TestNoiseStrengthShiftBounds) {
+  struct TestCase {
+    double max_input_value;
+    int expected_scaling_shift;
+    int expected_value;
+  };
+  const int kNumTestCases = 10;
+  const TestCase test_cases[] = {
+    { 0, 11, 0 },      { 1, 11, 64 },     { 2, 11, 128 }, { 3.99, 11, 255 },
+    { 4, 10, 128 },    { 7.99, 10, 255 }, { 8, 9, 128 },  { 16, 8, 128 },
+    { 31.99, 8, 255 }, { 64, 8, 255 },  // clipped
+  };
+  const int lag = 1;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  for (int i = 0; i < kNumTestCases; ++i) {
+    const TestCase &test_case = test_cases[i];
+    aom_equation_system_t &eqns = model.combined_state[0].strength_solver.eqns;
+    // Set the fitted scale parameters to be a constant value.
+    for (int j = 0; j < eqns.n; ++j) {
+      eqns.x[j] = test_case.max_input_value;
+    }
+    aom_film_grain_t film_grain;
+    EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+    // We expect a single constant segemnt
+    EXPECT_EQ(test_case.expected_scaling_shift, film_grain.scaling_shift);
+    EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[0][1]);
+    EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[1][1]);
+  }
+  aom_noise_model_free(&model);
+}
+
+// The AR coefficients are the same inputs used to generate "Test 2" in the test
+// vectors
+TEST(NoiseModelGetGrainParameters, GetGrainParametersReal) {
+  const double kInputCoeffsY[] = { 0.0315,  0.0073,  0.0218,  0.00235, 0.00511,
+                                   -0.0222, 0.0627,  -0.022,  0.05575, -0.1816,
+                                   0.0107,  -0.1966, 0.00065, -0.0809, 0.04934,
+                                   -0.1349, -0.0352, 0.41772, 0.27973, 0.04207,
+                                   -0.0429, -0.1372, 0.06193, 0.52032 };
+  const double kInputCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,
+                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5 };
+  const double kInputCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5 };
+  const int kExpectedARCoeffsY[] = { 4,  1,   3,  0,   1,  -3,  8, -3,
+                                     7,  -23, 1,  -25, 0,  -10, 6, -17,
+                                     -5, 53,  36, 5,   -5, -18, 8, 67 };
+  const int kExpectedARCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 };
+  const int kExpectedARCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+                                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -126 };
+  // Scaling function is initialized analytically with a sqrt function.
+  const int kNumScalingPointsY = 12;
+  const int kExpectedScalingPointsY[][2] = {
+    { 0, 0 },     { 13, 44 },   { 27, 62 },   { 40, 76 },
+    { 54, 88 },   { 67, 98 },   { 94, 117 },  { 121, 132 },
+    { 148, 146 }, { 174, 159 }, { 201, 171 }, { 255, 192 },
+  };
+
+  const int lag = 3;
+  aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
+  aom_noise_model_t model;
+  EXPECT_TRUE(aom_noise_model_init(&model, params));
+
+  // Setup the AR coeffs
+  memcpy(model.combined_state[0].eqns.x, kInputCoeffsY, sizeof(kInputCoeffsY));
+  memcpy(model.combined_state[1].eqns.x, kInputCoeffsCB,
+         sizeof(kInputCoeffsCB));
+  memcpy(model.combined_state[2].eqns.x, kInputCoeffsCR,
+         sizeof(kInputCoeffsCR));
+  for (int i = 0; i < model.combined_state[0].strength_solver.num_bins; ++i) {
+    const double x =
+        ((double)i) / (model.combined_state[0].strength_solver.num_bins - 1.0);
+    model.combined_state[0].strength_solver.eqns.x[i] = 6 * sqrt(x);
+    model.combined_state[1].strength_solver.eqns.x[i] = 3;
+    model.combined_state[2].strength_solver.eqns.x[i] = 2;
+
+    // Inject some observations into the strength solver, as during film grain
+    // parameter extraction an estimate of the average strength will be used to
+    // adjust correlation.
+    const int n = model.combined_state[0].strength_solver.num_bins;
+    for (int j = 0; j < model.combined_state[0].strength_solver.num_bins; ++j) {
+      model.combined_state[0].strength_solver.eqns.A[i * n + j] = 1;
+      model.combined_state[1].strength_solver.eqns.A[i * n + j] = 1;
+      model.combined_state[2].strength_solver.eqns.A[i * n + j] = 1;
+    }
+  }
+
+  aom_film_grain_t film_grain;
+  EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
+  EXPECT_EQ(lag, film_grain.ar_coeff_lag);
+  EXPECT_EQ(3, film_grain.ar_coeff_lag);
+  EXPECT_EQ(7, film_grain.ar_coeff_shift);
+  EXPECT_EQ(10, film_grain.scaling_shift);
+  EXPECT_EQ(kNumScalingPointsY, film_grain.num_y_points);
+  EXPECT_EQ(1, film_grain.update_parameters);
+  EXPECT_EQ(1, film_grain.apply_grain);
+
+  const int kNumARCoeffs = 24;
+  for (int i = 0; i < kNumARCoeffs; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsY[i], film_grain.ar_coeffs_y[i]);
+  }
+  for (int i = 0; i < kNumARCoeffs + 1; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsCB[i], film_grain.ar_coeffs_cb[i]);
+  }
+  for (int i = 0; i < kNumARCoeffs + 1; ++i) {
+    EXPECT_EQ(kExpectedARCoeffsCR[i], film_grain.ar_coeffs_cr[i]);
+  }
+  for (int i = 0; i < kNumScalingPointsY; ++i) {
+    EXPECT_EQ(kExpectedScalingPointsY[i][0], film_grain.scaling_points_y[i][0]);
+    EXPECT_EQ(kExpectedScalingPointsY[i][1], film_grain.scaling_points_y[i][1]);
+  }
+
+  // CB strength should just be a piecewise segment
+  EXPECT_EQ(2, film_grain.num_cb_points);
+  EXPECT_EQ(0, film_grain.scaling_points_cb[0][0]);
+  EXPECT_EQ(255, film_grain.scaling_points_cb[1][0]);
+  EXPECT_EQ(96, film_grain.scaling_points_cb[0][1]);
+  EXPECT_EQ(96, film_grain.scaling_points_cb[1][1]);
+
+  // CR strength should just be a piecewise segment
+  EXPECT_EQ(2, film_grain.num_cr_points);
+  EXPECT_EQ(0, film_grain.scaling_points_cr[0][0]);
+  EXPECT_EQ(255, film_grain.scaling_points_cr[1][0]);
+  EXPECT_EQ(64, film_grain.scaling_points_cr[0][1]);
+  EXPECT_EQ(64, film_grain.scaling_points_cr[1][1]);
+
+  EXPECT_EQ(128, film_grain.cb_mult);
+  EXPECT_EQ(192, film_grain.cb_luma_mult);
+  EXPECT_EQ(256, film_grain.cb_offset);
+  EXPECT_EQ(128, film_grain.cr_mult);
+  EXPECT_EQ(192, film_grain.cr_luma_mult);
+  EXPECT_EQ(256, film_grain.cr_offset);
+  EXPECT_EQ(0, film_grain.chroma_scaling_from_luma);
+  EXPECT_EQ(0, film_grain.grain_scale_shift);
+
+  aom_noise_model_free(&model);
+}
+
+template <typename T>
+class WienerDenoiseTest : public ::testing::Test, public T {
+ public:
+  static void SetUpTestCase() { aom_dsp_rtcd(); }
+
+ protected:
+  void SetUp() {
+    static const float kNoiseLevel = 5.f;
+    static const float kStd = 4.0;
+    static const double kMaxValue = (1 << T::kBitDepth) - 1;
+
+    chroma_sub_[0] = 1;
+    chroma_sub_[1] = 1;
+    stride_[0] = kWidth;
+    stride_[1] = kWidth / 2;
+    stride_[2] = kWidth / 2;
+    for (int k = 0; k < 3; ++k) {
+      data_[k].resize(kWidth * kHeight);
+      denoised_[k].resize(kWidth * kHeight);
+      noise_psd_[k].resize(kBlockSize * kBlockSize);
+    }
+
+    const double kCoeffsY[] = { 0.0406, -0.116, -0.078, -0.152, 0.0033, -0.093,
+                                0.048,  0.404,  0.2353, -0.035, -0.093, 0.441 };
+    const int kCoords[12][2] = {
+      { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 }, { 2, -2 }, { -2, -1 },
+      { -1, -1 }, { 0, -1 },  { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 }
+    };
+    const int kLag = 2;
+    const int kLength = 12;
+    libaom_test::ACMRandom random;
+    std::vector<double> noise(kWidth * kHeight);
+    noise_synth(&random, kLag, kLength, kCoords, kCoeffsY, &noise[0], kWidth,
+                kHeight);
+    noise_psd_[0] = get_noise_psd(&noise[0], kWidth, kHeight, kBlockSize);
+    for (int i = 0; i < kBlockSize * kBlockSize; ++i) {
+      noise_psd_[0][i] = (float)(noise_psd_[0][i] * kStd * kStd * kScaleNoise *
+                                 kScaleNoise / (kMaxValue * kMaxValue));
+    }
+
+    float psd_value =
+        aom_noise_psd_get_default_value(kBlockSizeChroma, kNoiseLevel);
+    for (int i = 0; i < kBlockSizeChroma * kBlockSizeChroma; ++i) {
+      noise_psd_[1][i] = psd_value;
+      noise_psd_[2][i] = psd_value;
+    }
+    for (int y = 0; y < kHeight; ++y) {
+      for (int x = 0; x < kWidth; ++x) {
+        data_[0][y * stride_[0] + x] = (typename T::data_type_t)fclamp(
+            (x + noise[y * stride_[0] + x] * kStd) * kScaleNoise, 0, kMaxValue);
+      }
+    }
+
+    for (int c = 1; c < 3; ++c) {
+      for (int y = 0; y < (kHeight >> 1); ++y) {
+        for (int x = 0; x < (kWidth >> 1); ++x) {
+          data_[c][y * stride_[c] + x] = (typename T::data_type_t)fclamp(
+              (x + randn(&random, kStd)) * kScaleNoise, 0, kMaxValue);
+        }
+      }
+    }
+    for (int k = 0; k < 3; ++k) {
+      noise_psd_ptrs_[k] = &noise_psd_[k][0];
+    }
+  }
+  static const int kBlockSize = 32;
+  static const int kBlockSizeChroma = 16;
+  static const int kWidth = 256;
+  static const int kHeight = 256;
+  static const int kScaleNoise = 1 << (T::kBitDepth - 8);
+
+  std::vector<typename T::data_type_t> data_[3];
+  std::vector<typename T::data_type_t> denoised_[3];
+  std::vector<float> noise_psd_[3];
+  int chroma_sub_[2];
+  float *noise_psd_ptrs_[3];
+  int stride_[3];
+};
+
+TYPED_TEST_CASE_P(WienerDenoiseTest);
+
+TYPED_TEST_P(WienerDenoiseTest, InvalidBlockSize) {
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     18, this->kBitDepth, this->kUseHighBD));
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     48, this->kBitDepth, this->kUseHighBD));
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_,
+                                     this->chroma_sub_, this->noise_psd_ptrs_,
+                                     64, this->kBitDepth, this->kUseHighBD));
+}
+
+TYPED_TEST_P(WienerDenoiseTest, InvalidChromaSubsampling) {
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  int chroma_sub[2] = { 1, 0 };
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_, chroma_sub,
+                                     this->noise_psd_ptrs_, 32, this->kBitDepth,
+                                     this->kUseHighBD));
+
+  chroma_sub[0] = 0;
+  chroma_sub[1] = 1;
+  EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
+                                     this->kHeight, this->stride_, chroma_sub,
+                                     this->noise_psd_ptrs_, 32, this->kBitDepth,
+                                     this->kUseHighBD));
+}
+
+TYPED_TEST_P(WienerDenoiseTest, GradientTest) {
+  const int kWidth = this->kWidth;
+  const int kHeight = this->kHeight;
+  const int kBlockSize = this->kBlockSize;
+  const uint8_t *const data_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->data_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->data_[2][0]),
+  };
+  uint8_t *denoised_ptrs[3] = {
+    reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
+    reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
+  };
+  const int ret = aom_wiener_denoise_2d(
+      data_ptrs, denoised_ptrs, kWidth, kHeight, this->stride_,
+      this->chroma_sub_, this->noise_psd_ptrs_, this->kBlockSize,
+      this->kBitDepth, this->kUseHighBD);
+  EXPECT_EQ(1, ret);
+
+  // Check the noise on the denoised image (from the analytical gradient)
+  // and make sure that it is less than what we added.
+  for (int c = 0; c < 3; ++c) {
+    std::vector<double> measured_noise(kWidth * kHeight);
+
+    double var = 0;
+    const int shift = (c > 0);
+    for (int x = 0; x < (kWidth >> shift); ++x) {
+      for (int y = 0; y < (kHeight >> shift); ++y) {
+        const double diff = this->denoised_[c][y * this->stride_[c] + x] -
+                            x * this->kScaleNoise;
+        var += diff * diff;
+        measured_noise[y * kWidth + x] = diff;
+      }
+    }
+    var /= (kWidth * kHeight);
+    const double std = sqrt(std::max(0.0, var));
+    EXPECT_LE(std, 1.25f * this->kScaleNoise);
+    if (c == 0) {
+      std::vector<float> measured_psd =
+          get_noise_psd(&measured_noise[0], kWidth, kHeight, kBlockSize);
+      std::vector<double> measured_psd_d(kBlockSize * kBlockSize);
+      std::vector<double> noise_psd_d(kBlockSize * kBlockSize);
+      std::copy(measured_psd.begin(), measured_psd.end(),
+                measured_psd_d.begin());
+      std::copy(this->noise_psd_[0].begin(), this->noise_psd_[0].end(),
+                noise_psd_d.begin());
+      EXPECT_LT(
+          aom_normalized_cross_correlation(&measured_psd_d[0], &noise_psd_d[0],
+                                           (int)(noise_psd_d.size())),
+          0.35);
+    }
+  }
+}
+
+REGISTER_TYPED_TEST_CASE_P(WienerDenoiseTest, InvalidBlockSize,
+                           InvalidChromaSubsampling, GradientTest);
+
+INSTANTIATE_TYPED_TEST_CASE_P(WienerDenoiseTestInstatiation, WienerDenoiseTest,
+                              AllBitDepthParams);
diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc
index 219c5d8109..1820da2663 100644
--- a/third_party/aom/test/obmc_sad_test.cc
+++ b/third_party/aom/test/obmc_sad_test.cc
@@ -14,8 +14,9 @@
 #include "test/function_equivalence_test.h"
 #include "test/register_state_check.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom/aom_integer.h"
 
 #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
@@ -84,13 +85,10 @@ TEST_P(ObmcSadTest, ExtremeValues) {
 }
 
 #if HAVE_SSE4_1
-#if CONFIG_MOTION_VAR
 const ObmcSadTest::ParamType sse4_functions[] = {
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1),
   TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1),
   TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1),
   TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1),
   TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1),
@@ -108,14 +106,12 @@ const ObmcSadTest::ParamType sse4_functions[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadTest,
                         ::testing::ValuesIn(sse4_functions));
-#endif  // CONFIG_MOTION_VAR
 #endif  // HAVE_SSE4_1
 
 ////////////////////////////////////////////////////////////////////////////////
 // High bit-depth
 ////////////////////////////////////////////////////////////////////////////////
 
-#if CONFIG_HIGHBITDEPTH
 class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {};
 
 TEST_P(ObmcSadHBDTest, RandomValues) {
@@ -169,13 +165,10 @@ TEST_P(ObmcSadHBDTest, ExtremeValues) {
 }
 
 #if HAVE_SSE4_1
-#if CONFIG_MOTION_VAR
 ObmcSadHBDTest::ParamType sse4_functions_hbd[] = {
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1),
   TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1),
   TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1),
   TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1),
   TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1),
@@ -193,7 +186,5 @@ ObmcSadHBDTest::ParamType sse4_functions_hbd[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadHBDTest,
                         ::testing::ValuesIn(sse4_functions_hbd));
-#endif  // CONFIG_MOTION_VAR
 #endif  // HAVE_SSE4_1
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc
index 1b30645a52..04fee82856 100644
--- a/third_party/aom/test/obmc_variance_test.cc
+++ b/third_party/aom/test/obmc_variance_test.cc
@@ -15,8 +15,9 @@
 #include "test/function_equivalence_test.h"
 #include "test/register_state_check.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom/aom_integer.h"
 
 #define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
@@ -93,13 +94,10 @@ TEST_P(ObmcVarianceTest, ExtremeValues) {
 }
 
 #if HAVE_SSE4_1
-#if CONFIG_MOTION_VAR
 const ObmcVarianceTest::ParamType sse4_functions[] = {
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1),
   TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1),
   TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1),
   TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1),
   TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1),
@@ -117,14 +115,12 @@ const ObmcVarianceTest::ParamType sse4_functions[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceTest,
                         ::testing::ValuesIn(sse4_functions));
-#endif  // CONFIG_MOTION_VAR
 #endif  // HAVE_SSE4_1
 
 ////////////////////////////////////////////////////////////////////////////////
 // High bit-depth
 ////////////////////////////////////////////////////////////////////////////////
 
-#if CONFIG_HIGHBITDEPTH
 class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {};
 
 TEST_P(ObmcVarianceHBDTest, RandomValues) {
@@ -183,16 +179,13 @@ TEST_P(ObmcVarianceHBDTest, ExtremeValues) {
 }
 
 #if HAVE_SSE4_1
-#if CONFIG_MOTION_VAR
 ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_obmc_variance128x128_c,
             aom_highbd_obmc_variance128x128_sse4_1, 8),
   TestFuncs(aom_highbd_obmc_variance128x64_c,
             aom_highbd_obmc_variance128x64_sse4_1, 8),
   TestFuncs(aom_highbd_obmc_variance64x128_c,
             aom_highbd_obmc_variance64x128_sse4_1, 8),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_obmc_variance64x64_c,
             aom_highbd_obmc_variance64x64_sse4_1, 8),
   TestFuncs(aom_highbd_obmc_variance64x32_c,
@@ -219,14 +212,12 @@ ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
             8),
   TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1,
             8),
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_10_obmc_variance128x128_c,
             aom_highbd_10_obmc_variance128x128_sse4_1, 10),
   TestFuncs(aom_highbd_10_obmc_variance128x64_c,
             aom_highbd_10_obmc_variance128x64_sse4_1, 10),
   TestFuncs(aom_highbd_10_obmc_variance64x128_c,
             aom_highbd_10_obmc_variance64x128_sse4_1, 10),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_10_obmc_variance64x64_c,
             aom_highbd_10_obmc_variance64x64_sse4_1, 10),
   TestFuncs(aom_highbd_10_obmc_variance64x32_c,
@@ -253,14 +244,12 @@ ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
             aom_highbd_10_obmc_variance4x8_sse4_1, 10),
   TestFuncs(aom_highbd_10_obmc_variance4x4_c,
             aom_highbd_10_obmc_variance4x4_sse4_1, 10),
-#if CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_12_obmc_variance128x128_c,
             aom_highbd_12_obmc_variance128x128_sse4_1, 12),
   TestFuncs(aom_highbd_12_obmc_variance128x64_c,
             aom_highbd_12_obmc_variance128x64_sse4_1, 12),
   TestFuncs(aom_highbd_12_obmc_variance64x128_c,
             aom_highbd_12_obmc_variance64x128_sse4_1, 12),
-#endif  // CONFIG_EXT_PARTITION
   TestFuncs(aom_highbd_12_obmc_variance64x64_c,
             aom_highbd_12_obmc_variance64x64_sse4_1, 12),
   TestFuncs(aom_highbd_12_obmc_variance64x32_c,
@@ -291,7 +280,5 @@ ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceHBDTest,
                         ::testing::ValuesIn(sse4_functions_hbd));
-#endif  // CONFIG_MOTION_VAR
 #endif  // HAVE_SSE4_1
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/onyxc_int_test.cc b/third_party/aom/test/onyxc_int_test.cc
new file mode 100644
index 0000000000..3889595187
--- /dev/null
+++ b/third_party/aom/test/onyxc_int_test.cc
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "av1/common/onyxc_int.h"
+
+TEST(OnyxcInt, TestGetTxSize) {
+  for (int t = TX_4X4; t < TX_SIZES_ALL; t++) {
+    TX_SIZE t2 = get_tx_size(tx_size_wide[t], tx_size_high[t]);
+    GTEST_ASSERT_EQ(tx_size_wide[t], tx_size_wide[t2]);
+    GTEST_ASSERT_EQ(tx_size_high[t], tx_size_high[t2]);
+  }
+}
diff --git a/third_party/aom/test/partial_idct_test.cc b/third_party/aom/test/partial_idct_test.cc
deleted file mode 100644
index b2ea176e86..0000000000
--- a/third_party/aom/test/partial_idct_test.cc
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/blockd.h"
-#include "av1/common/scan.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/aom_timer.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
-                                  int stride, int bd);
-
-template <InvTxfmFunc fn>
-void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
-  (void)bd;
-  fn(in, out, stride);
-}
-
-typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
-                        TX_SIZE, int, int, int>
-    PartialInvTxfmParam;
-const int kMaxNumCoeffs = 1024;
-const int kCountTestBlock = 10000;
-
-class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
- public:
-  virtual ~PartialIDctTest() {}
-  virtual void SetUp() {
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-    ftxfm_ = GET_PARAM(0);
-    full_itxfm_ = GET_PARAM(1);
-    partial_itxfm_ = GET_PARAM(2);
-    tx_size_ = GET_PARAM(3);
-    last_nonzero_ = GET_PARAM(4);
-    bit_depth_ = GET_PARAM(5);
-    pixel_size_ = GET_PARAM(6);
-    mask_ = (1 << bit_depth_) - 1;
-
-    switch (tx_size_) {
-      case TX_4X4: size_ = 4; break;
-      case TX_8X8: size_ = 8; break;
-      case TX_16X16: size_ = 16; break;
-      case TX_32X32: size_ = 32; break;
-      default: FAIL() << "Wrong Size!"; break;
-    }
-
-    // Randomize stride_ to a value less than or equal to 1024
-    stride_ = rnd_(1024) + 1;
-    if (stride_ < size_) {
-      stride_ = size_;
-    }
-    // Align stride_ to 16 if it's bigger than 16.
-    if (stride_ > 16) {
-      stride_ &= ~15;
-    }
-
-    input_block_size_ = size_ * size_;
-    output_block_size_ = size_ * stride_;
-
-    input_block_ = reinterpret_cast<tran_low_t *>(
-        aom_memalign(16, sizeof(*input_block_) * input_block_size_));
-    output_block_ = reinterpret_cast<uint8_t *>(
-        aom_memalign(16, pixel_size_ * output_block_size_));
-    output_block_ref_ = reinterpret_cast<uint8_t *>(
-        aom_memalign(16, pixel_size_ * output_block_size_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_block_);
-    input_block_ = NULL;
-    aom_free(output_block_);
-    output_block_ = NULL;
-    aom_free(output_block_ref_);
-    output_block_ref_ = NULL;
-    libaom_test::ClearSystemState();
-  }
-
-  void InitMem() {
-    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
-    if (pixel_size_ == 1) {
-      for (int j = 0; j < output_block_size_; ++j) {
-        output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
-      }
-    } else {
-      ASSERT_EQ(2, pixel_size_);
-      uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
-      uint16_t *const output_ref =
-          reinterpret_cast<uint16_t *>(output_block_ref_);
-      for (int j = 0; j < output_block_size_; ++j) {
-        output[j] = output_ref[j] = rnd_.Rand16() & mask_;
-      }
-    }
-  }
-
-  void InitInput() {
-    const int max_coeff = 32766 / 4;
-    int max_energy_leftover = max_coeff * max_coeff;
-    for (int j = 0; j < last_nonzero_; ++j) {
-      int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
-                                           (rnd_.Rand16() - 32768) / 65536);
-      max_energy_leftover -= coeff * coeff;
-      if (max_energy_leftover < 0) {
-        max_energy_leftover = 0;
-        coeff = 0;
-      }
-      input_block_[av1_default_scan_orders[tx_size_].scan[j]] = coeff;
-    }
-  }
-
- protected:
-  int last_nonzero_;
-  TX_SIZE tx_size_;
-  tran_low_t *input_block_;
-  uint8_t *output_block_;
-  uint8_t *output_block_ref_;
-  int size_;
-  int stride_;
-  int pixel_size_;
-  int input_block_size_;
-  int output_block_size_;
-  int bit_depth_;
-  int mask_;
-  FwdTxfmFunc ftxfm_;
-  InvTxfmWithBdFunc full_itxfm_;
-  InvTxfmWithBdFunc partial_itxfm_;
-  ACMRandom rnd_;
-};
-
-TEST_P(PartialIDctTest, RunQuantCheck) {
-  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
-
-  InitMem();
-  for (int i = 0; i < kCountTestBlock; ++i) {
-    // Initialize a test block with input range [-mask_, mask_].
-    if (i == 0) {
-      for (int k = 0; k < input_block_size_; ++k) {
-        input_extreme_block[k] = mask_;
-      }
-    } else if (i == 1) {
-      for (int k = 0; k < input_block_size_; ++k) {
-        input_extreme_block[k] = -mask_;
-      }
-    } else {
-      for (int k = 0; k < input_block_size_; ++k) {
-        input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
-      }
-    }
-
-    ftxfm_(input_extreme_block, output_ref_block, size_);
-
-    // quantization with minimum allowed step sizes
-    input_block_[0] = (output_ref_block[0] / 4) * 4;
-    for (int k = 1; k < last_nonzero_; ++k) {
-      const int pos = av1_default_scan_orders[tx_size_].scan[k];
-      input_block_[pos] = (output_ref_block[pos] / 4) * 4;
-    }
-
-    ASM_REGISTER_STATE_CHECK(
-        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
-    ASM_REGISTER_STATE_CHECK(
-        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
-    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
-                        pixel_size_ * output_block_size_))
-        << "Error: partial inverse transform produces different results";
-  }
-}
-
-TEST_P(PartialIDctTest, ResultsMatch) {
-  for (int i = 0; i < kCountTestBlock; ++i) {
-    InitMem();
-    InitInput();
-
-    ASM_REGISTER_STATE_CHECK(
-        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
-    ASM_REGISTER_STATE_CHECK(
-        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
-    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
-                        pixel_size_ * output_block_size_))
-        << "Error: partial inverse transform produces different results";
-  }
-}
-
-TEST_P(PartialIDctTest, AddOutputBlock) {
-  for (int i = 0; i < kCountTestBlock; ++i) {
-    InitMem();
-    for (int j = 0; j < last_nonzero_; ++j) {
-      input_block_[av1_default_scan_orders[tx_size_].scan[j]] = 10;
-    }
-
-    ASM_REGISTER_STATE_CHECK(
-        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
-    ASM_REGISTER_STATE_CHECK(
-        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
-    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
-                        pixel_size_ * output_block_size_))
-        << "Error: Transform results are not correctly added to output.";
-  }
-}
-
-TEST_P(PartialIDctTest, SingleExtremeCoeff) {
-  const int16_t max_coeff = INT16_MAX;
-  const int16_t min_coeff = INT16_MIN;
-  for (int i = 0; i < last_nonzero_; ++i) {
-    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
-    // Run once for min and once for max.
-    for (int j = 0; j < 2; ++j) {
-      const int coeff = j ? min_coeff : max_coeff;
-
-      memset(output_block_, 0, pixel_size_ * output_block_size_);
-      memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
-      input_block_[av1_default_scan_orders[tx_size_].scan[i]] = coeff;
-
-      ASM_REGISTER_STATE_CHECK(
-          full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
-      ASM_REGISTER_STATE_CHECK(
-          partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
-      ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
-                          pixel_size_ * output_block_size_))
-          << "Error: Fails with single coeff of " << coeff << " at " << i
-          << ".";
-    }
-  }
-}
-
-TEST_P(PartialIDctTest, DISABLED_Speed) {
-  // Keep runtime stable with transform size.
-  const int kCountSpeedTestBlock = 500000000 / input_block_size_;
-  InitMem();
-  InitInput();
-
-  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
-    ASM_REGISTER_STATE_CHECK(
-        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
-  }
-  aom_usec_timer timer;
-  aom_usec_timer_start(&timer);
-  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
-    partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
-  }
-  libaom_test::ClearSystemState();
-  aom_usec_timer_mark(&timer);
-  const int elapsed_time =
-      static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
-  printf("idct%dx%d_%d (bitdepth %d) time: %5d ms\n", size_, size_,
-         last_nonzero_, bit_depth_, elapsed_time);
-
-  ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
-                      pixel_size_ * output_block_size_))
-      << "Error: partial inverse transform produces different results";
-}
-
-using std::tr1::make_tuple;
-
-const PartialInvTxfmParam c_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_38_add_c>, TX_16X16, 38, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
-                        ::testing::ValuesIn(c_partial_idct_tests));
-
-#if HAVE_NEON && !CONFIG_HIGHBITDEPTH
-const PartialInvTxfmParam neon_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
-                        ::testing::ValuesIn(neon_partial_idct_tests));
-#endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_SSE2
-const PartialInvTxfmParam sse2_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
-                        ::testing::ValuesIn(sse2_partial_idct_tests));
-
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
-                        ::testing::ValuesIn(ssse3_partial_idct_tests));
-#endif  // HAVE_SSSE3
-
-#if HAVE_AVX2
-const PartialInvTxfmParam avx2_partial_idct_tests[] = {
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_256_add_avx2>, TX_16X16, 256, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_38_add_avx2>, TX_16X16, 38, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_avx2>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_avx2>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_avx2>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_135_add_avx2>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_avx2>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_avx2>, TX_32X32, 1, 8, 1),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, PartialIDctTest,
-                        ::testing::ValuesIn(avx2_partial_idct_tests));
-#endif  // HAVE_AVX2
-
-#if HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
-const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest,
-                        ::testing::ValuesIn(dspr2_partial_idct_tests));
-#endif  // HAVE_DSPR2 && !CONFIG_HIGHBITDEPTH
-
-#if HAVE_MSA && !CONFIG_HIGHBITDEPTH
-const PartialInvTxfmParam msa_partial_idct_tests[] = {
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
-  make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
-             &wrapper<aom_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
-  make_tuple(&aom_fdct16x16_c, &wrapper<aom_idct16x16_256_add_c>,
-             &wrapper<aom_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1),
-  make_tuple(&aom_fdct8x8_c, &wrapper<aom_idct8x8_64_add_c>,
-             &wrapper<aom_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
-  make_tuple(&aom_fdct4x4_c, &wrapper<aom_idct4x4_16_add_c>,
-             &wrapper<aom_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
-};
-
-INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
-                        ::testing::ValuesIn(msa_partial_idct_tests));
-#endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
-
-}  // namespace
diff --git a/third_party/aom/test/qm_test.cc b/third_party/aom/test/qm_test.cc
new file mode 100644
index 0000000000..c87506b41a
--- /dev/null
+++ b/third_party/aom/test/qm_test.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "config/aom_config.h"
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class QMTest
+    : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  QMTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~QMTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(AV1E_SET_ENABLE_QM, 1);
+      encoder->Control(AV1E_SET_QM_MIN, qm_min_);
+      encoder->Control(AV1E_SET_QM_MAX, qm_max_);
+
+      encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
+    }
+  }
+
+  void DoTest(int qm_min, int qm_max) {
+    qm_min_ = qm_min;
+    qm_max_ = qm_max;
+    cfg_.kf_max_dist = 12;
+    cfg_.rc_min_quantizer = 8;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 6;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_target_bitrate = 300;
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 15);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int set_cpu_used_;
+  int qm_min_;
+  int qm_max_;
+};
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM1) { DoTest(5, 9); }
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM2) { DoTest(0, 8); }
+
+// encodes and decodes without a mismatch.
+TEST_P(QMTest, TestNoMisMatchQM3) { DoTest(9, 15); }
+
+AV1_INSTANTIATE_TEST_CASE(QMTest,
+                          ::testing::Values(::libaom_test::kRealTime,
+                                            ::libaom_test::kOnePassGood),
+                          ::testing::Range(5, 9));
+}  // namespace
diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc
index 2e48290219..97e73bff0d 100644
--- a/third_party/aom/test/quantize_func_test.cc
+++ b/third_party/aom/test/quantize_func_test.cc
@@ -11,13 +11,14 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
 #include "aom/aom_codec.h"
 #include "aom_ports/aom_timer.h"
 #include "av1/encoder/encoder.h"
-#include "av1/encoder/av1_quantize.h"
+#include "av1/common/scan.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -34,36 +35,56 @@ using libaom_test::ACMRandom;
       const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \
       const int16_t *iscan
 
+#define QUAN_PARAM_LIST_NO_SKIP                                               \
+  const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,    \
+      const int16_t *round_ptr, const int16_t *quant_ptr,                     \
+      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,                 \
+      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \
+      const int16_t *scan, const int16_t *iscan
+
 typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
-typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
+typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST_NO_SKIP, int log_scale);
+typedef void (*QuantizeFuncNoSkip)(QUAN_PARAM_LIST_NO_SKIP);
 
 #define HBD_QUAN_FUNC                                                      \
-  fn(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr, quant_ptr,      \
-     quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, \
-     iscan, log_scale)
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
+
+#define LBD_QUAN_FUNC_NO_SKIP                                              \
+  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
+     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
 
 template <QuantizeFuncHbd fn>
 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 0;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 1;
   HBD_QUAN_FUNC;
 }
 
 template <QuantizeFuncHbd fn>
 void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
   const int log_scale = 2;
   HBD_QUAN_FUNC;
 }
 
+template <QuantizeFuncNoSkip fn>
+void lowbd_quan_wrapper(QUAN_PARAM_LIST) {
+  (void)skip_block;
+  LBD_QUAN_FUNC_NO_SKIP;
+}
+
 typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
 
-typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType,
-                        aom_bit_depth_t>
+using ::testing::tuple;
+typedef tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType, aom_bit_depth_t>
     QuantizeParam;
 
 typedef struct {
@@ -98,7 +119,7 @@ class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
   }
 
   void InitQuantizer() {
-    av1_build_quantizer(bd_, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
+    av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
   }
 
   void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
@@ -114,7 +135,7 @@ class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
     uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
 
     // Testing uses 2-D DCT scan order table
-    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT, 0);
+    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
 
     // Testing uses luminance quantization table
     const int16_t *zbin = qtab_->quant.y_zbin[q];
@@ -130,7 +151,7 @@ class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
     }
 
     const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
-    const int16_t *dequant = qtab_->dequant.y_dequant[q];
+    const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
 
     for (int i = 0; i < test_num; ++i) {
       if (is_loop) FillCoeffRandom();
@@ -171,7 +192,7 @@ class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
     }
   }
 
-  int coeff_num() const { return tx_size_2d[tx_size_]; }
+  int coeff_num() const { return av1_get_max_eob(tx_size_); }
 
   void FillCoeff(tran_low_t c) {
     const int n_coeffs = coeff_num();
@@ -255,6 +276,13 @@ TEST_P(QuantizeTest, MultipleQ) {
   }
 }
 
+// Force the coeff to be half the value of the dequant.  This exposes a
+// mismatch found in av1_quantize_fp_sse2().
+TEST_P(QuantizeTest, CoeffHalfDequant) {
+  FillCoeff(16);
+  QuantizeRun(false, 25, 1);
+}
+
 TEST_P(QuantizeTest, DISABLED_Speed) {
   tran_low_t *coeff_ptr = coeff_;
   const intptr_t n_coeffs = coeff_num();
@@ -268,7 +296,7 @@ TEST_P(QuantizeTest, DISABLED_Speed) {
   uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
 
   // Testing uses 2-D DCT scan order table
-  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT, 0);
+  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
 
   // Testing uses luminance quantization table
   const int q = 22;
@@ -276,7 +304,7 @@ TEST_P(QuantizeTest, DISABLED_Speed) {
   const int16_t *round_fp = qtab_->quant.y_round_fp[q];
   const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
   const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
-  const int16_t *dequant = qtab_->dequant.y_dequant[q];
+  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
   const int kNumTests = 5000000;
   aom_usec_timer timer;
 
@@ -293,15 +321,37 @@ TEST_P(QuantizeTest, DISABLED_Speed) {
   printf("Elapsed time: %d us\n", elapsed_time);
 }
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 #if HAVE_AVX2
 const QuantizeParam kQParamArrayAvx2[] = {
-  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X16, TYPE_FP,
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X16, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_4X16, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_16X4, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_32X8, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_avx2>, TX_8X32, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_32X32, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_16X64, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_32x32_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_32x32_avx2>, TX_64X16, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_64x64_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_64x64_avx2>, TX_64X64, TYPE_FP,
              AOM_BITS_8),
-  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_32X32,
-             TYPE_FP, AOM_BITS_8),
-#if CONFIG_HIGHBITDEPTH
   make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
              TYPE_FP, AOM_BITS_8),
@@ -320,7 +370,6 @@ const QuantizeParam kQParamArrayAvx2[] = {
   make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, TX_32X32,
              TYPE_FP, AOM_BITS_12),
-#if CONFIG_TX64X64
   make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, TX_64X64,
              TYPE_FP, AOM_BITS_8),
@@ -330,14 +379,12 @@ const QuantizeParam kQParamArrayAvx2[] = {
   make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
              &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, TX_64X64,
              TYPE_FP, AOM_BITS_12),
-#endif  // CONFIG_TX64X64
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
              TYPE_B, AOM_BITS_8),
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
              TYPE_B, AOM_BITS_10),
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
              TYPE_B, AOM_BITS_12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 
 INSTANTIATE_TEST_CASE_P(AVX2, QuantizeTest,
@@ -346,9 +393,21 @@ INSTANTIATE_TEST_CASE_P(AVX2, QuantizeTest,
 
 #if HAVE_SSE2
 const QuantizeParam kQParamArraySSE2[] = {
-  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, TYPE_FP,
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X16, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_4X16, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_16X4, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_8X32, TYPE_FP,
+             AOM_BITS_8),
+  make_tuple(&lowbd_quan_wrapper<av1_quantize_fp_c>,
+             &lowbd_quan_wrapper<av1_quantize_fp_sse2>, TX_32X8, TYPE_FP,
              AOM_BITS_8),
-#if CONFIG_HIGHBITDEPTH
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
              TYPE_B, AOM_BITS_8),
   make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
@@ -361,28 +420,10 @@ const QuantizeParam kQParamArraySSE2[] = {
              TX_32X32, TYPE_B, AOM_BITS_10),
   make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
              TX_32X32, TYPE_B, AOM_BITS_12),
-#endif
 };
 
 INSTANTIATE_TEST_CASE_P(SSE2, QuantizeTest,
                         ::testing::ValuesIn(kQParamArraySSE2));
 #endif
 
-#if !CONFIG_HIGHBITDEPTH && HAVE_SSSE3 && ARCH_X86_64
-const QuantizeParam kQ16x16ParamArraySSSE3[] = {
-  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_ssse3, TX_16X16, TYPE_FP,
-             AOM_BITS_8),
-};
-INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
-                        ::testing::ValuesIn(kQ16x16ParamArraySSSE3));
-
-// TODO(any):
-//  The following test does not pass yet
-const QuantizeParam kQ32x32ParamArraySSSE3[] = { make_tuple(
-    &av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_ssse3, TX_32X32, TYPE_FP,
-    AOM_BITS_8) };
-INSTANTIATE_TEST_CASE_P(DISABLED_SSSE3, QuantizeTest,
-                        ::testing::ValuesIn(kQ32x32ParamArraySSSE3));
-#endif
-
 }  // namespace
diff --git a/third_party/aom/test/realtime_test.cc b/third_party/aom/test/realtime_test.cc
deleted file mode 100644
index 11d2a32419..0000000000
--- a/third_party/aom/test/realtime_test.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/video_source.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-const int kVideoSourceWidth = 320;
-const int kVideoSourceHeight = 240;
-const int kFramesToEncode = 2;
-
-class RealtimeTest
-    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
-      public ::libaom_test::EncoderTest {
- protected:
-  RealtimeTest() : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
-  virtual ~RealtimeTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    cfg_.g_lag_in_frames = 0;
-    SetMode(::libaom_test::kRealTime);
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    // TODO(tomfinegan): We're changing the pass value here to make sure
-    // we get frames when real time mode is combined with |g_pass| set to
-    // AOM_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
-    // the pass value based on the mode passed into EncoderTest::SetMode(),
-    // which overrides the one specified in SetUp() above.
-    cfg_.g_pass = AOM_RC_FIRST_PASS;
-  }
-  virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {
-    frame_packets_++;
-  }
-
-  int frame_packets_;
-};
-
-TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
-  ::libaom_test::RandomVideoSource video;
-  video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
-  video.set_limit(kFramesToEncode);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_EQ(kFramesToEncode, frame_packets_);
-}
-
-AV1_INSTANTIATE_TEST_CASE(RealtimeTest,
-                          ::testing::Values(::libaom_test::kRealTime));
-
-}  // namespace
diff --git a/third_party/aom/test/reconinter_test.cc b/third_party/aom/test/reconinter_test.cc
new file mode 100644
index 0000000000..4f74c817e0
--- /dev/null
+++ b/third_party/aom/test/reconinter_test.cc
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_ports/mem.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+using libaom_test::ACMRandom;
+
+class BuildCompDiffwtdMaskTest : public ::testing::TestWithParam<int> {
+ public:
+  virtual ~BuildCompDiffwtdMaskTest() {}
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+  void RunTest(const int sb_type, const int is_speed,
+               const DIFFWTD_MASK_TYPE type);
+
+ private:
+  ACMRandom rnd_;
+};
+
+typedef void (*buildcompdiffwtdmaskd16_func)(
+    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
+    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+    ConvolveParams *conv_params, int bd);
+
+typedef ::testing::tuple<int, buildcompdiffwtdmaskd16_func, BLOCK_SIZE>
+    BuildCompDiffwtdMaskD16Param;
+
+#if HAVE_SSE4_1 || HAVE_NEON
+::testing::internal::ParamGenerator<BuildCompDiffwtdMaskD16Param> BuildParams(
+    buildcompdiffwtdmaskd16_func filter) {
+  return ::testing::Combine(::testing::Range(8, 13, 2),
+                            ::testing::Values(filter),
+                            ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
+}
+#endif
+class BuildCompDiffwtdMaskD16Test
+    : public ::testing::TestWithParam<BuildCompDiffwtdMaskD16Param> {
+ public:
+  ~BuildCompDiffwtdMaskD16Test() {}
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+  void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+ protected:
+  void RunCheckOutput(buildcompdiffwtdmaskd16_func test_impl);
+  void RunSpeedTest(buildcompdiffwtdmaskd16_func test_impl);
+  libaom_test::ACMRandom rnd_;
+};  // class BuildCompDiffwtdMaskD16Test
+
+void BuildCompDiffwtdMaskD16Test::RunCheckOutput(
+    buildcompdiffwtdmaskd16_func test_impl) {
+  const int block_idx = GET_PARAM(2);
+  const int bd = GET_PARAM(0);
+  const int width = block_size_wide[block_idx];
+  const int height = block_size_high[block_idx];
+  DECLARE_ALIGNED(16, uint8_t, mask_ref[2 * MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, mask_test[2 * MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
+
+  ConvolveParams conv_params =
+      get_conv_params_no_round(0, 0, 0, NULL, 0, 1, bd);
+
+  int in_precision =
+      bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
+
+  for (int i = 0; i < MAX_SB_SQUARE; i++) {
+    src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+    src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+  }
+
+  for (int mask_type = 0; mask_type < DIFFWTD_MASK_TYPES; mask_type++) {
+    av1_build_compound_diffwtd_mask_d16_c(
+        mask_ref, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width,
+        height, width, &conv_params, bd);
+
+    test_impl(mask_test, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width,
+              height, width, &conv_params, bd);
+
+    for (int r = 0; r < height; ++r) {
+      for (int c = 0; c < width; ++c) {
+        ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
+            << "Mismatch at unit tests for BuildCompDiffwtdMaskD16Test\n"
+            << " Pixel mismatch at index "
+            << "[" << r << "," << c << "] "
+            << " @ " << width << "x" << height << " inv " << mask_type;
+      }
+    }
+  }
+}
+
+void BuildCompDiffwtdMaskD16Test::RunSpeedTest(
+    buildcompdiffwtdmaskd16_func test_impl) {
+  const int block_idx = GET_PARAM(2);
+  const int bd = GET_PARAM(0);
+  const int width = block_size_wide[block_idx];
+  const int height = block_size_high[block_idx];
+  DECLARE_ALIGNED(16, uint8_t, mask[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
+
+  ConvolveParams conv_params =
+      get_conv_params_no_round(0, 0, 0, NULL, 0, 1, bd);
+
+  int in_precision =
+      bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
+
+  for (int i = 0; i < MAX_SB_SQUARE; i++) {
+    src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+    src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
+  }
+
+  const int num_loops = 1000000000 / (width + height);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+
+  for (int i = 0; i < num_loops; ++i)
+    av1_build_compound_diffwtd_mask_d16_c(mask, DIFFWTD_38, src0, width, src1,
+                                          width, height, width, &conv_params,
+                                          bd);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("av1_build_compound_diffwtd_mask_d16 c_code %3dx%-3d: %7.2f us\n",
+         width, height, 1000.0 * elapsed_time / num_loops);
+
+  aom_usec_timer timer1;
+  aom_usec_timer_start(&timer1);
+
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(mask, DIFFWTD_38, src0, width, src1, width, height, width,
+              &conv_params, bd);
+
+  aom_usec_timer_mark(&timer1);
+  const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
+  printf("av1_build_compound_diffwtd_mask_d16 test_code %3dx%-3d: %7.2f us\n",
+         width, height, 1000.0 * elapsed_time1 / num_loops);
+}
+#if HAVE_SSE4_1
+void BuildCompDiffwtdMaskTest::RunTest(const int sb_type, const int is_speed,
+                                       const DIFFWTD_MASK_TYPE type) {
+  const int width = block_size_wide[sb_type];
+  const int height = block_size_high[sb_type];
+  DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, src0[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, uint8_t, src1[MAX_SB_SQUARE]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int i = 0; i < width * height; i++) {
+    src0[i] = rnd.Rand8();
+    src1[i] = rnd.Rand8();
+  }
+  const int run_times = is_speed ? (10000000 / (width + height)) : 1;
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_build_compound_diffwtd_mask_c(mask_ref, type, src0, width, src1, width,
+                                      height, width);
+  }
+  const double t1 = get_time_mark(&timer);
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < run_times; ++i) {
+    av1_build_compound_diffwtd_mask_sse4_1(mask_test, type, src0, width, src1,
+                                           width, height, width);
+  }
+  const double t2 = get_time_mark(&timer);
+  if (is_speed) {
+    printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2);
+    printf("(%3.2f)\n", t1 / t2);
+  }
+  for (int r = 0; r < height; ++r) {
+    for (int c = 0; c < width; ++c) {
+      ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
+          << "[" << r << "," << c << "] " << run_times << " @ " << width << "x"
+          << height << " inv " << type;
+    }
+  }
+}
+
+TEST_P(BuildCompDiffwtdMaskTest, match) {
+  RunTest(GetParam(), 0, DIFFWTD_38);
+  RunTest(GetParam(), 0, DIFFWTD_38_INV);
+}
+TEST_P(BuildCompDiffwtdMaskTest, DISABLED_Speed) {
+  RunTest(GetParam(), 1, DIFFWTD_38);
+  RunTest(GetParam(), 1, DIFFWTD_38_INV);
+}
+#endif
+TEST_P(BuildCompDiffwtdMaskD16Test, CheckOutput) {
+  RunCheckOutput(GET_PARAM(1));
+}
+
+TEST_P(BuildCompDiffwtdMaskD16Test, DISABLED_Speed) {
+  RunSpeedTest(GET_PARAM(1));
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(SSE4_1, BuildCompDiffwtdMaskTest,
+                        ::testing::Range(0, static_cast<int>(BLOCK_SIZES_ALL),
+                                         1));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, BuildCompDiffwtdMaskD16Test,
+    BuildParams(av1_build_compound_diffwtd_mask_d16_sse4_1));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, BuildCompDiffwtdMaskD16Test,
+                        BuildParams(av1_build_compound_diffwtd_mask_d16_neon));
+#endif
+
+}  // namespace
diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h
index cce662a6d7..ef1f775e52 100644
--- a/third_party/aom/test/register_state_check.h
+++ b/third_party/aom/test/register_state_check.h
@@ -13,7 +13,9 @@
 #define TEST_REGISTER_STATE_CHECK_H_
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "./aom_config.h"
+
+#include "config/aom_config.h"
+
 #include "aom/aom_integer.h"
 
 // ASM_REGISTER_STATE_CHECK(asm_function)
@@ -29,7 +31,7 @@
 //   See platform implementations of RegisterStateCheckXXX for details.
 //
 
-#if defined(_WIN64)
+#if defined(_WIN64) && ARCH_X86_64
 
 #undef NOMINMAX
 #define NOMINMAX
@@ -88,53 +90,6 @@ class RegisterStateCheck {
 
 }  // namespace libaom_test
 
-#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && !CONFIG_SHARED && \
-    HAVE_NEON_ASM && CONFIG_AV1
-
-extern "C" {
-// Save the d8-d15 registers into store.
-void aom_push_neon(int64_t *store);
-}
-
-namespace libaom_test {
-
-// Compares the state of d8-d15 at construction with their state at
-// destruction. These registers should be preserved by the callee on
-// arm platform.
-class RegisterStateCheck {
- public:
-  RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
-  ~RegisterStateCheck() { Check(); }
-
- private:
-  static bool StoreRegisters(int64_t store[8]) {
-    aom_push_neon(store);
-    return true;
-  }
-
-  // Compares the register state. Returns true if the states match.
-  void Check() const {
-    ASSERT_TRUE(initialized_);
-    int64_t post_store[8];
-    aom_push_neon(post_store);
-    for (int i = 0; i < 8; ++i) {
-      EXPECT_EQ(pre_store_[i], post_store[i])
-          << "d" << i + 8 << " has been modified";
-    }
-  }
-
-  bool initialized_;
-  int64_t pre_store_[8];
-};
-
-#define ASM_REGISTER_STATE_CHECK(statement)    \
-  do {                                         \
-    libaom_test::RegisterStateCheck reg_check; \
-    statement;                                 \
-  } while (false)
-
-}  // namespace libaom_test
-
 #else
 
 namespace libaom_test {
@@ -144,7 +99,7 @@ class RegisterStateCheck {};
 
 }  // namespace libaom_test
 
-#endif  // _WIN64
+#endif  // _WIN64 && ARCH_X86_64
 
 #if ARCH_X86 || ARCH_X86_64
 #if defined(__GNUC__)
diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc
index c4e924de05..e1c4e9fa51 100644
--- a/third_party/aom/test/resize_test.cc
+++ b/third_party/aom/test/resize_test.cc
@@ -7,10 +7,11 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <climits>
 #include <vector>
+#include "aom_dsp/aom_dsp_common.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
@@ -149,83 +150,8 @@ void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
     *h = initial_h;
     return;
   }
+  // Go down very low
   if (frame < 120) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 130) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 140) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 150) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 160) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 170) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 180) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 190) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 200) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 210) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 220) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 230) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 240) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 250) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 260) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  // Go down very low.
-  if (frame < 270) {
     *w = initial_w / 4;
     *h = initial_h / 4;
     return;
@@ -233,7 +159,7 @@ void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
   if (flag_codec == 1) {
     // Cases that only works for AV1.
     // For AV1: Swap width and height of original.
-    if (frame < 320) {
+    if (frame < 140) {
       *w = initial_h;
       *h = initial_w;
       return;
@@ -247,7 +173,7 @@ class ResizingVideoSource : public ::libaom_test::DummyVideoSource {
  public:
   ResizingVideoSource() {
     SetSize(kInitialWidth, kInitialHeight);
-    limit_ = 350;
+    limit_ = 150;
   }
   int flag_codec_;
   virtual ~ResizingVideoSource() {}
@@ -289,8 +215,15 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
   ResizingVideoSource video;
   video.flag_codec_ = 0;
   cfg_.g_lag_in_frames = 0;
+  // We use max(kInitialWidth, kInitialHeight) because during the test
+  // the width and height of the frame are swapped
+  cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
+      AOMMAX(kInitialWidth, kInitialHeight);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+
   for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
        info != frame_info_list_.end(); ++info) {
     const unsigned int frame = static_cast<unsigned>(info->pts);
@@ -308,16 +241,16 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
 const unsigned int kStepDownFrame = 3;
 const unsigned int kStepUpFrame = 6;
 
-class ResizeInternalTest : public ResizeTest {
+class ResizeInternalTestLarge : public ResizeTest {
  protected:
 #if WRITE_COMPRESSED_STREAM
-  ResizeInternalTest()
+  ResizeInternalTestLarge()
       : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {}
 #else
-  ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
+  ResizeInternalTestLarge() : ResizeTest(), frame0_psnr_(0.0) {}
 #endif
 
-  virtual ~ResizeInternalTest() {}
+  virtual ~ResizeInternalTestLarge() {}
 
   virtual void BeginPassHook(unsigned int /*pass*/) {
 #if WRITE_COMPRESSED_STREAM
@@ -388,7 +321,7 @@ class ResizeInternalTest : public ResizeTest {
 #endif
 };
 
-TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
+TEST_P(ResizeInternalTestLarge, TestInternalResizeWorks) {
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 10);
   init_flags_ = AOM_CODEC_USE_PSNR;
@@ -420,7 +353,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
   }
 }
 
-TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
+TEST_P(ResizeInternalTestLarge, TestInternalResizeChangeConfig) {
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 10);
   cfg_.g_w = 352;
@@ -483,10 +416,14 @@ class ResizeRealtimeTest
     cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
     // Enable dropped frames.
     cfg_.rc_dropframe_thresh = 1;
-    // Enable error_resilience mode.
-    cfg_.g_error_resilient = 1;
+    // Disable error_resilience mode.
+    cfg_.g_error_resilient = 0;
     // Run at low bitrate.
     cfg_.rc_target_bitrate = 200;
+    // We use max(kInitialWidth, kInitialHeight) because during the test
+    // the width and height of the frame are swapped
+    cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
+        AOMMAX(kInitialWidth, kInitialHeight);
   }
 
   std::vector<FrameInfo> frame_info_list_;
@@ -505,6 +442,9 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
   mismatch_nframes_ = 0;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
+
   for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
        info != frame_info_list_.end(); ++info) {
     const unsigned int frame = static_cast<unsigned>(info->pts);
@@ -706,11 +646,14 @@ TEST_P(ResizeCspTest, TestResizeCspWorks) {
   cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
   cfg_.g_lag_in_frames = 0;
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Check we decoded the same number of frames as we attempted to encode
+  ASSERT_EQ(frame_info_list_.size(), video.limit());
 }
 
 AV1_INSTANTIATE_TEST_CASE(ResizeTest,
                           ::testing::Values(::libaom_test::kRealTime));
-AV1_INSTANTIATE_TEST_CASE(ResizeInternalTest,
+AV1_INSTANTIATE_TEST_CASE(ResizeInternalTestLarge,
                           ::testing::Values(::libaom_test::kOnePassGood));
 AV1_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
                           ::testing::Values(::libaom_test::kRealTime),
diff --git a/third_party/aom/test/run_encodes.sh b/third_party/aom/test/run_encodes.sh
new file mode 100755
index 0000000000..2096d8b158
--- /dev/null
+++ b/third_party/aom/test/run_encodes.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved.
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+# Author: jimbankoski@google.com (Jim Bankoski)
+
+if [[ $# -ne 4 ]]; then
+  echo Encodes all the y4m files in the directory at the bitrates specified by
+  echo the first 3 parameters and stores the results in a subdirectory named by
+  echo the 4th parameter:
+  echo
+  echo Usage:    run_encodes.sh start-kbps end-kbps step-kbps output-directory
+  echo Example:  run_encodes.sh 200 500 50 baseline
+  exit
+fi
+
+s=$1
+e=$2
+step=$3
+newdir=$4
+
+for i in ./*y4m; do
+  for (( b=$s; b<= $e; b+= $step ))
+  do
+    best_encode.sh $i $b
+  done
+  mv opsnr.stt $i.stt
+done
+
+mkdir $newdir
+mv *.stt $newdir
+mv *.webm $newdir
diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc
index 9ac58e6534..845fe79da1 100644
--- a/third_party/aom/test/sad_test.cc
+++ b/third_party/aom/test/sad_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string.h>
 #include <limits.h>
@@ -15,8 +15,9 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -27,17 +28,34 @@
 
 typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride,
                                    const uint8_t *ref_ptr, int ref_stride);
-typedef std::tr1::tuple<int, int, SadMxNFunc, int> SadMxNParam;
+typedef ::testing::tuple<int, int, SadMxNFunc, int> SadMxNParam;
 
 typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
                                   const uint8_t *ref_ptr, int ref_stride,
                                   const uint8_t *second_pred);
-typedef std::tr1::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
+typedef ::testing::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
+
+typedef void (*JntCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
+                               int width, int height, const uint8_t *ref,
+                               int ref_stride,
+                               const JNT_COMP_PARAMS *jcp_param);
+typedef ::testing::tuple<int, int, JntCompAvgFunc, int> JntCompAvgParam;
+
+typedef unsigned int (*JntSadMxhFunc)(const uint8_t *src_ptr, int src_stride,
+                                      const uint8_t *ref_ptr, int ref_stride,
+                                      int width, int height);
+typedef ::testing::tuple<int, int, JntSadMxhFunc, int> JntSadMxhParam;
+
+typedef uint32_t (*JntSadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
+                                     const uint8_t *ref_ptr, int ref_stride,
+                                     const uint8_t *second_pred,
+                                     const JNT_COMP_PARAMS *jcp_param);
+typedef ::testing::tuple<int, int, JntSadMxNAvgFunc, int> JntSadMxNAvgParam;
 
 typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride,
                              const uint8_t *const ref_ptr[], int ref_stride,
                              uint32_t *sad_array);
-typedef std::tr1::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
+typedef ::testing::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
 
 using libaom_test::ACMRandom;
 
@@ -54,12 +72,20 @@ class SADTestBase : public ::testing::Test {
         aom_memalign(kDataAlignment, kDataBufferSize));
     second_pred8_ =
         reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    comp_pred8_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
+    comp_pred8_test_ =
+        reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
     source_data16_ = reinterpret_cast<uint16_t *>(
         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t)));
     reference_data16_ = reinterpret_cast<uint16_t *>(
         aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
     second_pred16_ = reinterpret_cast<uint16_t *>(
         aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    comp_pred16_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
+    comp_pred16_test_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
   }
 
   static void TearDownTestCase() {
@@ -69,12 +95,20 @@ class SADTestBase : public ::testing::Test {
     reference_data8_ = NULL;
     aom_free(second_pred8_);
     second_pred8_ = NULL;
+    aom_free(comp_pred8_);
+    comp_pred8_ = NULL;
+    aom_free(comp_pred8_test_);
+    comp_pred8_test_ = NULL;
     aom_free(source_data16_);
     source_data16_ = NULL;
     aom_free(reference_data16_);
     reference_data16_ = NULL;
     aom_free(second_pred16_);
     second_pred16_ = NULL;
+    aom_free(comp_pred16_);
+    comp_pred16_ = NULL;
+    aom_free(comp_pred16_test_);
+    comp_pred16_test_ = NULL;
   }
 
   virtual void TearDown() { libaom_test::ClearSystemState(); }
@@ -92,14 +126,16 @@ class SADTestBase : public ::testing::Test {
       source_data_ = source_data8_;
       reference_data_ = reference_data8_;
       second_pred_ = second_pred8_;
-#if CONFIG_HIGHBITDEPTH
+      comp_pred_ = comp_pred8_;
+      comp_pred_test_ = comp_pred8_test_;
     } else {
       use_high_bit_depth_ = true;
       bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
       source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
       reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
       second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
-#endif  // CONFIG_HIGHBITDEPTH
+      comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
+      comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
     }
     mask_ = (1 << bit_depth_) - 1;
     source_stride_ = (width_ + 31) & ~31;
@@ -108,11 +144,9 @@ class SADTestBase : public ::testing::Test {
   }
 
   virtual uint8_t *GetReference(int block_idx) {
-#if CONFIG_HIGHBITDEPTH
     if (use_high_bit_depth_)
       return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
                                 block_idx * kDataBlockSize);
-#endif  // CONFIG_HIGHBITDEPTH
     return reference_data_ + block_idx * kDataBlockSize;
   }
 
@@ -122,21 +156,17 @@ class SADTestBase : public ::testing::Test {
     unsigned int sad = 0;
     const uint8_t *const reference8 = GetReference(block_idx);
     const uint8_t *const source8 = source_data_;
-#if CONFIG_HIGHBITDEPTH
     const uint16_t *const reference16 =
         CONVERT_TO_SHORTPTR(GetReference(block_idx));
     const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
-#endif  // CONFIG_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
         if (!use_high_bit_depth_) {
           sad += abs(source8[h * source_stride_ + w] -
                      reference8[h * reference_stride_ + w]);
-#if CONFIG_HIGHBITDEPTH
         } else {
           sad += abs(source16[h * source_stride_ + w] -
                      reference16[h * reference_stride_ + w]);
-#endif  // CONFIG_HIGHBITDEPTH
         }
       }
     }
@@ -151,12 +181,10 @@ class SADTestBase : public ::testing::Test {
     const uint8_t *const reference8 = GetReference(block_idx);
     const uint8_t *const source8 = source_data_;
     const uint8_t *const second_pred8 = second_pred_;
-#if CONFIG_HIGHBITDEPTH
     const uint16_t *const reference16 =
         CONVERT_TO_SHORTPTR(GetReference(block_idx));
     const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
     const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
-#endif  // CONFIG_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
         if (!use_high_bit_depth_) {
@@ -164,13 +192,65 @@ class SADTestBase : public ::testing::Test {
                           reference8[h * reference_stride_ + w];
           const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
           sad += abs(source8[h * source_stride_ + w] - comp_pred);
-#if CONFIG_HIGHBITDEPTH
         } else {
           const int tmp = second_pred16[h * width_ + w] +
                           reference16[h * reference_stride_ + w];
           const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
           sad += abs(source16[h * source_stride_ + w] - comp_pred);
-#endif  // CONFIG_HIGHBITDEPTH
+        }
+      }
+    }
+    return sad;
+  }
+
+  void ReferenceJntCompAvg(int block_idx) {
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const second_pred8 = second_pred_;
+    uint8_t *const comp_pred8 = comp_pred_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+    uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp =
+              second_pred8[h * width_ + w] * jcp_param_.bck_offset +
+              reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
+        } else {
+          const int tmp =
+              second_pred16[h * width_ + w] * jcp_param_.bck_offset +
+              reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
+        }
+      }
+    }
+  }
+
+  unsigned int ReferenceJntSADavg(int block_idx) {
+    unsigned int sad = 0;
+    const uint8_t *const reference8 = GetReference(block_idx);
+    const uint8_t *const source8 = source_data_;
+    const uint8_t *const second_pred8 = second_pred_;
+    const uint16_t *const reference16 =
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
+    const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
+    const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < width_; ++w) {
+        if (!use_high_bit_depth_) {
+          const int tmp =
+              second_pred8[h * width_ + w] * jcp_param_.bck_offset +
+              reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
+          sad += abs(source8[h * source_stride_ + w] - comp_pred);
+        } else {
+          const int tmp =
+              second_pred16[h * width_ + w] * jcp_param_.bck_offset +
+              reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
+          const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
+          sad += abs(source16[h * source_stride_ + w] - comp_pred);
         }
       }
     }
@@ -179,17 +259,13 @@ class SADTestBase : public ::testing::Test {
 
   void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
     uint8_t *data8 = data;
-#if CONFIG_HIGHBITDEPTH
     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif  // CONFIG_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
         if (!use_high_bit_depth_) {
           data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
-#if CONFIG_HIGHBITDEPTH
         } else {
           data16[h * stride + w] = fill_constant;
-#endif  // CONFIG_HIGHBITDEPTH
         }
       }
     }
@@ -197,17 +273,13 @@ class SADTestBase : public ::testing::Test {
 
   void FillRandom(uint8_t *data, int stride) {
     uint8_t *data8 = data;
-#if CONFIG_HIGHBITDEPTH
     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
-#endif  // CONFIG_HIGHBITDEPTH
     for (int h = 0; h < height_; ++h) {
       for (int w = 0; w < width_; ++w) {
         if (!use_high_bit_depth_) {
           data8[h * stride + w] = rnd_.Rand8();
-#if CONFIG_HIGHBITDEPTH
         } else {
           data16[h * stride + w] = rnd_.Rand16() & mask_;
-#endif  // CONFIG_HIGHBITDEPTH
         }
       }
     }
@@ -227,6 +299,13 @@ class SADTestBase : public ::testing::Test {
   static uint16_t *reference_data16_;
   static uint16_t *second_pred16_;
   int reference_stride_;
+  static uint8_t *comp_pred_;
+  static uint8_t *comp_pred8_;
+  static uint16_t *comp_pred16_;
+  static uint8_t *comp_pred_test_;
+  static uint8_t *comp_pred8_test_;
+  static uint16_t *comp_pred16_test_;
+  JNT_COMP_PARAMS jcp_param_;
 
   ACMRandom rnd_;
 };
@@ -312,15 +391,116 @@ class SADavgTest : public ::testing::WithParamInterface<SadMxNAvgParam>,
   }
 };
 
+class JntCompAvgTest : public ::testing::WithParamInterface<JntCompAvgParam>,
+                       public SADTestBase {
+ public:
+  JntCompAvgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  void jnt_comp_avg(int block_idx) {
+    const uint8_t *const reference = GetReference(block_idx);
+
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_,
+                                          height_, reference, reference_stride_,
+                                          &jcp_param_));
+  }
+
+  void CheckCompAvg() {
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < 4; ++i) {
+        jcp_param_.fwd_offset = quant_dist_lookup_table[j][i][0];
+        jcp_param_.bck_offset = quant_dist_lookup_table[j][i][1];
+
+        ReferenceJntCompAvg(0);
+        jnt_comp_avg(0);
+
+        for (int y = 0; y < height_; ++y)
+          for (int x = 0; x < width_; ++x)
+            ASSERT_EQ(comp_pred_[y * width_ + x],
+                      comp_pred_test_[y * width_ + x]);
+      }
+    }
+  }
+};
+
+class JntSADTest : public ::testing::WithParamInterface<JntSadMxhParam>,
+                   public SADTestBase {
+ public:
+  JntSADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int SAD(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                GET_PARAM(0), GET_PARAM(1)));
+    return ret;
+  }
+
+  void CheckSAD() {
+    const unsigned int reference_sad = ReferenceSAD(0);
+    const unsigned int exp_sad = SAD(0);
+
+    ASSERT_EQ(reference_sad, exp_sad);
+  }
+
+  void SpeedSAD() {
+    int test_count = 20000000;
+    while (test_count > 0) {
+      SAD(0);
+      test_count -= 1;
+    }
+  }
+};
+
+class JntSADavgTest : public ::testing::WithParamInterface<JntSadMxNAvgParam>,
+                      public SADTestBase {
+ public:
+  JntSADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
+
+ protected:
+  unsigned int jnt_SAD_avg(int block_idx) {
+    unsigned int ret;
+    const uint8_t *const reference = GetReference(block_idx);
+
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                second_pred_, &jcp_param_));
+    return ret;
+  }
+
+  void CheckSAD() {
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < 4; ++i) {
+        jcp_param_.fwd_offset = quant_dist_lookup_table[j][i][0];
+        jcp_param_.bck_offset = quant_dist_lookup_table[j][i][1];
+
+        const unsigned int reference_sad = ReferenceJntSADavg(0);
+        const unsigned int exp_sad = jnt_SAD_avg(0);
+
+        ASSERT_EQ(reference_sad, exp_sad);
+      }
+    }
+  }
+};
+
 uint8_t *SADTestBase::source_data_ = NULL;
 uint8_t *SADTestBase::reference_data_ = NULL;
 uint8_t *SADTestBase::second_pred_ = NULL;
+uint8_t *SADTestBase::comp_pred_ = NULL;
+uint8_t *SADTestBase::comp_pred_test_ = NULL;
 uint8_t *SADTestBase::source_data8_ = NULL;
 uint8_t *SADTestBase::reference_data8_ = NULL;
 uint8_t *SADTestBase::second_pred8_ = NULL;
+uint8_t *SADTestBase::comp_pred8_ = NULL;
+uint8_t *SADTestBase::comp_pred8_test_ = NULL;
 uint16_t *SADTestBase::source_data16_ = NULL;
 uint16_t *SADTestBase::reference_data16_ = NULL;
 uint16_t *SADTestBase::second_pred16_ = NULL;
+uint16_t *SADTestBase::comp_pred16_ = NULL;
+uint16_t *SADTestBase::comp_pred16_test_ = NULL;
 
 TEST_P(SADTest, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
@@ -428,6 +608,132 @@ TEST_P(SADavgTest, ShortSrc) {
   source_stride_ = tmp_stride;
 }
 
+TEST_P(JntCompAvgTest, MaxRef) {
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckCompAvg();
+}
+
+TEST_P(JntCompAvgTest, MaxSecondPred) {
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, mask_);
+  CheckCompAvg();
+}
+
+TEST_P(JntCompAvgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckCompAvg();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntCompAvgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckCompAvg();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  CheckSAD();
+}
+
+TEST_P(JntSADTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  CheckSAD();
+}
+
+TEST_P(JntSADTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    CheckSAD();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADavgTest, MaxRef) {
+  FillConstant(source_data_, source_stride_, 0);
+  FillConstant(reference_data_, reference_stride_, mask_);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+TEST_P(JntSADavgTest, MaxSrc) {
+  FillConstant(source_data_, source_stride_, mask_);
+  FillConstant(reference_data_, reference_stride_, 0);
+  FillConstant(second_pred_, width_, 0);
+  CheckSAD();
+}
+
+TEST_P(JntSADavgTest, ShortRef) {
+  const int tmp_stride = reference_stride_;
+  reference_stride_ >>= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADavgTest, UnalignedRef) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  const int tmp_stride = reference_stride_;
+  reference_stride_ -= 1;
+  FillRandom(source_data_, source_stride_);
+  FillRandom(reference_data_, reference_stride_);
+  FillRandom(second_pred_, width_);
+  CheckSAD();
+  reference_stride_ = tmp_stride;
+}
+
+TEST_P(JntSADavgTest, ShortSrc) {
+  const int tmp_stride = source_stride_;
+  source_stride_ >>= 1;
+  int test_count = 2000;
+  while (test_count > 0) {
+    FillRandom(source_data_, source_stride_);
+    FillRandom(reference_data_, reference_stride_);
+    FillRandom(second_pred_, width_);
+    CheckSAD();
+    test_count -= 1;
+  }
+  source_stride_ = tmp_stride;
+}
+
 TEST_P(SADx4Test, MaxRef) {
   FillConstant(source_data_, source_stride_, 0);
   FillConstant(GetReference(0), reference_stride_, mask_);
@@ -500,16 +806,14 @@ TEST_P(SADx4Test, SrcAlignedByWidth) {
   source_data_ = tmp_source_data;
 }
 
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
 
 //------------------------------------------------------------------------------
 // C functions
 const SadMxNParam c_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128_c, -1),
   make_tuple(128, 64, &aom_sad128x64_c, -1),
   make_tuple(64, 128, &aom_sad64x128_c, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64_c, -1),
   make_tuple(64, 32, &aom_sad64x32_c, -1),
   make_tuple(32, 64, &aom_sad32x64_c, -1),
@@ -523,12 +827,9 @@ const SadMxNParam c_tests[] = {
   make_tuple(8, 4, &aom_sad8x4_c, -1),
   make_tuple(4, 8, &aom_sad4x8_c, -1),
   make_tuple(4, 4, &aom_sad4x4_c, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_c, 8),
   make_tuple(128, 64, &aom_highbd_sad128x64_c, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128_c, 8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_c, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32_c, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64_c, 8),
@@ -542,11 +843,9 @@ const SadMxNParam c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_c, 8),
   make_tuple(4, 8, &aom_highbd_sad4x8_c, 8),
   make_tuple(4, 4, &aom_highbd_sad4x4_c, 8),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_c, 10),
   make_tuple(128, 64, &aom_highbd_sad128x64_c, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128_c, 10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_c, 10),
   make_tuple(64, 32, &aom_highbd_sad64x32_c, 10),
   make_tuple(32, 64, &aom_highbd_sad32x64_c, 10),
@@ -560,11 +859,9 @@ const SadMxNParam c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_c, 10),
   make_tuple(4, 8, &aom_highbd_sad4x8_c, 10),
   make_tuple(4, 4, &aom_highbd_sad4x4_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_c, 12),
   make_tuple(128, 64, &aom_highbd_sad128x64_c, 12),
   make_tuple(64, 128, &aom_highbd_sad64x128_c, 12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_c, 12),
   make_tuple(64, 32, &aom_highbd_sad64x32_c, 12),
   make_tuple(32, 64, &aom_highbd_sad32x64_c, 12),
@@ -578,16 +875,13 @@ const SadMxNParam c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_c, 12),
   make_tuple(4, 8, &aom_highbd_sad4x8_c, 12),
   make_tuple(4, 4, &aom_highbd_sad4x4_c, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
 
 const SadMxNAvgParam avg_c_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128_avg_c, -1),
   make_tuple(128, 64, &aom_sad128x64_avg_c, -1),
   make_tuple(64, 128, &aom_sad64x128_avg_c, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64_avg_c, -1),
   make_tuple(64, 32, &aom_sad64x32_avg_c, -1),
   make_tuple(32, 64, &aom_sad32x64_avg_c, -1),
@@ -601,12 +895,9 @@ const SadMxNAvgParam avg_c_tests[] = {
   make_tuple(8, 4, &aom_sad8x4_avg_c, -1),
   make_tuple(4, 8, &aom_sad4x8_avg_c, -1),
   make_tuple(4, 4, &aom_sad4x4_avg_c, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8),
   make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8),
@@ -620,11 +911,9 @@ const SadMxNAvgParam avg_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8),
   make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8),
   make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10),
   make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10),
   make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10),
   make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10),
@@ -638,11 +927,9 @@ const SadMxNAvgParam avg_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10),
   make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10),
   make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12),
   make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12),
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12),
   make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12),
   make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12),
@@ -656,16 +943,56 @@ const SadMxNAvgParam avg_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12),
   make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12),
   make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
 
+// TODO(chengchen): add highbd tests
+const JntCompAvgParam jnt_comp_avg_c_tests[] = {
+  make_tuple(128, 128, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(128, 64, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(64, 128, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(64, 64, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(64, 32, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(32, 64, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(32, 32, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(32, 16, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(16, 32, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(16, 16, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(16, 8, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(8, 16, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(8, 8, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(8, 4, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(4, 8, &aom_jnt_comp_avg_pred_c, -1),
+  make_tuple(4, 4, &aom_jnt_comp_avg_pred_c, -1),
+};
+
+INSTANTIATE_TEST_CASE_P(C, JntCompAvgTest,
+                        ::testing::ValuesIn(jnt_comp_avg_c_tests));
+
+const JntSadMxNAvgParam jnt_avg_c_tests[] = {
+  make_tuple(128, 128, &aom_jnt_sad128x128_avg_c, -1),
+  make_tuple(128, 64, &aom_jnt_sad128x64_avg_c, -1),
+  make_tuple(64, 128, &aom_jnt_sad64x128_avg_c, -1),
+  make_tuple(64, 64, &aom_jnt_sad64x64_avg_c, -1),
+  make_tuple(64, 32, &aom_jnt_sad64x32_avg_c, -1),
+  make_tuple(32, 64, &aom_jnt_sad32x64_avg_c, -1),
+  make_tuple(32, 32, &aom_jnt_sad32x32_avg_c, -1),
+  make_tuple(32, 16, &aom_jnt_sad32x16_avg_c, -1),
+  make_tuple(16, 32, &aom_jnt_sad16x32_avg_c, -1),
+  make_tuple(16, 16, &aom_jnt_sad16x16_avg_c, -1),
+  make_tuple(16, 8, &aom_jnt_sad16x8_avg_c, -1),
+  make_tuple(8, 16, &aom_jnt_sad8x16_avg_c, -1),
+  make_tuple(8, 8, &aom_jnt_sad8x8_avg_c, -1),
+  make_tuple(8, 4, &aom_jnt_sad8x4_avg_c, -1),
+  make_tuple(4, 8, &aom_jnt_sad4x8_avg_c, -1),
+  make_tuple(4, 4, &aom_jnt_sad4x4_avg_c, -1),
+};
+INSTANTIATE_TEST_CASE_P(C, JntSADavgTest, ::testing::ValuesIn(jnt_avg_c_tests));
+
 const SadMxNx4Param x4d_c_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128x4d_c, -1),
   make_tuple(128, 64, &aom_sad128x64x4d_c, -1),
   make_tuple(64, 128, &aom_sad64x128x4d_c, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64x4d_c, -1),
   make_tuple(64, 32, &aom_sad64x32x4d_c, -1),
   make_tuple(32, 64, &aom_sad32x64x4d_c, -1),
@@ -679,12 +1006,9 @@ const SadMxNx4Param x4d_c_tests[] = {
   make_tuple(8, 4, &aom_sad8x4x4d_c, -1),
   make_tuple(4, 8, &aom_sad4x8x4d_c, -1),
   make_tuple(4, 4, &aom_sad4x4x4d_c, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8),
   make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8),
@@ -698,11 +1022,9 @@ const SadMxNx4Param x4d_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8),
   make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8),
   make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10),
   make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10),
   make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10),
   make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10),
@@ -716,11 +1038,9 @@ const SadMxNx4Param x4d_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10),
   make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10),
   make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12),
   make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12),
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12),
   make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12),
   make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12),
@@ -734,7 +1054,6 @@ const SadMxNx4Param x4d_c_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12),
   make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12),
   make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
 
@@ -764,11 +1083,9 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
 // x86 functions
 #if HAVE_SSE2
 const SadMxNParam sse2_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128_sse2, -1),
   make_tuple(128, 64, &aom_sad128x64_sse2, -1),
   make_tuple(64, 128, &aom_sad64x128_sse2, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64_sse2, -1),
   make_tuple(64, 32, &aom_sad64x32_sse2, -1),
   make_tuple(32, 64, &aom_sad32x64_sse2, -1),
@@ -782,7 +1099,6 @@ const SadMxNParam sse2_tests[] = {
   make_tuple(8, 4, &aom_sad8x4_sse2, -1),
   make_tuple(4, 8, &aom_sad4x8_sse2, -1),
   make_tuple(4, 4, &aom_sad4x4_sse2, -1),
-#if CONFIG_HIGHBITDEPTH
   make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8),
@@ -816,16 +1132,13 @@ const SadMxNParam sse2_tests[] = {
   make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12),
   make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12),
   make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
 
 const SadMxNAvgParam avg_sse2_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1),
   make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1),
   make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1),
   make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1),
   make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1),
@@ -839,7 +1152,6 @@ const SadMxNAvgParam avg_sse2_tests[] = {
   make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1),
   make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1),
   make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1),
-#if CONFIG_HIGHBITDEPTH
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8),
@@ -873,16 +1185,13 @@ const SadMxNAvgParam avg_sse2_tests[] = {
   make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12),
   make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12),
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
 
 const SadMxNx4Param x4d_sse2_tests[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1),
   make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1),
   make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1),
   make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1),
   make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1),
@@ -896,7 +1205,6 @@ const SadMxNx4Param x4d_sse2_tests[] = {
   make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
   make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
   make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
-#if CONFIG_HIGHBITDEPTH
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
   make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
   make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8),
@@ -936,17 +1244,90 @@ const SadMxNx4Param x4d_sse2_tests[] = {
   make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12),
   make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12),
   make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12),
-#endif  // CONFIG_HIGHBITDEPTH
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
 #endif  // HAVE_SSE2
 
+#if HAVE_SSSE3
+// Note: These are named sse2, but part of ssse3 file and only built and linked
+// when ssse3 is enabled.
+const JntSadMxhParam jnt_sad_sse2_tests[] = {
+  make_tuple(4, 4, &aom_sad4xh_sse2, -1),
+  make_tuple(4, 8, &aom_sad4xh_sse2, -1),
+  make_tuple(8, 4, &aom_sad8xh_sse2, -1),
+  make_tuple(8, 8, &aom_sad8xh_sse2, -1),
+  make_tuple(8, 16, &aom_sad8xh_sse2, -1),
+  make_tuple(16, 8, &aom_sad16xh_sse2, -1),
+  make_tuple(16, 16, &aom_sad16xh_sse2, -1),
+  make_tuple(16, 32, &aom_sad16xh_sse2, -1),
+  make_tuple(32, 16, &aom_sad32xh_sse2, -1),
+  make_tuple(32, 32, &aom_sad32xh_sse2, -1),
+  make_tuple(32, 64, &aom_sad32xh_sse2, -1),
+  make_tuple(64, 32, &aom_sad64xh_sse2, -1),
+  make_tuple(64, 64, &aom_sad64xh_sse2, -1),
+  make_tuple(128, 128, &aom_sad128xh_sse2, -1),
+  make_tuple(128, 64, &aom_sad128xh_sse2, -1),
+  make_tuple(64, 128, &aom_sad64xh_sse2, -1),
+  make_tuple(4, 16, &aom_sad4xh_sse2, -1),
+  make_tuple(16, 4, &aom_sad16xh_sse2, -1),
+  make_tuple(8, 32, &aom_sad8xh_sse2, -1),
+  make_tuple(32, 8, &aom_sad32xh_sse2, -1),
+  make_tuple(16, 64, &aom_sad16xh_sse2, -1),
+  make_tuple(64, 16, &aom_sad64xh_sse2, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE2, JntSADTest,
+                        ::testing::ValuesIn(jnt_sad_sse2_tests));
+
+#endif  // HAVE_SSSE3
+
 #if HAVE_SSE3
 // Only functions are x3, which do not have tests.
 #endif  // HAVE_SSE3
 
 #if HAVE_SSSE3
-// Only functions are x3, which do not have tests.
+const JntCompAvgParam jnt_comp_avg_ssse3_tests[] = {
+  make_tuple(128, 128, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(128, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 128, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(64, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(32, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(8, 4, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(4, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(4, 4, &aom_jnt_comp_avg_pred_ssse3, -1),
+  make_tuple(16, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, JntCompAvgTest,
+                        ::testing::ValuesIn(jnt_comp_avg_ssse3_tests));
+
+const JntSadMxNAvgParam jnt_avg_ssse3_tests[] = {
+  make_tuple(128, 128, &aom_jnt_sad128x128_avg_ssse3, -1),
+  make_tuple(128, 64, &aom_jnt_sad128x64_avg_ssse3, -1),
+  make_tuple(64, 128, &aom_jnt_sad64x128_avg_ssse3, -1),
+  make_tuple(64, 64, &aom_jnt_sad64x64_avg_ssse3, -1),
+  make_tuple(64, 32, &aom_jnt_sad64x32_avg_ssse3, -1),
+  make_tuple(32, 64, &aom_jnt_sad32x64_avg_ssse3, -1),
+  make_tuple(32, 32, &aom_jnt_sad32x32_avg_ssse3, -1),
+  make_tuple(32, 16, &aom_jnt_sad32x16_avg_ssse3, -1),
+  make_tuple(16, 32, &aom_jnt_sad16x32_avg_ssse3, -1),
+  make_tuple(16, 16, &aom_jnt_sad16x16_avg_ssse3, -1),
+  make_tuple(16, 8, &aom_jnt_sad16x8_avg_ssse3, -1),
+  make_tuple(8, 16, &aom_jnt_sad8x16_avg_ssse3, -1),
+  make_tuple(8, 8, &aom_jnt_sad8x8_avg_ssse3, -1),
+  make_tuple(8, 4, &aom_jnt_sad8x4_avg_ssse3, -1),
+  make_tuple(4, 8, &aom_jnt_sad4x8_avg_ssse3, -1),
+  make_tuple(4, 4, &aom_jnt_sad4x4_avg_ssse3, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSSE3, JntSADavgTest,
+                        ::testing::ValuesIn(jnt_avg_ssse3_tests));
 #endif  // HAVE_SSSE3
 
 #if HAVE_SSE4_1
@@ -955,18 +1336,14 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
 
 #if HAVE_AVX2
 const SadMxNParam avx2_tests[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(64, 128, &aom_sad64x128_avx2, -1),
   make_tuple(128, 64, &aom_sad128x64_avx2, -1),
   make_tuple(128, 128, &aom_sad128x128_avx2, -1),
-#endif
   make_tuple(64, 64, &aom_sad64x64_avx2, -1),
   make_tuple(64, 32, &aom_sad64x32_avx2, -1),
   make_tuple(32, 64, &aom_sad32x64_avx2, -1),
   make_tuple(32, 32, &aom_sad32x32_avx2, -1),
   make_tuple(32, 16, &aom_sad32x16_avx2, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8),
   make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10),
   make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12),
@@ -976,7 +1353,6 @@ const SadMxNParam avx2_tests[] = {
   make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12),
-#endif
   make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8),
   make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10),
   make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12),
@@ -1001,23 +1377,18 @@ const SadMxNParam avx2_tests[] = {
   make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8),
   make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10),
   make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12),
-#endif
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
 
 const SadMxNAvgParam avg_avx2_tests[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1),
   make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1),
   make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1),
-#endif
   make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1),
   make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1),
   make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1),
   make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1),
   make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8),
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10),
   make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12),
@@ -1027,7 +1398,6 @@ const SadMxNAvgParam avg_avx2_tests[] = {
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12),
-#endif
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8),
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10),
   make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12),
@@ -1052,22 +1422,17 @@ const SadMxNAvgParam avg_avx2_tests[] = {
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8),
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10),
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12),
-#endif
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
 
 const SadMxNx4Param x4d_avx2_tests[] = {
-#if CONFIG_EXT_PARTITION
   make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1),
   make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1),
   make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1),
-#endif
   make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1),
   make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1),
   make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1),
   make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1),
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_EXT_PARTITION
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8),
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10),
   make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12),
@@ -1077,7 +1442,6 @@ const SadMxNx4Param x4d_avx2_tests[] = {
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8),
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10),
   make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12),
-#endif
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8),
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10),
   make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12),
@@ -1102,7 +1466,6 @@ const SadMxNx4Param x4d_avx2_tests[] = {
   make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8),
   make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10),
   make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12),
-#endif
 };
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
 #endif  // HAVE_AVX2
diff --git a/third_party/aom/test/scalability_test.cc b/third_party/aom/test/scalability_test.cc
new file mode 100644
index 0000000000..b399188617
--- /dev/null
+++ b/third_party/aom/test/scalability_test.cc
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+const int kCpuUsed = 8;
+const int kBaseLayerQp = 55;
+const int kEnhancementLayerQp = 20;
+
+class ScalabilityTest
+    : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  ScalabilityTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ScalabilityTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    num_spatial_layers_ = 2;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+      encoder->Control(AOME_SET_NUMBER_SPATIAL_LAYERS, num_spatial_layers_);
+    } else if (video->frame() % num_spatial_layers_) {
+      frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
+                     AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
+                     AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
+                     AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                     AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY;
+      encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 1);
+      encoder->Control(AOME_SET_CQ_LEVEL, kEnhancementLayerQp);
+    } else {
+      frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
+                     AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
+                     AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
+                     AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
+                     AOM_EFLAG_NO_UPD_ENTROPY;
+      encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 0);
+      encoder->Control(AOME_SET_CQ_LEVEL, kBaseLayerQp);
+    }
+  }
+
+  void DoTest(int num_spatial_layers) {
+    num_spatial_layers_ = num_spatial_layers;
+    cfg_.rc_end_usage = AOM_Q;
+    cfg_.g_lag_in_frames = 0;
+
+    ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
+                                         288, 30, 1, 0, 18);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  }
+
+  int num_spatial_layers_;
+};
+
+TEST_P(ScalabilityTest, TestNoMismatch2SpatialLayers) { DoTest(2); }
+
+TEST_P(ScalabilityTest, TestNoMismatch3SpatialLayers) { DoTest(3); }
+
+AV1_INSTANTIATE_TEST_CASE(ScalabilityTest,
+                          ::testing::Values(::libaom_test::kRealTime));
+
+}  // namespace
diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc
index 2b11bd1fbc..dee2ab5a69 100644
--- a/third_party/aom/test/scan_test.cc
+++ b/third_party/aom/test/scan_test.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -9,122 +9,125 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#include "av1/common/common_data.h"
-#include "av1/common/scan.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "av1/common/scan.h"
+#include "av1/common/txb_common.h"
+#include "test/av1_txfm_test.h"
 
-namespace {
-
-TEST(ScanTest, av1_augment_prob) {
-  const TX_SIZE tx_size = TX_4X4;
-  const TX_TYPE tx_type = DCT_DCT;
-  const int tx1d_size = tx_size_wide[tx_size];
-  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
-  const uint32_t ref_prob[16] = {
-    8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2
-  };
-  av1_augment_prob(tx_size, tx_type, prob);
-  for (int r = 0; r < tx1d_size; ++r) {
-    for (int c = 0; c < tx1d_size; ++c) {
-      const uint32_t idx = r * tx1d_size + c;
-      EXPECT_EQ(ref_prob[idx], prob[idx] >> 16);
-    }
-  }
-
-  const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
-  const uint32_t mask = (1 << 16) - 1;
-  for (int r = 0; r < tx1d_size; ++r) {
-    for (int c = 0; c < tx1d_size; ++c) {
-      const uint32_t ref_idx = r * tx1d_size + c;
-      const uint32_t scan_idx = mask ^ (prob[r * tx1d_size + c] & mask);
-      const uint32_t idx = sc->scan[scan_idx];
-      EXPECT_EQ(ref_idx, idx);
-    }
-  }
-}
-
-#if USE_TOPOLOGICAL_SORT
-TEST(ScanTest, av1_update_sort_order) {
-  const TX_SIZE tx_size = TX_4X4;
-  const TX_TYPE tx_type = DCT_DCT;
-  const uint32_t prob[16] = { 15, 14, 11, 10, 13, 12, 9, 5,
-                              8,  7,  4,  2,  6,  3,  1, 0 };
-  const int16_t ref_sort_order[16] = { 0, 1,  4, 5,  2,  3,  6,  8,
-                                       9, 12, 7, 10, 13, 11, 14, 15 };
-  int16_t sort_order[16];
-  av1_update_sort_order(tx_size, tx_type, prob, sort_order);
-  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]);
-}
-#endif
-
-#if USE_TOPOLOGICAL_SORT
-TEST(ScanTest, av1_update_scan_order) {
-  TX_SIZE tx_size = TX_4X4;
-  const TX_TYPE tx_type = DCT_DCT;
-  const uint32_t prob[16] = { 10, 12, 14, 9, 11, 13, 15, 5,
-                              8,  7,  4,  2, 6,  3,  1,  0 };
-  int16_t sort_order[16];
-  int16_t scan[16];
-  int16_t iscan[16];
-  const int16_t ref_iscan[16] = { 0, 1, 2,  6,  3, 4,  5,  10,
-                                  7, 8, 11, 13, 9, 12, 14, 15 };
-
-  av1_update_sort_order(tx_size, tx_type, prob, sort_order);
-  av1_update_scan_order(tx_size, sort_order, scan, iscan);
-
-  for (int i = 0; i < 16; ++i) {
-    EXPECT_EQ(ref_iscan[i], iscan[i]);
-    EXPECT_EQ(i, scan[ref_iscan[i]]);
+static int scan_test(const int16_t *scan, const int16_t *iscan, int si, int r,
+                     int c, int w) {
+  if (iscan[r * w + c] != si || scan[si] != r * w + c) {
+    printf("r %d c %d ref_iscan %d iscan %d ref_scan %d scan %d\n", r, c, si,
+           iscan[r * w + c], r * w + c, scan[si]);
+    return 1;
+  } else {
+    return 0;
   }
 }
-#endif
-
-TEST(ScanTest, av1_update_neighbors) {
-  TX_SIZE tx_size = TX_4X4;
-  // raster order
-  const int16_t scan[16] = { 0, 1, 2,  3,  4,  5,  6,  7,
-                             8, 9, 10, 11, 12, 13, 14, 15 };
-  int16_t nb[(16 + 1) * 2];
-  const int16_t ref_nb[(16 + 1) * 2] = { 0,  0,  0,  0,  1,  1,  2, 2, 0,
-                                         1,  1,  4,  2,  5,  3,  6, 4, 5,
-                                         5,  8,  6,  9,  7,  10, 8, 9, 9,
-                                         12, 10, 13, 11, 14, 0,  0 };
 
-  // raster order's scan and iscan are the same
-  av1_update_neighbors(tx_size, scan, scan, nb);
-
-  for (int i = 0; i < (16 + 1) * 2; ++i) {
-    EXPECT_EQ(ref_nb[i], nb[i]);
+int scan_order_test(const SCAN_ORDER *scan_order, int w, int h,
+                    SCAN_MODE mode) {
+  const int16_t *scan = scan_order->scan;
+  const int16_t *iscan = scan_order->iscan;
+  int dim = w + h - 1;
+  if (mode == SCAN_MODE_ZIG_ZAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      if (i % 2 == 0) {
+        for (int c = 0; c < w; ++c) {
+          int r = i - c;
+          if (r >= 0 && r < h) {
+            if (scan_test(scan, iscan, si, r, c, w)) return 1;
+            ++si;
+          }
+        }
+      } else {
+        for (int r = 0; r < h; ++r) {
+          int c = i - r;
+          if (c >= 0 && c < w) {
+            if (scan_test(scan, iscan, si, r, c, w)) return 1;
+            ++si;
+          }
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_COL_DIAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      for (int c = 0; c < w; ++c) {
+        int r = i - c;
+        if (r >= 0 && r < h) {
+          if (scan_test(scan, iscan, si, r, c, w)) return 1;
+          ++si;
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_ROW_DIAG) {
+    int si = 0;
+    for (int i = 0; i < dim; ++i) {
+      for (int r = 0; r < h; ++r) {
+        int c = i - r;
+        if (c >= 0 && c < w) {
+          if (scan_test(scan, iscan, si, r, c, w)) return 1;
+          ++si;
+        }
+      }
+    }
+  } else if (mode == SCAN_MODE_ROW_1D) {
+    int si = 0;
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        if (scan_test(scan, iscan, si, r, c, w)) return 1;
+        ++si;
+      }
+    }
+  } else {
+    assert(mode == SCAN_MODE_COL_1D);
+    int si = 0;
+    for (int c = 0; c < w; ++c) {
+      for (int r = 0; r < h; ++r) {
+        if (scan_test(scan, iscan, si, r, c, w)) return 1;
+        ++si;
+      }
+    }
   }
+  return 0;
 }
 
-#if USE_2X2_PROB
-TEST(ScanTest, av1_down_sample_scan_count) {
-  const uint32_t non_zero_count[256] = {
-    13, 12, 11, 10, 0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 13, 9, 10, 8, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 11, 12, 9, 8, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  13, 9, 9, 10, 0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
-    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0,
-  };
-  const uint32_t ref_non_zero_count_ds[64] = {
-    13, 11, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0,  0,  0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0,  0,  0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  };
-  uint32_t non_zero_count_ds[64];
-  av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, TX_16X16);
-  for (int i = 0; i < 64; ++i) {
-    EXPECT_EQ(ref_non_zero_count_ds[i], non_zero_count_ds[i]);
+TEST(Av1ScanTest, Dependency) {
+  for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
+    const int org_rows = tx_size_high[(TX_SIZE)tx_size];
+    const int org_cols = tx_size_wide[(TX_SIZE)tx_size];
+    const int rows = get_txb_high((TX_SIZE)tx_size);
+    const int cols = get_txb_wide((TX_SIZE)tx_size);
+    for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+      if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
+                                         static_cast<TX_TYPE>(tx_type)) ==
+          false) {
+        continue;
+      }
+      SCAN_MODE scan_mode;
+      TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type];
+      if (tx_class == TX_CLASS_2D) {
+        if (rows == cols) {
+          scan_mode = SCAN_MODE_ZIG_ZAG;
+        } else if (rows > cols) {
+          scan_mode = SCAN_MODE_ROW_DIAG;
+        } else {
+          scan_mode = SCAN_MODE_COL_DIAG;
+        }
+      } else if (tx_class == TX_CLASS_VERT) {
+        scan_mode = SCAN_MODE_ROW_1D;
+      } else {
+        assert(tx_class == TX_CLASS_HORIZ);
+        scan_mode = SCAN_MODE_COL_1D;
+      }
+      const SCAN_ORDER *scan_order =
+          get_default_scan((TX_SIZE)tx_size, (TX_TYPE)tx_type);
+      ASSERT_EQ(scan_order_test(scan_order, cols, rows, scan_mode), 0)
+          << "scan mismatch tx_class " << tx_class << " tx_type " << tx_type
+          << " tx_w " << org_cols << " tx_h " << org_rows << " scan_mode "
+          << scan_mode << "\n";
+    }
   }
 }
-#endif
-
-}  // namespace
diff --git a/third_party/aom/test/segment_binarization_sync.cc b/third_party/aom/test/segment_binarization_sync.cc
new file mode 100644
index 0000000000..bd8cf11410
--- /dev/null
+++ b/third_party/aom/test/segment_binarization_sync.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/acm_random.h"
+
+using libaom_test::ACMRandom;
+
+extern "C" {
+int av1_neg_interleave(int x, int ref, int max);
+int av1_neg_deinterleave(int diff, int ref, int max);
+}
+
+namespace {
+
+struct Segment {
+  int id;
+  int pred;
+  int last_id;
+};
+
+Segment GenerateSegment(int seed) {
+  static const int MAX_SEGMENTS = 8;
+
+  ACMRandom rnd_(seed);
+
+  Segment segment;
+  const int last_segid = rnd_.PseudoUniform(MAX_SEGMENTS);
+  segment.last_id = last_segid;
+  segment.pred = rnd_.PseudoUniform(MAX_SEGMENTS);
+  segment.id = rnd_.PseudoUniform(last_segid + 1);
+
+  return segment;
+}
+
+// Try to reveal a mismatch between segment binarization and debinarization
+TEST(SegmentBinarizationSync, SearchForBinarizationMismatch) {
+  const int count_tests = 1000;
+  const int seed_init = 4321;
+
+  for (int i = 0; i < count_tests; ++i) {
+    const Segment seg = GenerateSegment(seed_init + i);
+
+    const int max_segid = seg.last_id + 1;
+    const int seg_diff = av1_neg_interleave(seg.id, seg.pred, max_segid);
+    const int decoded_segid =
+        av1_neg_deinterleave(seg_diff, seg.pred, max_segid);
+
+    ASSERT_EQ(decoded_segid, seg.id);
+  }
+}
+
+}  // namespace
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
index 55ce1d5de1..4506a90dbd 100644
--- a/third_party/aom/test/selfguided_filter_test.cc
+++ b/third_party/aom/test/selfguided_filter_test.cc
@@ -13,22 +13,30 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 
+#include "aom_ports/aom_timer.h"
 #include "av1/common/mv.h"
 #include "av1/common/restoration.h"
 
 namespace {
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
 
-typedef tuple<> FilterTestParam;
+typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
+                        int eps, const int *xqd, uint8_t *dst8, int dst_stride,
+                        int32_t *tmpbuf, int bit_depth, int highbd);
+
+// Test parameter list:
+//  <tst_fun_>
+typedef tuple<SgrFunc> FilterTestParam;
 
 class AV1SelfguidedFilterTest
     : public ::testing::TestWithParam<FilterTestParam> {
@@ -40,6 +48,7 @@ class AV1SelfguidedFilterTest
 
  protected:
   void RunSpeedTest() {
+    tst_fun_ = GET_PARAM(0);
     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     const int width = 256, height = 256, stride = 288, out_stride = 288;
@@ -47,10 +56,10 @@ class AV1SelfguidedFilterTest
     int i, j, k;
 
     uint8_t *input_ =
-        (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t));
+        (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
     uint8_t *output_ = (uint8_t *)aom_memalign(
-        16, out_stride * (height + 32) * sizeof(uint8_t));
-    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+        32, out_stride * (height + 32) * sizeof(uint8_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
     uint8_t *input = input_ + stride * 16 + 16;
     uint8_t *output = output_ + out_stride * 16 + 16;
 
@@ -60,19 +69,18 @@ class AV1SelfguidedFilterTest
       for (j = -16; j < width + 16; ++j)
         input[i * stride + j] = rnd.Rand16() & 0xFF;
 
-    int xqd[2] = {
-      SGRPROJ_PRJ_MIN0 +
-          rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
-      SGRPROJ_PRJ_MIN1 +
-          rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
-    };
+    int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                        SGRPROJ_PRJ_MIN0),
+                   SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                        SGRPROJ_PRJ_MIN1) };
     // Fix a parameter set, since the speed depends slightly on r.
     // Change this to test different combinations of values of r.
     int eps = 15;
 
     av1_loop_restoration_precal();
 
-    std::clock_t start = std::clock();
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
     for (i = 0; i < NUM_ITERS; ++i) {
       for (k = 0; k < height; k += pu_height)
         for (j = 0; j < width; j += pu_width) {
@@ -80,15 +88,36 @@ class AV1SelfguidedFilterTest
           int h = AOMMIN(pu_height, height - k);
           uint8_t *input_p = input + k * stride + j;
           uint8_t *output_p = output + k * out_stride + j;
-          apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
-                                       output_p, out_stride, tmpbuf);
+          apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
+                                         output_p, out_stride, tmpbuf, 8, 0);
         }
     }
-    std::clock_t end = std::clock();
-    double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
 
-    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
-           height, elapsed, elapsed * 1000000. / NUM_ITERS);
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
+                   tmpbuf, 8, 0);
+        }
+    }
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
 
     aom_free(input_);
     aom_free(output_);
@@ -96,6 +125,7 @@ class AV1SelfguidedFilterTest
   }
 
   void RunCorrectnessTest() {
+    tst_fun_ = GET_PARAM(0);
     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     // Set the maximum width/height to test here. We actually test a small
@@ -106,12 +136,12 @@ class AV1SelfguidedFilterTest
     int i, j, k;
 
     uint8_t *input_ =
-        (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t));
+        (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
     uint8_t *output_ = (uint8_t *)aom_memalign(
-        16, out_stride * (max_h + 32) * sizeof(uint8_t));
+        32, out_stride * (max_h + 32) * sizeof(uint8_t));
     uint8_t *output2_ = (uint8_t *)aom_memalign(
-        16, out_stride * (max_h + 32) * sizeof(uint8_t));
-    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+        32, out_stride * (max_h + 32) * sizeof(uint8_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
 
     uint8_t *input = input_ + stride * 16 + 16;
     uint8_t *output = output_ + out_stride * 16 + 16;
@@ -126,12 +156,10 @@ class AV1SelfguidedFilterTest
         for (k = -16; k < max_w + 16; ++k)
           input[j * stride + k] = rnd.Rand16() & 0xFF;
 
-      int xqd[2] = {
-        SGRPROJ_PRJ_MIN0 +
-            rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
-        SGRPROJ_PRJ_MIN1 +
-            rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
-      };
+      int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                          SGRPROJ_PRJ_MIN0),
+                     SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                          SGRPROJ_PRJ_MIN1) };
       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
 
       // Test various tile sizes around 256x256
@@ -145,17 +173,12 @@ class AV1SelfguidedFilterTest
           uint8_t *input_p = input + k * stride + j;
           uint8_t *output_p = output + k * out_stride + j;
           uint8_t *output2_p = output2 + k * out_stride + j;
-          apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
-                                       output_p, out_stride, tmpbuf);
+          tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
+                   tmpbuf, 8, 0);
           apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
-                                         output2_p, out_stride, tmpbuf);
+                                         output2_p, out_stride, tmpbuf, 8, 0);
         }
-      /*
-      apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd,
-                                   output, out_stride, tmpbuf);
-      apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd,
-                                     output2, out_stride, tmpbuf);
-                                     */
+
       for (j = 0; j < test_h; ++j)
         for (k = 0; k < test_w; ++k) {
           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
@@ -167,20 +190,27 @@ class AV1SelfguidedFilterTest
     aom_free(output2_);
     aom_free(tmpbuf);
   }
+
+ private:
+  SgrFunc tst_fun_;
 };
 
-TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
 TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
 
 #if HAVE_SSE4_1
-const FilterTestParam params[] = { make_tuple() };
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
-                        ::testing::ValuesIn(params));
+                        ::testing::Values(apply_selfguided_restoration_sse4_1));
 #endif
 
-#if CONFIG_HIGHBITDEPTH
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest,
+                        ::testing::Values(apply_selfguided_restoration_avx2));
+#endif
 
-typedef tuple<int> HighbdFilterTestParam;
+// Test parameter list:
+//  <tst_fun_, bit_depth>
+typedef tuple<SgrFunc, int> HighbdFilterTestParam;
 
 class AV1HighbdSelfguidedFilterTest
     : public ::testing::TestWithParam<HighbdFilterTestParam> {
@@ -192,19 +222,20 @@ class AV1HighbdSelfguidedFilterTest
 
  protected:
   void RunSpeedTest() {
+    tst_fun_ = GET_PARAM(0);
     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     const int width = 256, height = 256, stride = 288, out_stride = 288;
     const int NUM_ITERS = 2000;
     int i, j, k;
-    int bit_depth = GET_PARAM(0);
+    int bit_depth = GET_PARAM(1);
     int mask = (1 << bit_depth) - 1;
 
     uint16_t *input_ =
-        (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t));
+        (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
     uint16_t *output_ = (uint16_t *)aom_memalign(
-        16, out_stride * (height + 32) * sizeof(uint16_t));
-    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+        32, out_stride * (height + 32) * sizeof(uint16_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
     uint16_t *input = input_ + stride * 16 + 16;
     uint16_t *output = output_ + out_stride * 16 + 16;
 
@@ -214,19 +245,18 @@ class AV1HighbdSelfguidedFilterTest
       for (j = -16; j < width + 16; ++j)
         input[i * stride + j] = rnd.Rand16() & mask;
 
-    int xqd[2] = {
-      SGRPROJ_PRJ_MIN0 +
-          rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
-      SGRPROJ_PRJ_MIN1 +
-          rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
-    };
+    int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                        SGRPROJ_PRJ_MIN0),
+                   SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                        SGRPROJ_PRJ_MIN1) };
     // Fix a parameter set, since the speed depends slightly on r.
     // Change this to test different combinations of values of r.
     int eps = 15;
 
     av1_loop_restoration_precal();
 
-    std::clock_t start = std::clock();
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
     for (i = 0; i < NUM_ITERS; ++i) {
       for (k = 0; k < height; k += pu_height)
         for (j = 0; j < width; j += pu_width) {
@@ -234,16 +264,39 @@ class AV1HighbdSelfguidedFilterTest
           int h = AOMMIN(pu_height, height - k);
           uint16_t *input_p = input + k * stride + j;
           uint16_t *output_p = output + k * out_stride + j;
-          apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
-                                              eps, xqd, output_p, out_stride,
-                                              tmpbuf);
+          apply_selfguided_restoration_c(
+              CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+              CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
         }
     }
-    std::clock_t end = std::clock();
-    double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
 
-    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
-           height, elapsed, elapsed * 1000000. / NUM_ITERS);
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (i = 0; i < NUM_ITERS; ++i) {
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+                   CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+                   1);
+        }
+    }
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
+           "C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
 
     aom_free(input_);
     aom_free(output_);
@@ -251,6 +304,7 @@ class AV1HighbdSelfguidedFilterTest
   }
 
   void RunCorrectnessTest() {
+    tst_fun_ = GET_PARAM(0);
     const int pu_width = RESTORATION_PROC_UNIT_SIZE;
     const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     // Set the maximum width/height to test here. We actually test a small
@@ -259,16 +313,16 @@ class AV1HighbdSelfguidedFilterTest
     const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
     const int NUM_ITERS = 81;
     int i, j, k;
-    int bit_depth = GET_PARAM(0);
+    int bit_depth = GET_PARAM(1);
     int mask = (1 << bit_depth) - 1;
 
     uint16_t *input_ =
-        (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t));
+        (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
     uint16_t *output_ = (uint16_t *)aom_memalign(
-        16, out_stride * (max_h + 32) * sizeof(uint16_t));
+        32, out_stride * (max_h + 32) * sizeof(uint16_t));
     uint16_t *output2_ = (uint16_t *)aom_memalign(
-        16, out_stride * (max_h + 32) * sizeof(uint16_t));
-    int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+        32, out_stride * (max_h + 32) * sizeof(uint16_t));
+    int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
 
     uint16_t *input = input_ + stride * 16 + 16;
     uint16_t *output = output_ + out_stride * 16 + 16;
@@ -283,12 +337,10 @@ class AV1HighbdSelfguidedFilterTest
         for (k = -16; k < max_w + 16; ++k)
           input[j * stride + k] = rnd.Rand16() & mask;
 
-      int xqd[2] = {
-        SGRPROJ_PRJ_MIN0 +
-            rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
-        SGRPROJ_PRJ_MIN1 +
-            rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
-      };
+      int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
+                                                          SGRPROJ_PRJ_MIN0),
+                     SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
+                                                          SGRPROJ_PRJ_MIN1) };
       int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
 
       // Test various tile sizes around 256x256
@@ -302,22 +354,14 @@ class AV1HighbdSelfguidedFilterTest
           uint16_t *input_p = input + k * stride + j;
           uint16_t *output_p = output + k * out_stride + j;
           uint16_t *output2_p = output2 + k * out_stride + j;
-          apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
-                                              eps, xqd, output_p, out_stride,
-                                              tmpbuf);
-          apply_selfguided_restoration_highbd_c(input_p, w, h, stride,
-                                                bit_depth, eps, xqd, output2_p,
-                                                out_stride, tmpbuf);
+          tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+                   CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
+                   1);
+          apply_selfguided_restoration_c(
+              CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
+              CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
         }
 
-      /*
-      apply_selfguided_restoration_highbd(input, test_w, test_h, stride,
-                                          bit_depth, eps, xqd, output,
-                                          out_stride, tmpbuf);
-      apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride,
-                                            bit_depth, eps, xqd, output2,
-                                            out_stride, tmpbuf);
-                                            */
       for (j = 0; j < test_h; ++j)
         for (k = 0; k < test_w; ++k)
           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
@@ -328,17 +372,28 @@ class AV1HighbdSelfguidedFilterTest
     aom_free(output2_);
     aom_free(tmpbuf);
   }
+
+ private:
+  SgrFunc tst_fun_;
 };
 
-TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
+TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
 TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
 
 #if HAVE_SSE4_1
-const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10),
-                                                make_tuple(12) };
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest,
-                        ::testing::ValuesIn(highbd_params));
+const int highbd_params_sse4_1[] = { 8, 10, 12 };
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, AV1HighbdSelfguidedFilterTest,
+    ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1),
+                       ::testing::ValuesIn(highbd_params_sse4_1)));
 #endif
+
+#if HAVE_AVX2
+const int highbd_params_avx2[] = { 8, 10, 12 };
+INSTANTIATE_TEST_CASE_P(
+    AVX2, AV1HighbdSelfguidedFilterTest,
+    ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2),
+                       ::testing::ValuesIn(highbd_params_avx2)));
 #endif
 
 }  // namespace
diff --git a/third_party/aom/test/simd_avx2_test.cc b/third_party/aom/test/simd_avx2_test.cc
index d54d201b9a..8a012bff88 100644
--- a/third_party/aom/test/simd_avx2_test.cc
+++ b/third_party/aom/test/simd_avx2_test.cc
@@ -12,4 +12,4 @@
 #define ARCH AVX2
 #define ARCH_POSTFIX(name) name##_avx2
 #define SIMD_NAMESPACE simd_test_avx2
-#include "./simd_impl.h"
+#include "test/simd_impl.h"
diff --git a/third_party/aom/test/simd_cmp_avx2.cc b/third_party/aom/test/simd_cmp_avx2.cc
index 47ae11c620..cda632bcdf 100644
--- a/third_party/aom/test/simd_cmp_avx2.cc
+++ b/third_party/aom/test/simd_cmp_avx2.cc
@@ -12,4 +12,4 @@
 #define ARCH AVX2
 #define ARCH_POSTFIX(name) name##_avx2
 #define SIMD_NAMESPACE simd_test_avx2
-#include "./simd_cmp_impl.h"
+#include "test/simd_cmp_impl.h"
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
index 03fe703d9d..b98af9aade 100644
--- a/third_party/aom/test/simd_cmp_impl.h
+++ b/third_party/aom/test/simd_cmp_impl.h
@@ -7,11 +7,13 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <assert.h>
 #include <string>
-#include "./aom_dsp_rtcd.h"
+
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "aom_dsp/aom_simd.h"
 #undef SIMD_INLINE
@@ -22,6 +24,14 @@
 // simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
 // ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
 
+#ifdef _MSC_VER
+// Disable "value of intrinsic immediate argument 'value' is out of range
+// 'lowerbound - upperbound'" warning. Visual Studio emits this warning though
+// the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a
+// mask doesn't always appear to be sufficient.
+#pragma warning(disable : 4556)
+#endif
+
 using libaom_test::ACMRandom;
 
 namespace SIMD_NAMESPACE {
@@ -171,6 +181,18 @@ v128 imm_v128_shr_n_s32(v128 a) {
   return v128_shr_n_s32(a, shift);
 }
 template <int shift>
+v128 imm_v128_shl_n_64(v128 a) {
+  return v128_shl_n_64(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_u64(v128 a) {
+  return v128_shr_n_u64(a, shift);
+}
+template <int shift>
+v128 imm_v128_shr_n_s64(v128 a) {
+  return v128_shr_n_s64(a, shift);
+}
+template <int shift>
 v128 imm_v128_align(v128 a, v128 b) {
   return v128_align(a, b, shift);
 }
@@ -220,11 +242,31 @@ c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
   return c_v128_shr_n_s32(a, shift);
 }
 template <int shift>
+c_v128 c_imm_v128_shl_n_64(c_v128 a) {
+  return c_v128_shl_n_64(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_u64(c_v128 a) {
+  return c_v128_shr_n_u64(a, shift);
+}
+template <int shift>
+c_v128 c_imm_v128_shr_n_s64(c_v128 a) {
+  return c_v128_shr_n_s64(a, shift);
+}
+template <int shift>
 c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
   return c_v128_align(a, b, shift);
 }
 
 template <int shift>
+v256 imm_v256_shl_n_word(v256 a) {
+  return v256_shl_n_word(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_word(v256 a) {
+  return v256_shr_n_word(a, shift);
+}
+template <int shift>
 v256 imm_v256_shl_n_byte(v256 a) {
   return v256_shl_n_byte(a, shift);
 }
@@ -269,11 +311,31 @@ v256 imm_v256_shr_n_s32(v256 a) {
   return v256_shr_n_s32(a, shift);
 }
 template <int shift>
+v256 imm_v256_shl_n_64(v256 a) {
+  return v256_shl_n_64(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_u64(v256 a) {
+  return v256_shr_n_u64(a, shift);
+}
+template <int shift>
+v256 imm_v256_shr_n_s64(v256 a) {
+  return v256_shr_n_s64(a, shift);
+}
+template <int shift>
 v256 imm_v256_align(v256 a, v256 b) {
   return v256_align(a, b, shift);
 }
 
 template <int shift>
+c_v256 c_imm_v256_shl_n_word(c_v256 a) {
+  return c_v256_shl_n_word(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_word(c_v256 a) {
+  return c_v256_shr_n_word(a, shift);
+}
+template <int shift>
 c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
   return c_v256_shl_n_byte(a, shift);
 }
@@ -318,6 +380,18 @@ c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
   return c_v256_shr_n_s32(a, shift);
 }
 template <int shift>
+c_v256 c_imm_v256_shl_n_64(c_v256 a) {
+  return c_v256_shl_n_64(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_u64(c_v256 a) {
+  return c_v256_shr_n_u64(a, shift);
+}
+template <int shift>
+c_v256 c_imm_v256_shr_n_s64(c_v256 a) {
+  return c_v256_shr_n_s64(a, shift);
+}
+template <int shift>
 c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
   return c_v256_align(a, b, shift);
 }
@@ -348,6 +422,18 @@ uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
 uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
   return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
 }
+uint32_t v128_sad_u16(v128 a, v128 b) {
+  return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a, b));
+}
+uint64_t v128_ssd_s16(v128 a, v128 b) {
+  return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a, b));
+}
+uint32_t c_v128_sad_u16(c_v128 a, c_v128 b) {
+  return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a, b));
+}
+uint64_t c_v128_ssd_s16(c_v128 a, c_v128 b) {
+  return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a, b));
+}
 uint32_t v256_sad_u8(v256 a, v256 b) {
   return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
 }
@@ -360,6 +446,18 @@ uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
 uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
   return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
 }
+uint32_t v256_sad_u16(v256 a, v256 b) {
+  return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a, b));
+}
+uint64_t v256_ssd_s16(v256 a, v256 b) {
+  return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a, b));
+}
+uint32_t c_v256_sad_u16(c_v256 a, c_v256 b) {
+  return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a, b));
+}
+uint64_t c_v256_ssd_s16(c_v256 a, c_v256 b) {
+  return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a, b));
+}
 
 namespace {
 
@@ -371,16 +469,18 @@ typedef struct {
   fptr simd;
 } mapping;
 
-#define MAP(name)                                                              \
-  {                                                                            \
-    #name,                                                                     \
-        reinterpret_cast < fptr > (c_##name), reinterpret_cast < fptr > (name) \
+#define MAP(name)                                \
+  {                                              \
+    #name, reinterpret_cast < fptr > (c_##name), \
+        reinterpret_cast < fptr > (name)         \
   }
 
 const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v64_ssd_u8),
                       MAP(v64_add_8),
                       MAP(v64_add_16),
+                      MAP(v64_sadd_s8),
+                      MAP(v64_sadd_u8),
                       MAP(v64_sadd_s16),
                       MAP(v64_add_32),
                       MAP(v64_sub_8),
@@ -396,6 +496,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v64_ziphi_16),
                       MAP(v64_ziplo_32),
                       MAP(v64_ziphi_32),
+                      MAP(v64_pack_s32_u16),
                       MAP(v64_pack_s32_s16),
                       MAP(v64_pack_s16_u8),
                       MAP(v64_pack_s16_s8),
@@ -414,6 +515,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v64_madd_us8),
                       MAP(v64_avg_u8),
                       MAP(v64_rdavg_u8),
+                      MAP(v64_rdavg_u16),
                       MAP(v64_avg_u16),
                       MAP(v64_min_u8),
                       MAP(v64_max_u8),
@@ -554,10 +656,15 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v64_from_16),
                       MAP(v128_sad_u8),
                       MAP(v128_ssd_u8),
+                      MAP(v128_sad_u16),
+                      MAP(v128_ssd_s16),
                       MAP(v128_add_8),
                       MAP(v128_add_16),
+                      MAP(v128_sadd_s8),
+                      MAP(v128_sadd_u8),
                       MAP(v128_sadd_s16),
                       MAP(v128_add_32),
+                      MAP(v128_add_64),
                       MAP(v128_sub_8),
                       MAP(v128_ssub_u8),
                       MAP(v128_ssub_s8),
@@ -565,6 +672,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_ssub_s16),
                       MAP(v128_ssub_u16),
                       MAP(v128_sub_32),
+                      MAP(v128_sub_64),
                       MAP(v128_ziplo_8),
                       MAP(v128_ziphi_8),
                       MAP(v128_ziplo_16),
@@ -579,6 +687,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_unziplo_16),
                       MAP(v128_unziphi_32),
                       MAP(v128_unziplo_32),
+                      MAP(v128_pack_s32_u16),
                       MAP(v128_pack_s32_s16),
                       MAP(v128_pack_s16_u8),
                       MAP(v128_pack_s16_s8),
@@ -593,6 +702,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_madd_us8),
                       MAP(v128_avg_u8),
                       MAP(v128_rdavg_u8),
+                      MAP(v128_rdavg_u16),
                       MAP(v128_avg_u16),
                       MAP(v128_min_u8),
                       MAP(v128_max_u8),
@@ -600,12 +710,17 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_max_s8),
                       MAP(v128_min_s16),
                       MAP(v128_max_s16),
+                      MAP(v128_min_s32),
+                      MAP(v128_max_s32),
                       MAP(v128_cmpgt_s8),
                       MAP(v128_cmplt_s8),
                       MAP(v128_cmpeq_8),
                       MAP(v128_cmpgt_s16),
                       MAP(v128_cmpeq_16),
                       MAP(v128_cmplt_s16),
+                      MAP(v128_cmpgt_s32),
+                      MAP(v128_cmpeq_32),
+                      MAP(v128_cmplt_s32),
                       MAP(v128_shuffle_8),
                       MAP(imm_v128_align<1>),
                       MAP(imm_v128_align<2>),
@@ -624,6 +739,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(imm_v128_align<15>),
                       MAP(v128_abs_s8),
                       MAP(v128_abs_s16),
+                      MAP(v128_padd_u8),
                       MAP(v128_padd_s16),
                       MAP(v128_unpacklo_u16_s32),
                       MAP(v128_unpacklo_s16_s32),
@@ -728,6 +844,54 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(imm_v128_shr_n_s32<20>),
                       MAP(imm_v128_shr_n_s32<24>),
                       MAP(imm_v128_shr_n_s32<28>),
+                      MAP(imm_v128_shl_n_64<1>),
+                      MAP(imm_v128_shl_n_64<4>),
+                      MAP(imm_v128_shl_n_64<8>),
+                      MAP(imm_v128_shl_n_64<12>),
+                      MAP(imm_v128_shl_n_64<16>),
+                      MAP(imm_v128_shl_n_64<20>),
+                      MAP(imm_v128_shl_n_64<24>),
+                      MAP(imm_v128_shl_n_64<28>),
+                      MAP(imm_v128_shl_n_64<32>),
+                      MAP(imm_v128_shl_n_64<36>),
+                      MAP(imm_v128_shl_n_64<40>),
+                      MAP(imm_v128_shl_n_64<44>),
+                      MAP(imm_v128_shl_n_64<48>),
+                      MAP(imm_v128_shl_n_64<52>),
+                      MAP(imm_v128_shl_n_64<56>),
+                      MAP(imm_v128_shl_n_64<60>),
+                      MAP(imm_v128_shr_n_u64<1>),
+                      MAP(imm_v128_shr_n_u64<4>),
+                      MAP(imm_v128_shr_n_u64<8>),
+                      MAP(imm_v128_shr_n_u64<12>),
+                      MAP(imm_v128_shr_n_u64<16>),
+                      MAP(imm_v128_shr_n_u64<20>),
+                      MAP(imm_v128_shr_n_u64<24>),
+                      MAP(imm_v128_shr_n_u64<28>),
+                      MAP(imm_v128_shr_n_u64<32>),
+                      MAP(imm_v128_shr_n_u64<36>),
+                      MAP(imm_v128_shr_n_u64<40>),
+                      MAP(imm_v128_shr_n_u64<44>),
+                      MAP(imm_v128_shr_n_u64<48>),
+                      MAP(imm_v128_shr_n_u64<52>),
+                      MAP(imm_v128_shr_n_u64<56>),
+                      MAP(imm_v128_shr_n_u64<60>),
+                      MAP(imm_v128_shr_n_s64<1>),
+                      MAP(imm_v128_shr_n_s64<4>),
+                      MAP(imm_v128_shr_n_s64<8>),
+                      MAP(imm_v128_shr_n_s64<12>),
+                      MAP(imm_v128_shr_n_s64<16>),
+                      MAP(imm_v128_shr_n_s64<20>),
+                      MAP(imm_v128_shr_n_s64<24>),
+                      MAP(imm_v128_shr_n_s64<28>),
+                      MAP(imm_v128_shr_n_s64<32>),
+                      MAP(imm_v128_shr_n_s64<36>),
+                      MAP(imm_v128_shr_n_s64<40>),
+                      MAP(imm_v128_shr_n_s64<44>),
+                      MAP(imm_v128_shr_n_s64<48>),
+                      MAP(imm_v128_shr_n_s64<52>),
+                      MAP(imm_v128_shr_n_s64<56>),
+                      MAP(imm_v128_shr_n_s64<60>),
                       MAP(v128_from_v64),
                       MAP(v128_zip_8),
                       MAP(v128_zip_16),
@@ -746,21 +910,29 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_shl_32),
                       MAP(v128_shr_u32),
                       MAP(v128_shr_s32),
+                      MAP(v128_shl_64),
+                      MAP(v128_shr_u64),
+                      MAP(v128_shr_s64),
                       MAP(v128_hadd_u8),
+                      MAP(v128_dotp_su8),
                       MAP(v128_dotp_s16),
+                      MAP(v128_dotp_s32),
                       MAP(v128_low_u32),
                       MAP(v128_low_v64),
                       MAP(v128_high_v64),
                       MAP(v128_from_64),
                       MAP(v128_from_32),
+                      MAP(v128_movemask_8),
                       MAP(v128_zero),
                       MAP(v128_dup_8),
                       MAP(v128_dup_16),
                       MAP(v128_dup_32),
+                      MAP(v128_dup_64),
                       MAP(v128_unpacklo_u8_s16),
                       MAP(v128_unpackhi_u8_s16),
                       MAP(v128_unpacklo_s8_s16),
                       MAP(v128_unpackhi_s8_s16),
+                      MAP(v128_blend_8),
                       MAP(u32_load_unaligned),
                       MAP(u32_store_unaligned),
                       MAP(v64_load_unaligned),
@@ -769,12 +941,20 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v128_store_unaligned),
                       MAP(v256_sad_u8),
                       MAP(v256_ssd_u8),
+                      MAP(v256_sad_u16),
+                      MAP(v256_ssd_s16),
                       MAP(v256_hadd_u8),
+                      MAP(v256_low_u64),
+                      MAP(v256_dotp_su8),
                       MAP(v256_dotp_s16),
+                      MAP(v256_dotp_s32),
                       MAP(v256_add_8),
                       MAP(v256_add_16),
+                      MAP(v256_sadd_s8),
+                      MAP(v256_sadd_u8),
                       MAP(v256_sadd_s16),
                       MAP(v256_add_32),
+                      MAP(v256_add_64),
                       MAP(v256_sub_8),
                       MAP(v256_ssub_u8),
                       MAP(v256_ssub_s8),
@@ -782,6 +962,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_ssub_u16),
                       MAP(v256_ssub_s16),
                       MAP(v256_sub_32),
+                      MAP(v256_sub_64),
                       MAP(v256_ziplo_8),
                       MAP(v256_ziphi_8),
                       MAP(v256_ziplo_16),
@@ -796,6 +977,9 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_unziplo_16),
                       MAP(v256_unziphi_32),
                       MAP(v256_unziplo_32),
+                      MAP(v256_unziphi_64),
+                      MAP(v256_unziplo_64),
+                      MAP(v256_pack_s32_u16),
                       MAP(v256_pack_s32_s16),
                       MAP(v256_pack_s16_u8),
                       MAP(v256_pack_s16_s8),
@@ -810,6 +994,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_madd_us8),
                       MAP(v256_avg_u8),
                       MAP(v256_rdavg_u8),
+                      MAP(v256_rdavg_u16),
                       MAP(v256_avg_u16),
                       MAP(v256_min_u8),
                       MAP(v256_max_u8),
@@ -817,14 +1002,20 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_max_s8),
                       MAP(v256_min_s16),
                       MAP(v256_max_s16),
+                      MAP(v256_min_s32),
+                      MAP(v256_max_s32),
                       MAP(v256_cmpgt_s8),
                       MAP(v256_cmplt_s8),
                       MAP(v256_cmpeq_8),
                       MAP(v256_cmpgt_s16),
                       MAP(v256_cmplt_s16),
                       MAP(v256_cmpeq_16),
+                      MAP(v256_cmpgt_s32),
+                      MAP(v256_cmplt_s32),
+                      MAP(v256_cmpeq_32),
                       MAP(v256_shuffle_8),
                       MAP(v256_pshuffle_8),
+                      MAP(v256_wideshuffle_8),
                       MAP(imm_v256_align<1>),
                       MAP(imm_v256_align<2>),
                       MAP(imm_v256_align<3>),
@@ -874,13 +1065,47 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_shl_32),
                       MAP(v256_shr_u32),
                       MAP(v256_shr_s32),
+                      MAP(v256_shl_64),
+                      MAP(v256_shr_u64),
+                      MAP(v256_shr_s64),
                       MAP(v256_abs_s8),
                       MAP(v256_abs_s16),
+                      MAP(v256_padd_u8),
                       MAP(v256_padd_s16),
                       MAP(v256_unpacklo_u16_s32),
                       MAP(v256_unpacklo_s16_s32),
                       MAP(v256_unpackhi_u16_s32),
                       MAP(v256_unpackhi_s16_s32),
+                      MAP(imm_v256_shr_n_word<1>),
+                      MAP(imm_v256_shr_n_word<2>),
+                      MAP(imm_v256_shr_n_word<3>),
+                      MAP(imm_v256_shr_n_word<4>),
+                      MAP(imm_v256_shr_n_word<5>),
+                      MAP(imm_v256_shr_n_word<6>),
+                      MAP(imm_v256_shr_n_word<7>),
+                      MAP(imm_v256_shr_n_word<8>),
+                      MAP(imm_v256_shr_n_word<9>),
+                      MAP(imm_v256_shr_n_word<10>),
+                      MAP(imm_v256_shr_n_word<11>),
+                      MAP(imm_v256_shr_n_word<12>),
+                      MAP(imm_v256_shr_n_word<13>),
+                      MAP(imm_v256_shr_n_word<14>),
+                      MAP(imm_v256_shr_n_word<15>),
+                      MAP(imm_v256_shl_n_word<1>),
+                      MAP(imm_v256_shl_n_word<2>),
+                      MAP(imm_v256_shl_n_word<3>),
+                      MAP(imm_v256_shl_n_word<4>),
+                      MAP(imm_v256_shl_n_word<5>),
+                      MAP(imm_v256_shl_n_word<6>),
+                      MAP(imm_v256_shl_n_word<7>),
+                      MAP(imm_v256_shl_n_word<8>),
+                      MAP(imm_v256_shl_n_word<9>),
+                      MAP(imm_v256_shl_n_word<10>),
+                      MAP(imm_v256_shl_n_word<11>),
+                      MAP(imm_v256_shl_n_word<12>),
+                      MAP(imm_v256_shl_n_word<13>),
+                      MAP(imm_v256_shl_n_word<14>),
+                      MAP(imm_v256_shl_n_word<15>),
                       MAP(imm_v256_shr_n_byte<1>),
                       MAP(imm_v256_shr_n_byte<2>),
                       MAP(imm_v256_shr_n_byte<3>),
@@ -1012,10 +1237,60 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(imm_v256_shr_n_s32<20>),
                       MAP(imm_v256_shr_n_s32<24>),
                       MAP(imm_v256_shr_n_s32<28>),
+                      MAP(imm_v256_shl_n_64<1>),
+                      MAP(imm_v256_shl_n_64<4>),
+                      MAP(imm_v256_shl_n_64<8>),
+                      MAP(imm_v256_shl_n_64<12>),
+                      MAP(imm_v256_shl_n_64<16>),
+                      MAP(imm_v256_shl_n_64<20>),
+                      MAP(imm_v256_shl_n_64<24>),
+                      MAP(imm_v256_shl_n_64<28>),
+                      MAP(imm_v256_shl_n_64<32>),
+                      MAP(imm_v256_shl_n_64<36>),
+                      MAP(imm_v256_shl_n_64<40>),
+                      MAP(imm_v256_shl_n_64<44>),
+                      MAP(imm_v256_shl_n_64<48>),
+                      MAP(imm_v256_shl_n_64<52>),
+                      MAP(imm_v256_shl_n_64<56>),
+                      MAP(imm_v256_shl_n_64<60>),
+                      MAP(imm_v256_shr_n_u64<1>),
+                      MAP(imm_v256_shr_n_u64<4>),
+                      MAP(imm_v256_shr_n_u64<8>),
+                      MAP(imm_v256_shr_n_u64<12>),
+                      MAP(imm_v256_shr_n_u64<16>),
+                      MAP(imm_v256_shr_n_u64<20>),
+                      MAP(imm_v256_shr_n_u64<24>),
+                      MAP(imm_v256_shr_n_u64<28>),
+                      MAP(imm_v256_shr_n_u64<32>),
+                      MAP(imm_v256_shr_n_u64<36>),
+                      MAP(imm_v256_shr_n_u64<40>),
+                      MAP(imm_v256_shr_n_u64<44>),
+                      MAP(imm_v256_shr_n_u64<48>),
+                      MAP(imm_v256_shr_n_u64<52>),
+                      MAP(imm_v256_shr_n_u64<56>),
+                      MAP(imm_v256_shr_n_u64<60>),
+                      MAP(imm_v256_shr_n_s64<1>),
+                      MAP(imm_v256_shr_n_s64<4>),
+                      MAP(imm_v256_shr_n_s64<8>),
+                      MAP(imm_v256_shr_n_s64<12>),
+                      MAP(imm_v256_shr_n_s64<16>),
+                      MAP(imm_v256_shr_n_s64<20>),
+                      MAP(imm_v256_shr_n_s64<24>),
+                      MAP(imm_v256_shr_n_s64<28>),
+                      MAP(imm_v256_shr_n_s64<32>),
+                      MAP(imm_v256_shr_n_s64<36>),
+                      MAP(imm_v256_shr_n_s64<40>),
+                      MAP(imm_v256_shr_n_s64<44>),
+                      MAP(imm_v256_shr_n_s64<48>),
+                      MAP(imm_v256_shr_n_s64<52>),
+                      MAP(imm_v256_shr_n_s64<56>),
+                      MAP(imm_v256_shr_n_s64<60>),
+                      MAP(v256_movemask_8),
                       MAP(v256_zero),
                       MAP(v256_dup_8),
                       MAP(v256_dup_16),
                       MAP(v256_dup_32),
+                      MAP(v256_dup_64),
                       MAP(v256_low_u32),
                       MAP(v256_low_v64),
                       MAP(v256_from_64),
@@ -1026,6 +1301,7 @@ const mapping m[] = { MAP(v64_sad_u8),
                       MAP(v256_unpackhi_u8_s16),
                       MAP(v256_unpacklo_s8_s16),
                       MAP(v256_unpackhi_s8_s16),
+                      MAP(v256_blend_8),
                       { NULL, NULL, NULL } };
 #undef MAP
 
@@ -1042,7 +1318,7 @@ void Map(const char *name, fptr *ref, fptr *simd) {
   *simd = m[i].simd;
 }
 
-// Used for printing errors in TestSimd1Arg and TestSimd2Args
+// Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args
 std::string Print(const uint8_t *a, int size) {
   std::string text = "0x";
   for (int i = 0; i < size; i++) {
@@ -1055,7 +1331,8 @@ std::string Print(const uint8_t *a, int size) {
   return text;
 }
 
-// Used in TestSimd1Arg and TestSimd2Args to restrict argument ranges
+// Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument
+// ranges
 void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
   switch (maskwidth) {
     case 0: {
@@ -1133,16 +1410,16 @@ uint8_t c_u8_load_aligned(const void *p) {
   return *(reinterpret_cast<const uint8_t *>(p));
 }
 
-// CompareSimd1Arg and CompareSimd2Args compare intrinsics taking 1 or
-// 2 arguments respectively with their corresponding C reference.
-// Ideally, the loads and stores should have gone into the template
-// parameter list, but v64 and v128 could be typedef'ed to the same
-// type (which is the case on x86) and then we can't instantiate both
-// v64 and v128, so the function return and argument types, including
-// the always differing types in the C equivalent are used instead.
-// The function arguments must be void pointers and then go through a
-// cast to avoid matching errors in the branches eliminated by the
-// typeid tests in the calling function.
+// CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare
+// intrinsics taking 1, 2 or 3 arguments respectively with their
+// corresponding C reference.  Ideally, the loads and stores should
+// have gone into the template parameter list, but v64 and v128 could
+// be typedef'ed to the same type (which is the case on x86) and then
+// we can't instantiate both v64 and v128, so the function return and
+// argument types, including the always differing types in the C
+// equivalent are used instead.  The function arguments must be void
+// pointers and then go through a cast to avoid matching errors in the
+// branches eliminated by the typeid tests in the calling function.
 template <typename Ret, typename Arg, typename CRet, typename CArg>
 int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
                     fptr c_load, fptr c_simd, void *ref_d, const void *a) {
@@ -1185,6 +1462,35 @@ int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
   return memcmp(ref_d, d, sizeof(CRet));
 }
 
+template <typename Ret, typename Arg1, typename Arg2, typename Arg3,
+          typename CRet, typename CArg1, typename CArg2, typename CArg3>
+int CompareSimd3Args(fptr store, fptr load1, fptr load2, fptr load3, fptr simd,
+                     void *d, fptr c_store, fptr c_load1, fptr c_load2,
+                     fptr c_load3, fptr c_simd, void *ref_d, const void *a,
+                     const void *b, const void *c) {
+  void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
+  Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
+  Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
+  Arg3 (*const my_load3)(const void *) = (Arg3(*const)(const void *))load3;
+  Ret (*const my_simd)(Arg1, Arg2, Arg3) = (Ret(*const)(Arg1, Arg2, Arg3))simd;
+  void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
+  CArg1 (*const my_c_load1)(const void *) =
+      (CArg1(*const)(const void *))c_load1;
+  CArg2 (*const my_c_load2)(const void *) =
+      (CArg2(*const)(const void *))c_load2;
+  CArg2 (*const my_c_load3)(const void *) =
+      (CArg2(*const)(const void *))c_load3;
+  CRet (*const my_c_simd)(CArg1, CArg2, CArg3) =
+      (CRet(*const)(CArg1, CArg2, CArg3))c_simd;
+
+  // Call reference and intrinsic
+  my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b), my_c_load3(c)));
+  my_store(d, my_simd(my_load1(a), my_load2(b), my_load3(c)));
+
+  // Compare results
+  return memcmp(ref_d, d, sizeof(CRet));
+}
+
 }  // namespace
 
 template <typename CRet, typename CArg>
@@ -1194,9 +1500,10 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
   fptr ref_simd;
   fptr simd;
   int error = 0;
-  DECLARE_ALIGNED(32, uint8_t, s[sizeof(CArg)]);
-  DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
-  DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
+  DECLARE_ALIGNED(32, uint8_t, s[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg) <= 32 && sizeof(CRet) <= 32);
   memset(ref_d, 0, sizeof(ref_d));
   memset(d, 0, sizeof(d));
 
@@ -1347,6 +1654,14 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(u32_load_aligned), simd, d,
           reinterpret_cast<fptr>(c_v128_store_aligned),
           reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v128) &&
+               typeid(CArg) == typeid(uint64_t)) {
+      // V128_U64
+      error = CompareSimd1Arg<v128, uint64_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v128_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v128_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
     } else if (typeid(CRet) == typeid(c_v256) &&
                typeid(CArg) == typeid(c_v256)) {
       // V256_V256
@@ -1387,6 +1702,14 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(u32_load_aligned), simd, d,
           reinterpret_cast<fptr>(c_v256_store_aligned),
           reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg) == typeid(uint64_t)) {
+      // V256_U64
+      error = CompareSimd1Arg<v256, uint64_t, CRet, CArg>(
+          reinterpret_cast<fptr>(v256_store_aligned),
+          reinterpret_cast<fptr>(u64_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_v256_store_aligned),
+          reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
     } else if (typeid(CRet) == typeid(uint32_t) &&
                typeid(CArg) == typeid(c_v256)) {
       // U32_V256
@@ -1422,10 +1745,11 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
   fptr ref_simd;
   fptr simd;
   int error = 0;
-  DECLARE_ALIGNED(32, uint8_t, s1[sizeof(CArg1)]);
-  DECLARE_ALIGNED(32, uint8_t, s2[sizeof(CArg2)]);
-  DECLARE_ALIGNED(32, uint8_t, d[sizeof(CRet)]);
-  DECLARE_ALIGNED(32, uint8_t, ref_d[sizeof(CRet)]);
+  DECLARE_ALIGNED(32, uint8_t, s1[32]);
+  DECLARE_ALIGNED(32, uint8_t, s2[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CRet) <= 32);
   memset(ref_d, 0, sizeof(ref_d));
   memset(d, 0, sizeof(d));
 
@@ -1525,6 +1849,18 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(c_v128_load_aligned),
           reinterpret_cast<fptr>(c_v128_load_aligned),
           reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg1) == typeid(c_v128) &&
+               typeid(CArg2) == typeid(c_v128)) {
+      // U64_V128V128
+      error = CompareSimd2Args<uint64_t, v128, v128, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned),
+          reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(c_v128_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
     } else if (typeid(CRet) == typeid(int64_t) &&
                typeid(CArg1) == typeid(c_v128) &&
                typeid(CArg2) == typeid(c_v128)) {
@@ -1585,6 +1921,18 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
           reinterpret_cast<fptr>(c_v256_load_aligned),
           reinterpret_cast<fptr>(c_v256_load_aligned),
           reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
+    } else if (typeid(CRet) == typeid(uint64_t) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256)) {
+      // U64_V256V256
+      error = CompareSimd2Args<uint64_t, v256, v256, CRet, CArg1, CArg2>(
+          reinterpret_cast<fptr>(u64_store_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned),
+          reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+          reinterpret_cast<fptr>(c_u64_store_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(c_v256_load_aligned),
+          reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
     } else if (typeid(CRet) == typeid(int64_t) &&
                typeid(CArg1) == typeid(c_v256) &&
                typeid(CArg2) == typeid(c_v256)) {
@@ -1647,6 +1995,83 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
                       << Print(ref_d, sizeof(ref_d)) << " (ref)";
 }
 
+template <typename CRet, typename CArg1, typename CArg2, typename CArg3>
+void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  fptr ref_simd;
+  fptr simd;
+  int error = 0;
+  DECLARE_ALIGNED(32, uint8_t, s1[32]);
+  DECLARE_ALIGNED(32, uint8_t, s2[32]);
+  DECLARE_ALIGNED(32, uint8_t, s3[32]);
+  DECLARE_ALIGNED(32, uint8_t, d[32]);
+  DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
+  assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CArg3) <= 32 &&
+         sizeof(CRet) <= 32);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  Map(name, &ref_simd, &simd);
+  if (simd == NULL || ref_simd == NULL) {
+    FAIL() << "Internal error: Unknown intrinsic function " << name;
+  }
+
+  for (unsigned int count = 0;
+       count < iterations && !error && !testing::Test::HasFailure(); count++) {
+    for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
+
+    for (unsigned int c = 0; c < sizeof(CArg3); c++) s3[c] = rnd.Rand8();
+
+    if (maskwidth) SetMask(s3, sizeof(CArg3), mask, maskwidth);
+
+    if (typeid(CRet) == typeid(c_v128) && typeid(CArg1) == typeid(c_v128) &&
+        typeid(CArg2) == typeid(c_v128) && typeid(CArg3) == typeid(c_v128)) {
+      // V128_V128V128V128
+      error =
+          CompareSimd3Args<v128, v128, v128, v128, CRet, CArg1, CArg2, CArg3>(
+              reinterpret_cast<fptr>(v128_store_aligned),
+              reinterpret_cast<fptr>(v128_load_aligned),
+              reinterpret_cast<fptr>(v128_load_aligned),
+              reinterpret_cast<fptr>(v128_load_aligned), simd, d,
+              reinterpret_cast<fptr>(c_v128_store_aligned),
+              reinterpret_cast<fptr>(c_v128_load_aligned),
+              reinterpret_cast<fptr>(c_v128_load_aligned),
+              reinterpret_cast<fptr>(c_v128_load_aligned),
+              reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
+    } else if (typeid(CRet) == typeid(c_v256) &&
+               typeid(CArg1) == typeid(c_v256) &&
+               typeid(CArg2) == typeid(c_v256) &&
+               typeid(CArg3) == typeid(c_v256)) {
+      // V256_V256V256V256
+      error =
+          CompareSimd3Args<v256, v256, v256, v256, CRet, CArg1, CArg2, CArg3>(
+              reinterpret_cast<fptr>(v256_store_aligned),
+              reinterpret_cast<fptr>(v256_load_aligned),
+              reinterpret_cast<fptr>(v256_load_aligned),
+              reinterpret_cast<fptr>(v256_load_aligned), simd, d,
+              reinterpret_cast<fptr>(c_v256_store_aligned),
+              reinterpret_cast<fptr>(c_v256_load_aligned),
+              reinterpret_cast<fptr>(c_v256_load_aligned),
+              reinterpret_cast<fptr>(c_v256_load_aligned),
+              reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
+    } else {
+      FAIL() << "Internal error: Unknown intrinsic function "
+             << typeid(CRet).name() << " " << name << "("
+             << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ", "
+             << typeid(CArg3).name() << ")";
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
+                      << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
+                      << ", " << Print(s3, sizeof(s3)) << ") -> "
+                      << Print(d, sizeof(d)) << " (simd), "
+                      << Print(ref_d, sizeof(ref_d)) << " (ref)";
+}
+
 // Instantiations to make the functions callable from another files
 template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
                                            const char *);
@@ -1682,6 +2107,8 @@ template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
                                              const char *);
 template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
                                              const char *);
+template void TestSimd1Arg<c_v128, uint64_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
 template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
                                           const char *);
 template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
@@ -1698,10 +2125,15 @@ template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
                                                         uint32_t, const char *);
 template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
                                                   const char *);
+template void TestSimd2Args<uint64_t, c_v128, c_v128>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
 template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
                                                      uint32_t, const char *);
 template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
                                                       uint32_t, const char *);
+template void TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(uint32_t, uint32_t,
+                                                            uint32_t,
+                                                            const char *);
 template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
                                            const char *);
 template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
@@ -1714,6 +2146,8 @@ template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
                                              const char *);
 template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
                                              const char *);
+template void TestSimd1Arg<c_v256, uint64_t>(uint32_t, uint32_t, uint32_t,
+                                             const char *);
 template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
                                              const char *);
 template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
@@ -1724,9 +2158,14 @@ template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
                                                     uint32_t, const char *);
 template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
                                                       uint32_t, const char *);
+template void TestSimd2Args<uint64_t, c_v256, c_v256>(uint32_t, uint32_t,
+                                                      uint32_t, const char *);
 template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
                                                      uint32_t, const char *);
 template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
                                                       uint32_t, const char *);
+template void TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(uint32_t, uint32_t,
+                                                            uint32_t,
+                                                            const char *);
 
 }  // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_cmp_neon.cc b/third_party/aom/test/simd_cmp_neon.cc
index c8004cc8b1..53c1e2a07f 100644
--- a/third_party/aom/test/simd_cmp_neon.cc
+++ b/third_party/aom/test/simd_cmp_neon.cc
@@ -13,5 +13,5 @@
 #define ARCH NEON
 #define ARCH_POSTFIX(name) name##_neon
 #define SIMD_NAMESPACE simd_test_neon
-#include "./simd_cmp_impl.h"
+#include "test/simd_cmp_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc
index 67cb43c101..f7827a7fa1 100644
--- a/third_party/aom/test/simd_cmp_sse2.cc
+++ b/third_party/aom/test/simd_cmp_sse2.cc
@@ -14,5 +14,5 @@
 #define ARCH SSE2
 #define ARCH_POSTFIX(name) name##_sse2
 #define SIMD_NAMESPACE simd_test_sse2
-#include "./simd_cmp_impl.h"
+#include "test/simd_cmp_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc
index ba826d8983..3566764b64 100644
--- a/third_party/aom/test/simd_cmp_sse4.cc
+++ b/third_party/aom/test/simd_cmp_sse4.cc
@@ -14,5 +14,5 @@
 #define ARCH SSE4_1
 #define ARCH_POSTFIX(name) name##_sse4_1
 #define SIMD_NAMESPACE simd_test_sse4_1
-#include "./simd_cmp_impl.h"
+#include "test/simd_cmp_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc
index a6c7000fd3..57bf135ddb 100644
--- a/third_party/aom/test/simd_cmp_ssse3.cc
+++ b/third_party/aom/test/simd_cmp_ssse3.cc
@@ -14,5 +14,5 @@
 #define ARCH SSSE3
 #define ARCH_POSTFIX(name) name##_ssse3
 #define SIMD_NAMESPACE simd_test_ssse3
-#include "./simd_cmp_impl.h"
+#include "test/simd_cmp_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h
index c3dfbc400d..fd06f67fdf 100644
--- a/third_party/aom/test/simd_impl.h
+++ b/third_party/aom/test/simd_impl.h
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #define SIMD_CHECK 1
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
@@ -23,9 +23,9 @@ class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
  public:
   virtual ~TestIntrinsic() {}
   virtual void SetUp() {
-    mask = std::tr1::get<0>(this->GetParam());
-    maskwidth = std::tr1::get<1>(this->GetParam());
-    name = std::tr1::get<2>(this->GetParam());
+    mask = ::testing::get<0>(this->GetParam());
+    maskwidth = ::testing::get<1>(this->GetParam());
+    name = ::testing::get<2>(this->GetParam());
   }
 
   virtual void TearDown() { libaom_test::ClearSystemState(); }
@@ -36,8 +36,8 @@ class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
 };
 
 // Create one typedef for each function signature
-#define TYPEDEF_SIMD(name)                                                  \
-  typedef TestIntrinsic<std::tr1::tuple<uint32_t, uint32_t, const char *> > \
+#define TYPEDEF_SIMD(name)                                                    \
+  typedef TestIntrinsic< ::testing::tuple<uint32_t, uint32_t, const char *> > \
       ARCH_POSTFIX(name)
 
 TYPEDEF_SIMD(V64_U8);
@@ -61,23 +61,29 @@ TYPEDEF_SIMD(V64_V128);
 TYPEDEF_SIMD(V128_U8);
 TYPEDEF_SIMD(V128_U16);
 TYPEDEF_SIMD(V128_U32);
+TYPEDEF_SIMD(V128_U64);
 TYPEDEF_SIMD(V128_U64U64);
 TYPEDEF_SIMD(V128_V64V64);
 TYPEDEF_SIMD(V128_V128V128);
+TYPEDEF_SIMD(V128_V128V128V128);
 TYPEDEF_SIMD(S64_V128V128);
 TYPEDEF_SIMD(V128_V128U32);
 TYPEDEF_SIMD(U32_V128V128);
+TYPEDEF_SIMD(U64_V128V128);
 TYPEDEF_SIMD(V256_V128);
 TYPEDEF_SIMD(V256_V256);
 TYPEDEF_SIMD(U64_V256);
 TYPEDEF_SIMD(V256_V128V128);
 TYPEDEF_SIMD(V256_V256V256);
+TYPEDEF_SIMD(V256_V256V256V256);
+TYPEDEF_SIMD(U64_V256V256);
 TYPEDEF_SIMD(S64_V256V256);
 TYPEDEF_SIMD(V256_V256U32);
 TYPEDEF_SIMD(U32_V256V256);
 TYPEDEF_SIMD(V256_U8);
 TYPEDEF_SIMD(V256_U16);
 TYPEDEF_SIMD(V256_U32);
+TYPEDEF_SIMD(V256_U64);
 TYPEDEF_SIMD(U32_V256);
 TYPEDEF_SIMD(V64_V256);
 
@@ -86,9 +92,12 @@ typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
 typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
+typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4);
 typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4);
+typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5);
 typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
 
 // These functions are machine tuned located elsewhere
@@ -100,6 +109,10 @@ template <typename c_ret, typename c_arg1, typename c_arg2>
 void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
                    const char *name);
 
+template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
+void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
+                   const char *name);
+
 const int kIterations = 65536;
 
 // Add a macro layer since TEST_P will quote the name so we need to
@@ -195,6 +208,10 @@ MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
   TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
+  TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
   TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
 }
@@ -203,10 +220,19 @@ MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
   TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
+  TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
+                                                name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
   TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
+  TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
   TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
@@ -235,6 +261,10 @@ MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
+  TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
   TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
 }
@@ -251,6 +281,11 @@ MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
   TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
+  TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
+                                                name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
   TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
 }
@@ -259,6 +294,10 @@ MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
   TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
+  TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
   TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
 }
@@ -279,6 +318,14 @@ MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
+MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
+  TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
   TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
 }
@@ -291,6 +338,10 @@ MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
   TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
 }
 
+MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
+  TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
+}
+
 MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
   TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
 }
@@ -305,7 +356,7 @@ MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
   INSTANTIATE_TEST_CASE_P(name, type, ::testing::Values(__VA_ARGS__))
 
 #define SIMD_TUPLE(name, mask, maskwidth) \
-  std::tr1::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
+  ::testing::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64),
             (SIMD_TUPLE(v64_sad_u8, 0U, 0U), SIMD_TUPLE(v64_ssd_u8, 0U, 0U)));
@@ -339,6 +390,8 @@ INSTANTIATE(
 
 INSTANTIATE(
     ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
+    SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
+    SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
     SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
     SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
     SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
@@ -470,7 +523,8 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
-            SIMD_TUPLE(v128_ssd_u8, 0U, 0U));
+            SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
 
 INSTANTIATE(
     ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
@@ -501,9 +555,16 @@ INSTANTIATE(
     SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
-            SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
+            SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
+            SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
+            SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
+            SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
             SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
+            SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
+            SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
+            SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
             SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
+            SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
             SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
             SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
             SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
@@ -520,6 +581,9 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
             SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
             SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
 
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
+            SIMD_TUPLE(v128_blend_8, 0U, 0U));
+
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
             SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
             SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
@@ -634,6 +698,57 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
             SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
 
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
+            SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
+            SIMD_TUPLE(v128_padd_u8, 0U, 0U));
+
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
             SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
             SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
@@ -646,16 +761,17 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
             SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
             SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
-            SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
-            SIMD_TUPLE(v128_shl_16, 15U, 32U),
-            SIMD_TUPLE(v128_shr_u16, 15U, 32U),
-            SIMD_TUPLE(v128_shr_s16, 15U, 32U),
-            SIMD_TUPLE(v128_shl_32, 31U, 32U),
-            SIMD_TUPLE(v128_shr_u32, 31U, 32U),
-            SIMD_TUPLE(v128_shr_s32, 31U, 32U));
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
+    SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
+    SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
+    SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
+    SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
+    SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
+    SIMD_TUPLE(v128_shr_s64, 63U, 32U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
+            SIMD_TUPLE(v128_movemask_8, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
 
@@ -668,16 +784,23 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128),
-            SIMD_TUPLE(v128_dotp_s16, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
+            SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
-            SIMD_TUPLE(v256_ssd_u8, 0U, 0U));
+            SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
+            SIMD_TUPLE(v256_low_u64, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
+            SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
+            SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256),
-            SIMD_TUPLE(v256_dotp_s16, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
 
 INSTANTIATE(
     ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
@@ -709,10 +832,16 @@ INSTANTIATE(
 
 INSTANTIATE(
     ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
+    SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
+    SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
     SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
-    SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_shuffle_8, 15U, 8U),
-    SIMD_TUPLE(v256_pshuffle_8, 15U, 8U), SIMD_TUPLE(imm_v256_align<1>, 0U, 0U),
-    SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
+    SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
+    SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
+    SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
+    SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
+    SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
+    SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
+    SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
     SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
     SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
     SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
@@ -754,14 +883,14 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
             SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
             SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
-            SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
-            SIMD_TUPLE(v256_shl_16, 15U, 32U),
-            SIMD_TUPLE(v256_shr_u16, 15U, 32U),
-            SIMD_TUPLE(v256_shr_s16, 15U, 32U),
-            SIMD_TUPLE(v256_shl_32, 31U, 32U),
-            SIMD_TUPLE(v256_shr_u32, 31U, 32U),
-            SIMD_TUPLE(v256_shr_s32, 31U, 32U));
+INSTANTIATE(
+    ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
+    SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
+    SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
+    SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
+    SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
+    SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
+    SIMD_TUPLE(v256_shr_s64, 63U, 32U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
             SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
@@ -909,13 +1038,103 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
             SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
             SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
 
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
+            SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
+            SIMD_TUPLE(v256_padd_u8, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
+            SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
+            SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
+            SIMD_TUPLE(v256_blend_8, 0U, 0U),
+            SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
+
 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
 
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U));
+INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
+
+INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
+            SIMD_TUPLE(v256_movemask_8, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
 
diff --git a/third_party/aom/test/simd_neon_test.cc b/third_party/aom/test/simd_neon_test.cc
index 0565fb4e2a..b67b188959 100644
--- a/third_party/aom/test/simd_neon_test.cc
+++ b/third_party/aom/test/simd_neon_test.cc
@@ -13,5 +13,5 @@
 #define ARCH NEON
 #define ARCH_POSTFIX(name) name##_neon
 #define SIMD_NAMESPACE simd_test_neon
-#include "./simd_impl.h"
+#include "test/simd_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc
index a0b49d77e6..b37a931b38 100644
--- a/third_party/aom/test/simd_sse2_test.cc
+++ b/third_party/aom/test/simd_sse2_test.cc
@@ -14,5 +14,5 @@
 #define ARCH SSE2
 #define ARCH_POSTFIX(name) name##_sse2
 #define SIMD_NAMESPACE simd_test_sse2
-#include "./simd_impl.h"
+#include "test/simd_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc
index 73c96427f2..b1c9d5cd88 100644
--- a/third_party/aom/test/simd_sse4_test.cc
+++ b/third_party/aom/test/simd_sse4_test.cc
@@ -14,5 +14,5 @@
 #define ARCH SSE4_1
 #define ARCH_POSTFIX(name) name##_sse4_1
 #define SIMD_NAMESPACE simd_test_sse4_1
-#include "./simd_impl.h"
+#include "test/simd_impl.h"
 #endif
diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc
index 9ebeeef1b5..d95c26fb5e 100644
--- a/third_party/aom/test/simd_ssse3_test.cc
+++ b/third_party/aom/test/simd_ssse3_test.cc
@@ -14,5 +14,5 @@
 #define ARCH SSSE3
 #define ARCH_POSTFIX(name) name##_ssse3
 #define SIMD_NAMESPACE simd_test_ssse3
-#include "./simd_impl.h"
+#include "test/simd_impl.h"
 #endif
diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh
index ac3a07b189..5f39ad206e 100755
--- a/third_party/aom/test/simple_decoder.sh
+++ b/third_party/aom/test/simple_decoder.sh
@@ -25,7 +25,7 @@ simple_decoder_verify_environment() {
 # Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
 # solely to name the output file.
 simple_decoder() {
-  local decoder="${LIBAOM_BIN_PATH}/simple_decoder${AOM_TEST_EXE_SUFFIX}"
+  local decoder="$(aom_tool_path simple_decoder)"
   local input_file="$1"
   local codec="$2"
   local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc
index 725a6a2c65..7dcedf56de 100644
--- a/third_party/aom/test/subtract_test.cc
+++ b/third_party/aom/test/subtract_test.cc
@@ -7,24 +7,21 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#if CONFIG_AV1
 #include "av1/common/blockd.h"
-#endif
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
 
-#define USE_SPEED_TEST (0)
-
 typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr,
                              ptrdiff_t diff_stride, const uint8_t *src_ptr,
                              ptrdiff_t src_stride, const uint8_t *pred_ptr,
@@ -112,14 +109,13 @@ typedef void (*HBDSubtractFunc)(int rows, int cols, int16_t *diff_ptr,
                                 ptrdiff_t src_stride, const uint8_t *pred_ptr,
                                 ptrdiff_t pred_stride, int bd);
 
-using ::std::tr1::get;
-using ::std::tr1::make_tuple;
-using ::std::tr1::tuple;
+using ::testing::get;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 // <width, height, bit_dpeth, subtract>
 typedef tuple<int, int, int, HBDSubtractFunc> Params;
 
-#if CONFIG_HIGHBITDEPTH
 class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
  public:
   virtual void SetUp() {
@@ -130,11 +126,7 @@ class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
 
     rnd_.Reset(ACMRandom::DeterministicSeed());
 
-#if CONFIG_EXT_PARTITION
     const size_t max_width = 128;
-#else
-    const size_t max_width = 64;
-#endif
     const size_t max_block_size = max_width * max_width;
     src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
         aom_memalign(16, max_block_size * sizeof(uint16_t))));
@@ -167,11 +159,7 @@ class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
 
 void AV1HBDSubtractBlockTest::CheckResult() {
   const int test_num = 100;
-#if CONFIG_EXT_PARTITION
   const size_t max_width = 128;
-#else
-  const size_t max_width = 64;
-#endif
   const int max_block_size = max_width * max_width;
   const int mask = (1 << bit_depth_) - 1;
   int i, j;
@@ -200,11 +188,7 @@ TEST_P(AV1HBDSubtractBlockTest, CheckResult) { CheckResult(); }
 
 void AV1HBDSubtractBlockTest::RunForSpeed() {
   const int test_num = 200000;
-#if CONFIG_EXT_PARTITION
   const size_t max_width = 128;
-#else
-  const size_t max_width = 64;
-#endif
   const int max_block_size = max_width * max_width;
   const int mask = (1 << bit_depth_) - 1;
   int i, j;
@@ -251,18 +235,15 @@ const Params kAV1HBDSubtractBlock_sse2[] = {
   make_tuple(64, 32, 12, &aom_highbd_subtract_block_c),
   make_tuple(64, 64, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(64, 64, 12, &aom_highbd_subtract_block_c),
-#if CONFIG_EXT_PARTITION
   make_tuple(64, 128, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(64, 128, 12, &aom_highbd_subtract_block_c),
   make_tuple(128, 64, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(128, 64, 12, &aom_highbd_subtract_block_c),
   make_tuple(128, 128, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(128, 128, 12, &aom_highbd_subtract_block_c)
-#endif  // CONFIG_EXT_PARTITION
 };
 
 INSTANTIATE_TEST_CASE_P(SSE2, AV1HBDSubtractBlockTest,
                         ::testing::ValuesIn(kAV1HBDSubtractBlock_sse2));
 #endif  // HAVE_SSE2
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc
index b8701c1964..c03ebad4af 100644
--- a/third_party/aom/test/sum_squares_test.cc
+++ b/third_party/aom/test/sum_squares_test.cc
@@ -15,8 +15,9 @@
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "aom_ports/mem.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
@@ -39,89 +40,82 @@ typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
 class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
  public:
   virtual ~SumSquaresTest() {}
-  virtual void SetUp() { params_ = this->GetParam(); }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  TestFuncs params_;
-};
-
-TEST_P(SumSquaresTest, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
-
-  int failed = 0;
-
-  const int msb = 11;  // Up to 12 bit input
-  const int limit = 1 << (msb + 1);
+  virtual void SetUp() {
+    params_ = this->GetParam();
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
+    ASSERT_TRUE(src_ != NULL);
+  }
 
-  for (int k = 0; k < kNumIterations; k++) {
-    int width = 4 * rnd(32);   // Up to 128x128
-    int height = 4 * rnd(32);  // Up to 128x128
-    int stride = 4 << rnd(7);  // Up to 256 stride
-    while (stride < width) {   // Make sure it's valid
-      stride = 4 << rnd(7);
-    }
+  virtual void TearDown() {
+    libaom_test::ClearSystemState();
+    aom_free(src_);
+  }
+  void RunTest(int isRandom);
 
+  void GenRandomData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
     for (int ii = 0; ii < height; ii++) {
       for (int jj = 0; jj < width; jj++) {
-        src[ii * stride + jj] = rnd(2) ? rnd(limit) : -rnd(limit);
+        src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit);
       }
     }
+  }
 
-    const uint64_t res_ref = params_.ref_func(src, stride, width, height);
-    uint64_t res_tst;
-    ASM_REGISTER_STATE_CHECK(res_tst =
-                                 params_.tst_func(src, stride, width, height));
-
-    if (!failed) {
-      failed = res_ref != res_tst;
-      EXPECT_EQ(res_ref, res_tst)
-          << "Error: Sum Squares Test"
-          << " C output does not match optimized output.";
+  void GenExtremeData(int width, int height, int stride) {
+    const int msb = 11;  // Up to 12 bit input
+    const int limit = 1 << (msb + 1);
+    const int val = rnd_(2) ? limit - 1 : -(limit - 1);
+    for (int ii = 0; ii < height; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        src_[ii * stride + jj] = val;
+      }
     }
   }
-}
 
-TEST_P(SumSquaresTest, ExtremeValues) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
+ protected:
+  TestFuncs params_;
+  int16_t *src_;
+  ACMRandom rnd_;
+};
 
+void SumSquaresTest::RunTest(int isRandom) {
   int failed = 0;
-
-  const int msb = 11;  // Up to 12 bit input
-  const int limit = 1 << (msb + 1);
-
   for (int k = 0; k < kNumIterations; k++) {
-    int width = 4 * rnd(32);   // Up to 128x128
-    int height = 4 * rnd(32);  // Up to 128x128
-    int stride = 4 << rnd(7);  // Up to 256 stride
-    while (stride < width) {   // Make sure it's valid
-      stride = 4 << rnd(7);
+    const int width = 4 * (rnd_(31) + 1);   // Up to 128x128
+    const int height = 4 * (rnd_(31) + 1);  // Up to 128x128
+    int stride = 4 << rnd_(7);              // Up to 256 stride
+    while (stride < width) {                // Make sure it's valid
+      stride = 4 << rnd_(7);
     }
-
-    int val = rnd(2) ? limit - 1 : -(limit - 1);
-    for (int ii = 0; ii < height; ii++) {
-      for (int jj = 0; jj < width; jj++) {
-        src[ii * stride + jj] = val;
-      }
+    if (isRandom) {
+      GenRandomData(width, height, stride);
+    } else {
+      GenExtremeData(width, height, stride);
     }
-
-    const uint64_t res_ref = params_.ref_func(src, stride, width, height);
+    const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
     uint64_t res_tst;
     ASM_REGISTER_STATE_CHECK(res_tst =
-                                 params_.tst_func(src, stride, width, height));
+                                 params_.tst_func(src_, stride, width, height));
 
     if (!failed) {
       failed = res_ref != res_tst;
       EXPECT_EQ(res_ref, res_tst)
-          << "Error: Sum Squares Test"
-          << " C output does not match optimized output.";
+          << "Error: Sum Squares Test [" << width << "x" << height
+          << "] C output does not match optimized output.";
     }
   }
 }
 
+TEST_P(SumSquaresTest, OperationCheck) {
+  RunTest(1);  // GenRandomData
+}
+
+TEST_P(SumSquaresTest, ExtremeValues) {
+  RunTest(0);  // GenExtremeData
+}
+
 #if HAVE_SSE2
 
 INSTANTIATE_TEST_CASE_P(
diff --git a/third_party/aom/test/superframe_test.cc b/third_party/aom/test/superframe_test.cc
index c8f663e5c7..7be18f72a3 100644
--- a/third_party/aom/test/superframe_test.cc
+++ b/third_party/aom/test/superframe_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <climits>
 #include <vector>
@@ -23,7 +23,7 @@ const int kTestMode = 0;
 const int kTileCols = 1;
 const int kTileRows = 2;
 
-typedef std::tr1::tuple<libaom_test::TestMode, int, int> SuperframeTestParam;
+typedef ::testing::tuple<libaom_test::TestMode, int, int> SuperframeTestParam;
 
 class SuperframeTest
     : public ::libaom_test::CodecTestWithParam<SuperframeTestParam>,
@@ -35,12 +35,12 @@ class SuperframeTest
   virtual void SetUp() {
     InitializeConfig();
     const SuperframeTestParam input = GET_PARAM(1);
-    const libaom_test::TestMode mode = std::tr1::get<kTestMode>(input);
+    const libaom_test::TestMode mode = ::testing::get<kTestMode>(input);
     SetMode(mode);
     sf_count_ = 0;
     sf_count_max_ = INT_MAX;
-    n_tile_cols_ = std::tr1::get<kTileCols>(input);
-    n_tile_rows_ = std::tr1::get<kTileRows>(input);
+    n_tile_cols_ = ::testing::get<kTileCols>(input);
+    n_tile_rows_ = ::testing::get<kTileRows>(input);
   }
 
   virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
@@ -50,9 +50,6 @@ class SuperframeTest
       encoder->Control(AOME_SET_CPUUSED, 2);
       encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
       encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
-#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     }
   }
 
@@ -100,31 +97,13 @@ class SuperframeTest
 TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
   sf_count_max_ = 0;  // early exit on successful test.
   cfg_.g_lag_in_frames = 25;
-#if CONFIG_EXT_TILE
   cfg_.large_scale_tile = 1;
-#endif  // CONFIG_EXT_TILE
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 40);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-#if CONFIG_EXT_REFS
   // NOTE: The use of BWDREF_FRAME will enable the coding of more non-show
   //       frames besides ALTREF_FRAME.
   EXPECT_GE(sf_count_, 1);
-#else
-  EXPECT_EQ(sf_count_, 1);
-#endif  // CONFIG_EXT_REFS
 }
 
-// The superframe index is currently mandatory with both ANS and DAALA_EC due
-// to the decoder starting at the end of the buffer.
-#if CONFIG_EXT_TILE
-// Single tile does not work with ANS (see comment above).
-const int tile_col_values[] = { 1, 2 };
-const int tile_row_values[] = { 1, 2, 32 };
-AV1_INSTANTIATE_TEST_CASE(
-    SuperframeTest,
-    ::testing::Combine(::testing::Values(::libaom_test::kTwoPassGood),
-                       ::testing::ValuesIn(tile_col_values),
-                       ::testing::ValuesIn(tile_row_values)));
-#endif  // CONFIG_EXT_TILE
 }  // namespace
diff --git a/third_party/aom/test/test-data.mk b/third_party/aom/test/test-data.mk
deleted file mode 100644
index d82033e3bd..0000000000
--- a/third_party/aom/test/test-data.mk
+++ /dev/null
@@ -1,49 +0,0 @@
-LIBAOM_TEST_SRCS-yes += test-data.mk
-
-# Encoder test source
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += hantro_collage_w352h288.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += hantro_odd.yuv
-
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_10_440.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_12_440.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_420_a10-1.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_420.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_422.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += park_joy_90p_8_440.yuv
-
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_credits.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += rush_hour_444.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += screendata.y4m
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_640_480_30.yuv
-
-ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
-# Encode / Decode test
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv
-endif  # CONFIG_DECODE_PERF_TESTS
-
-ifeq ($(CONFIG_ENCODE_PERF_TESTS),yes)
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += desktop_640_360_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += kirland_640_480_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcomoving_640_480_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += macmarcostationary_640_480_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += niklas_1280_720_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomanarrows_640_480_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += thaloundeskmtg_640_480_30.yuv
-endif  # CONFIG_ENCODE_PERF_TESTS
-
-ifeq ($(CONFIG_EXT_TILE),yes)
-LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += vase10x10.yuv
-endif  # CONFIG_EXT_TILE
-
-# sort and remove duplicates
-LIBAOM_TEST_DATA-yes := $(sort $(LIBAOM_TEST_DATA-yes))
-
diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1
index 0caf21e1ea..67aeb52082 100644
--- a/third_party/aom/test/test-data.sha1
+++ b/third_party/aom/test/test-data.sha1
@@ -1,5 +1,7 @@
 d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv
 b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
+26b7f64399b84db4b4c9c915d743ec5c2619d4b9 *invalid-bug-1814.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-1814.ivf.res
 a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
@@ -27,3 +29,331 @@ e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv
 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
 5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
 36ddab9b99eb7545aa0bf362d6f498212d596516 *vase10x10.yuv
+c2e1ec9936b95254187a359e94aa32a9f3dad1b7 *av1-1-b8-00-quantizer-00.ivf
+26cd2a0321d01d9db5f6dace8b43a40cd5b9d58d *av1-1-b8-00-quantizer-00.ivf.md5
+a56dd02c0258d4afea1ee358a22b54e99e39d5e1 *av1-1-b8-00-quantizer-01.ivf
+b3d24124d81f1fbb26f5eb0036accb54f3ec69b2 *av1-1-b8-00-quantizer-01.ivf.md5
+3466327cb842a91d69839b11ef930a74f086f4c6 *av1-1-b8-00-quantizer-02.ivf
+c111dce946100efeaad34203080eee1d55464df6 *av1-1-b8-00-quantizer-02.ivf.md5
+d3f1f32de5e2c0c19a58bb8ef096108388c6a820 *av1-1-b8-00-quantizer-03.ivf
+6265321b31130545b4454982ca93e412a56845b8 *av1-1-b8-00-quantizer-03.ivf.md5
+f37c393ebe73266a5ec8508a2ca33c586ff28e64 *av1-1-b8-00-quantizer-04.ivf
+c6e979da71aecc593c0abb40135dd304152b00dd *av1-1-b8-00-quantizer-04.ivf.md5
+ac9c5e93cb19942a9be259d0567ec96c54dcdc7c *av1-1-b8-00-quantizer-05.ivf
+49e35a7399568a0e4f015ce323d5a45ea780ca87 *av1-1-b8-00-quantizer-05.ivf.md5
+461142b1b50ae74c6b698d23f5ed3b764eadfb89 *av1-1-b8-00-quantizer-06.ivf
+6477ff260624e0f76c94ac872d1e7d5576af4177 *av1-1-b8-00-quantizer-06.ivf.md5
+7f8113cd13d8faaa06fdbaaa50dc328daf037e6d *av1-1-b8-00-quantizer-07.ivf
+b26795c6cb408487c20737977cd6b77311772bf7 *av1-1-b8-00-quantizer-07.ivf.md5
+4218f7945a172e1fe4f9e77ec35085a394eda9f4 *av1-1-b8-00-quantizer-08.ivf
+ea5d7d501e9a69d805251e4871515d28468d8676 *av1-1-b8-00-quantizer-08.ivf.md5
+837f3bcadfe56cf302db2ebaf9a990446fb35801 *av1-1-b8-00-quantizer-09.ivf
+eede995cdac5fd01a411da2e74e86e8394138be1 *av1-1-b8-00-quantizer-09.ivf.md5
+adc229b3780a4968c18ded1bcbe72e3f04643833 *av1-1-b8-00-quantizer-10.ivf
+0799b7e54e54ee97bf0e8aad2b75509ce59c7097 *av1-1-b8-00-quantizer-10.ivf.md5
+44bac8247160a8d9a0ab19f890fc89cc9298de1d *av1-1-b8-00-quantizer-11.ivf
+cc6b2bf167e114599b242aba574e8c6f1fa2f047 *av1-1-b8-00-quantizer-11.ivf.md5
+ebb3af7dfc15567188bcb617021cdc95ebc560e3 *av1-1-b8-00-quantizer-12.ivf
+b716ae29d56cd0c052dbfa1b5dcf850cd0fa8ca7 *av1-1-b8-00-quantizer-12.ivf.md5
+46159641f981a26fb9c374a5ca41e44f0ce0a9f0 *av1-1-b8-00-quantizer-13.ivf
+c6db1b8b4a74f83e4a0647e053cea0fc00f6abab *av1-1-b8-00-quantizer-13.ivf.md5
+fadc909d18eb640760fbb075f922fb050e715470 *av1-1-b8-00-quantizer-14.ivf
+e36bb6b23273633ba3ef7d28160a7258840a1476 *av1-1-b8-00-quantizer-14.ivf.md5
+8befbd9cc1601dcd36ec6911613855f68e6fd40e *av1-1-b8-00-quantizer-15.ivf
+cfc2334b76fb5e7aa9d8607e89d37cbc7716d62e *av1-1-b8-00-quantizer-15.ivf.md5
+ca42e00ae27c6b7f684fe3d2a787d50d2827cb3f *av1-1-b8-00-quantizer-16.ivf
+f11278218a7c3c73cfaab2332bab55f06cedcc81 *av1-1-b8-00-quantizer-16.ivf.md5
+05270d365bdc067f9446eda3029a6f41571a5229 *av1-1-b8-00-quantizer-17.ivf
+fb6482f35e7ad04bf231ea1806226760abcb3c26 *av1-1-b8-00-quantizer-17.ivf.md5
+617bc72037165efbff478d5a0d342b3c20ffcafd *av1-1-b8-00-quantizer-18.ivf
+1ff68d5424f91322123fe0d58f436b8e49cfa99d *av1-1-b8-00-quantizer-18.ivf.md5
+821c3b1ae6054c7a91b2f64428806e57f1157ca6 *av1-1-b8-00-quantizer-19.ivf
+f2fd118e786697553d6987f786660a2bb9f00680 *av1-1-b8-00-quantizer-19.ivf.md5
+48bcf17c27d9a4eb73632a68c09f42eff9f9af99 *av1-1-b8-00-quantizer-20.ivf
+64d55e4c858414bc2837c9c3e2d5fb6d2208c4b8 *av1-1-b8-00-quantizer-20.ivf.md5
+d61ecdd4f0950bc5c8bae1270b22e711bdd22763 *av1-1-b8-00-quantizer-21.ivf
+9d447938596096704fd5f4d41bcdf6fabf9cdfb9 *av1-1-b8-00-quantizer-21.ivf.md5
+59b4b65d8e56ccdd1bddff26a03e991a63409334 *av1-1-b8-00-quantizer-22.ivf
+aa1be0c7c7622d612af85f9bf96a212f6fe5ab56 *av1-1-b8-00-quantizer-22.ivf.md5
+95ed96988eb9916cad956db9b929718769de49f1 *av1-1-b8-00-quantizer-23.ivf
+596b8a3aea468996d609624367465c412751f52b *av1-1-b8-00-quantizer-23.ivf.md5
+e6c2dc4ce725003152797b3d7b34d7eb34da50c8 *av1-1-b8-00-quantizer-24.ivf
+1cd3d7e8b3813a9e5591b94eaeb72d471780e64a *av1-1-b8-00-quantizer-24.ivf.md5
+6734e353008824e523939d1a18daa3f2ab2d8ec6 *av1-1-b8-00-quantizer-25.ivf
+c45cf440a05802c1f9e29472175ed397d130d988 *av1-1-b8-00-quantizer-25.ivf.md5
+3372b1c69fb39811156adcea4f6dba802c0918c2 *av1-1-b8-00-quantizer-26.ivf
+b1751d55bb3fb788751fe28fb7434bee153bda68 *av1-1-b8-00-quantizer-26.ivf.md5
+e7ddb19a6e2a798d6a4e7dfdfc10b4df777b60e3 *av1-1-b8-00-quantizer-27.ivf
+0e19d6b79cd71de69d03e0455349568af979b170 *av1-1-b8-00-quantizer-27.ivf.md5
+7f1c90a35543d6b673e353b3702baf3aa1caeaa7 *av1-1-b8-00-quantizer-28.ivf
+d9a4f9cb88103249a05a7e6aa616bf0c16bf9c95 *av1-1-b8-00-quantizer-28.ivf.md5
+28d741b923011c7fcc50a7318256a638d3110a07 *av1-1-b8-00-quantizer-29.ivf
+c68cacf2b2ff2694945a99ad836dcf1ee3961c09 *av1-1-b8-00-quantizer-29.ivf.md5
+9a5d9ea4bc76dd40d04e92f33f45e9c2e120e85d *av1-1-b8-00-quantizer-30.ivf
+eb02bb8c16c4c0368ddff83e05e516e84ec9eaf3 *av1-1-b8-00-quantizer-30.ivf.md5
+20193c372f44f522e094c2c05fc7e4aaa0717fa8 *av1-1-b8-00-quantizer-31.ivf
+a4c1a4ac332f4911f0d5abbd826ebecfb8432d6c *av1-1-b8-00-quantizer-31.ivf.md5
+9617bbd691f093d259dbc8a642a57a153c1fc00c *av1-1-b8-00-quantizer-32.ivf
+73d60a348454b126ea6368ea604954bc23f210ae *av1-1-b8-00-quantizer-32.ivf.md5
+d9aea9d72a686c59b60584d827f60ca1ee8eee26 *av1-1-b8-00-quantizer-33.ivf
+fbf64de376a63d2d3051da83b0e4e56579b55c0a *av1-1-b8-00-quantizer-33.ivf.md5
+791aaf067f125e5cf4a247cf06a2e29ab071ec90 *av1-1-b8-00-quantizer-34.ivf
+8e2e6efe4c069e54844da19125c4280b95990c69 *av1-1-b8-00-quantizer-34.ivf.md5
+01ba67bba5cbf7c94c65da8f4c9bd6e7db24cf3a *av1-1-b8-00-quantizer-35.ivf
+0c5e60704a4a6bd27e67b6fd72ca7d2cf7fff50f *av1-1-b8-00-quantizer-35.ivf.md5
+3e255b4a320c9522dcec539fef770b6920b9a102 *av1-1-b8-00-quantizer-36.ivf
+1241aab865fd7b4bae73736cbeec1866ea9c90ec *av1-1-b8-00-quantizer-36.ivf.md5
+44fa6fca109747d8f43f6c6aa46d782e5d476d54 *av1-1-b8-00-quantizer-37.ivf
+947f0f887c5ac9149cf85e8114a709d6f410fc32 *av1-1-b8-00-quantizer-37.ivf.md5
+8319ac1ddd6ce3279da5780175dff7a3a5fa1054 *av1-1-b8-00-quantizer-38.ivf
+5f571b7f88678eab9e54f162cc9898f14e437770 *av1-1-b8-00-quantizer-38.ivf.md5
+5975e7056e17608593a8c40619b68e6576d373d9 *av1-1-b8-00-quantizer-39.ivf
+7c870192d6eb70ce5367147a3d2c6a52e11f7bec *av1-1-b8-00-quantizer-39.ivf.md5
+47da942f1e455f1422fc65f06dd57304541d16ac *av1-1-b8-00-quantizer-40.ivf
+6ea7116c9ce3a1641c7060bab2f5e06fd0910d61 *av1-1-b8-00-quantizer-40.ivf.md5
+ab35c15dfde21c2572b14e04dbfd5fac1adae449 *av1-1-b8-00-quantizer-41.ivf
+19596f9849653b913186b9d6b7072984ede96177 *av1-1-b8-00-quantizer-41.ivf.md5
+23a5fa6c3d0eaffaf13f6402465f5dd33d8ea7f1 *av1-1-b8-00-quantizer-42.ivf
+5a2726f0d1b1799d4f70883f1bfe5c9d976c6cf5 *av1-1-b8-00-quantizer-42.ivf.md5
+86cddfc463d2b186ec5a1aa25c4562c05201e3c3 *av1-1-b8-00-quantizer-43.ivf
+674c64ec8487ee774ad09350380fa6ac43815807 *av1-1-b8-00-quantizer-43.ivf.md5
+6894c154eb56c4f3fe44d54fc4f9af468b03d175 *av1-1-b8-00-quantizer-44.ivf
+eca679a2781eb894d18b3d578e3aaf4f48019a15 *av1-1-b8-00-quantizer-44.ivf.md5
+0960bf018ada4224b8344519cf091850d50a57bd *av1-1-b8-00-quantizer-45.ivf
+291bb43b9e1ab167040b51019daf1ccf94fd1e50 *av1-1-b8-00-quantizer-45.ivf.md5
+ea644a4732f1a2534332802c2fa5073344f3c356 *av1-1-b8-00-quantizer-46.ivf
+4c7915382b1d6d08709c95525b04ab8830f20ca1 *av1-1-b8-00-quantizer-46.ivf.md5
+d1f8832d33234e2c74a2280090850153ea24ea82 *av1-1-b8-00-quantizer-47.ivf
+90eb9959e612602934dcc512fe6f54abf0c88d9c *av1-1-b8-00-quantizer-47.ivf.md5
+69c93f760e8b666eb5b98f510e09d90f9230ac9b *av1-1-b8-00-quantizer-48.ivf
+931f869e14bd455de9dac2101b383c29e7d6f04c *av1-1-b8-00-quantizer-48.ivf.md5
+8b660c577d95c031d6711c1134b8d115097f8d7e *av1-1-b8-00-quantizer-49.ivf
+0e3fe8b49d497050dc1a0eac5f3ad60f5fe068fe *av1-1-b8-00-quantizer-49.ivf.md5
+d40bb21448a6da0fc9b88cbcf76d2f4226573acb *av1-1-b8-00-quantizer-50.ivf
+bcd2a9c9a021ba44fc5dc74ae02194fe49ca76a4 *av1-1-b8-00-quantizer-50.ivf.md5
+3b5a1d464aa89b0f1a6ad4f5a03602292b826172 *av1-1-b8-00-quantizer-51.ivf
+49bcde0c56cf8b7fbe429336981be22d39025b74 *av1-1-b8-00-quantizer-51.ivf.md5
+38970a02fb38ddb4954fe4240164cb75de5fc744 *av1-1-b8-00-quantizer-52.ivf
+fd02b034d79d4be150efb02bd4349edfd0e41311 *av1-1-b8-00-quantizer-52.ivf.md5
+2fde7a7cf3014d5196d011c47de4a144227ed122 *av1-1-b8-00-quantizer-53.ivf
+0cb66e6d8fbb29962a69ae1703e22da50db2c92b *av1-1-b8-00-quantizer-53.ivf.md5
+89a69e9b9a601e40cb491ac3a1d32491f2468ac8 *av1-1-b8-00-quantizer-54.ivf
+2f8af51acc73c99b5af81db2bdd1883b611ad311 *av1-1-b8-00-quantizer-54.ivf.md5
+31ee4f56fcb0043e95fff7af49e4ef82aafa5543 *av1-1-b8-00-quantizer-55.ivf
+04a7104e02bdd0fa38c118202dbbecdbd11ace02 *av1-1-b8-00-quantizer-55.ivf.md5
+f262f0b234006a2652fceb77b1a8711aa53abb54 *av1-1-b8-00-quantizer-56.ivf
+bdd54dc25bc5a147c76163af0bced45c56435d79 *av1-1-b8-00-quantizer-56.ivf.md5
+1ef00617091db4b2b839de623bd6b4fb0b2f5f83 *av1-1-b8-00-quantizer-57.ivf
+714c65363a87ed5e6e4ad75c79ddb6af57d41fd9 *av1-1-b8-00-quantizer-57.ivf.md5
+43c9b02feccbb3c709d96015f126b7e3d4c24c64 *av1-1-b8-00-quantizer-58.ivf
+bae22b8d6377862bff8219470c0d87205d186a68 *av1-1-b8-00-quantizer-58.ivf.md5
+ca5f780abe4c02e48cceb9c804f3625723c359bf *av1-1-b8-00-quantizer-59.ivf
+c60a20bbf60b0b0a442ef3f7b682979053909d6e *av1-1-b8-00-quantizer-59.ivf.md5
+1f6f047e9f0e1da22fb514370d92c3c7c66dcf89 *av1-1-b8-00-quantizer-60.ivf
+86dc7fa59d363cf1ae4b027a57b119bda893c1c1 *av1-1-b8-00-quantizer-60.ivf.md5
+bcf0c3353568c47a043f2dc34c9abd3fc04eebd4 *av1-1-b8-00-quantizer-61.ivf
+66fc4f729c5915aa19939d1b6e28e5b398e747bb *av1-1-b8-00-quantizer-61.ivf.md5
+ac8d3c54451b52cf557ef435d33e7638088d66df *av1-1-b8-00-quantizer-62.ivf
+b57f4e1276ead626a3662339a86111ae6fda49d2 *av1-1-b8-00-quantizer-62.ivf.md5
+2a8aa33513d8e01ae9410c4bf5fe1e471b775482 *av1-1-b8-00-quantizer-63.ivf
+9f646ec35a168f495e144c64ba7ce9aeb41cd0a2 *av1-1-b8-00-quantizer-63.ivf.md5
+838388fbda4a1d91be81ff62694c3bf13c460d38 *av1-1-b8-01-size-16x16.ivf
+4229c1caf8e25eb3073456fb90ceed206753901e *av1-1-b8-01-size-16x16.ivf.md5
+23f4253bf71e02b2e8ead66da4b3de875e879ef2 *av1-1-b8-01-size-18x16.ivf
+af125644436d4b6897dade68336cedad663b6610 *av1-1-b8-01-size-18x16.ivf.md5
+94e4a75bd93052f79998e9e08e6b5dd73dc27e50 *av1-1-b8-01-size-32x16.ivf
+e7b3fbc5e4b2469838e7ae36512bd3ce0a81040c *av1-1-b8-01-size-32x16.ivf.md5
+f297bde01c05ec5c07ff8118a0280bd36c52b246 *av1-1-b8-01-size-34x16.ivf
+f6bbd94d6063c689de3c7cf94afa2c68b969d12c *av1-1-b8-01-size-34x16.ivf.md5
+1e18bdf68bab7e7282aacc77e423bc7d93d04a8e *av1-1-b8-01-size-64x16.ivf
+de75732fccfb385294b23c17f0f1a57b455edcf7 *av1-1-b8-01-size-64x16.ivf.md5
+26b1f6ae80b161e971468085778cc1ece502b330 *av1-1-b8-01-size-66x16.ivf
+48bd99813557c314d398e6952da78da07c79d416 *av1-1-b8-01-size-66x16.ivf.md5
+ff213ecf31b982a3a7f009c9739f64e066e1ffe9 *av1-1-b8-01-size-16x18.ivf
+86b20a13b1939dc5f678e80491f190d376233d58 *av1-1-b8-01-size-16x18.ivf.md5
+c90bd878c59263a15c6a6f515d1c7e071f141559 *av1-1-b8-01-size-18x18.ivf
+6f659036ffcd3dd380cf970cf1a06f7755e0b2de *av1-1-b8-01-size-18x18.ivf.md5
+e16a1411381b34817a4c0d8e5eeaeb8cddcc9c46 *av1-1-b8-01-size-32x18.ivf
+fdb1c4ec56f5aa690eadbe897340fee86a06ae2f *av1-1-b8-01-size-32x18.ivf.md5
+fac7052b39bd2d0ae107e0e94050226712c770c2 *av1-1-b8-01-size-34x18.ivf
+adb0d5a99228027eaa3b016963df447c9818c447 *av1-1-b8-01-size-34x18.ivf.md5
+b8be5e55d9be42746c2b547d0e26e80b21c9802a *av1-1-b8-01-size-64x18.ivf
+8f8f6da34cdf78c5a6551c637e1afe279cc3884e *av1-1-b8-01-size-64x18.ivf.md5
+9e066bdcc2cd789cdf551bd4c9c85c178887b880 *av1-1-b8-01-size-66x18.ivf
+e8ec6effa936423ae2eec2b60a3160720d2de912 *av1-1-b8-01-size-66x18.ivf.md5
+6ebe45085cdeebc2acd6da5abd542a59312c0ff4 *av1-1-b8-01-size-16x32.ivf
+044695669103dbf158591dce9c649317a177d5f6 *av1-1-b8-01-size-16x32.ivf.md5
+9fabb4f60641b8c7995d1dc451419165d41258ff *av1-1-b8-01-size-18x32.ivf
+7263764680dfec864c3fad5df824ab1973489a14 *av1-1-b8-01-size-18x32.ivf.md5
+3f72841a24a13e601d79cf029aa1fdb02970ce0b *av1-1-b8-01-size-32x32.ivf
+bbe1ae2888d291ec6bc98cd0784937580c554103 *av1-1-b8-01-size-32x32.ivf.md5
+392131a7c7609acd0dba88fee14f1ed042d23ab1 *av1-1-b8-01-size-34x32.ivf
+eea68165ebe9acd28693374bf2266374b9c77786 *av1-1-b8-01-size-34x32.ivf.md5
+78afdd96265811ab9466e906347b57161e5c010d *av1-1-b8-01-size-64x32.ivf
+47b317af582700b67f6e77659db1dfaa26c8cde6 *av1-1-b8-01-size-64x32.ivf.md5
+2b4d01f2c9f23044c0d886482c7073bd4d5d37d1 *av1-1-b8-01-size-66x32.ivf
+3ad5a58a0ee5086af370b22ab2b5b7592a4f33e7 *av1-1-b8-01-size-66x32.ivf.md5
+78ddae04eb8277ae605bd7017ad7ad27bfc82d39 *av1-1-b8-01-size-16x34.ivf
+d0c18e679f1fc51e4f7409831321eed9c4858f6f *av1-1-b8-01-size-16x34.ivf.md5
+38d8ed885f46aead6ec1271d8a5d4aee79b8eb68 *av1-1-b8-01-size-18x34.ivf
+097ddbd69b8f54826a35efeb0b8b07ec198bba6b *av1-1-b8-01-size-18x34.ivf.md5
+91a42720bc2e7ba701f4d97b463a098b6707cdbd *av1-1-b8-01-size-32x34.ivf
+c590d43d37095bd2e8f8d12c9278477419b72d1a *av1-1-b8-01-size-32x34.ivf.md5
+4cc2a437dba56e8878113d9b390b980522542028 *av1-1-b8-01-size-34x34.ivf
+57eeb971f00e64abde25be69dbcb4e3ce5065a57 *av1-1-b8-01-size-34x34.ivf.md5
+b36fee1b6ad69d1206466615d69c05e0a4407939 *av1-1-b8-01-size-64x34.ivf
+a78aea0250d0b32657dc0eaf2d8394bc766c0e35 *av1-1-b8-01-size-64x34.ivf.md5
+10e441209262e082e31fef8c15b51579c9e81509 *av1-1-b8-01-size-66x34.ivf
+558b46f6ef1662c208012d0b66d1857eeff3244e *av1-1-b8-01-size-66x34.ivf.md5
+dd44aad500c7ca0fc97e3d8f0abed3c83b24c79c *av1-1-b8-01-size-16x64.ivf
+a5b64e8063abcf3e4872dc4baf1c32384dc5cf83 *av1-1-b8-01-size-16x64.ivf.md5
+aa849f0d09bcb2ead44719d63043536932d5c9f2 *av1-1-b8-01-size-18x64.ivf
+bcdf2dea3590c7031158ffe7b907d9ee35e2fe57 *av1-1-b8-01-size-18x64.ivf.md5
+36e856d30e160ba2fbb00510296202f61afaae49 *av1-1-b8-01-size-32x64.ivf
+99299f75b82c40c13f168adf2d124f57044a39a2 *av1-1-b8-01-size-32x64.ivf.md5
+e3e03ec5d38eb25e97e4ec3adc6ed40ecdebd278 *av1-1-b8-01-size-34x64.ivf
+84625abf8a200a7d20dd3dd3b277b50b3d62ce32 *av1-1-b8-01-size-34x64.ivf.md5
+7d017daebef2d39ed42a505a8e6103ab0c0988c1 *av1-1-b8-01-size-64x64.ivf
+1ff38d5ecba82fb2e6ac3b09c29c9fe74885ac29 *av1-1-b8-01-size-64x64.ivf.md5
+e1b58ba0b462508593399a2ed84db5f1c59ffcd2 *av1-1-b8-01-size-66x64.ivf
+a6b2c84c94fe79ab0373d157d1203f8d66de0706 *av1-1-b8-01-size-66x64.ivf.md5
+7b4faa7eb7b73392b62de6613282a98dddc13bb6 *av1-1-b8-01-size-16x66.ivf
+a2dacf2bae3c4ab352af66a9600946d29ab9a6ee *av1-1-b8-01-size-16x66.ivf.md5
+0f97805fa30497d4cf39665150f00dfdea52d862 *av1-1-b8-01-size-18x66.ivf
+33d8ea0765953250f998da3fe161f2a8cfca2353 *av1-1-b8-01-size-18x66.ivf.md5
+c8bb00256de973e3b3ee31b924f554336d310cdb *av1-1-b8-01-size-32x66.ivf
+6a6588e6edc68ff7739968a9e7cc6d9eaaeed356 *av1-1-b8-01-size-32x66.ivf.md5
+75ec54fec5c36eecde6d0a16e0389a5f7ad8ec22 *av1-1-b8-01-size-34x66.ivf
+36101dfa9495c18696c0d7d61f25e748f4de7425 *av1-1-b8-01-size-34x66.ivf.md5
+7e5491716e70f8199156b8843513c935667b281e *av1-1-b8-01-size-64x66.ivf
+da38755bb0c9ef56b81617835ddf1340242c6dce *av1-1-b8-01-size-64x66.ivf.md5
+68b47b386f61d67cb5b824a7e6bf87c8b9c2bf7b *av1-1-b8-01-size-66x66.ivf
+25974893956ebd92df474325946130c34f880ea7 *av1-1-b8-01-size-66x66.ivf.md5
+9f386d19c87dbfd6ac84a06d2393dd88863ac003 *av1-1-b8-01-size-196x196.ivf
+788f77f655f55de3db94dd69870316134c149116 *av1-1-b8-01-size-196x196.ivf.md5
+ed3bb2bb52a9d1786e233ef38142b15b85097875 *av1-1-b8-01-size-198x196.ivf
+3bb6b6721ad9b2838b2d07e47b29d6c0117526b1 *av1-1-b8-01-size-198x196.ivf.md5
+49461772caaaa7b824d48f4e9c77a906b0dc02d5 *av1-1-b8-01-size-200x196.ivf
+f1cba00c36909c56097c8785df476d42bc91f259 *av1-1-b8-01-size-200x196.ivf.md5
+44a656a22958e26ed169a69deb8f373117224f06 *av1-1-b8-01-size-202x196.ivf
+69be876b52fe42811bba52d36d0bcc88d6c25b3f *av1-1-b8-01-size-202x196.ivf.md5
+0a6fe9b478363faedbfd465a75790b4c2661b9ba *av1-1-b8-01-size-208x196.ivf
+fc8e95a6860a8a37ccdf1dfe49828502fcf96a08 *av1-1-b8-01-size-208x196.ivf.md5
+8e05b5a20ec95afd92bb615a7daa2e17a7ef55a8 *av1-1-b8-01-size-210x196.ivf
+0add512bffbda3300d8f684a53b13b996fe2e46d *av1-1-b8-01-size-210x196.ivf.md5
+a15f12652c6b4d0c30f13a439c941bfc4a431d1a *av1-1-b8-01-size-224x196.ivf
+b904b93252175f79e0e2b28896131ce93d5fc925 *av1-1-b8-01-size-224x196.ivf.md5
+1a57b913443b267f4a31a6925c39f5b58022f550 *av1-1-b8-01-size-226x196.ivf
+7cf3087de5804763a82d2a798243a66459664772 *av1-1-b8-01-size-226x196.ivf.md5
+2cc28541a2a72e8b45a368f71e70fc294e2de3ab *av1-1-b8-01-size-196x198.ivf
+bb736eedb4bd1e39bf9d60435b4b27a12842e112 *av1-1-b8-01-size-196x198.ivf.md5
+c4ebf93fbf3ae52108fd7b39ddef3afae48188ea *av1-1-b8-01-size-198x198.ivf
+fa4de6881511728bafa15b5f441a0cfdf683cc75 *av1-1-b8-01-size-198x198.ivf.md5
+55fce983186d454b0eb15527393bb2465ba41c6b *av1-1-b8-01-size-200x198.ivf
+1ac8fb1ee622cbc4aa1b83cb46b4731c85efae62 *av1-1-b8-01-size-200x198.ivf.md5
+67d276c67886f0a91a7ee06751a64f95eeb7bc1f *av1-1-b8-01-size-202x198.ivf
+1633b62d9e4ea41737c42f70cbde9a5671da0cef *av1-1-b8-01-size-202x198.ivf.md5
+081cb3f29d3956d4d858d9661fd3d62c94b68867 *av1-1-b8-01-size-208x198.ivf
+871d1c99167408dd32fa7603a7296c9b99ccda15 *av1-1-b8-01-size-208x198.ivf.md5
+b2d80b42468d5f296ae240cfb1fc0b3dd3d96bbc *av1-1-b8-01-size-210x198.ivf
+6a3382656cb17b532a97b1061697f9a878fc58d1 *av1-1-b8-01-size-210x198.ivf.md5
+84d7994fa20fcf6c1d8dbd4c2060c988a6fce831 *av1-1-b8-01-size-224x198.ivf
+42ea12e15de81f2e8617b6de7bae76de2da4d648 *av1-1-b8-01-size-224x198.ivf.md5
+c74a9281cf98c597121df6bff0ac5312b887f969 *av1-1-b8-01-size-226x198.ivf
+4133aae0001804e2bbc7928fc065517a6dd8b288 *av1-1-b8-01-size-226x198.ivf.md5
+27adbf148c63f807bd617cfd78aeaedb8b0f2304 *av1-1-b8-01-size-196x200.ivf
+9253e525e6207ef1ce0839b8f88ea781e9abe41e *av1-1-b8-01-size-196x200.ivf.md5
+21c9ea4d882e48353d3df66fcde0e4746168163f *av1-1-b8-01-size-198x200.ivf
+3d5ee59fde9194f0eaff736051cfd1d7b7daeff1 *av1-1-b8-01-size-198x200.ivf.md5
+c27b0b57667910847122a0309c703315e444110f *av1-1-b8-01-size-200x200.ivf
+7b2a15a17b421ef07e285ca4e8a224f0512c434d *av1-1-b8-01-size-200x200.ivf.md5
+780de549e4163a52590f7c0f488e027a8a4aa053 *av1-1-b8-01-size-202x200.ivf
+cb0ec0969522ca60d79a639e9b9509363468ffd0 *av1-1-b8-01-size-202x200.ivf.md5
+2c59821904863e264ae61401cbd494a79bc04f13 *av1-1-b8-01-size-208x200.ivf
+9963955966a52b65cdd13465c9fb2ba3b5356755 *av1-1-b8-01-size-208x200.ivf.md5
+ff63121611ea9c0628c7e5af13de5e7786611ca6 *av1-1-b8-01-size-210x200.ivf
+2a5993be234e3af2af6d185b2a6f3aaf1979b83a *av1-1-b8-01-size-210x200.ivf.md5
+b8485ada95440d78b51153227231b1aced1a8273 *av1-1-b8-01-size-224x200.ivf
+9c3cd32ea6c006a91eb37d69dbeccf878de5d214 *av1-1-b8-01-size-224x200.ivf.md5
+1aa0ce3e3a74f9b600a146e98b05547a0b454c48 *av1-1-b8-01-size-226x200.ivf
+e045be96c3af16a9ddc10a9933e8ddfb3319d716 *av1-1-b8-01-size-226x200.ivf.md5
+e92b76480f4339855d998b97182f36b28deadcfa *av1-1-b8-01-size-196x202.ivf
+480c707abcd2a650e2160ec397f8348cecb45770 *av1-1-b8-01-size-196x202.ivf.md5
+137b9c0d10a3bdbdf6f97b3e6331f3e8acaf8f91 *av1-1-b8-01-size-198x202.ivf
+7429642146d0da55161ab13024a261094ee2ce87 *av1-1-b8-01-size-198x202.ivf.md5
+9cea71c44ad015ac702d675bacca17876e65cb1a *av1-1-b8-01-size-200x202.ivf
+76b1ec6c42da55f47e389a561590d1a7c713e495 *av1-1-b8-01-size-200x202.ivf.md5
+26dffdcd0dac9becf68d12e31fcd91eddf1f7154 *av1-1-b8-01-size-202x202.ivf
+ddb75e99123fed4ef05d9b85200cefd8985bc84c *av1-1-b8-01-size-202x202.ivf.md5
+04007e83bb66ba547d09f8926ea5bfc7fd9e4b2a *av1-1-b8-01-size-208x202.ivf
+5b72eb58db22087ad416c499119f41e718395b52 *av1-1-b8-01-size-208x202.ivf.md5
+721ff7c0ae0e2ed896b5acac230113f1404e769c *av1-1-b8-01-size-210x202.ivf
+187d2ef939fc26e1a1c7de65abe8e058d8aae17a *av1-1-b8-01-size-210x202.ivf.md5
+dba41421cc938bcf0234254f96be0325ab66186e *av1-1-b8-01-size-224x202.ivf
+58856038c1eb13a7bf0353a30b1affe844cd31b1 *av1-1-b8-01-size-224x202.ivf.md5
+55eba14878d25dcc351ee5e92fa06e559035b409 *av1-1-b8-01-size-226x202.ivf
+e295b3d791d40d7c1fff2c40a260078dccaef24a *av1-1-b8-01-size-226x202.ivf.md5
+6c777223990ddfd92040a8526646ed0f39299b0d *av1-1-b8-01-size-196x208.ivf
+5210daff766cddaf3945610ee05ff242aef8175a *av1-1-b8-01-size-196x208.ivf.md5
+252831abfb9f4a9a8556c21cc3bf60adfe88210f *av1-1-b8-01-size-198x208.ivf
+35ed9601e608a829980cec81e41b7bd3e5f4c2ce *av1-1-b8-01-size-198x208.ivf.md5
+e800ed893a88704a4576d4984957f3664560daa9 *av1-1-b8-01-size-200x208.ivf
+82c038f9072a2fcf8d55fb4a474fdd791ba9a290 *av1-1-b8-01-size-200x208.ivf.md5
+9ce7bb932dd99f86da8ff2ab89fa4d3089a78da8 *av1-1-b8-01-size-202x208.ivf
+0611bf0179abe3c820a447a2bd3a04c3790f3a87 *av1-1-b8-01-size-202x208.ivf.md5
+e5900d9150c8bebc49776227afd3b0a21f5a6ac6 *av1-1-b8-01-size-208x208.ivf
+86d6b9a3840aa0a77938547c905bd6f45d069681 *av1-1-b8-01-size-208x208.ivf.md5
+2758ba5dad16f4a91334f2ed07a4a037201bb873 *av1-1-b8-01-size-210x208.ivf
+78453b1fda2ccc6f35e0d762567807757bcddb16 *av1-1-b8-01-size-210x208.ivf.md5
+fff88fb8e833f6b4ad64cb591b219c7cceb7f2d2 *av1-1-b8-01-size-224x208.ivf
+87266fc34aaed82cdb98cbc309b221ad52eccd81 *av1-1-b8-01-size-224x208.ivf.md5
+dec839fe64046461015b56cda191835284f42a52 *av1-1-b8-01-size-226x208.ivf
+d7a15264fc3fd55d3aec0ccfaa7c434c6d90969f *av1-1-b8-01-size-226x208.ivf.md5
+584782e93ed1cb7797a90fece44becdd1e23bf0d *av1-1-b8-01-size-196x210.ivf
+ed76ec841b18a457853e368576967c4768fc2730 *av1-1-b8-01-size-196x210.ivf.md5
+dab625599b9f01398b593e865d9a4a95a029d60f *av1-1-b8-01-size-198x210.ivf
+b90e8d96a1f5b329b088b467a11fed2d055d74ca *av1-1-b8-01-size-198x210.ivf.md5
+6774bee17b9e50d2d8630e2e1afc30ded67e662d *av1-1-b8-01-size-200x210.ivf
+343a86bd54eb3dd5e9902eb62a3d776dcff2f4f3 *av1-1-b8-01-size-200x210.ivf.md5
+0456c3b8e242eeee019ca97d155f81124de62c90 *av1-1-b8-01-size-202x210.ivf
+5a6a6428c9858a0d3561db42ceaf981c143fe479 *av1-1-b8-01-size-202x210.ivf.md5
+6a3a8f65bf806b1be7726b983427880f772c9986 *av1-1-b8-01-size-208x210.ivf
+5563ea6d8c65887553ff3000addc6418913f1650 *av1-1-b8-01-size-208x210.ivf.md5
+5a8b69489f8e9b917ea7718ad2645101cdbe5644 *av1-1-b8-01-size-210x210.ivf
+f4b01604036fa23000d44fbf42097ae1181bcd62 *av1-1-b8-01-size-210x210.ivf.md5
+fb6f5b08a048698cfe324557ee8cd840c4a3f6ce *av1-1-b8-01-size-224x210.ivf
+3ce5c404e3ca09c8e994b3043bad42cd555b00c0 *av1-1-b8-01-size-224x210.ivf.md5
+2e9fc8510d2131b2f3c9a93bececac985e4426d2 *av1-1-b8-01-size-226x210.ivf
+897c537e259331ca86cdd6e4d2bd343f8538402e *av1-1-b8-01-size-226x210.ivf.md5
+8300512106fce3424eb74b5d4bc0f4f19f7c9af8 *av1-1-b8-01-size-196x224.ivf
+43662ea025ea79afe4964fd4d12a77f4aa4e565e *av1-1-b8-01-size-196x224.ivf.md5
+640f8fda7ade8f2850e2275a9f5e233e33a0ba8d *av1-1-b8-01-size-198x224.ivf
+9ac690bdbbce47d7b169128b568f955e70076f8c *av1-1-b8-01-size-198x224.ivf.md5
+ce2e9379c72fc924e364d5727605394a1438a211 *av1-1-b8-01-size-200x224.ivf
+1ec35a53d88072b96b255202f678178bc7e5bb20 *av1-1-b8-01-size-200x224.ivf.md5
+5d3af7921623deccb578115c8ce207c019f97f50 *av1-1-b8-01-size-202x224.ivf
+14eafd55b0cda3a3476cae7ad500dbd5ee899dd5 *av1-1-b8-01-size-202x224.ivf.md5
+6b6d78e466cf94a5ef8dfe252caa0948dd2ec175 *av1-1-b8-01-size-208x224.ivf
+e178b0c272dfcfe614c6b49cb28dad11781af0b6 *av1-1-b8-01-size-208x224.ivf.md5
+dd2232b9e18971d7e19650a1e3218aef1010247f *av1-1-b8-01-size-210x224.ivf
+40a66198c47820f5fa2d2e389ec0c1191ea4ffcc *av1-1-b8-01-size-210x224.ivf.md5
+9ec028b81a5ea311683328d856f436e6d0b0e6a0 *av1-1-b8-01-size-224x224.ivf
+143b9530ce722385db2c2d883daa649ed42b8d40 *av1-1-b8-01-size-224x224.ivf.md5
+bf833947e62935c54e1e727ccb36157f7c1e9e5d *av1-1-b8-01-size-226x224.ivf
+ca4f3b44463106e4f0bb54e490c3bd457d7d780b *av1-1-b8-01-size-226x224.ivf.md5
+5525f7e312ec073f480ed5a2be5bdc4f0ce51a09 *av1-1-b8-01-size-196x226.ivf
+062d4b240741184458d2d2abd243ed7877631de8 *av1-1-b8-01-size-196x226.ivf.md5
+e6b911142394b94c23191eaa63c9eb41a00f80b0 *av1-1-b8-01-size-198x226.ivf
+3b580d903dddf47082f5e055bfb01a4f05c09b7d *av1-1-b8-01-size-198x226.ivf.md5
+70feb5efeb28df25f7d1a661c73bf013c5ada9b4 *av1-1-b8-01-size-200x226.ivf
+f0b894e7f787e62f1492be62f3dedeb065062160 *av1-1-b8-01-size-200x226.ivf.md5
+7f9a10831e2389b31497fad50080b4d5452d6e91 *av1-1-b8-01-size-202x226.ivf
+45b7194eba9367c8059403c23ca4ae49e988dfaf *av1-1-b8-01-size-202x226.ivf.md5
+967837a2cfbf9aa3131f73aec6a52dcdd82926c7 *av1-1-b8-01-size-208x226.ivf
+c8baedb48fd5d4c956aa8d73fd957370f718f047 *av1-1-b8-01-size-208x226.ivf.md5
+9c926226b9f6b015501d8ac1e3f95e8570283a05 *av1-1-b8-01-size-210x226.ivf
+57d4837667fd4c5a7aeb908626d701b632852c60 *av1-1-b8-01-size-210x226.ivf.md5
+25a4940922761239809d82c45c2be1c5e4f48785 *av1-1-b8-01-size-224x226.ivf
+87ae7e7558241bf3575a333f56fbad4dfdade8ff *av1-1-b8-01-size-224x226.ivf.md5
+40dd208eb525cd90d7c0674cf787097fb909afae *av1-1-b8-01-size-226x226.ivf
+34bdef682a4eae0e0a05e4486a968af1df8b220a *av1-1-b8-01-size-226x226.ivf.md5
+\ No newline at end of file
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
index 26937c96ad..8594d059cf 100644
--- a/third_party/aom/test/test.cmake
+++ b/third_party/aom/test/test.cmake
@@ -1,14 +1,16 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-if (NOT AOM_TEST_TEST_CMAKE_)
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+if(AOM_TEST_TEST_CMAKE_)
+  return()
+endif() # AOM_TEST_TEST_CMAKE_
 set(AOM_TEST_TEST_CMAKE_ 1)
 
 include(FindPythonInterp)
@@ -18,507 +20,408 @@ include("${AOM_ROOT}/test/test_data_util.cmake")
 
 set(AOM_UNIT_TEST_DATA_LIST_FILE "${AOM_ROOT}/test/test-data.sha1")
 
-set(AOM_UNIT_TEST_WRAPPER_SOURCES
-    "${AOM_CONFIG_DIR}/usage_exit.c"
-    "${AOM_ROOT}/test/test_libaom.cc")
-
-set(AOM_UNIT_TEST_COMMON_SOURCES
-    "${AOM_ROOT}/test/acm_random.h"
-    "${AOM_ROOT}/test/clear_system_state.h"
-    "${AOM_ROOT}/test/codec_factory.h"
-    "${AOM_ROOT}/test/decode_test_driver.cc"
-    "${AOM_ROOT}/test/decode_test_driver.h"
-    "${AOM_ROOT}/test/function_equivalence_test.h"
-    "${AOM_ROOT}/test/md5_helper.h"
-    "${AOM_ROOT}/test/register_state_check.h"
-    "${AOM_ROOT}/test/transform_test_base.h"
-    "${AOM_ROOT}/test/util.h"
-    "${AOM_ROOT}/test/video_source.h")
-
-if (NOT BUILD_SHARED_LIBS)
-  set(AOM_UNIT_TEST_COMMON_SOURCES
-      ${AOM_UNIT_TEST_COMMON_SOURCES}
-      "${AOM_ROOT}/test/convolve_test.cc"
-      "${AOM_ROOT}/test/simd_impl.h")
-
-  if (HAVE_NEON)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/simd_neon_test.cc")
-  endif ()
-  if (HAVE_SSE2)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/simd_sse2_test.cc")
-  endif ()
-  if (HAVE_SSSE3)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/simd_ssse3_test.cc")
-  endif ()
-  if (HAVE_SSE4)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/simd_sse4_test.cc")
-  endif ()
-  if (HAVE_AVX2)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/simd_avx2_test.cc")
-  endif ()
-
-  if (CONFIG_ACCOUNTING)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/accounting_test.cc")
-  endif ()
-
-  if (CONFIG_ADAPT_SCAN)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/scan_test.cc")
-  endif ()
-
-  if (CONFIG_GLOBAL_MOTION OR CONFIG_WARPED_MOTION)
-    if (HAVE_SSE2)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/warp_filter_test.cc"
-          "${AOM_ROOT}/test/warp_filter_test_util.cc"
-          "${AOM_ROOT}/test/warp_filter_test_util.h")
-    endif ()
-  endif ()
-endif ()
-
-set(AOM_UNIT_TEST_DECODER_SOURCES
-    "${AOM_ROOT}/test/decode_api_test.cc"
-    "${AOM_ROOT}/test/ivf_video_source.h")
-
-set(AOM_UNIT_TEST_ENCODER_SOURCES
-    "${AOM_ROOT}/test/altref_test.cc"
-    "${AOM_ROOT}/test/aq_segment_test.cc"
-    "${AOM_ROOT}/test/datarate_test.cc"
-    "${AOM_ROOT}/test/encode_api_test.cc"
-    "${AOM_ROOT}/test/encode_test_driver.cc"
-    "${AOM_ROOT}/test/encode_test_driver.h"
-    "${AOM_ROOT}/test/error_resilience_test.cc"
-    "${AOM_ROOT}/test/i420_video_source.h"
-    "${AOM_ROOT}/test/resize_test.cc"
-    "${AOM_ROOT}/test/y4m_test.cc"
-    "${AOM_ROOT}/test/y4m_video_source.h"
-    "${AOM_ROOT}/test/yuv_video_source.h")
-
-if (NOT BUILD_SHARED_LIBS)
-  set(AOM_UNIT_TEST_ENCODER_SOURCES
-      ${AOM_UNIT_TEST_ENCODER_SOURCES}
-      "${AOM_ROOT}/test/dct16x16_test.cc"
-      "${AOM_ROOT}/test/dct32x32_test.cc"
-      "${AOM_ROOT}/test/sad_test.cc")
-endif ()
-
-set(AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc")
-set(AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc")
-set(AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h")
-
-set(AOM_TEST_INTRA_PRED_SPEED_SOURCES
-    "${AOM_CONFIG_DIR}/usage_exit.c"
-    "${AOM_ROOT}/test/test_intra_pred_speed.cc")
-
-if (NOT BUILD_SHARED_LIBS)
-  if (CONFIG_AV1_DECODER OR CONFIG_AV1_ENCODER)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/av1_convolve_optimz_test.cc"
-        "${AOM_ROOT}/test/av1_convolve_test.cc"
-        "${AOM_ROOT}/test/av1_txfm_test.cc"
-        "${AOM_ROOT}/test/av1_txfm_test.h"
-        "${AOM_ROOT}/test/intrapred_test.cc"
-        "${AOM_ROOT}/test/lpf_8_test.cc"
-        "${AOM_ROOT}/test/simd_cmp_impl.h")
-
-    set(AOM_UNIT_TEST_ENCODER_SOURCES
-        ${AOM_UNIT_TEST_ENCODER_SOURCES}
-        "${AOM_ROOT}/test/motion_vector_test.cc")
-
-    if (CONFIG_CDEF)
-      if (CONFIG_CDEF_SINGLEPASS)
-        set(AOM_UNIT_TEST_COMMON_SOURCES
-            ${AOM_UNIT_TEST_COMMON_SOURCES}
-            "${AOM_ROOT}/test/cdef_test.cc")
-      else ()
-        set(AOM_UNIT_TEST_COMMON_SOURCES
-            ${AOM_UNIT_TEST_COMMON_SOURCES}
-            "${AOM_ROOT}/test/clpf_test.cc"
-            "${AOM_ROOT}/test/dering_test.cc")
-      endif ()
-    endif ()
-
-    # Omit 4-tap filter intra predictor test-- currently a 3-tap filter is in
-    # use.
-    #if (CONFIG_FILTER_INTRA)
-    #  if (HAVE_SSE4_1)
-    #    set(AOM_UNIT_TEST_COMMON_SOURCES
-    #        ${AOM_UNIT_TEST_COMMON_SOURCES}
-    #        "${AOM_ROOT}/test/filterintra_predictors_test.cc")
-    #  endif ()
-    #endif ()
-
-    if (CONFIG_INTRABC)
-        set(AOM_UNIT_TEST_COMMON_SOURCES
-            ${AOM_UNIT_TEST_COMMON_SOURCES}
-            "${AOM_ROOT}/test/intrabc_test.cc")
-    endif ()
-
-    if (CONFIG_LOOP_RESTORATION)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/selfguided_filter_test.cc")
-
-      if (HAVE_SSE2)
-        set(AOM_UNIT_TEST_COMMON_SOURCES
-            ${AOM_UNIT_TEST_COMMON_SOURCES}
-            "${AOM_ROOT}/test/hiprec_convolve_test.cc"
-            "${AOM_ROOT}/test/hiprec_convolve_test_util.cc"
-            "${AOM_ROOT}/test/hiprec_convolve_test_util.h")
-      endif ()
-    endif ()
-
-    set(AOM_UNIT_TEST_COMMON_INTRIN_NEON
-        ${AOM_UNIT_TEST_COMMON_INTRIN_NEON}
-        "${AOM_ROOT}/test/simd_cmp_neon.cc")
-    set(AOM_UNIT_TEST_COMMON_INTRIN_SSE2
-        ${AOM_UNIT_TEST_COMMON_INTRIN_SSE2}
-        "${AOM_ROOT}/test/simd_cmp_sse2.cc")
-    set(AOM_UNIT_TEST_COMMON_INTRIN_SSSE3
-        ${AOM_UNIT_TEST_COMMON_INTRIN_SSSE3}
-        "${AOM_ROOT}/test/simd_cmp_ssse3.cc")
-    set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
-        ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1}
-        "${AOM_ROOT}/test/simd_cmp_sse4.cc")
-    set(AOM_UNIT_TEST_COMMON_INTRIN_AVX2
-        ${AOM_UNIT_TEST_COMMON_INTRIN_AVX2}
-        "${AOM_ROOT}/test/simd_cmp_avx2.cc")
-  endif ()
-endif ()
-
-if (CONFIG_AV1_ENCODER)
-  set(AOM_UNIT_TEST_ENCODER_SOURCES
-      ${AOM_UNIT_TEST_ENCODER_SOURCES}
-      "${AOM_ROOT}/test/active_map_test.cc"
-      "${AOM_ROOT}/test/borders_test.cc"
-      "${AOM_ROOT}/test/cpu_speed_test.cc"
-      "${AOM_ROOT}/test/end_to_end_test.cc"
-      "${AOM_ROOT}/test/frame_size_tests.cc"
-      "${AOM_ROOT}/test/lossless_test.cc")
-
-  if (NOT BUILD_SHARED_LIBS)
-    set(AOM_UNIT_TEST_ENCODER_SOURCES
-        ${AOM_UNIT_TEST_ENCODER_SOURCES}
-        "${AOM_ROOT}/test/arf_freq_test.cc"
-        "${AOM_ROOT}/test/av1_dct_test.cc"
-        "${AOM_ROOT}/test/av1_fht16x16_test.cc"
-        "${AOM_ROOT}/test/av1_fht32x32_test.cc"
-        "${AOM_ROOT}/test/av1_fht8x8_test.cc"
-        "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
-        "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
-        "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
-        "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
-        "${AOM_ROOT}/test/av1_inv_txfm_test.cc"
-        "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
-        "${AOM_ROOT}/test/avg_test.cc"
-        "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
-        "${AOM_ROOT}/test/blend_a64_mask_test.cc"
-        "${AOM_ROOT}/test/error_block_test.cc"
-        "${AOM_ROOT}/test/fdct4x4_test.cc"
-        "${AOM_ROOT}/test/fdct8x8_test.cc"
-        "${AOM_ROOT}/test/hadamard_test.cc"
-        "${AOM_ROOT}/test/masked_sad_test.cc"
-        "${AOM_ROOT}/test/masked_variance_test.cc"
-        "${AOM_ROOT}/test/minmax_test.cc"
-        "${AOM_ROOT}/test/subtract_test.cc"
-        "${AOM_ROOT}/test/sum_squares_test.cc"
-        "${AOM_ROOT}/test/variance_test.cc")
-
-    if (NOT CONFIG_AOM_QM AND NOT CONFIG_NEW_QUANT)
-      set(AOM_UNIT_TEST_ENCODER_SOURCES
-          ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/quantize_func_test.cc")
-    endif ()
-
-    if (CONFIG_CONVOLVE_ROUND)
-      set(AOM_UNIT_TEST_ENCODER_SOURCES
-          ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/convolve_round_test.cc")
-      if (HAVE_SSE2)
-        set(AOM_UNIT_TEST_ENCODER_SOURCES
-            ${AOM_UNIT_TEST_ENCODER_SOURCES}
-            "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
-            "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
-            "${AOM_ROOT}/test/av1_convolve_2d_test_util.h")
-      endif ()
-      if (NOT CONFIG_COMPOUND_ROUND)
-        if (HAVE_SSE4_1)
-          set(AOM_UNIT_TEST_ENCODER_SOURCES
-              ${AOM_UNIT_TEST_ENCODER_SOURCES}
-              "${AOM_ROOT}/test/av1_convolve_scale_test.cc")
-        endif ()
-      endif ()
-    endif ()
-
-    if (CONFIG_EXT_TX)
-      set(AOM_UNIT_TEST_ENCODER_SOURCES
-          ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/av1_fht16x32_test.cc"
-          "${AOM_ROOT}/test/av1_fht16x8_test.cc"
-          "${AOM_ROOT}/test/av1_fht32x16_test.cc"
-          "${AOM_ROOT}/test/av1_fht4x4_test.cc"
-          "${AOM_ROOT}/test/av1_fht4x8_test.cc"
-          "${AOM_ROOT}/test/av1_fht8x16_test.cc"
-          "${AOM_ROOT}/test/av1_fht8x4_test.cc")
-    endif ()
-
-    if (CONFIG_GLOBAL_MOTION)
-      set(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
-          ${AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1}
-          "${AOM_ROOT}/test/corner_match_test.cc")
-    endif ()
-
-    if (CONFIG_MOTION_VAR)
-      set(AOM_UNIT_TEST_ENCODER_SOURCES
-          ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/obmc_sad_test.cc"
-          "${AOM_ROOT}/test/obmc_variance_test.cc")
-    endif ()
-
-    if (CONFIG_TX64X64)
-      set(AOM_UNIT_TEST_ENCODER_SOURCES
-          ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/av1_fht64x64_test.cc")
-    endif ()
-  endif ()
-endif ()
-
-if (NOT BUILD_SHARED_LIBS)
-  if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/divu_small_test.cc"
-        "${AOM_ROOT}/test/ethread_test.cc"
-        "${AOM_ROOT}/test/coding_path_sync.cc"
-        "${AOM_ROOT}/test/idct8x8_test.cc"
-        "${AOM_ROOT}/test/partial_idct_test.cc"
-        "${AOM_ROOT}/test/superframe_test.cc"
-        "${AOM_ROOT}/test/tile_independence_test.cc")
-
-    if (CONFIG_ANS)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/ans_codec_test.cc"
-          "${AOM_ROOT}/test/ans_test.cc")
-    else ()
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/binary_codes_test.cc"
-          "${AOM_ROOT}/test/boolcoder_test.cc")
-    endif ()
-
-    if (CONFIG_EXT_TILE)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/av1_ext_tile_test.cc")
-    endif ()
-  endif ()
-endif ()
-
-if (CONFIG_HIGHBITDEPTH)
-  if (CONFIG_AV1_ENCODER AND NOT BUILD_SHARED_LIBS)
-    set(AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1
-        ${AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1}
-        "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
-        "${AOM_ROOT}/test/av1_quantize_test.cc")
-  endif ()
-
-  if (CONFIG_INTERNAL_STATS)
-    set(AOM_UNIT_TEST_COMMON_SOURCES
-        ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/hbd_metrics_test.cc")
-  endif ()
-endif ()
-
-if (CONFIG_UNIT_TESTS)
+list(APPEND AOM_UNIT_TEST_WRAPPER_SOURCES "${AOM_CONFIG_DIR}/usage_exit.c"
+            "${AOM_ROOT}/test/test_libaom.cc")
+
+list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+            "${AOM_ROOT}/test/acm_random.h"
+            "${AOM_ROOT}/test/aom_integer_test.cc"
+            "${AOM_ROOT}/test/clear_system_state.h"
+            "${AOM_ROOT}/test/codec_factory.h"
+            "${AOM_ROOT}/test/decode_test_driver.cc"
+            "${AOM_ROOT}/test/decode_test_driver.h"
+            "${AOM_ROOT}/test/function_equivalence_test.h"
+            "${AOM_ROOT}/test/md5_helper.h"
+            "${AOM_ROOT}/test/register_state_check.h"
+            "${AOM_ROOT}/test/test_vectors.cc"
+            "${AOM_ROOT}/test/test_vectors.h"
+            "${AOM_ROOT}/test/transform_test_base.h"
+            "${AOM_ROOT}/test/util.h"
+            "${AOM_ROOT}/test/video_source.h")
+
+if(CONFIG_INTERNAL_STATS)
+  list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+              "${AOM_ROOT}/test/hbd_metrics_test.cc")
+endif()
+
+list(APPEND AOM_UNIT_TEST_DECODER_SOURCES "${AOM_ROOT}/test/decode_api_test.cc"
+            "${AOM_ROOT}/test/invalid_file_test.cc"
+            "${AOM_ROOT}/test/test_vector_test.cc"
+            "${AOM_ROOT}/test/ivf_video_source.h")
+
+list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+            "${AOM_ROOT}/test/active_map_test.cc"
+            "${AOM_ROOT}/test/altref_test.cc"
+            "${AOM_ROOT}/test/aq_segment_test.cc"
+            "${AOM_ROOT}/test/av1_txfm_test.cc"
+            "${AOM_ROOT}/test/av1_txfm_test.h"
+            "${AOM_ROOT}/test/borders_test.cc"
+            "${AOM_ROOT}/test/cpu_speed_test.cc"
+            "${AOM_ROOT}/test/datarate_test.cc"
+            "${AOM_ROOT}/test/encode_api_test.cc"
+            "${AOM_ROOT}/test/encode_test_driver.cc"
+            "${AOM_ROOT}/test/encode_test_driver.h"
+            "${AOM_ROOT}/test/end_to_end_test.cc"
+            "${AOM_ROOT}/test/error_resilience_test.cc"
+            "${AOM_ROOT}/test/frame_size_tests.cc"
+            "${AOM_ROOT}/test/horz_superres_test.cc"
+            "${AOM_ROOT}/test/i420_video_source.h"
+            "${AOM_ROOT}/test/lossless_test.cc"
+            "${AOM_ROOT}/test/monochrome_test.cc"
+            "${AOM_ROOT}/test/qm_test.cc"
+            "${AOM_ROOT}/test/resize_test.cc"
+            "${AOM_ROOT}/test/scalability_test.cc"
+            "${AOM_ROOT}/test/y4m_test.cc"
+            "${AOM_ROOT}/test/y4m_video_source.h"
+            "${AOM_ROOT}/test/yuv_video_source.h")
+
+list(APPEND AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc")
+list(APPEND AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc")
+list(APPEND AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h")
+list(APPEND AOM_TEST_INTRA_PRED_SPEED_SOURCES "${AOM_CONFIG_DIR}/usage_exit.c"
+            "${AOM_ROOT}/test/test_intra_pred_speed.cc")
+
+if(NOT BUILD_SHARED_LIBS)
+  list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+              "${AOM_ROOT}/test/cdef_test.cc"
+              "${AOM_ROOT}/test/cfl_test.cc"
+              "${AOM_ROOT}/test/convolve_test.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test_util.cc"
+              "${AOM_ROOT}/test/hiprec_convolve_test_util.h"
+              "${AOM_ROOT}/test/intrabc_test.cc"
+              "${AOM_ROOT}/test/intrapred_test.cc"
+              "${AOM_ROOT}/test/lpf_test.cc"
+              "${AOM_ROOT}/test/onyxc_int_test.cc"
+              "${AOM_ROOT}/test/scan_test.cc"
+              "${AOM_ROOT}/test/selfguided_filter_test.cc"
+              "${AOM_ROOT}/test/simd_cmp_impl.h"
+              "${AOM_ROOT}/test/simd_impl.h")
+
+  if(CONFIG_ACCOUNTING)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/accounting_test.cc")
+  endif()
+
+  if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/av1_ext_tile_test.cc"
+                "${AOM_ROOT}/test/binary_codes_test.cc"
+                "${AOM_ROOT}/test/boolcoder_test.cc"
+                "${AOM_ROOT}/test/coding_path_sync.cc"
+                "${AOM_ROOT}/test/decode_multithreaded_test.cc"
+                "${AOM_ROOT}/test/divu_small_test.cc"
+                "${AOM_ROOT}/test/dr_prediction_test.cc"
+                "${AOM_ROOT}/test/ec_test.cc"
+                "${AOM_ROOT}/test/ethread_test.cc"
+                "${AOM_ROOT}/test/film_grain_table_test.cc"
+                "${AOM_ROOT}/test/segment_binarization_sync.cc"
+                "${AOM_ROOT}/test/superframe_test.cc"
+                "${AOM_ROOT}/test/tile_independence_test.cc")
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_NEON
+              "${AOM_ROOT}/test/simd_cmp_neon.cc")
+  if(HAVE_NEON)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_neon_test.cc")
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE2
+              "${AOM_ROOT}/test/simd_cmp_sse2.cc")
+  if(HAVE_SSE2)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_sse2_test.cc")
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSSE3
+              "${AOM_ROOT}/test/simd_cmp_ssse3.cc")
+  if(HAVE_SSSE3)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_ssse3_test.cc")
+  endif()
+
+  if(HAVE_SSE4)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_sse4_test.cc")
+  endif()
+
+  if(HAVE_SSE4_1)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/filterintra_test.cc")
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_AVX2
+              "${AOM_ROOT}/test/simd_cmp_avx2.cc")
+  if(HAVE_AVX2)
+    list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
+                "${AOM_ROOT}/test/simd_avx2_test.cc")
+  endif()
+
+  list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+              "${AOM_ROOT}/test/arf_freq_test.cc"
+              "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
+              "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
+              "${AOM_ROOT}/test/av1_convolve_2d_test_util.h"
+              "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
+              "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
+              "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
+              "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
+              "${AOM_ROOT}/test/av1_round_shift_array_test.cc"
+              "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
+              "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
+              "${AOM_ROOT}/test/blend_a64_mask_test.cc"
+              "${AOM_ROOT}/test/comp_avg_pred_test.cc"
+              "${AOM_ROOT}/test/comp_avg_pred_test.h"
+              "${AOM_ROOT}/test/comp_mask_variance_test.cc"
+              "${AOM_ROOT}/test/encodetxb_test.cc"
+              "${AOM_ROOT}/test/error_block_test.cc"
+              "${AOM_ROOT}/test/fft_test.cc"
+              "${AOM_ROOT}/test/fwht4x4_test.cc"
+              "${AOM_ROOT}/test/masked_sad_test.cc"
+              "${AOM_ROOT}/test/masked_variance_test.cc"
+              "${AOM_ROOT}/test/motion_vector_test.cc"
+              "${AOM_ROOT}/test/noise_model_test.cc"
+              "${AOM_ROOT}/test/obmc_sad_test.cc"
+              "${AOM_ROOT}/test/obmc_variance_test.cc"
+              "${AOM_ROOT}/test/sad_test.cc"
+              "${AOM_ROOT}/test/subtract_test.cc"
+              "${AOM_ROOT}/test/reconinter_test.cc"
+              "${AOM_ROOT}/test/sum_squares_test.cc"
+              "${AOM_ROOT}/test/variance_test.cc")
+
+  list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
+              "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
+              "${AOM_ROOT}/test/av1_quantize_test.cc"
+              "${AOM_ROOT}/test/corner_match_test.cc"
+              "${AOM_ROOT}/test/quantize_func_test.cc"
+              "${AOM_ROOT}/test/simd_cmp_sse4.cc")
+
+  if(HAVE_SSE4_1)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
+                "${AOM_ROOT}/test/av1_convolve_scale_test.cc"
+                "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc"
+                "${AOM_ROOT}/test/intra_edge_test.cc"
+                "${AOM_ROOT}/test/warp_filter_test.cc"
+                "${AOM_ROOT}/test/warp_filter_test_util.cc"
+                "${AOM_ROOT}/test/warp_filter_test_util.h")
+  endif()
+
+  if(HAVE_SSE4_2)
+    list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES "${AOM_ROOT}/test/hash_test.cc")
+  endif()
+
+endif()
+
+if(ENABLE_TESTS)
   find_package(PythonInterp)
-  if (NOT PYTHONINTERP_FOUND)
-    message(WARNING "--- Unit tests disabled: Python not found.")
-    set(CONFIG_UNIT_TESTS 0)
-  endif ()
-
-  if (MSVC)
-    # Force static run time to avoid collisions with googletest.
+  if(NOT PYTHONINTERP_FOUND)
+    message(FATAL_ERROR
+              "--- Unit tests require Python, rerun cmake with "
+              "-DENABLE_TESTS=0 to avoid this error, or install Python and "
+              "make sure it's in your PATH.")
+  endif()
+
+  if(MSVC) # Force static run time to avoid collisions with googletest.
     include("${AOM_ROOT}/build/cmake/msvc_runtime.cmake")
-  endif ()
+  endif()
 
-  if (BUILD_SHARED_LIBS AND APPLE)
-    # Silence an RPATH warning.
+  if(BUILD_SHARED_LIBS AND APPLE) # Silence an RPATH warning.
     set(CMAKE_MACOSX_RPATH 1)
-  endif ()
+  endif()
 
   include_directories(
-    "${AOM_ROOT}/third_party/googletest/src/googletest/src"
     "${AOM_ROOT}/third_party/googletest/src/googletest/include")
 
-  if (AOM_DISABLE_GTEST_CMAKE)
+  if(AOM_DISABLE_GTEST_CMAKE)
     include_directories("${AOM_ROOT}/third_party/googletest/src/googletest")
-    add_library(gtest STATIC
+    add_library(
+      gtest
+      STATIC
       "${AOM_ROOT}/third_party/googletest/src/googletest/src/gtest-all.cc")
-  else ()
+  else()
     add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest"
                      EXCLUDE_FROM_ALL)
-  endif ()
-
-endif ()
-
-# Setup the targets for CONFIG_UNIT_TESTS. The libaom and app util targets must
-# exist before this function is called.
-function (setup_aom_test_targets)
+  endif()
+endif()
+
+# Setup testdata download targets, test build targets, and test run targets. The
+# libaom and app util targets must exist before this function is called.
+function(setup_aom_test_targets)
+
+  # TODO(tomfinegan): Build speed optimization. $AOM_UNIT_TEST_COMMON_SOURCES
+  # and $AOM_UNIT_TEST_ENCODER_SOURCES are very large. The build of test targets
+  # could be sped up (on multicore build machines) by compiling sources in each
+  # list into separate object library targets, and then linking them into
+  # test_libaom.
   add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES})
   add_dependencies(test_aom_common aom)
 
-  if (CONFIG_AV1_DECODER)
+  if(CONFIG_AV1_DECODER)
     add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
     add_dependencies(test_aom_decoder aom)
-  endif ()
+  endif()
 
-  if (CONFIG_AV1_ENCODER)
+  if(CONFIG_AV1_ENCODER)
     add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
     add_dependencies(test_aom_encoder aom)
-  endif ()
+  endif()
 
   add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES}
                  $<TARGET_OBJECTS:aom_common_app_util>
                  $<TARGET_OBJECTS:test_aom_common>)
-  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} test_libaom)
+  list(APPEND AOM_APP_TARGETS test_libaom)
 
-  if (CONFIG_AV1_DECODER)
-    target_sources(test_libaom PRIVATE
-                   $<TARGET_OBJECTS:aom_decoder_app_util>
+  if(CONFIG_AV1_DECODER)
+    target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:aom_decoder_app_util>
                    $<TARGET_OBJECTS:test_aom_decoder>)
 
-    if (CONFIG_DECODE_PERF_TESTS AND CONFIG_WEBM_IO)
+    if(ENABLE_DECODE_PERF_TESTS AND CONFIG_WEBM_IO)
       target_sources(test_libaom PRIVATE ${AOM_DECODE_PERF_TEST_SOURCES})
-    endif ()
-  endif ()
+    endif()
+  endif()
 
-  if (CONFIG_AV1_ENCODER)
-    target_sources(test_libaom PRIVATE
-                   $<TARGET_OBJECTS:test_aom_encoder>
+  if(CONFIG_AV1_ENCODER)
+    target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:test_aom_encoder>
                    $<TARGET_OBJECTS:aom_encoder_app_util>)
 
-    if (CONFIG_ENCODE_PERF_TESTS)
+    if(ENABLE_ENCODE_PERF_TESTS)
       target_sources(test_libaom PRIVATE ${AOM_ENCODE_PERF_TEST_SOURCES})
-    endif ()
+    endif()
 
-    if (NOT BUILD_SHARED_LIBS)
-      add_executable(test_intra_pred_speed
-                     ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
+    if(NOT BUILD_SHARED_LIBS)
+      add_executable(test_intra_pred_speed ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
                      $<TARGET_OBJECTS:aom_common_app_util>)
-      target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE}
-                            aom gtest)
-      set(AOM_APP_TARGETS ${AOM_APP_TARGETS} test_intra_pred_speed)
-    endif ()
-  endif ()
+      target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE} aom
+                            gtest)
+      list(APPEND AOM_APP_TARGETS test_intra_pred_speed)
+    endif()
+  endif()
 
   target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} aom gtest)
 
-  if (CONFIG_LIBYUV)
+  if(CONFIG_LIBYUV)
     target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:yuv>)
-  endif ()
-  if (CONFIG_WEBM_IO)
+  endif()
+  if(CONFIG_WEBM_IO)
     target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:webm>)
-  endif ()
-  if (HAVE_SSE2)
+  endif()
+  if(HAVE_SSE2)
     add_intrinsics_source_to_target("-msse2" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_SSE2")
-  endif ()
-  if (HAVE_SSSE3)
+  endif()
+  if(HAVE_SSSE3)
     add_intrinsics_source_to_target("-mssse3" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3")
-  endif ()
-  if (HAVE_SSE4_1)
+  endif()
+  if(HAVE_SSE4_1)
     add_intrinsics_source_to_target("-msse4.1" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1")
-    if (CONFIG_AV1_ENCODER)
-      if (AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1)
+    if(CONFIG_AV1_ENCODER)
+      if(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1)
         add_intrinsics_source_to_target("-msse4.1" "test_libaom"
                                         "AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1")
-      endif ()
-    endif ()
-  endif ()
-  if (HAVE_AVX2)
+      endif()
+    endif()
+  endif()
+  if(HAVE_AVX2)
     add_intrinsics_source_to_target("-mavx2" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_AVX2")
-  endif ()
-  if (HAVE_NEON)
+  endif()
+  if(HAVE_NEON)
     add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
                                     "AOM_UNIT_TEST_COMMON_INTRIN_NEON")
-  endif ()
-
-  if (NOT ENABLE_IDE_TEST_HOSTING)
-    if (MSVC OR XCODE)
-      # Skip creation of test data download and test run targets when generating
-      # for Visual Studio and Xcode unless the user explicitly requests IDE test
-      # hosting. This is done to make build cycles in the IDE tolerable when the
-      # IDE command for build project is used to build AOM. Default behavior in
-      # IDEs is to build all targets, and the test run takes hours.
-      return ()
-    endif ()
-  endif ()
-
-  make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}"
-                       test_files test_file_checksums)
-  list(LENGTH test_files num_test_files)
-  list(LENGTH test_file_checksums num_test_file_checksums)
-
-  math(EXPR max_file_index "${num_test_files} - 1")
-  foreach (test_index RANGE ${max_file_index})
-    list(GET test_files ${test_index} test_file)
-    list(GET test_file_checksums ${test_index} test_file_checksum)
-    add_custom_target(testdata_${test_index}
-                      COMMAND ${CMAKE_COMMAND}
-                        -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
-                        -DAOM_ROOT="${AOM_ROOT}"
-                        -DAOM_TEST_FILE="${test_file}"
-                        -DAOM_TEST_CHECKSUM=${test_file_checksum}
-                        -P "${AOM_ROOT}/test/test_data_download_worker.cmake")
-    set(testdata_targets ${testdata_targets} testdata_${test_index})
-  endforeach ()
-
-  # Create a custom build target for running each test data download target.
-  add_custom_target(testdata)
-  add_dependencies(testdata ${testdata_targets})
-
-  # Pick a reasonable number of targets (this controls parallelization).
-  ProcessorCount(num_test_targets)
-  if (num_test_targets EQUAL 0)
-    # Just default to 10 targets when there's no processor count available.
-    set(num_test_targets 10)
-  endif ()
-
-  math(EXPR max_shard_index "${num_test_targets} - 1")
-  foreach (shard_index RANGE ${max_shard_index})
-    set(test_name "test_${shard_index}")
-    add_custom_target(${test_name}
-                      COMMAND ${CMAKE_COMMAND}
-                      -DGTEST_SHARD_INDEX=${shard_index}
-                      -DGTEST_TOTAL_SHARDS=${num_test_targets}
-                      -DTEST_LIBAOM=$<TARGET_FILE:test_libaom>
-                      -P "${AOM_ROOT}/test/test_runner.cmake"
-                      DEPENDS testdata test_libaom)
-    set(test_targets ${test_targets} ${test_name})
-  endforeach ()
-  add_custom_target(runtests)
-  add_dependencies(runtests ${test_targets})
+  endif()
+
+  if(ENABLE_TESTDATA)
+    make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}" test_files
+                         test_file_checksums)
+    list(LENGTH test_files num_test_files)
+    list(LENGTH test_file_checksums num_test_file_checksums)
+
+    math(EXPR max_file_index "${num_test_files} - 1")
+    foreach(test_index RANGE ${max_file_index})
+      list(GET test_files ${test_index} test_file)
+      list(GET test_file_checksums ${test_index} test_file_checksum)
+      add_custom_target(testdata_${test_index}
+                        COMMAND
+                          ${CMAKE_COMMAND} -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
+                          -DAOM_ROOT="${AOM_ROOT}"
+                          -DAOM_TEST_FILE="${test_file}"
+                          -DAOM_TEST_CHECKSUM=${test_file_checksum} -P
+                          "${AOM_ROOT}/test/test_data_download_worker.cmake")
+      list(APPEND testdata_targets testdata_${test_index})
+    endforeach()
+
+    # Create a custom build target for running each test data download target.
+    add_custom_target(testdata)
+    add_dependencies(testdata ${testdata_targets})
+
+    # Skip creation of test run targets when generating for Visual Studio and
+    # Xcode unless the user explicitly requests IDE test hosting. This is done
+    # to make build cycles in the IDE tolerable when the IDE command for build
+    # project is used to build AOM. Default behavior in IDEs is to build all
+    # targets, and the test run takes hours.
+    if(((NOT MSVC) AND (NOT XCODE)) OR ENABLE_IDE_TEST_HOSTING)
+
+      # Pick a reasonable number of targets (this controls parallelization).
+      processorcount(num_test_targets)
+      if(num_test_targets EQUAL 0) # Just default to 10 targets when there's no
+                                   # processor count available.
+        set(num_test_targets 10)
+      endif()
+
+      math(EXPR max_shard_index "${num_test_targets} - 1")
+      foreach(shard_index RANGE ${max_shard_index})
+        set(test_name "test_${shard_index}")
+        add_custom_target(${test_name}
+                          COMMAND ${CMAKE_COMMAND}
+                                  -DGTEST_SHARD_INDEX=${shard_index}
+                                  -DGTEST_TOTAL_SHARDS=${num_test_targets}
+                                  -DTEST_LIBAOM=$<TARGET_FILE:test_libaom> -P
+                                  "${AOM_ROOT}/test/test_runner.cmake"
+                          DEPENDS testdata test_libaom)
+        list(APPEND test_targets ${test_name})
+      endforeach()
+      add_custom_target(runtests)
+      add_dependencies(runtests ${test_targets})
+    endif()
+  endif()
+
+  # Collect all variables containing libaom test source files.
+  get_cmake_property(all_cmake_vars VARIABLES)
+  foreach(var ${all_cmake_vars})
+
+    # https://github.com/cheshirekow/cmake_format/issues/34
+# cmake-format: off
+    if (("${var}" MATCHES "_TEST_" AND NOT
+         "${var}" MATCHES
+         "_DATA_\|_CMAKE_\|INTRA_PRED\|_COMPILED\|_HOSTING\|_PERF_\|CODER_")
+        OR (CONFIG_AV1_ENCODER AND ENABLE_ENCODE_PERF_TESTS AND
+            "${var}" MATCHES "_ENCODE_PERF_TEST_")
+        OR (CONFIG_AV1_DECODER AND ENABLE_DECODE_PERF_TESTS AND
+            "${var}" MATCHES "_DECODE_PERF_TEST_")
+        OR (CONFIG_AV1_ENCODER AND "${var}" MATCHES "_TEST_ENCODER_")
+        OR (CONFIG_AV1_DECODER AND  "${var}" MATCHES "_TEST_DECODER_"))
+      list(APPEND aom_test_source_vars ${var})
+    endif()
+    # cmake-format:on
+  endforeach()
+
+  # Libaom_test_srcs.txt generation.
+  set(libaom_test_srcs_txt_file "${AOM_CONFIG_DIR}/libaom_test_srcs.txt")
+  file(WRITE "${libaom_test_srcs_txt_file}"
+             "# This file is generated. DO NOT EDIT.\n")
+
+  # Static source file list first.
+  foreach(aom_test_source_var ${aom_test_source_vars})
+    foreach(file ${${aom_test_source_var}})
+      if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
+        string(REPLACE "${AOM_ROOT}/" "" file "${file}")
+        file(APPEND "${libaom_test_srcs_txt_file}" "${file}\n")
+      endif()
+    endforeach()
+  endforeach()
 
   set(AOM_APP_TARGETS ${AOM_APP_TARGETS} PARENT_SCOPE)
-endfunction ()
-
-endif ()  # AOM_TEST_TEST_CMAKE_
+endfunction()
diff --git a/third_party/aom/test/test.mk b/third_party/aom/test/test.mk
deleted file mode 100644
index e6b0c534cc..0000000000
--- a/third_party/aom/test/test.mk
+++ /dev/null
@@ -1,268 +0,0 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-
-LIBAOM_TEST_SRCS-yes += acm_random.h
-LIBAOM_TEST_SRCS-yes += clear_system_state.h
-LIBAOM_TEST_SRCS-yes += codec_factory.h
-LIBAOM_TEST_SRCS-yes += md5_helper.h
-LIBAOM_TEST_SRCS-yes += register_state_check.h
-LIBAOM_TEST_SRCS-yes += test.mk
-LIBAOM_TEST_SRCS-yes += test_libaom.cc
-LIBAOM_TEST_SRCS-yes += util.h
-LIBAOM_TEST_SRCS-yes += video_source.h
-LIBAOM_TEST_SRCS-yes += transform_test_base.h
-LIBAOM_TEST_SRCS-yes += function_equivalence_test.h
-
-##
-## BLACK BOX TESTS
-##
-## Black box tests only use the public API.
-##
-LIBAOM_TEST_SRCS-yes                   += ../md5_utils.h ../md5_utils.c
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER)    += ivf_video_source.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += ../y4minput.h ../y4minput.c
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += altref_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += aq_segment_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += datarate_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += encode_api_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += error_resilience_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += i420_video_source.h
-#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += realtime_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += resize_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += y4m_video_source.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += yuv_video_source.h
-
-#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += level_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_refresh_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += active_map_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += borders_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += cpu_speed_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += frame_size_tests.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += lossless_test.cc
-
-LIBAOM_TEST_SRCS-yes                   += decode_test_driver.cc
-LIBAOM_TEST_SRCS-yes                   += decode_test_driver.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += encode_test_driver.cc
-LIBAOM_TEST_SRCS-yes                   += encode_test_driver.h
-
-## IVF writing.
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += ../ivfenc.c ../ivfenc.h
-
-## Y4m parsing.
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += y4m_test.cc ../y4menc.c ../y4menc.h
-
-## WebM Parsing
-ifeq ($(CONFIG_WEBM_IO), yes)
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += $(LIBWEBM_PARSER_SRCS)
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../tools_common.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../webmdec.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += ../webmdec.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += webm_video_source.h
-endif
-
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += decode_api_test.cc
-
-# Currently we only support decoder perf tests for av1. Also they read from WebM
-# files, so WebM IO is required.
-ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_AV1_DECODER)$(CONFIG_WEBM_IO), \
-      yesyesyes)
-LIBAOM_TEST_SRCS-yes                   += decode_perf_test.cc
-endif
-
-ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_AV1_ENCODER), yesyes)
-LIBAOM_TEST_SRCS-yes += encode_perf_test.cc
-endif
-
-## Multi-codec / unconditional black box tests.
-ifeq ($(findstring yes,$(CONFIG_AV1_ENCODER)),yes)
-LIBAOM_TEST_SRCS-yes += active_map_refresh_test.cc
-LIBAOM_TEST_SRCS-yes += active_map_test.cc
-LIBAOM_TEST_SRCS-yes += end_to_end_test.cc
-endif
-
-##
-## WHITE BOX TESTS
-##
-## Whitebox tests invoke functions not exposed via the public API. Certain
-## shared library builds don't make these functions accessible.
-##
-ifeq ($(CONFIG_SHARED),)
-
-## AV1
-ifeq ($(CONFIG_AV1),yes)
-
-# These tests require both the encoder and decoder to be built.
-ifeq ($(CONFIG_AV1_ENCODER)$(CONFIG_AV1_DECODER),yesyes)
-# IDCT test currently depends on FDCT function
-LIBAOM_TEST_SRCS-yes                   += coding_path_sync.cc
-LIBAOM_TEST_SRCS-yes                   += idct8x8_test.cc
-LIBAOM_TEST_SRCS-yes                   += partial_idct_test.cc
-LIBAOM_TEST_SRCS-yes                   += superframe_test.cc
-LIBAOM_TEST_SRCS-yes                   += tile_independence_test.cc
-LIBAOM_TEST_SRCS-yes                   += ethread_test.cc
-LIBAOM_TEST_SRCS-yes                   += motion_vector_test.cc
-ifneq ($(CONFIG_ANS),yes)
-LIBAOM_TEST_SRCS-yes                   += binary_codes_test.cc
-endif
-ifeq ($(CONFIG_EXT_TILE),yes)
-LIBAOM_TEST_SRCS-yes                   += av1_ext_tile_test.cc
-endif
-ifeq ($(CONFIG_ANS),yes)
-LIBAOM_TEST_SRCS-yes                   += ans_test.cc
-LIBAOM_TEST_SRCS-yes                   += ans_codec_test.cc
-else
-LIBAOM_TEST_SRCS-yes                   += boolcoder_test.cc
-ifeq ($(CONFIG_ACCOUNTING),yes)
-LIBAOM_TEST_SRCS-yes                   += accounting_test.cc
-endif
-endif
-LIBAOM_TEST_SRCS-yes                   += divu_small_test.cc
-#LIBAOM_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
-endif
-
-LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
-LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
-LIBAOM_TEST_SRCS-yes                   += lpf_8_test.cc
-ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
-LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += cdef_test.cc
-else
-LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += dering_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += clpf_test.cc
-endif
-LIBAOM_TEST_SRCS-yes                   += simd_cmp_impl.h
-LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_cmp_sse2.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_cmp_ssse3.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_cmp_sse4.cc
-LIBAOM_TEST_SRCS-$(HAVE_AVX2)          += simd_cmp_avx2.cc
-LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_cmp_neon.cc
-LIBAOM_TEST_SRCS-yes                   += simd_impl.h
-LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_sse2_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_ssse3_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1)        += simd_sse4_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_AVX2)          += simd_avx2_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_NEON)          += simd_neon_test.cc
-LIBAOM_TEST_SRCS-yes                   += intrapred_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_INTRABC)     += intrabc_test.cc
-#LIBAOM_TEST_SRCS-$(CONFIG_AV1_DECODER) += av1_thread_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct16x16_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += dct32x32_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct4x4_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += fdct8x8_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += hadamard_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += minmax_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += variance_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc
-#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
-ifneq ($(CONFIG_NEW_QUANT), yes)
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
-endif
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc
-
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_dct_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x4_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x8_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x16_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x32_test.cc
-ifeq ($(CONFIG_TX64X64),yes)
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht64x64_test.cc
-endif
-ifeq ($(CONFIG_EXT_TX),yes)
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht4x8_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x4_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht8x16_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x8_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht16x32_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fht32x16_test.cc
-endif
-
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sum_squares_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_1d_test.cc
-
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_variance_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_sad_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc
-
-## Skip the unit test written for 4-tap filter intra predictor, because we
-## revert to 3-tap filter.
-## ifeq ($(CONFIG_FILTER_INTRA),yes)
-## LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += filterintra_predictors_test.cc
-## endif
-
-ifeq ($(CONFIG_MOTION_VAR),yes)
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_sad_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_variance_test.cc
-endif
-
-ifeq ($(CONFIG_HIGHBITDEPTH),yes)
-ifeq ($(CONFIG_AV1_ENCODER),yes)
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_quantize_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_highbd_iht_test.cc
-endif
-endif # CONFIG_HIGHBITDEPTH
-endif # AV1
-
-## Multi-codec / unconditional whitebox tests.
-
-ifeq ($(CONFIG_AV1_ENCODER),yes)
-LIBAOM_TEST_SRCS-yes += avg_test.cc
-endif
-ifeq ($(CONFIG_INTERNAL_STATS),yes)
-LIBAOM_TEST_SRCS-$(CONFIG_HIGHBITDEPTH) += hbd_metrics_test.cc
-endif
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += sad_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.h
-LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_txfm_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm1d_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm1d_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_fwd_txfm2d_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm2d_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1) += av1_convolve_optimz_test.cc
-ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION)$(CONFIG_WARPED_MOTION)),)
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test_util.h
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc warp_filter_test_util.cc
-endif
-ifeq ($(CONFIG_LOOP_RESTORATION),yes)
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test_util.h
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test_util.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc
-endif
-ifeq ($(CONFIG_CONVOLVE_ROUND),yes)
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test_util.h
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test.cc
-LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test_util.cc
-LIBAOM_TEST_SRCS-yes          += convolve_round_test.cc
-endif
-
-ifeq (yesx,$(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND))
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_convolve_scale_test.cc
-endif
-
-ifeq ($(CONFIG_GLOBAL_MOTION)$(CONFIG_AV1_ENCODER),yesyes)
-LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += corner_match_test.cc
-endif
-
-TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
-TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
-
-endif # CONFIG_SHARED
-
-include $(SRC_PATH_BARE)/test/test-data.mk
diff --git a/third_party/aom/test/test_data_download_worker.cmake b/third_party/aom/test/test_data_download_worker.cmake
index b252dd9602..dc803497da 100644
--- a/third_party/aom/test/test_data_download_worker.cmake
+++ b/third_party/aom/test/test_data_download_worker.cmake
@@ -1,43 +1,46 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
 include("${AOM_ROOT}/test/test_data_util.cmake")
 
+# https://github.com/cheshirekow/cmake_format/issues/34
+# cmake-format: off
 if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR OR NOT AOM_TEST_FILE
     OR NOT AOM_TEST_CHECKSUM)
   message(FATAL_ERROR
           "AOM_ROOT, AOM_CONFIG_DIR, AOM_TEST_FILE and AOM_TEST_CHECKSUM must be
           defined.")
 endif ()
+# cmake-format: on
 
-set(AOM_TEST_DATA_URL
-    "https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx")
+set(AOM_TEST_DATA_URL "http://storage.googleapis.com/aom-test-data")
 
-if (NOT AOM_TEST_DATA_PATH)
+if(NOT AOM_TEST_DATA_PATH)
   set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
-endif ()
+endif()
 
-if ("${AOM_TEST_DATA_PATH}" STREQUAL "")
-  message(WARNING "Writing test data to ${AOM_CONFIG_DIR}, set "
-          "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
+if("${AOM_TEST_DATA_PATH}" STREQUAL "")
+  message(WARNING
+            "Writing test data to ${AOM_CONFIG_DIR}, set "
+            "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
   set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
-endif ()
+endif()
 
-if (NOT EXISTS "${AOM_TEST_DATA_PATH}")
+if(NOT EXISTS "${AOM_TEST_DATA_PATH}")
   file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
-endif ()
+endif()
 
 expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_PATH}" "filepath")
 expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_URL}" "url")
 
 check_file("${filepath}" "${AOM_TEST_CHECKSUM}" "needs_download")
-if (needs_download)
+if(needs_download)
   download_test_file("${url}" "${AOM_TEST_CHECKSUM}" "${filepath}")
-endif ()
+endif()
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
index 3904734b5a..bbdd5f4a2d 100644
--- a/third_party/aom/test/test_data_util.cmake
+++ b/third_party/aom/test/test_data_util.cmake
@@ -1,68 +1,394 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-
-set(AOM_TEST_DATA_FILE_NAMES
-    "hantro_collage_w352h288.yuv"
-    "hantro_odd.yuv"
-    "park_joy_90p_10_420.y4m"
-    "park_joy_90p_10_422.y4m"
-    "park_joy_90p_10_444.y4m"
-    "park_joy_90p_10_440.yuv"
-    "park_joy_90p_12_420.y4m"
-    "park_joy_90p_12_422.y4m"
-    "park_joy_90p_12_444.y4m"
-    "park_joy_90p_12_440.yuv"
-    "park_joy_90p_8_420_a10-1.y4m"
-    "park_joy_90p_8_420.y4m"
-    "park_joy_90p_8_422.y4m"
-    "park_joy_90p_8_444.y4m"
-    "park_joy_90p_8_440.yuv"
-    "desktop_credits.y4m"
-    "niklas_1280_720_30.y4m"
-    "rush_hour_444.y4m"
-    "screendata.y4m"
-    "niklas_640_480_30.yuv")
-
-if (CONFIG_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
-  set(AOM_TEST_DATA_FILE_NAMES
-      ${AOM_TEST_DATA_FILE_NAMES}
-      "niklas_1280_720_30.yuv")
-endif ()
-
-if (CONFIG_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
-  set(AOM_TEST_DATA_FILE_NAMES
-      ${AOM_TEST_DATA_FILE_NAMES}
-      "desktop_640_360_30.yuv"
-      "kirland_640_480_30.yuv"
-      "macmarcomoving_640_480_30.yuv"
-      "macmarcostationary_640_480_30.yuv"
-      "niklas_1280_720_30.yuv"
-      "tacomanarrows_640_480_30.yuv"
-      "tacomasmallcameramovement_640_480_30.yuv"
-      "thaloundeskmtg_640_480_30.yuv")
-endif ()
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+
+list(APPEND AOM_TEST_DATA_FILE_NAMES
+            "hantro_collage_w352h288.yuv"
+            "hantro_odd.yuv"
+            "invalid-bug-1814.ivf"
+            "invalid-bug-1814.ivf.res"
+            "park_joy_90p_10_420.y4m"
+            "park_joy_90p_10_422.y4m"
+            "park_joy_90p_10_444.y4m"
+            "park_joy_90p_12_420.y4m"
+            "park_joy_90p_12_422.y4m"
+            "park_joy_90p_12_444.y4m"
+            "park_joy_90p_8_420_a10-1.y4m"
+            "park_joy_90p_8_420.y4m"
+            "park_joy_90p_8_422.y4m"
+            "park_joy_90p_8_444.y4m"
+            "desktop_credits.y4m"
+            "niklas_1280_720_30.y4m"
+            "rush_hour_444.y4m"
+            "screendata.y4m"
+            "niklas_640_480_30.yuv"
+            "vase10x10.yuv")
+
+if(ENABLE_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES "niklas_1280_720_30.yuv")
+endif()
+
+if(CONFIG_AV1_DECODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES
+              "av1-1-b8-00-quantizer-00.ivf"
+              "av1-1-b8-00-quantizer-00.ivf.md5"
+              "av1-1-b8-00-quantizer-01.ivf"
+              "av1-1-b8-00-quantizer-01.ivf.md5"
+              "av1-1-b8-00-quantizer-02.ivf"
+              "av1-1-b8-00-quantizer-02.ivf.md5"
+              "av1-1-b8-00-quantizer-03.ivf"
+              "av1-1-b8-00-quantizer-03.ivf.md5"
+              "av1-1-b8-00-quantizer-04.ivf"
+              "av1-1-b8-00-quantizer-04.ivf.md5"
+              "av1-1-b8-00-quantizer-05.ivf"
+              "av1-1-b8-00-quantizer-05.ivf.md5"
+              "av1-1-b8-00-quantizer-06.ivf"
+              "av1-1-b8-00-quantizer-06.ivf.md5"
+              "av1-1-b8-00-quantizer-07.ivf"
+              "av1-1-b8-00-quantizer-07.ivf.md5"
+              "av1-1-b8-00-quantizer-08.ivf"
+              "av1-1-b8-00-quantizer-08.ivf.md5"
+              "av1-1-b8-00-quantizer-09.ivf"
+              "av1-1-b8-00-quantizer-09.ivf.md5"
+              "av1-1-b8-00-quantizer-10.ivf"
+              "av1-1-b8-00-quantizer-10.ivf.md5"
+              "av1-1-b8-00-quantizer-11.ivf"
+              "av1-1-b8-00-quantizer-11.ivf.md5"
+              "av1-1-b8-00-quantizer-12.ivf"
+              "av1-1-b8-00-quantizer-12.ivf.md5"
+              "av1-1-b8-00-quantizer-13.ivf"
+              "av1-1-b8-00-quantizer-13.ivf.md5"
+              "av1-1-b8-00-quantizer-14.ivf"
+              "av1-1-b8-00-quantizer-14.ivf.md5"
+              "av1-1-b8-00-quantizer-15.ivf"
+              "av1-1-b8-00-quantizer-15.ivf.md5"
+              "av1-1-b8-00-quantizer-16.ivf"
+              "av1-1-b8-00-quantizer-16.ivf.md5"
+              "av1-1-b8-00-quantizer-17.ivf"
+              "av1-1-b8-00-quantizer-17.ivf.md5"
+              "av1-1-b8-00-quantizer-18.ivf"
+              "av1-1-b8-00-quantizer-18.ivf.md5"
+              "av1-1-b8-00-quantizer-19.ivf"
+              "av1-1-b8-00-quantizer-19.ivf.md5"
+              "av1-1-b8-00-quantizer-20.ivf"
+              "av1-1-b8-00-quantizer-20.ivf.md5"
+              "av1-1-b8-00-quantizer-21.ivf"
+              "av1-1-b8-00-quantizer-21.ivf.md5"
+              "av1-1-b8-00-quantizer-22.ivf"
+              "av1-1-b8-00-quantizer-22.ivf.md5"
+              "av1-1-b8-00-quantizer-23.ivf"
+              "av1-1-b8-00-quantizer-23.ivf.md5"
+              "av1-1-b8-00-quantizer-24.ivf"
+              "av1-1-b8-00-quantizer-24.ivf.md5"
+              "av1-1-b8-00-quantizer-25.ivf"
+              "av1-1-b8-00-quantizer-25.ivf.md5"
+              "av1-1-b8-00-quantizer-26.ivf"
+              "av1-1-b8-00-quantizer-26.ivf.md5"
+              "av1-1-b8-00-quantizer-27.ivf"
+              "av1-1-b8-00-quantizer-27.ivf.md5"
+              "av1-1-b8-00-quantizer-28.ivf"
+              "av1-1-b8-00-quantizer-28.ivf.md5"
+              "av1-1-b8-00-quantizer-29.ivf"
+              "av1-1-b8-00-quantizer-29.ivf.md5"
+              "av1-1-b8-00-quantizer-30.ivf"
+              "av1-1-b8-00-quantizer-30.ivf.md5"
+              "av1-1-b8-00-quantizer-31.ivf"
+              "av1-1-b8-00-quantizer-31.ivf.md5"
+              "av1-1-b8-00-quantizer-32.ivf"
+              "av1-1-b8-00-quantizer-32.ivf.md5"
+              "av1-1-b8-00-quantizer-33.ivf"
+              "av1-1-b8-00-quantizer-33.ivf.md5"
+              "av1-1-b8-00-quantizer-34.ivf"
+              "av1-1-b8-00-quantizer-34.ivf.md5"
+              "av1-1-b8-00-quantizer-35.ivf"
+              "av1-1-b8-00-quantizer-35.ivf.md5"
+              "av1-1-b8-00-quantizer-36.ivf"
+              "av1-1-b8-00-quantizer-36.ivf.md5"
+              "av1-1-b8-00-quantizer-37.ivf"
+              "av1-1-b8-00-quantizer-37.ivf.md5"
+              "av1-1-b8-00-quantizer-38.ivf"
+              "av1-1-b8-00-quantizer-38.ivf.md5"
+              "av1-1-b8-00-quantizer-39.ivf"
+              "av1-1-b8-00-quantizer-39.ivf.md5"
+              "av1-1-b8-00-quantizer-40.ivf"
+              "av1-1-b8-00-quantizer-40.ivf.md5"
+              "av1-1-b8-00-quantizer-41.ivf"
+              "av1-1-b8-00-quantizer-41.ivf.md5"
+              "av1-1-b8-00-quantizer-42.ivf"
+              "av1-1-b8-00-quantizer-42.ivf.md5"
+              "av1-1-b8-00-quantizer-43.ivf"
+              "av1-1-b8-00-quantizer-43.ivf.md5"
+              "av1-1-b8-00-quantizer-44.ivf"
+              "av1-1-b8-00-quantizer-44.ivf.md5"
+              "av1-1-b8-00-quantizer-45.ivf"
+              "av1-1-b8-00-quantizer-45.ivf.md5"
+              "av1-1-b8-00-quantizer-46.ivf"
+              "av1-1-b8-00-quantizer-46.ivf.md5"
+              "av1-1-b8-00-quantizer-47.ivf"
+              "av1-1-b8-00-quantizer-47.ivf.md5"
+              "av1-1-b8-00-quantizer-48.ivf"
+              "av1-1-b8-00-quantizer-48.ivf.md5"
+              "av1-1-b8-00-quantizer-49.ivf"
+              "av1-1-b8-00-quantizer-49.ivf.md5"
+              "av1-1-b8-00-quantizer-50.ivf"
+              "av1-1-b8-00-quantizer-50.ivf.md5"
+              "av1-1-b8-00-quantizer-51.ivf"
+              "av1-1-b8-00-quantizer-51.ivf.md5"
+              "av1-1-b8-00-quantizer-52.ivf"
+              "av1-1-b8-00-quantizer-52.ivf.md5"
+              "av1-1-b8-00-quantizer-53.ivf"
+              "av1-1-b8-00-quantizer-53.ivf.md5"
+              "av1-1-b8-00-quantizer-54.ivf"
+              "av1-1-b8-00-quantizer-54.ivf.md5"
+              "av1-1-b8-00-quantizer-55.ivf"
+              "av1-1-b8-00-quantizer-55.ivf.md5"
+              "av1-1-b8-00-quantizer-56.ivf"
+              "av1-1-b8-00-quantizer-56.ivf.md5"
+              "av1-1-b8-00-quantizer-57.ivf"
+              "av1-1-b8-00-quantizer-57.ivf.md5"
+              "av1-1-b8-00-quantizer-58.ivf"
+              "av1-1-b8-00-quantizer-58.ivf.md5"
+              "av1-1-b8-00-quantizer-59.ivf"
+              "av1-1-b8-00-quantizer-59.ivf.md5"
+              "av1-1-b8-00-quantizer-60.ivf"
+              "av1-1-b8-00-quantizer-60.ivf.md5"
+              "av1-1-b8-00-quantizer-61.ivf"
+              "av1-1-b8-00-quantizer-61.ivf.md5"
+              "av1-1-b8-00-quantizer-62.ivf"
+              "av1-1-b8-00-quantizer-62.ivf.md5"
+              "av1-1-b8-00-quantizer-63.ivf"
+              "av1-1-b8-00-quantizer-63.ivf.md5"
+              "av1-1-b8-01-size-16x16.ivf"
+              "av1-1-b8-01-size-16x16.ivf.md5"
+              "av1-1-b8-01-size-16x18.ivf"
+              "av1-1-b8-01-size-16x18.ivf.md5"
+              "av1-1-b8-01-size-16x32.ivf"
+              "av1-1-b8-01-size-16x32.ivf.md5"
+              "av1-1-b8-01-size-16x34.ivf"
+              "av1-1-b8-01-size-16x34.ivf.md5"
+              "av1-1-b8-01-size-16x64.ivf"
+              "av1-1-b8-01-size-16x64.ivf.md5"
+              "av1-1-b8-01-size-16x66.ivf"
+              "av1-1-b8-01-size-16x66.ivf.md5"
+              "av1-1-b8-01-size-18x16.ivf"
+              "av1-1-b8-01-size-18x16.ivf.md5"
+              "av1-1-b8-01-size-18x18.ivf"
+              "av1-1-b8-01-size-18x18.ivf.md5"
+              "av1-1-b8-01-size-18x32.ivf"
+              "av1-1-b8-01-size-18x32.ivf.md5"
+              "av1-1-b8-01-size-18x34.ivf"
+              "av1-1-b8-01-size-18x34.ivf.md5"
+              "av1-1-b8-01-size-18x64.ivf"
+              "av1-1-b8-01-size-18x64.ivf.md5"
+              "av1-1-b8-01-size-18x66.ivf"
+              "av1-1-b8-01-size-18x66.ivf.md5"
+              "av1-1-b8-01-size-196x196.ivf"
+              "av1-1-b8-01-size-196x196.ivf.md5"
+              "av1-1-b8-01-size-196x198.ivf"
+              "av1-1-b8-01-size-196x198.ivf.md5"
+              "av1-1-b8-01-size-196x200.ivf"
+              "av1-1-b8-01-size-196x200.ivf.md5"
+              "av1-1-b8-01-size-196x202.ivf"
+              "av1-1-b8-01-size-196x202.ivf.md5"
+              "av1-1-b8-01-size-196x208.ivf"
+              "av1-1-b8-01-size-196x208.ivf.md5"
+              "av1-1-b8-01-size-196x210.ivf"
+              "av1-1-b8-01-size-196x210.ivf.md5"
+              "av1-1-b8-01-size-196x224.ivf"
+              "av1-1-b8-01-size-196x224.ivf.md5"
+              "av1-1-b8-01-size-196x226.ivf"
+              "av1-1-b8-01-size-196x226.ivf.md5"
+              "av1-1-b8-01-size-198x196.ivf"
+              "av1-1-b8-01-size-198x196.ivf.md5"
+              "av1-1-b8-01-size-198x198.ivf"
+              "av1-1-b8-01-size-198x198.ivf.md5"
+              "av1-1-b8-01-size-198x200.ivf"
+              "av1-1-b8-01-size-198x200.ivf.md5"
+              "av1-1-b8-01-size-198x202.ivf"
+              "av1-1-b8-01-size-198x202.ivf.md5"
+              "av1-1-b8-01-size-198x208.ivf"
+              "av1-1-b8-01-size-198x208.ivf.md5"
+              "av1-1-b8-01-size-198x210.ivf"
+              "av1-1-b8-01-size-198x210.ivf.md5"
+              "av1-1-b8-01-size-198x224.ivf"
+              "av1-1-b8-01-size-198x224.ivf.md5"
+              "av1-1-b8-01-size-198x226.ivf"
+              "av1-1-b8-01-size-198x226.ivf.md5"
+              "av1-1-b8-01-size-200x196.ivf"
+              "av1-1-b8-01-size-200x196.ivf.md5"
+              "av1-1-b8-01-size-200x198.ivf"
+              "av1-1-b8-01-size-200x198.ivf.md5"
+              "av1-1-b8-01-size-200x200.ivf"
+              "av1-1-b8-01-size-200x200.ivf.md5"
+              "av1-1-b8-01-size-200x202.ivf"
+              "av1-1-b8-01-size-200x202.ivf.md5"
+              "av1-1-b8-01-size-200x208.ivf"
+              "av1-1-b8-01-size-200x208.ivf.md5"
+              "av1-1-b8-01-size-200x210.ivf"
+              "av1-1-b8-01-size-200x210.ivf.md5"
+              "av1-1-b8-01-size-200x224.ivf"
+              "av1-1-b8-01-size-200x224.ivf.md5"
+              "av1-1-b8-01-size-200x226.ivf"
+              "av1-1-b8-01-size-200x226.ivf.md5"
+              "av1-1-b8-01-size-202x196.ivf"
+              "av1-1-b8-01-size-202x196.ivf.md5"
+              "av1-1-b8-01-size-202x198.ivf"
+              "av1-1-b8-01-size-202x198.ivf.md5"
+              "av1-1-b8-01-size-202x200.ivf"
+              "av1-1-b8-01-size-202x200.ivf.md5"
+              "av1-1-b8-01-size-202x202.ivf"
+              "av1-1-b8-01-size-202x202.ivf.md5"
+              "av1-1-b8-01-size-202x208.ivf"
+              "av1-1-b8-01-size-202x208.ivf.md5"
+              "av1-1-b8-01-size-202x210.ivf"
+              "av1-1-b8-01-size-202x210.ivf.md5"
+              "av1-1-b8-01-size-202x224.ivf"
+              "av1-1-b8-01-size-202x224.ivf.md5"
+              "av1-1-b8-01-size-202x226.ivf"
+              "av1-1-b8-01-size-202x226.ivf.md5"
+              "av1-1-b8-01-size-208x196.ivf"
+              "av1-1-b8-01-size-208x196.ivf.md5"
+              "av1-1-b8-01-size-208x198.ivf"
+              "av1-1-b8-01-size-208x198.ivf.md5"
+              "av1-1-b8-01-size-208x200.ivf"
+              "av1-1-b8-01-size-208x200.ivf.md5"
+              "av1-1-b8-01-size-208x202.ivf"
+              "av1-1-b8-01-size-208x202.ivf.md5"
+              "av1-1-b8-01-size-208x208.ivf"
+              "av1-1-b8-01-size-208x208.ivf.md5"
+              "av1-1-b8-01-size-208x210.ivf"
+              "av1-1-b8-01-size-208x210.ivf.md5"
+              "av1-1-b8-01-size-208x224.ivf"
+              "av1-1-b8-01-size-208x224.ivf.md5"
+              "av1-1-b8-01-size-208x226.ivf"
+              "av1-1-b8-01-size-208x226.ivf.md5"
+              "av1-1-b8-01-size-210x196.ivf"
+              "av1-1-b8-01-size-210x196.ivf.md5"
+              "av1-1-b8-01-size-210x198.ivf"
+              "av1-1-b8-01-size-210x198.ivf.md5"
+              "av1-1-b8-01-size-210x200.ivf"
+              "av1-1-b8-01-size-210x200.ivf.md5"
+              "av1-1-b8-01-size-210x202.ivf"
+              "av1-1-b8-01-size-210x202.ivf.md5"
+              "av1-1-b8-01-size-210x208.ivf"
+              "av1-1-b8-01-size-210x208.ivf.md5"
+              "av1-1-b8-01-size-210x210.ivf"
+              "av1-1-b8-01-size-210x210.ivf.md5"
+              "av1-1-b8-01-size-210x224.ivf"
+              "av1-1-b8-01-size-210x224.ivf.md5"
+              "av1-1-b8-01-size-210x226.ivf"
+              "av1-1-b8-01-size-210x226.ivf.md5"
+              "av1-1-b8-01-size-224x196.ivf"
+              "av1-1-b8-01-size-224x196.ivf.md5"
+              "av1-1-b8-01-size-224x198.ivf"
+              "av1-1-b8-01-size-224x198.ivf.md5"
+              "av1-1-b8-01-size-224x200.ivf"
+              "av1-1-b8-01-size-224x200.ivf.md5"
+              "av1-1-b8-01-size-224x202.ivf"
+              "av1-1-b8-01-size-224x202.ivf.md5"
+              "av1-1-b8-01-size-224x208.ivf"
+              "av1-1-b8-01-size-224x208.ivf.md5"
+              "av1-1-b8-01-size-224x210.ivf"
+              "av1-1-b8-01-size-224x210.ivf.md5"
+              "av1-1-b8-01-size-224x224.ivf"
+              "av1-1-b8-01-size-224x224.ivf.md5"
+              "av1-1-b8-01-size-224x226.ivf"
+              "av1-1-b8-01-size-224x226.ivf.md5"
+              "av1-1-b8-01-size-226x196.ivf"
+              "av1-1-b8-01-size-226x196.ivf.md5"
+              "av1-1-b8-01-size-226x198.ivf"
+              "av1-1-b8-01-size-226x198.ivf.md5"
+              "av1-1-b8-01-size-226x200.ivf"
+              "av1-1-b8-01-size-226x200.ivf.md5"
+              "av1-1-b8-01-size-226x202.ivf"
+              "av1-1-b8-01-size-226x202.ivf.md5"
+              "av1-1-b8-01-size-226x208.ivf"
+              "av1-1-b8-01-size-226x208.ivf.md5"
+              "av1-1-b8-01-size-226x210.ivf"
+              "av1-1-b8-01-size-226x210.ivf.md5"
+              "av1-1-b8-01-size-226x224.ivf"
+              "av1-1-b8-01-size-226x224.ivf.md5"
+              "av1-1-b8-01-size-226x226.ivf"
+              "av1-1-b8-01-size-226x226.ivf.md5"
+              "av1-1-b8-01-size-32x16.ivf"
+              "av1-1-b8-01-size-32x16.ivf.md5"
+              "av1-1-b8-01-size-32x18.ivf"
+              "av1-1-b8-01-size-32x18.ivf.md5"
+              "av1-1-b8-01-size-32x32.ivf"
+              "av1-1-b8-01-size-32x32.ivf.md5"
+              "av1-1-b8-01-size-32x34.ivf"
+              "av1-1-b8-01-size-32x34.ivf.md5"
+              "av1-1-b8-01-size-32x64.ivf"
+              "av1-1-b8-01-size-32x64.ivf.md5"
+              "av1-1-b8-01-size-32x66.ivf"
+              "av1-1-b8-01-size-32x66.ivf.md5"
+              "av1-1-b8-01-size-34x16.ivf"
+              "av1-1-b8-01-size-34x16.ivf.md5"
+              "av1-1-b8-01-size-34x18.ivf"
+              "av1-1-b8-01-size-34x18.ivf.md5"
+              "av1-1-b8-01-size-34x32.ivf"
+              "av1-1-b8-01-size-34x32.ivf.md5"
+              "av1-1-b8-01-size-34x34.ivf"
+              "av1-1-b8-01-size-34x34.ivf.md5"
+              "av1-1-b8-01-size-34x64.ivf"
+              "av1-1-b8-01-size-34x64.ivf.md5"
+              "av1-1-b8-01-size-34x66.ivf"
+              "av1-1-b8-01-size-34x66.ivf.md5"
+              "av1-1-b8-01-size-64x16.ivf"
+              "av1-1-b8-01-size-64x16.ivf.md5"
+              "av1-1-b8-01-size-64x18.ivf"
+              "av1-1-b8-01-size-64x18.ivf.md5"
+              "av1-1-b8-01-size-64x32.ivf"
+              "av1-1-b8-01-size-64x32.ivf.md5"
+              "av1-1-b8-01-size-64x34.ivf"
+              "av1-1-b8-01-size-64x34.ivf.md5"
+              "av1-1-b8-01-size-64x64.ivf"
+              "av1-1-b8-01-size-64x64.ivf.md5"
+              "av1-1-b8-01-size-64x66.ivf"
+              "av1-1-b8-01-size-64x66.ivf.md5"
+              "av1-1-b8-01-size-66x16.ivf"
+              "av1-1-b8-01-size-66x16.ivf.md5"
+              "av1-1-b8-01-size-66x18.ivf"
+              "av1-1-b8-01-size-66x18.ivf.md5"
+              "av1-1-b8-01-size-66x32.ivf"
+              "av1-1-b8-01-size-66x32.ivf.md5"
+              "av1-1-b8-01-size-66x34.ivf"
+              "av1-1-b8-01-size-66x34.ivf.md5"
+              "av1-1-b8-01-size-66x64.ivf"
+              "av1-1-b8-01-size-66x64.ivf.md5"
+              "av1-1-b8-01-size-66x66.ivf"
+              "av1-1-b8-01-size-66x66.ivf.md5")
+endif()
+
+if(ENABLE_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  list(APPEND AOM_TEST_DATA_FILE_NAMES "desktop_640_360_30.yuv"
+              "kirland_640_480_30.yuv" "macmarcomoving_640_480_30.yuv"
+              "macmarcostationary_640_480_30.yuv" "niklas_1280_720_30.yuv"
+              "tacomanarrows_640_480_30.yuv"
+              "tacomasmallcameramovement_640_480_30.yuv"
+              "thaloundeskmtg_640_480_30.yuv")
+endif()
 
 # Parses test/test-data.sha1 and writes captured file names and checksums to
 # $out_files and $out_checksums as lists.
-function (make_test_data_lists test_data_file out_files out_checksums)
-  if (NOT test_data_file OR NOT EXISTS "${test_data_file}")
+function(make_test_data_lists test_data_file out_files out_checksums)
+  if(NOT test_data_file OR NOT EXISTS "${test_data_file}")
     message(FATAL_ERROR "Test info file missing or empty (${test_data_file})")
-  endif ()
+  endif()
 
   # Read $test_data_file into $files_and_checksums. $files_and_checksums becomes
   # a list with an entry for each line from $test_data_file.
   file(STRINGS "${test_data_file}" files_and_checksums)
 
   # Iterate over the list of lines and split it into $checksums and $filenames.
-  foreach (line ${files_and_checksums})
+  foreach(line ${files_and_checksums})
     string(FIND "${line}" " *" delim_pos)
 
     math(EXPR filename_pos "${delim_pos} + 2")
@@ -70,55 +396,55 @@ function (make_test_data_lists test_data_file out_files out_checksums)
     string(SUBSTRING "${line}" ${filename_pos} -1 filename)
 
     list(FIND AOM_TEST_DATA_FILE_NAMES ${filename} list_index)
-    if (NOT ${list_index} EQUAL -1)
+    if(NOT ${list_index} EQUAL -1)
+
       # Include the name and checksum in output only when the file is needed.
       set(checksums ${checksums} ${checksum})
       set(filenames ${filenames} ${filename})
-    endif ()
-  endforeach ()
+    endif()
+  endforeach()
 
   list(LENGTH filenames num_files)
   list(LENGTH checksums num_checksums)
-  if (NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums)
+  if(NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums)
     message(FATAL_ERROR "Parsing of ${test_data_file} failed.")
-  endif ()
+  endif()
 
   set(${out_checksums} ${checksums} PARENT_SCOPE)
   set(${out_files} ${filenames} PARENT_SCOPE)
-endfunction ()
+endfunction()
 
 # Appends each file name in $test_files to $test_dir and adds the result path to
 # $out_path_list.
-function (expand_test_file_paths test_files test_dir out_path_list)
-  foreach (filename ${${test_files}})
+function(expand_test_file_paths test_files test_dir out_path_list)
+  foreach(filename ${${test_files}})
     set(path_list ${path_list} "${test_dir}/${filename}")
-  endforeach ()
+  endforeach()
   set(${out_path_list} ${path_list} PARENT_SCOPE)
-endfunction ()
+endfunction()
 
-function (check_file local_path expected_checksum out_needs_update)
-  if (EXISTS "${local_path}")
+function(check_file local_path expected_checksum out_needs_update)
+  if(EXISTS "${local_path}")
     file(SHA1 "${local_path}" file_checksum)
-  else ()
+  else()
     set(${out_needs_update} 1 PARENT_SCOPE)
-    return ()
-  endif ()
+    return()
+  endif()
 
-  if ("${file_checksum}" STREQUAL "${expected_checksum}")
+  if("${file_checksum}" STREQUAL "${expected_checksum}")
     unset(${out_needs_update} PARENT_SCOPE)
-  else ()
+  else()
     set(${out_needs_update} 1 PARENT_SCOPE)
-    return ()
-  endif ()
+    return()
+  endif()
   message("${local_path} up to date.")
-endfunction ()
+endfunction()
 
 # Downloads data from $file_url, confirms that $file_checksum matches, and
 # writes it to $local_path.
-function (download_test_file file_url file_checksum local_path)
+function(download_test_file file_url file_checksum local_path)
   message("Downloading ${file_url} ...")
-  file(DOWNLOAD "${file_url}" "${local_path}"
-       SHOW_PROGRESS
+  file(DOWNLOAD "${file_url}" "${local_path}" SHOW_PROGRESS
        EXPECTED_HASH SHA1=${file_checksum})
   message("Download of ${file_url} complete.")
-endfunction ()
+endfunction()
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
index 25289446f6..b72ac11674 100644
--- a/third_party/aom/test/test_intra_pred_speed.cc
+++ b/third_party/aom/test/test_intra_pred_speed.cc
@@ -7,54 +7,68 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 //  Test and time AOM intra-predictor functions
 
 #include <stdio.h>
-#include <string.h>
+#include <string>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/md5_helper.h"
 #include "aom/aom_integer.h"
 #include "aom_ports/mem.h"
 #include "aom_ports/aom_timer.h"
+#include "av1/common/common_data.h"
 
 // -----------------------------------------------------------------------------
 
 namespace {
 
+// Note:
+// APPLY_UNIT_TESTS
+// 1: Do unit tests
+// 0: Generate MD5 array as required
+#define APPLY_UNIT_TESTS 1
+
 typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
                             const uint8_t *above, const uint8_t *left);
 
-const int kBPS = 32;
+const int kBPS = 64;
 const int kTotalPixels = kBPS * kBPS;
-const int kNumAv1IntraFuncs = INTRA_MODES + 3;  // 4 DC predictor variants.
+// 4 DC variants, V, H, PAETH, SMOOTH, SMOOTH_V, SMOOTH_H
+const int kNumAv1IntraFuncs = 10;
+
+#if APPLY_UNIT_TESTS
 const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
-  "DC_PRED",       "DC_LEFT_PRED",  "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
-  "H_PRED",        "D45_PRED",      "D135_PRED",   "D117_PRED",   "D153_PRED",
-  "D207_PRED",     "D63_PRED",      "TM_PRED",     "SMOOTH_PRED",
-#if CONFIG_SMOOTH_HV
-  "SMOOTH_V_PRED", "SMOOTH_H_PRED",
-#endif  // CONFIG_SMOOTH_HV
+  "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED",   "V_PRED",
+  "H_PRED",  "PAETH_PRED",   "SMOOTH_PRED", "SMOOTH_V_PRED", "SMOOTH_H_PRED",
 };
+#endif  // APPLY_UNIT_TESTS
 
 template <typename Pixel>
 struct IntraPredTestMem {
-  void Init(int block_width, int bd) {
+  void Init(int block_width, int block_height, int bd) {
+    ASSERT_LE(block_width, kBPS);
+    ASSERT_LE(block_height, kBPS);
+    // Note: for blocks having width <= 32 and height <= 32, we generate 32x32
+    // random pixels as before to avoid having to recalculate all hashes again.
+    const int block_size_upto_32 = (block_width <= 32) && (block_height <= 32);
+    stride = block_size_upto_32 ? 32 : kBPS;
+    num_pixels = stride * stride;
     libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
-    Pixel *const above = above_mem + 16;
+    above = above_mem + 16;
     const int mask = (1 << bd) - 1;
-    for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand16() & mask;
-    for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand16() & mask;
-    for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand16() & mask;
+    for (int i = 0; i < num_pixels; ++i) ref_src[i] = rnd.Rand16() & mask;
+    for (int i = 0; i < stride; ++i) left[i] = rnd.Rand16() & mask;
+    for (int i = -1; i < stride; ++i) above[i] = rnd.Rand16() & mask;
 
-    ASSERT_LE(block_width, kBPS);
-    for (int i = kBPS; i < 2 * kBPS; ++i) {
+    for (int i = stride; i < 2 * stride; ++i) {
       left[i] = rnd.Rand16() & mask;
       above[i] = rnd.Rand16() & mask;
     }
@@ -63,6 +77,11 @@ struct IntraPredTestMem {
   DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
   DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
   DECLARE_ALIGNED(16, Pixel, left[2 * kBPS]);
+  Pixel *above;
+  int stride;
+  int num_pixels;
+
+ private:
   DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
 };
 
@@ -71,36 +90,40 @@ struct IntraPredTestMem {
 
 typedef IntraPredTestMem<uint8_t> Av1IntraPredTestMem;
 
-// Note:
-// APPLY_UNIT_TESTS
-// 1: Do unit tests
-// 0: Generate MD5 array as required
-#define APPLY_UNIT_TESTS 1
+static const char *const kTxSizeStrings[TX_SIZES_ALL] = {
+  "4X4",  "8X8",  "16X16", "32X32", "64X64", "4X8",   "8X4",
+  "8X16", "16X8", "16X32", "32X16", "32X64", "64X32", "4X16",
+  "16X4", "8X32", "32X8",  "16X64", "64X16",
+};
 
-void CheckMd5Signature(const char name[], const char *const signatures[],
-                       const void *data, size_t data_size, int elapsed_time,
-                       int idx) {
+void CheckMd5Signature(TX_SIZE tx_size, bool is_hbd,
+                       const char *const signatures[], const void *data,
+                       size_t data_size, int elapsed_time, int idx) {
+  const std::string hbd_str = is_hbd ? "Hbd " : "";
+  const std::string name_str = hbd_str + "Intra" + kTxSizeStrings[tx_size];
   libaom_test::MD5 md5;
   md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
 #if APPLY_UNIT_TESTS
-  printf("Mode %s[%13s]: %5d ms     MD5: %s\n", name, kAv1IntraPredNames[idx],
-         elapsed_time, md5.Get());
+  printf("Mode %s[%13s]: %5d ms     MD5: %s\n", name_str.c_str(),
+         kAv1IntraPredNames[idx], elapsed_time, md5.Get());
   EXPECT_STREQ(signatures[idx], md5.Get());
 #else
+  (void)signatures;
+  (void)elapsed_time;
+  (void)idx;
   printf("\"%s\",\n", md5.Get());
 #endif
 }
 
-void TestIntraPred(const char name[], AvxPredFunc const *pred_funcs,
-                   const char *const signatures[], int block_width,
-                   int block_height) {
+void TestIntraPred(TX_SIZE tx_size, AvxPredFunc const *pred_funcs,
+                   const char *const signatures[]) {
+  const int block_width = tx_size_wide[tx_size];
+  const int block_height = tx_size_high[tx_size];
   const int num_pixels_per_test =
       block_width * block_height * kNumAv1IntraFuncs;
   const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
   Av1IntraPredTestMem intra_pred_test_mem;
-  const uint8_t *const above = intra_pred_test_mem.above_mem + 16;
-
-  intra_pred_test_mem.Init(block_width, 8);
+  intra_pred_test_mem.Init(block_width, block_height, 8);
 
   for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
     if (pred_funcs[k] == NULL) continue;
@@ -109,715 +132,754 @@ void TestIntraPred(const char name[], AvxPredFunc const *pred_funcs,
     aom_usec_timer timer;
     aom_usec_timer_start(&timer);
     for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
-                    intra_pred_test_mem.left);
+      pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
+                    intra_pred_test_mem.above, intra_pred_test_mem.left);
     }
     libaom_test::ClearSystemState();
     aom_usec_timer_mark(&timer);
     const int elapsed_time =
         static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
-    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
-                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
-  }
-}
-
-void TestIntraPred4(const char *block_name, AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures4x4[kNumAv1IntraFuncs] = {
-    "e7ed7353c3383fff942e500e9bfe82fe",
-    "2a4a26fcc6ce005eadc08354d196c8a9",
-    "269d92eff86f315d9c38fe7640d85b15",
-    "ae2960eea9f71ee3dabe08b282ec1773",
-    "6c1abcc44e90148998b51acd11144e9c",
-    "f7bb3186e1ef8a2b326037ff898cad8e",
-    "87e72798518d62e84bcc77dcb17d0f3b",
-    "141624072a4a56773f68fadbdd07c4a7",
-    "7be49b08687a5f24df3a2c612fca3876",
-    "459bb5d9fd5b238348179c9a22108cd6",
-    "3d98810f418a9de92acfe2c68909c61c",
-    "6310eecda3cc9496987ca10186255558",
-    "59fc0e923a08cfac0a493fb38988e2bb",
-    "9ff8bb37d9c830e6ab8ecb0c435d3c91",
-#if CONFIG_SMOOTH_HV
-    "de6937fca02354f2874dbc5dbec5d5b3",
-    "723cf948137f7d8c7860d814e55ae67d",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures4x8[kNumAv1IntraFuncs] = {
-    "d9fbebdc85f71ab1e18461b2db4a2adc",
-    "5ccb2a68284bc9714d94b8a06ccadbb2",
-    "735d059abc2744f3ff3f9590f7191b37",
-    "d9fbebdc85f71ab1e18461b2db4a2adc",
-    "6819497c44cd0ace120add83672996ee",
-    "7e3244f5a2d3edf81c7e962a842b97f9",
-    "3fa52ee9acf5a25594cac684be263f32",
-    "c18dd23d57def4df4c6147c572dfc827",
-    "d007fbf7e43cb8f49702daa20f0c9153",
-    "5c0226c44c5df285728296b80cc6de4b",
-    "b55d7b558bebc8c2042dfac58b3c4688",
-    "6549362baa389b8faa2d954926b64e2f",
-    "809350f164cd4d1650850bb0f59c3260",
-    "1b60a394331eeab6927a6f8aaff57040",
-#if CONFIG_SMOOTH_HV
-    "5307de1bd7329ba6b281d2c1b0b457f9",
-    "24c58a8138339846d95568efb91751db",
-#endif
-  };
-  if (!strcmp(block_name, "intra4x4")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures4x4, 4, 4);
-  }
-  if (!strcmp(block_name, "intra4x8")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures4x8, 4, 8);
-  }
-}
-
-void TestIntraPred8(const char *block_name, AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures8x8[kNumAv1IntraFuncs] = {
-    "d8bbae5d6547cfc17e4f5f44c8730e88",
-    "373bab6d931868d41a601d9d88ce9ac3",
-    "6fdd5ff4ff79656c14747598ca9e3706",
-    "d9661c2811d6a73674f40ffb2b841847",
-    "7c722d10b19ccff0b8c171868e747385",
-    "f81dd986eb2b50f750d3a7da716b7e27",
-    "e0b1292448f3350bf1c92ca283ca872a",
-    "0e3523f9cab2142dd37fd07ec0760bce",
-    "79ac4efe907f0a0f1885d43066cfedee",
-    "19ecf2432ac305057de3b6578474eec6",
-    "7ae38292cbe47b4aa0807c3bd5a543df",
-    "d0ecffec1bb01f4b61ab5738164695c4",
-    "064404361748dd111a890a1470d7f0ea",
-    "dc29b7e1f78cc8e7525d5ea4c0ab9b78",
-#if CONFIG_SMOOTH_HV
-    "97111eb1bc26bade6272015df829f1ae",
-    "d19a8a73cc46b807f2c5e817576cc1e1",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
-    "23f9fc11344426c9bee2e06d57dfd628",
-    "2d71a26d1bae1fb34734de7b42fc5eb7",
-    "5af9c1b2fd9d5721fad67b67b3f7c816",
-    "00d71b17be662753813d515f197d145e",
-    "bef10ec984427e28f4390f43809d10af",
-    "77773cdfb7ed6bc882ab202a64b0a470",
-    "cba356970f6b9a1b6024e1dbe4a66f9b",
-    "c58c21efc804242848e6f29a93a7984d",
-    "dc92cc45a51c7a397506cab19f74e66d",
-    "391f6a12224f81a3719ea09a2cf7a5ad",
-    "b74b8b11f7eb2bbf723b25f381104ca9",
-    "2234aaa06ca245624211cf53a0261017",
-    "2cc48bd66d6b0121b5221d52ccd732af",
-    "b302155e1c9eeeafe2ba2bf68e807a46",
-#if CONFIG_SMOOTH_HV
-    "561bc8d0e76d5041ebd5168fc6a115e1",
-    "81d0113fb1d0a9a24ffd6f1987b77948",
-#endif
-  };
-  static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
-    "c849de88b24f773dfcdd1d48d1209796",
-    "6cb807c1897b94866a0f3d3c56ed8695",
-    "d56db05a8ac7981762f5b877f486c4ef",
-    "b4bc01eb6e59a40922ad17715cafb04b",
-    "09d178439534f4062ae687c351f66d64",
-    "644501399cf73080ac606e5cef7ca09b",
-    "0e8e968fa177204d7e73d7e04ce69ebb",
-    "1d25f9287fdf7ba48a5105f1529b7e75",
-    "02cacccf3752451763a6a6e2e784494f",
-    "6044a1416d53e324ddc012d2e7763339",
-    "57ac6e8f3ab5e943c9280043eeb174b8",
-    "d51b9d65471194d9caebc7d67e75ef10",
-    "278076495180e17c065a95ab7278539a",
-    "9dd7f324816f242be408ffeb0c673732",
-#if CONFIG_SMOOTH_HV
-    "f520c4a20acfa0bea1d253c6f0f040fd",
-    "85f38df809df2c2d7c8b4a157a65cd44",
-#endif
-  };
-  if (!strcmp(block_name, "intra8x8")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures8x8, 8, 8);
-  }
-  if (!strcmp(block_name, "intra8x4")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures8x4, 8, 4);
-  }
-  if (!strcmp(block_name, "intra8x16")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures8x16, 8, 16);
-  }
-}
-
-void TestIntraPred16(const char *block_name, AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures16x16[kNumAv1IntraFuncs] = {
-    "50971c07ce26977d30298538fffec619",
-    "527a6b9e0dc5b21b98cf276305432bef",
-    "7eff2868f80ebc2c43a4f367281d80f7",
-    "67cd60512b54964ef6aff1bd4816d922",
-    "48371c87dc95c08a33b2048f89cf6468",
-    "b0acf2872ee411d7530af6d2625a7084",
-    "31d901ab2289d1e61e704e40240382a7",
-    "dae208f3dca583529cff49b73f7c4183",
-    "7af66a2f4c8e0b4908e40f047e60c47c",
-    "125e3ab6ab9bc961f183ec366a7afa88",
-    "ff230677e800977757d14b85a9eba404",
-    "eb42dc39140515dd4f3ab1afe6c3e71b",
-    "93d6b5352b571805ab16a55e1bbed86a",
-    "03764e4c0aebbc180e4e2c68fb06df2b",
-#if CONFIG_SMOOTH_HV
-    "bb6c74c9076c9f266ab11fb57060d8e6",
-    "0c5162bc28489756ddb847b5678e6f07",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
-    "b4cbdbdf10ce13300b4063a3daf99e04",
-    "3731e1e6202064a9d0604d7c293ecee4",
-    "6c856188c4256a06452f0d5d70cac436",
-    "1f2192b4c8c497589484ea7bf9c944e8",
-    "84011bd4b7f565119d06787840e333a0",
-    "0e48949f7a6aa36f0d76b5d01f91124a",
-    "58114c06f6b9d8285e5020c7afd834ab",
-    "e37afe84a8b3c5e0f048d4652ecbe09e",
-    "c216348473fb029b45f8fb4f2862a7bd",
-    "0b7385155dcef742cc456d5741ae93a3",
-    "d55fadb221f0ea20266e57cd413e7b94",
-    "9bd6eb226c7e169b8d53cf70aea98b3a",
-    "60eff8064634b6c73b10681356baeee9",
-    "1559aeb081a9c0c71111d6093c2ff9fd",
-#if CONFIG_SMOOTH_HV
-    "c15479b739713773e5cabb748451987b",
-    "72e33ec12c9b67aea26d8d005fb82de2",
-#endif
-  };
-  static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
-    "abe5233d189cdbf79424721571bbaa7b",
-    "282759f81e3cfb2e2d396fe406b72a8b",
-    "e2224926c264f6f174cbc3167a233168",
-    "6814e85c2b33f8c9415d62e80394b47b",
-    "99cbbb60459c08a3061d72c4e4f6276a",
-    "1d1567d40b8e816f8c1f71e576fe0f87",
-    "5e989f9c748a0d2cd8c4ebf9d3fe1278",
-    "7135a2f419452a3a192a35156f68b019",
-    "06e10af5a726d2c81b8f8c708204f9fb",
-    "c0882f0e7ba1ffa0aeef6d5c751df6de",
-    "8477429e17d39a423f30e2082f651549",
-    "ba35068a30c2d1d10901e4bfabd02a11",
-    "36fdd371b624a075814d497c4832ec85",
-    "8ab8da61b727442b6ff692b40d0df018",
-#if CONFIG_SMOOTH_HV
-    "e35a10ad7fdf2327e821504a90f6a6eb",
-    "1f7211e727dc1de7d6a55d082fbdd821",
-#endif
-  };
-  if (!strcmp(block_name, "intra16x16")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures16x16, 16, 16);
-  }
-  if (!strcmp(block_name, "intra16x8")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures16x8, 16, 8);
-  }
-  if (!strcmp(block_name, "intra16x32")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures16x32, 16, 32);
+    CheckMd5Signature(
+        tx_size, false, signatures, intra_pred_test_mem.src,
+        intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
+        elapsed_time, k);
   }
 }
 
-void TestIntraPred32(const char *block_name, AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures32x32[kNumAv1IntraFuncs] = {
-    "a0a618c900e65ae521ccc8af789729f2",
-    "985aaa7c72b4a6c2fb431d32100cf13a",
-    "10662d09febc3ca13ee4e700120daeb5",
-    "b3b01379ba08916ef6b1b35f7d9ad51c",
-    "9f4261755795af97e34679c333ec7004",
-    "bc2c9da91ad97ef0d1610fb0a9041657",
-    "f524b1a7e31c7bb9bfb2487fac3e16d8",
-    "4039bb7da0f6860090d3c57b5c85468f",
-    "b29fff7b61804e68383e3a609b33da58",
-    "e1aa5e49067fd8dba66c2eb8d07b7a89",
-    "db217e7891581cf93895ef5974bebb21",
-    "beb6cdc52b52c8976b4d2407ec8d2313",
-    "ef1653982b69e1f64bee3759f3e1ec45",
-    "1a51a675deba2c83282142eb48d3dc3d",
-#if CONFIG_SMOOTH_HV
-    "866c224746dc260cda861a7b1b383fb3",
-    "cea23799fc3526e1b6a6ff02b42b82af",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
-    "d1aeb8d5fdcfd3307922af01a798a4dc",
-    "b0bcb514ebfbee065faea9d34c12ae75",
-    "d6a18c63b4e909871c0137ca652fad23",
-    "fd047f2fc1b8ffb95d0eeef3e8796a45",
-    "645ab60779ea348fd93c81561c31bab9",
-    "4409633c9db8dff41ade4292a3a56e7f",
-    "b9b2935b2287a9a461ac5c11251ac706",
-    "43b05f808c0ac4fe8accd84d293b0488",
-    "1d2cb43872d20c205ffb185102bcd22a",
-    "2c1551b5e99592fd21053b5d14e397d9",
-    "cd499ef0dd41e2e38d5dac3319dfdd97",
-    "cd2610426637003f3b5d3984cb3320d5",
-    "5e36a11e069b31c2a739f3a9c7b37c24",
-    "e83b9483d702cfae496991c3c7fa92c0",
-#if CONFIG_SMOOTH_HV
-    "12f6ddf98c7f30a277307f1ea935b030",
-    "354321d6c32bbdb0739e4fa2acbf41e1",
-#endif
-  };
-  if (!strcmp(block_name, "intra32x32")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures32x32, 32, 32);
-  }
-  if (!strcmp(block_name, "intra32x16")) {
-    TestIntraPred(block_name, pred_funcs, kSignatures32x16, 32, 16);
-  }
-}
+static const char *const kSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
+  {
+      // 4X4
+      "e7ed7353c3383fff942e500e9bfe82fe",
+      "2a4a26fcc6ce005eadc08354d196c8a9",
+      "269d92eff86f315d9c38fe7640d85b15",
+      "ae2960eea9f71ee3dabe08b282ec1773",
+      "6c1abcc44e90148998b51acd11144e9c",
+      "f7bb3186e1ef8a2b326037ff898cad8e",
+      "59fc0e923a08cfac0a493fb38988e2bb",
+      "9ff8bb37d9c830e6ab8ecb0c435d3c91",
+      "de6937fca02354f2874dbc5dbec5d5b3",
+      "723cf948137f7d8c7860d814e55ae67d",
+  },
+  {
+      // 8X8
+      "d8bbae5d6547cfc17e4f5f44c8730e88",
+      "373bab6d931868d41a601d9d88ce9ac3",
+      "6fdd5ff4ff79656c14747598ca9e3706",
+      "d9661c2811d6a73674f40ffb2b841847",
+      "7c722d10b19ccff0b8c171868e747385",
+      "f81dd986eb2b50f750d3a7da716b7e27",
+      "064404361748dd111a890a1470d7f0ea",
+      "dc29b7e1f78cc8e7525d5ea4c0ab9b78",
+      "97111eb1bc26bade6272015df829f1ae",
+      "d19a8a73cc46b807f2c5e817576cc1e1",
+  },
+  {
+      // 16X16
+      "50971c07ce26977d30298538fffec619",
+      "527a6b9e0dc5b21b98cf276305432bef",
+      "7eff2868f80ebc2c43a4f367281d80f7",
+      "67cd60512b54964ef6aff1bd4816d922",
+      "48371c87dc95c08a33b2048f89cf6468",
+      "b0acf2872ee411d7530af6d2625a7084",
+      "93d6b5352b571805ab16a55e1bbed86a",
+      "03764e4c0aebbc180e4e2c68fb06df2b",
+      "bb6c74c9076c9f266ab11fb57060d8e6",
+      "0c5162bc28489756ddb847b5678e6f07",
+  },
+  {
+      // 32X32
+      "a0a618c900e65ae521ccc8af789729f2",
+      "985aaa7c72b4a6c2fb431d32100cf13a",
+      "10662d09febc3ca13ee4e700120daeb5",
+      "b3b01379ba08916ef6b1b35f7d9ad51c",
+      "9f4261755795af97e34679c333ec7004",
+      "bc2c9da91ad97ef0d1610fb0a9041657",
+      "ef1653982b69e1f64bee3759f3e1ec45",
+      "1a51a675deba2c83282142eb48d3dc3d",
+      "866c224746dc260cda861a7b1b383fb3",
+      "cea23799fc3526e1b6a6ff02b42b82af",
+  },
+  {
+      // 64X64
+      "6e1094fa7b50bc813aa2ba29f5df8755",
+      "afe020786b83b793c2bbd9468097ff6e",
+      "be91585259bc37bf4dc1651936e90b3e",
+      "a1650dbcd56e10288c3e269eca37967d",
+      "9e5c34f3797e0cdd3cd9d4c05b0d8950",
+      "bc87be7ac899cc6a28f399d7516c49fe",
+      "9811fd0d2dd515f06122f5d1bd18b784",
+      "3c140e466f2c2c0d9cb7d2157ab8dc27",
+      "9543de76c925a8f6adc884cc7f98dc91",
+      "df1df0376cc944afe7e74e94f53e575a",
+  },
+  {
+      // 4X8
+      "d9fbebdc85f71ab1e18461b2db4a2adc",
+      "5ccb2a68284bc9714d94b8a06ccadbb2",
+      "735d059abc2744f3ff3f9590f7191b37",
+      "d9fbebdc85f71ab1e18461b2db4a2adc",
+      "6819497c44cd0ace120add83672996ee",
+      "7e3244f5a2d3edf81c7e962a842b97f9",
+      "809350f164cd4d1650850bb0f59c3260",
+      "1b60a394331eeab6927a6f8aaff57040",
+      "5307de1bd7329ba6b281d2c1b0b457f9",
+      "24c58a8138339846d95568efb91751db",
+  },
+  {
+      // 8X4
+      "23f9fc11344426c9bee2e06d57dfd628",
+      "2d71a26d1bae1fb34734de7b42fc5eb7",
+      "5af9c1b2fd9d5721fad67b67b3f7c816",
+      "00d71b17be662753813d515f197d145e",
+      "bef10ec984427e28f4390f43809d10af",
+      "77773cdfb7ed6bc882ab202a64b0a470",
+      "2cc48bd66d6b0121b5221d52ccd732af",
+      "b302155e1c9eeeafe2ba2bf68e807a46",
+      "561bc8d0e76d5041ebd5168fc6a115e1",
+      "81d0113fb1d0a9a24ffd6f1987b77948",
+  },
+  {
+      // 8X16
+      "c849de88b24f773dfcdd1d48d1209796",
+      "6cb807c1897b94866a0f3d3c56ed8695",
+      "d56db05a8ac7981762f5b877f486c4ef",
+      "b4bc01eb6e59a40922ad17715cafb04b",
+      "09d178439534f4062ae687c351f66d64",
+      "644501399cf73080ac606e5cef7ca09b",
+      "278076495180e17c065a95ab7278539a",
+      "9dd7f324816f242be408ffeb0c673732",
+      "f520c4a20acfa0bea1d253c6f0f040fd",
+      "85f38df809df2c2d7c8b4a157a65cd44",
+  },
+  {
+      // 16X8
+      "b4cbdbdf10ce13300b4063a3daf99e04",
+      "3731e1e6202064a9d0604d7c293ecee4",
+      "6c856188c4256a06452f0d5d70cac436",
+      "1f2192b4c8c497589484ea7bf9c944e8",
+      "84011bd4b7f565119d06787840e333a0",
+      "0e48949f7a6aa36f0d76b5d01f91124a",
+      "60eff8064634b6c73b10681356baeee9",
+      "1559aeb081a9c0c71111d6093c2ff9fd",
+      "c15479b739713773e5cabb748451987b",
+      "72e33ec12c9b67aea26d8d005fb82de2",
+  },
+  {
+      // 16X32
+      "abe5233d189cdbf79424721571bbaa7b",
+      "282759f81e3cfb2e2d396fe406b72a8b",
+      "e2224926c264f6f174cbc3167a233168",
+      "6814e85c2b33f8c9415d62e80394b47b",
+      "99cbbb60459c08a3061d72c4e4f6276a",
+      "1d1567d40b8e816f8c1f71e576fe0f87",
+      "36fdd371b624a075814d497c4832ec85",
+      "8ab8da61b727442b6ff692b40d0df018",
+      "e35a10ad7fdf2327e821504a90f6a6eb",
+      "1f7211e727dc1de7d6a55d082fbdd821",
+  },
+  {
+      // 32X16
+      "d1aeb8d5fdcfd3307922af01a798a4dc",
+      "b0bcb514ebfbee065faea9d34c12ae75",
+      "d6a18c63b4e909871c0137ca652fad23",
+      "fd047f2fc1b8ffb95d0eeef3e8796a45",
+      "645ab60779ea348fd93c81561c31bab9",
+      "4409633c9db8dff41ade4292a3a56e7f",
+      "5e36a11e069b31c2a739f3a9c7b37c24",
+      "e83b9483d702cfae496991c3c7fa92c0",
+      "12f6ddf98c7f30a277307f1ea935b030",
+      "354321d6c32bbdb0739e4fa2acbf41e1",
+  },
+  {
+      // 32X64
+      "0ce332b343934b34cd4417725faa85cb",
+      "4e2a2cfd8f56f15939bdfc753145b303",
+      "0f46d124ba9f48cdd5d5290acf786d6d",
+      "e1e8ed803236367821981500a3d9eebe",
+      "1d2f8e48e3adb7c448be05d9f66f4954",
+      "9fb2e176636a5689b26f73ca73fcc512",
+      "e720ebccae7e25e36f23da53ae5b5d6a",
+      "86fe4364734169aaa4520d799890d530",
+      "b1870290764bb1b100d1974e2bd70f1d",
+      "ce5b238e19d85ef69d85badfab4e63ae",
+  },
+  {
+      // 64X32
+      "a6c5aeb722615089efbca80b02951ceb",
+      "538424b24bd0830f21788e7238ca762f",
+      "80c15b303235f9bc2259027bb92dfdc4",
+      "e48e1ac15e97191a8fda08d62fff343e",
+      "12604b37875533665078405ef4582e35",
+      "0048afa17bd3e1632d68b96048836530",
+      "07a0cfcb56a5eed50c4bd6c26814336b",
+      "529d8a070de5bc6531fa3ee8f450c233",
+      "33c50a11c7d78f72434064f634305e95",
+      "e0ef7f0559c1a50ec5a8c12011b962f7",
+  },
+  {
+      // 4X16
+      "750491056568eb8fe15387b86bdf06b8",
+      "3a52dae9f599f08cfb3bd1b910dc0e11",
+      "af79f71e3e03dbeca44e2e13561f70c7",
+      "ca7dfd7624afc0c06fb5552f44398535",
+      "b591af115444bf43140c29c269f68fb2",
+      "483d942ae36e69e62f31eb215331416f",
+      "f14b58525e81870bc5d95c7ac71a347f",
+      "371208bb4027d9badb04095d1590bbc4",
+      "c7049c21b2924d70c7c12784d6b6b796",
+      "7d87233f4b5b0f12086045e5d7b2d4c2",
+  },
+  {
+      // 16X4
+      "7c6e325a65e77e732b3adbe237e045e4",
+      "24478f93ffcec47852e004d0fe948464",
+      "258d042c67d4ba3ecfa667f0adc9aebf",
+      "b2cd21d06959f159a1f3c4d9768ee7fb",
+      "b4e1f38157bf8410e7c3da02f687a343",
+      "869e703729eb0fc0711c254944ff5d5a",
+      "9638dd77105a640b146a8201ea7a0801",
+      "919d932c6af8a1cc7486e8ce996dd487",
+      "e1c9be493b6714c7ae48f30044c43140",
+      "bf0fe3889d654b2f6eb98c8fc751f9e4",
+  },
+  {
+      // 8X32
+      "8dfac4319fe0bd40013ffb3102da8c72",
+      "feb46b6dc4e2ca0a09533bfc51d4dcb0",
+      "850837ec714c37262216527aaf4cbbe9",
+      "4603c7800fb08361f163daca876e8bda",
+      "1ff95e7d2debc27b05806fb25abfd624",
+      "d81b9a51a062b23ca7823804cb7bec22",
+      "f1d8978158766f46335203608cb807e7",
+      "f3527096256258c0878d644a9d7d53ca",
+      "cbde98ac8b009953eb112807ad2ea29e",
+      "654fb1153415747feae599f538122af5",
+  },
+  {
+      // 32X8
+      "3d4ee16fab374357474f60b845327bc7",
+      "bc17c5059473a476df4e85f56395ad55",
+      "3d4ee16fab374357474f60b845327bc7",
+      "c14b8db34dc2355b84e3735c9ba16c7f",
+      "a71d25b5d47a92a8b9223c98f18458ee",
+      "6c1cfe2b1893f4576a80675687cb6426",
+      "92d11bbef8b85bb48d799bb055de3514",
+      "bcf81d1db8ae5cc03360467f44f498ec",
+      "79f8c564163555592e808e145eaf5c60",
+      "46fff139cef2ef773938bcc8b0e5abb8",
+  },
+  {
+      // 16X64
+      "3b2a053ee8b05a8ac35ad23b0422a151",
+      "12b0c69595328c465e0b25e0c9e3e9fc",
+      "f77c544ac8035e01920deae40cee7b07",
+      "727797ef15ccd8d325476fe8f12006a3",
+      "f3be77c0fe67eb5d9d515e92bec21eb7",
+      "f1ece6409e01e9dd98b800d49628247d",
+      "efd2ec9bfbbd4fd1f6604ea369df1894",
+      "ec703de918422b9e03197ba0ed60a199",
+      "739418efb89c07f700895deaa5d0b3e3",
+      "9943ae1bbeeebfe1d3a92dc39e049d63",
+  },
+  {
+      // 64X16
+      "821b76b1494d4f84d20817840f719a1a",
+      "69e462c3338a9aaf993c3f7cfbc15649",
+      "516d8f6eb054d74d150e7b444185b6b9",
+      "de1b736e9d99129609d6ef3a491507a0",
+      "fd9b4276e7affe1e0e4ce4f428058994",
+      "cd82fd361a4767ac29a9f406b480b8f3",
+      "2792c2f810157a4a6cb13c28529ff779",
+      "1220442d90c4255ba0969d28b91e93a6",
+      "c7253e10b45f7f67dfee3256c9b94825",
+      "879792198071c7e0b50b9b5010d8c18f",
+  },
+};
 
 }  // namespace
 
 // Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
-// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
-#define INTRA_PRED_TEST(arch, test_func, blk, dc, dc_left, dc_top, dc_128, v, \
-                        h, d45e, d135, d117, d153, d207e, d63e, tm, smooth,   \
-                        smooth_v, smooth_h)                                   \
-  TEST(arch, DISABLED_##test_func) {                                          \
-    static const AvxPredFunc aom_intra_pred[] = {                             \
-      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,              \
-      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h           \
-    };                                                                        \
-    test_func(blk, aom_intra_pred);                                           \
+// to TestIntraPred. The test name is 'arch.TestIntraPred_tx_size', e.g.,
+// C.TestIntraPred.0
+#define INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, h,  \
+                        paeth, smooth, smooth_v, smooth_h)                 \
+  TEST(arch, DISABLED_##TestIntraPred_##tx_size) {                         \
+    static const AvxPredFunc aom_intra_pred[] = {                          \
+      dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \
+    };                                                                     \
+    TestIntraPred(tx_size, aom_intra_pred, kSignatures[tx_size]);          \
   }
 
 // -----------------------------------------------------------------------------
-// 4x4
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_4x4_c
-#define smooth_h_pred_func aom_smooth_h_predictor_4x4_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// 4x4, 4x8, 4x16
 
-INTRA_PRED_TEST(C_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_c,
+INTRA_PRED_TEST(C_1, TX_4X4, aom_dc_predictor_4x4_c,
                 aom_dc_left_predictor_4x4_c, aom_dc_top_predictor_4x4_c,
                 aom_dc_128_predictor_4x4_c, aom_v_predictor_4x4_c,
-                aom_h_predictor_4x4_c, aom_d45e_predictor_4x4_c,
-                aom_d135_predictor_4x4_c, aom_d117_predictor_4x4_c,
-                aom_d153_predictor_4x4_c, aom_d207e_predictor_4x4_c,
-                aom_d63e_predictor_4x4_c, aom_paeth_predictor_4x4_c,
-                aom_smooth_predictor_4x4_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_4x8_c
-#define smooth_h_pred_func aom_smooth_h_predictor_4x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+                aom_h_predictor_4x4_c, aom_paeth_predictor_4x4_c,
+                aom_smooth_predictor_4x4_c, aom_smooth_v_predictor_4x4_c,
+                aom_smooth_h_predictor_4x4_c)
 
-INTRA_PRED_TEST(C_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_c,
+INTRA_PRED_TEST(C_2, TX_4X8, aom_dc_predictor_4x8_c,
                 aom_dc_left_predictor_4x8_c, aom_dc_top_predictor_4x8_c,
                 aom_dc_128_predictor_4x8_c, aom_v_predictor_4x8_c,
-                aom_h_predictor_4x8_c, aom_d45e_predictor_4x8_c,
-                aom_d135_predictor_4x8_c, aom_d117_predictor_4x8_c,
-                aom_d153_predictor_4x8_c, aom_d207e_predictor_4x8_c,
-                aom_d63e_predictor_4x8_c, aom_paeth_predictor_4x8_c,
-                aom_smooth_predictor_4x8_c, smooth_v_pred_func,
-                smooth_h_pred_func)
+                aom_h_predictor_4x8_c, aom_paeth_predictor_4x8_c,
+                aom_smooth_predictor_4x8_c, aom_smooth_v_predictor_4x8_c,
+                aom_smooth_h_predictor_4x8_c)
 
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+INTRA_PRED_TEST(C_3, TX_4X16, aom_dc_predictor_4x16_c,
+                aom_dc_left_predictor_4x16_c, aom_dc_top_predictor_4x16_c,
+                aom_dc_128_predictor_4x16_c, aom_v_predictor_4x16_c,
+                aom_h_predictor_4x16_c, aom_paeth_predictor_4x16_c,
+                aom_smooth_predictor_4x16_c, aom_smooth_v_predictor_4x16_c,
+                aom_smooth_h_predictor_4x16_c)
 
 #if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_sse2,
+INTRA_PRED_TEST(SSE2_1, TX_4X4, aom_dc_predictor_4x4_sse2,
                 aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
                 aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
-                aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_sse2,
+                aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TX_4X8, aom_dc_predictor_4x8_sse2,
                 aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
                 aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
-                aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TX_4X16, aom_dc_predictor_4x16_sse2,
+                aom_dc_left_predictor_4x16_sse2, aom_dc_top_predictor_4x16_sse2,
+                aom_dc_128_predictor_4x16_sse2, aom_v_predictor_4x16_sse2,
+                aom_h_predictor_4x16_sse2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TestIntraPred4, "intra4x4", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_4x4_ssse3,
-                NULL, aom_d63e_predictor_4x4_ssse3,
+INTRA_PRED_TEST(SSSE3_1, TX_4X4, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_4x4_ssse3, aom_smooth_predictor_4x4_ssse3,
-                NULL, NULL)
-INTRA_PRED_TEST(SSSE3_2, TestIntraPred4, "intra4x8", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_smooth_v_predictor_4x4_ssse3,
+                aom_smooth_h_predictor_4x4_ssse3)
+INTRA_PRED_TEST(SSSE3_2, TX_4X8, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_4x8_ssse3, aom_smooth_predictor_4x8_ssse3,
-                NULL, NULL)
+                aom_smooth_v_predictor_4x8_ssse3,
+                aom_smooth_h_predictor_4x8_ssse3)
+INTRA_PRED_TEST(SSSE3_3, TX_4X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_4x16_ssse3, aom_smooth_predictor_4x16_ssse3,
+                aom_smooth_v_predictor_4x16_ssse3,
+                aom_smooth_h_predictor_4x16_ssse3)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_dspr2,
-                NULL, NULL, NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(DSPR2, TX_4X4, aom_dc_predictor_4x4_dspr2, NULL, NULL, NULL,
+                NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_neon,
+INTRA_PRED_TEST(NEON, TX_4X4, aom_dc_predictor_4x4_neon,
                 aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
                 aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
-                aom_h_predictor_4x4_neon, NULL, aom_d135_predictor_4x4_neon,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+                aom_h_predictor_4x4_neon, NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_msa,
+INTRA_PRED_TEST(MSA, TX_4X4, aom_dc_predictor_4x4_msa,
                 aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa,
                 aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa,
-                aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
-// 8x8
+// 8x8, 8x4, 8x16, 8x32
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_8x8_c
-#define smooth_h_pred_func aom_smooth_h_predictor_8x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_c,
+INTRA_PRED_TEST(C_1, TX_8X8, aom_dc_predictor_8x8_c,
                 aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c,
                 aom_dc_128_predictor_8x8_c, aom_v_predictor_8x8_c,
-                aom_h_predictor_8x8_c, aom_d45e_predictor_8x8_c,
-                aom_d135_predictor_8x8_c, aom_d117_predictor_8x8_c,
-                aom_d153_predictor_8x8_c, aom_d207e_predictor_8x8_c,
-                aom_d63e_predictor_8x8_c, aom_paeth_predictor_8x8_c,
-                aom_smooth_predictor_8x8_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_8x4_c
-#define smooth_h_pred_func aom_smooth_h_predictor_8x4_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_c,
+                aom_h_predictor_8x8_c, aom_paeth_predictor_8x8_c,
+                aom_smooth_predictor_8x8_c, aom_smooth_v_predictor_8x8_c,
+                aom_smooth_h_predictor_8x8_c)
+
+INTRA_PRED_TEST(C_2, TX_8X4, aom_dc_predictor_8x4_c,
                 aom_dc_left_predictor_8x4_c, aom_dc_top_predictor_8x4_c,
                 aom_dc_128_predictor_8x4_c, aom_v_predictor_8x4_c,
-                aom_h_predictor_8x4_c, aom_d45e_predictor_8x4_c,
-                aom_d135_predictor_8x4_c, aom_d117_predictor_8x4_c,
-                aom_d153_predictor_8x4_c, aom_d207e_predictor_8x4_c,
-                aom_d63e_predictor_8x4_c, aom_paeth_predictor_8x4_c,
-                aom_smooth_predictor_8x4_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_8x16_c
-#define smooth_h_pred_func aom_smooth_h_predictor_8x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_c,
+                aom_h_predictor_8x4_c, aom_paeth_predictor_8x4_c,
+                aom_smooth_predictor_8x4_c, aom_smooth_v_predictor_8x4_c,
+                aom_smooth_h_predictor_8x4_c)
+
+INTRA_PRED_TEST(C_3, TX_8X16, aom_dc_predictor_8x16_c,
                 aom_dc_left_predictor_8x16_c, aom_dc_top_predictor_8x16_c,
                 aom_dc_128_predictor_8x16_c, aom_v_predictor_8x16_c,
-                aom_h_predictor_8x16_c, aom_d45e_predictor_8x16_c,
-                aom_d135_predictor_8x16_c, aom_d117_predictor_8x16_c,
-                aom_d153_predictor_8x16_c, aom_d207e_predictor_8x16_c,
-                aom_d63e_predictor_8x16_c, aom_paeth_predictor_8x16_c,
-                aom_smooth_predictor_8x16_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+                aom_h_predictor_8x16_c, aom_paeth_predictor_8x16_c,
+                aom_smooth_predictor_8x16_c, aom_smooth_v_predictor_8x16_c,
+                aom_smooth_h_predictor_8x16_c)
+
+INTRA_PRED_TEST(C_4, TX_8X32, aom_dc_predictor_8x32_c,
+                aom_dc_left_predictor_8x32_c, aom_dc_top_predictor_8x32_c,
+                aom_dc_128_predictor_8x32_c, aom_v_predictor_8x32_c,
+                aom_h_predictor_8x32_c, aom_paeth_predictor_8x32_c,
+                aom_smooth_predictor_8x32_c, aom_smooth_v_predictor_8x32_c,
+                aom_smooth_h_predictor_8x32_c)
 
 #if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_sse2,
+INTRA_PRED_TEST(SSE2_1, TX_8X8, aom_dc_predictor_8x8_sse2,
                 aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
                 aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
-                aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_sse2,
+                aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TX_8X4, aom_dc_predictor_8x4_sse2,
                 aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
                 aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
-                aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_sse2,
+                aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TX_8X16, aom_dc_predictor_8x16_sse2,
                 aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
                 aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
-                aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_4, TX_8X32, aom_dc_predictor_8x32_sse2,
+                aom_dc_left_predictor_8x32_sse2, aom_dc_top_predictor_8x32_sse2,
+                aom_dc_128_predictor_8x32_sse2, aom_v_predictor_8x32_sse2,
+                aom_h_predictor_8x32_sse2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TestIntraPred8, "intra8x8", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_8x8_ssse3,
-                NULL, NULL, aom_paeth_predictor_8x8_ssse3,
-                aom_smooth_predictor_8x8_ssse3, NULL, NULL)
-INTRA_PRED_TEST(SSSE3_2, TestIntraPred8, "intra8x4", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(SSSE3_1, TX_8X8, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_8x8_ssse3, aom_smooth_predictor_8x8_ssse3,
+                aom_smooth_v_predictor_8x8_ssse3,
+                aom_smooth_h_predictor_8x8_ssse3)
+INTRA_PRED_TEST(SSSE3_2, TX_8X4, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_8x4_ssse3, aom_smooth_predictor_8x4_ssse3,
-                NULL, NULL)
-INTRA_PRED_TEST(SSSE3_3, TestIntraPred8, "intra8x16", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_smooth_v_predictor_8x4_ssse3,
+                aom_smooth_h_predictor_8x4_ssse3)
+INTRA_PRED_TEST(SSSE3_3, TX_8X16, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_8x16_ssse3, aom_smooth_predictor_8x16_ssse3,
-                NULL, NULL)
+                aom_smooth_v_predictor_8x16_ssse3,
+                aom_smooth_h_predictor_8x16_ssse3)
+INTRA_PRED_TEST(SSSE3_4, TX_8X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_8x32_ssse3, aom_smooth_predictor_8x32_ssse3,
+                aom_smooth_v_predictor_8x32_ssse3,
+                aom_smooth_h_predictor_8x32_ssse3)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_dspr2,
-                NULL, NULL, NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(DSPR2, TX_8X8, aom_dc_predictor_8x8_dspr2, NULL, NULL, NULL,
+                NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_neon,
+INTRA_PRED_TEST(NEON, TX_8X8, aom_dc_predictor_8x8_neon,
                 aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
                 aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
-                aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_msa,
+INTRA_PRED_TEST(MSA, TX_8X8, aom_dc_predictor_8x8_msa,
                 aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa,
                 aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa,
-                aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
-// 16x16
+// 16x16, 16x8, 16x32, 16x4, 16x64
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_16x16_c
-#define smooth_h_pred_func aom_smooth_h_predictor_16x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_1, TestIntraPred16, "intra16x16", aom_dc_predictor_16x16_c,
+INTRA_PRED_TEST(C_1, TX_16X16, aom_dc_predictor_16x16_c,
                 aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
                 aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c,
-                aom_h_predictor_16x16_c, aom_d45e_predictor_16x16_c,
-                aom_d135_predictor_16x16_c, aom_d117_predictor_16x16_c,
-                aom_d153_predictor_16x16_c, aom_d207e_predictor_16x16_c,
-                aom_d63e_predictor_16x16_c, aom_paeth_predictor_16x16_c,
-                aom_smooth_predictor_16x16_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_16x8_c
-#define smooth_h_pred_func aom_smooth_h_predictor_16x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_2, TestIntraPred16, "intra16x8", aom_dc_predictor_16x8_c,
+                aom_h_predictor_16x16_c, aom_paeth_predictor_16x16_c,
+                aom_smooth_predictor_16x16_c, aom_smooth_v_predictor_16x16_c,
+                aom_smooth_h_predictor_16x16_c)
+
+INTRA_PRED_TEST(C_2, TX_16X8, aom_dc_predictor_16x8_c,
                 aom_dc_left_predictor_16x8_c, aom_dc_top_predictor_16x8_c,
                 aom_dc_128_predictor_16x8_c, aom_v_predictor_16x8_c,
-                aom_h_predictor_16x8_c, aom_d45e_predictor_16x8_c,
-                aom_d135_predictor_16x8_c, aom_d117_predictor_16x8_c,
-                aom_d153_predictor_16x8_c, aom_d207e_predictor_16x8_c,
-                aom_d63e_predictor_16x8_c, aom_paeth_predictor_16x8_c,
-                aom_smooth_predictor_16x8_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_16x32_c
-#define smooth_h_pred_func aom_smooth_h_predictor_16x32_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_3, TestIntraPred16, "intra16x32", aom_dc_predictor_16x32_c,
+                aom_h_predictor_16x8_c, aom_paeth_predictor_16x8_c,
+                aom_smooth_predictor_16x8_c, aom_smooth_v_predictor_16x8_c,
+                aom_smooth_h_predictor_16x8_c)
+
+INTRA_PRED_TEST(C_3, TX_16X32, aom_dc_predictor_16x32_c,
                 aom_dc_left_predictor_16x32_c, aom_dc_top_predictor_16x32_c,
                 aom_dc_128_predictor_16x32_c, aom_v_predictor_16x32_c,
-                aom_h_predictor_16x32_c, aom_d45e_predictor_16x32_c,
-                aom_d135_predictor_16x32_c, aom_d117_predictor_16x32_c,
-                aom_d153_predictor_16x32_c, aom_d207e_predictor_16x32_c,
-                aom_d63e_predictor_16x32_c, aom_paeth_predictor_16x32_c,
-                aom_smooth_predictor_16x32_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+                aom_h_predictor_16x32_c, aom_paeth_predictor_16x32_c,
+                aom_smooth_predictor_16x32_c, aom_smooth_v_predictor_16x32_c,
+                aom_smooth_h_predictor_16x32_c)
+
+INTRA_PRED_TEST(C_4, TX_16X4, aom_dc_predictor_16x4_c,
+                aom_dc_left_predictor_16x4_c, aom_dc_top_predictor_16x4_c,
+                aom_dc_128_predictor_16x4_c, aom_v_predictor_16x4_c,
+                aom_h_predictor_16x4_c, aom_paeth_predictor_16x4_c,
+                aom_smooth_predictor_16x4_c, aom_smooth_v_predictor_16x4_c,
+                aom_smooth_h_predictor_16x4_c)
+
+INTRA_PRED_TEST(C_5, TX_16X64, aom_dc_predictor_16x64_c,
+                aom_dc_left_predictor_16x64_c, aom_dc_top_predictor_16x64_c,
+                aom_dc_128_predictor_16x64_c, aom_v_predictor_16x64_c,
+                aom_h_predictor_16x64_c, aom_paeth_predictor_16x64_c,
+                aom_smooth_predictor_16x64_c, aom_smooth_v_predictor_16x64_c,
+                aom_smooth_h_predictor_16x64_c)
 
 #if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TestIntraPred16, "intra16x16",
-                aom_dc_predictor_16x16_sse2, aom_dc_left_predictor_16x16_sse2,
+INTRA_PRED_TEST(SSE2_1, TX_16X16, aom_dc_predictor_16x16_sse2,
+                aom_dc_left_predictor_16x16_sse2,
                 aom_dc_top_predictor_16x16_sse2,
                 aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
-                aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8",
-                aom_dc_predictor_16x8_sse2, aom_dc_left_predictor_16x8_sse2,
-                aom_dc_top_predictor_16x8_sse2, aom_dc_128_predictor_16x8_sse2,
-                aom_v_predictor_16x8_sse2, aom_h_predictor_16x8_sse2, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32",
-                aom_dc_predictor_16x32_sse2, aom_dc_left_predictor_16x32_sse2,
+                aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TX_16X8, aom_dc_predictor_16x8_sse2,
+                aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2,
+                aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2,
+                aom_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TX_16X32, aom_dc_predictor_16x32_sse2,
+                aom_dc_left_predictor_16x32_sse2,
                 aom_dc_top_predictor_16x32_sse2,
                 aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
-                aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_4, TX_16X64, aom_dc_predictor_16x64_sse2,
+                aom_dc_left_predictor_16x64_sse2,
+                aom_dc_top_predictor_16x64_sse2,
+                aom_dc_128_predictor_16x64_sse2, aom_v_predictor_16x64_sse2,
+                aom_h_predictor_16x64_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_5, TX_16X4, aom_dc_predictor_16x4_sse2,
+                aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2,
+                aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2,
+                aom_h_predictor_16x4_sse2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TestIntraPred16, "intra16x16", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3,
-                NULL, NULL, aom_paeth_predictor_16x16_ssse3,
-                aom_smooth_predictor_16x16_ssse3, NULL, NULL)
-INTRA_PRED_TEST(SSSE3_2, TestIntraPred16, "intra16x8", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(SSSE3_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x16_ssse3,
+                aom_smooth_predictor_16x16_ssse3,
+                aom_smooth_v_predictor_16x16_ssse3,
+                aom_smooth_h_predictor_16x16_ssse3)
+INTRA_PRED_TEST(SSSE3_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_16x8_ssse3, aom_smooth_predictor_16x8_ssse3,
-                NULL, NULL)
-INTRA_PRED_TEST(SSSE3_3, TestIntraPred16, "intra16x32", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_smooth_v_predictor_16x8_ssse3,
+                aom_smooth_h_predictor_16x8_ssse3)
+INTRA_PRED_TEST(SSSE3_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_16x32_ssse3,
-                aom_smooth_predictor_16x32_ssse3, NULL, NULL)
+                aom_smooth_predictor_16x32_ssse3,
+                aom_smooth_v_predictor_16x32_ssse3,
+                aom_smooth_h_predictor_16x32_ssse3)
+INTRA_PRED_TEST(SSSE3_4, TX_16X64, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x64_ssse3,
+                aom_smooth_predictor_16x64_ssse3,
+                aom_smooth_v_predictor_16x64_ssse3,
+                aom_smooth_h_predictor_16x64_ssse3)
+INTRA_PRED_TEST(SSSE3_5, TX_16X4, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x4_ssse3, aom_smooth_predictor_16x4_ssse3,
+                aom_smooth_v_predictor_16x4_ssse3,
+                aom_smooth_h_predictor_16x4_ssse3)
 #endif  // HAVE_SSSE3
 
 #if HAVE_AVX2
-INTRA_PRED_TEST(AVX2_1, TestIntraPred16, "intra16x16", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(AVX2_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_16x16_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_2, TestIntraPred16, "intra16x8", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(AVX2_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_16x8_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_3, TestIntraPred16, "intra16x32", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(AVX2_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_16x32_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_4, TX_16X64, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x64_avx2, NULL, NULL, NULL)
 #endif  // HAVE_AVX2
 
 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred16, "intra16x16",
-                aom_dc_predictor_16x16_dspr2, NULL, NULL, NULL, NULL,
-                aom_h_predictor_16x16_dspr2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(DSPR2, TX_16X16, aom_dc_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred16, "intra16x16",
-                aom_dc_predictor_16x16_neon, aom_dc_left_predictor_16x16_neon,
+INTRA_PRED_TEST(NEON, TX_16X16, aom_dc_predictor_16x16_neon,
+                aom_dc_left_predictor_16x16_neon,
                 aom_dc_top_predictor_16x16_neon,
                 aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
-                aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred16, "intra16x16", aom_dc_predictor_16x16_msa,
+INTRA_PRED_TEST(MSA, TX_16X16, aom_dc_predictor_16x16_msa,
                 aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa,
                 aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa,
-                aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
-// 32x32
+// 32x32, 32x16, 32x64, 32x8
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_32x32_c
-#define smooth_h_pred_func aom_smooth_h_predictor_32x32_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_1, TestIntraPred32, "intra32x32", aom_dc_predictor_32x32_c,
+INTRA_PRED_TEST(C_1, TX_32X32, aom_dc_predictor_32x32_c,
                 aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
                 aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c,
-                aom_h_predictor_32x32_c, aom_d45e_predictor_32x32_c,
-                aom_d135_predictor_32x32_c, aom_d117_predictor_32x32_c,
-                aom_d153_predictor_32x32_c, aom_d207e_predictor_32x32_c,
-                aom_d63e_predictor_32x32_c, aom_paeth_predictor_32x32_c,
-                aom_smooth_predictor_32x32_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_smooth_v_predictor_32x16_c
-#define smooth_h_pred_func aom_smooth_h_predictor_32x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
-INTRA_PRED_TEST(C_2, TestIntraPred32, "intra32x16", aom_dc_predictor_32x16_c,
+                aom_h_predictor_32x32_c, aom_paeth_predictor_32x32_c,
+                aom_smooth_predictor_32x32_c, aom_smooth_v_predictor_32x32_c,
+                aom_smooth_h_predictor_32x32_c)
+
+INTRA_PRED_TEST(C_2, TX_32X16, aom_dc_predictor_32x16_c,
                 aom_dc_left_predictor_32x16_c, aom_dc_top_predictor_32x16_c,
                 aom_dc_128_predictor_32x16_c, aom_v_predictor_32x16_c,
-                aom_h_predictor_32x16_c, aom_d45e_predictor_32x16_c,
-                aom_d135_predictor_32x16_c, aom_d117_predictor_32x16_c,
-                aom_d153_predictor_32x16_c, aom_d207e_predictor_32x16_c,
-                aom_d63e_predictor_32x16_c, aom_paeth_predictor_32x16_c,
-                aom_smooth_predictor_32x16_c, smooth_v_pred_func,
-                smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+                aom_h_predictor_32x16_c, aom_paeth_predictor_32x16_c,
+                aom_smooth_predictor_32x16_c, aom_smooth_v_predictor_32x16_c,
+                aom_smooth_h_predictor_32x16_c)
+
+INTRA_PRED_TEST(C_3, TX_32X64, aom_dc_predictor_32x64_c,
+                aom_dc_left_predictor_32x64_c, aom_dc_top_predictor_32x64_c,
+                aom_dc_128_predictor_32x64_c, aom_v_predictor_32x64_c,
+                aom_h_predictor_32x64_c, aom_paeth_predictor_32x64_c,
+                aom_smooth_predictor_32x64_c, aom_smooth_v_predictor_32x64_c,
+                aom_smooth_h_predictor_32x64_c)
+
+INTRA_PRED_TEST(C_4, TX_32X8, aom_dc_predictor_32x8_c,
+                aom_dc_left_predictor_32x8_c, aom_dc_top_predictor_32x8_c,
+                aom_dc_128_predictor_32x8_c, aom_v_predictor_32x8_c,
+                aom_h_predictor_32x8_c, aom_paeth_predictor_32x8_c,
+                aom_smooth_predictor_32x8_c, aom_smooth_v_predictor_32x8_c,
+                aom_smooth_h_predictor_32x8_c)
 
 #if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TestIntraPred32, "intra32x32",
-                aom_dc_predictor_32x32_sse2, aom_dc_left_predictor_32x32_sse2,
+INTRA_PRED_TEST(SSE2_1, TX_32X32, aom_dc_predictor_32x32_sse2,
+                aom_dc_left_predictor_32x32_sse2,
                 aom_dc_top_predictor_32x32_sse2,
                 aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
-                aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16",
-                aom_dc_predictor_32x16_sse2, aom_dc_left_predictor_32x16_sse2,
+                aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TX_32X16, aom_dc_predictor_32x16_sse2,
+                aom_dc_left_predictor_32x16_sse2,
                 aom_dc_top_predictor_32x16_sse2,
                 aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
-                aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TX_32X64, aom_dc_predictor_32x64_sse2,
+                aom_dc_left_predictor_32x64_sse2,
+                aom_dc_top_predictor_32x64_sse2,
+                aom_dc_128_predictor_32x64_sse2, aom_v_predictor_32x64_sse2,
+                aom_h_predictor_32x64_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_4, TX_32X8, aom_dc_predictor_32x8_sse2,
+                aom_dc_left_predictor_32x8_sse2, aom_dc_top_predictor_32x8_sse2,
+                aom_dc_128_predictor_32x8_sse2, aom_v_predictor_32x8_sse2,
+                aom_h_predictor_32x8_sse2, NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TestIntraPred32, "intra32x32", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3,
-                NULL, NULL, aom_paeth_predictor_32x32_ssse3,
-                aom_smooth_predictor_32x32_ssse3, NULL, NULL)
-INTRA_PRED_TEST(SSSE3_2, TestIntraPred32, "intra32x16", NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+INTRA_PRED_TEST(SSSE3_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x32_ssse3,
+                aom_smooth_predictor_32x32_ssse3,
+                aom_smooth_v_predictor_32x32_ssse3,
+                aom_smooth_h_predictor_32x32_ssse3)
+INTRA_PRED_TEST(SSSE3_2, TX_32X16, NULL, NULL, NULL, NULL, NULL, NULL,
                 aom_paeth_predictor_32x16_ssse3,
-                aom_smooth_predictor_32x16_ssse3, NULL, NULL)
+                aom_smooth_predictor_32x16_ssse3,
+                aom_smooth_v_predictor_32x16_ssse3,
+                aom_smooth_h_predictor_32x16_ssse3)
+INTRA_PRED_TEST(SSSE3_3, TX_32X64, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x64_ssse3,
+                aom_smooth_predictor_32x64_ssse3,
+                aom_smooth_v_predictor_32x64_ssse3,
+                aom_smooth_h_predictor_32x64_ssse3)
+INTRA_PRED_TEST(SSSE3_4, TX_32X8, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x8_ssse3, aom_smooth_predictor_32x8_ssse3,
+                aom_smooth_v_predictor_32x8_ssse3,
+                aom_smooth_h_predictor_32x8_ssse3)
 #endif  // HAVE_SSSE3
 
 #if HAVE_AVX2
-INTRA_PRED_TEST(AVX2_1, TestIntraPred32, "intra32x32",
-                aom_dc_predictor_32x32_avx2, aom_dc_left_predictor_32x32_avx2,
+INTRA_PRED_TEST(AVX2_1, TX_32X32, aom_dc_predictor_32x32_avx2,
+                aom_dc_left_predictor_32x32_avx2,
                 aom_dc_top_predictor_32x32_avx2,
                 aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
-                aom_h_predictor_32x32_avx2, NULL, NULL, NULL, NULL, NULL, NULL,
-                aom_paeth_predictor_32x32_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16",
-                aom_dc_predictor_32x16_avx2, aom_dc_left_predictor_32x16_avx2,
+                aom_h_predictor_32x32_avx2, aom_paeth_predictor_32x32_avx2,
+                NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_2, TX_32X16, aom_dc_predictor_32x16_avx2,
+                aom_dc_left_predictor_32x16_avx2,
                 aom_dc_top_predictor_32x16_avx2,
                 aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                aom_paeth_predictor_32x16_avx2, NULL, NULL, NULL)
+                NULL, aom_paeth_predictor_32x16_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_3, TX_32X64, aom_dc_predictor_32x64_avx2,
+                aom_dc_left_predictor_32x64_avx2,
+                aom_dc_top_predictor_32x64_avx2,
+                aom_dc_128_predictor_32x64_avx2, aom_v_predictor_32x64_avx2,
+                NULL, aom_paeth_predictor_32x64_avx2, NULL, NULL, NULL)
 #endif  // HAVE_AVX2
 
 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred32, "intra32x32",
-                aom_dc_predictor_32x32_neon, aom_dc_left_predictor_32x32_neon,
+INTRA_PRED_TEST(NEON, TX_32X32, aom_dc_predictor_32x32_neon,
+                aom_dc_left_predictor_32x32_neon,
                 aom_dc_top_predictor_32x32_neon,
                 aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
-                aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred32, "intra32x32", aom_dc_predictor_32x32_msa,
+INTRA_PRED_TEST(MSA, TX_32X32, aom_dc_predictor_32x32_msa,
                 aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa,
                 aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa,
-                aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+                aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
+// 64x64, 64x32, 64x16
+
+INTRA_PRED_TEST(C_1, TX_64X64, aom_dc_predictor_64x64_c,
+                aom_dc_left_predictor_64x64_c, aom_dc_top_predictor_64x64_c,
+                aom_dc_128_predictor_64x64_c, aom_v_predictor_64x64_c,
+                aom_h_predictor_64x64_c, aom_paeth_predictor_64x64_c,
+                aom_smooth_predictor_64x64_c, aom_smooth_v_predictor_64x64_c,
+                aom_smooth_h_predictor_64x64_c)
+
+INTRA_PRED_TEST(C_2, TX_64X32, aom_dc_predictor_64x32_c,
+                aom_dc_left_predictor_64x32_c, aom_dc_top_predictor_64x32_c,
+                aom_dc_128_predictor_64x32_c, aom_v_predictor_64x32_c,
+                aom_h_predictor_64x32_c, aom_paeth_predictor_64x32_c,
+                aom_smooth_predictor_64x32_c, aom_smooth_v_predictor_64x32_c,
+                aom_smooth_h_predictor_64x32_c)
+
+INTRA_PRED_TEST(C_3, TX_64X16, aom_dc_predictor_64x16_c,
+                aom_dc_left_predictor_64x16_c, aom_dc_top_predictor_64x16_c,
+                aom_dc_128_predictor_64x16_c, aom_v_predictor_64x16_c,
+                aom_h_predictor_64x16_c, aom_paeth_predictor_64x16_c,
+                aom_smooth_predictor_64x16_c, aom_smooth_v_predictor_64x16_c,
+                aom_smooth_h_predictor_64x16_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2_4, TX_64X64, aom_dc_predictor_64x64_sse2,
+                aom_dc_left_predictor_64x64_sse2,
+                aom_dc_top_predictor_64x64_sse2,
+                aom_dc_128_predictor_64x64_sse2, aom_v_predictor_64x64_sse2,
+                aom_h_predictor_64x64_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_5, TX_64X32, aom_dc_predictor_64x32_sse2,
+                aom_dc_left_predictor_64x32_sse2,
+                aom_dc_top_predictor_64x32_sse2,
+                aom_dc_128_predictor_64x32_sse2, aom_v_predictor_64x32_sse2,
+                aom_h_predictor_64x32_sse2, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_6, TX_64X16, aom_dc_predictor_64x16_sse2,
+                aom_dc_left_predictor_64x16_sse2,
+                aom_dc_top_predictor_64x16_sse2,
+                aom_dc_128_predictor_64x16_sse2, aom_v_predictor_64x16_sse2,
+                aom_h_predictor_64x16_sse2, NULL, NULL, NULL, NULL)
+#endif
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3_4, TX_64X64, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_64x64_ssse3,
+                aom_smooth_predictor_64x64_ssse3,
+                aom_smooth_v_predictor_64x64_ssse3,
+                aom_smooth_h_predictor_64x64_ssse3)
+INTRA_PRED_TEST(SSSE3_5, TX_64X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_64x32_ssse3,
+                aom_smooth_predictor_64x32_ssse3,
+                aom_smooth_v_predictor_64x32_ssse3,
+                aom_smooth_h_predictor_64x32_ssse3)
+INTRA_PRED_TEST(SSSE3_6, TX_64X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_64x16_ssse3,
+                aom_smooth_predictor_64x16_ssse3,
+                aom_smooth_v_predictor_64x16_ssse3,
+                aom_smooth_h_predictor_64x16_ssse3)
+#endif
+
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2_4, TX_64X64, aom_dc_predictor_64x64_avx2,
+                aom_dc_left_predictor_64x64_avx2,
+                aom_dc_top_predictor_64x64_avx2,
+                aom_dc_128_predictor_64x64_avx2, aom_v_predictor_64x64_avx2,
+                NULL, aom_paeth_predictor_64x64_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_5, TX_64X32, aom_dc_predictor_64x32_avx2,
+                aom_dc_left_predictor_64x32_avx2,
+                aom_dc_top_predictor_64x32_avx2,
+                aom_dc_128_predictor_64x32_avx2, aom_v_predictor_64x32_avx2,
+                NULL, aom_paeth_predictor_64x32_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_6, TX_64X16, aom_dc_predictor_64x16_avx2,
+                aom_dc_left_predictor_64x16_avx2,
+                aom_dc_top_predictor_64x16_avx2,
+                aom_dc_128_predictor_64x16_avx2, aom_v_predictor_64x16_avx2,
+                NULL, aom_paeth_predictor_64x16_avx2, NULL, NULL, NULL)
+#endif
+// -----------------------------------------------------------------------------
 // High Bitdepth
-#if CONFIG_HIGHBITDEPTH
 namespace {
 
 typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
@@ -826,17 +888,16 @@ typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
 
 typedef IntraPredTestMem<uint16_t> Av1HighbdIntraPredTestMem;
 
-void TestHighbdIntraPred(const char name[], AvxHighbdPredFunc const *pred_funcs,
-                         const char *const signatures[], int block_width,
-                         int block_height) {
+void TestHighbdIntraPred(TX_SIZE tx_size, AvxHighbdPredFunc const *pred_funcs,
+                         const char *const signatures[]) {
+  const int block_width = tx_size_wide[tx_size];
+  const int block_height = tx_size_high[tx_size];
   const int num_pixels_per_test =
       block_width * block_height * kNumAv1IntraFuncs;
   const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
   Av1HighbdIntraPredTestMem intra_pred_test_mem;
-  const uint16_t *const above = intra_pred_test_mem.above_mem + 16;
   const int bd = 12;
-
-  intra_pred_test_mem.Init(block_width, bd);
+  intra_pred_test_mem.Init(block_width, block_height, bd);
 
   for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
     if (pred_funcs[k] == NULL) continue;
@@ -845,646 +906,559 @@ void TestHighbdIntraPred(const char name[], AvxHighbdPredFunc const *pred_funcs,
     aom_usec_timer timer;
     aom_usec_timer_start(&timer);
     for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
-                    intra_pred_test_mem.left, bd);
+      pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
+                    intra_pred_test_mem.above, intra_pred_test_mem.left, bd);
     }
     libaom_test::ClearSystemState();
     aom_usec_timer_mark(&timer);
     const int elapsed_time =
         static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
-    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
-                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
-  }
-}
-
-void TestHighbdIntraPred4(const char *block_name,
-                          AvxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures4x4[kNumAv1IntraFuncs] = {
-    "11f74af6c5737df472f3275cbde062fa",
-    "51bea056b6447c93f6eb8f6b7e8f6f71",
-    "27e97f946766331795886f4de04c5594",
-    "53ab15974b049111fb596c5168ec7e3f",
-    "f0b640bb176fbe4584cf3d32a9b0320a",
-    "729783ca909e03afd4b47111c80d967b",
-    "d631a8544ccc87702db3e98fac494657",
-    "293fc903254a33754133314c6cdba81f",
-    "f8074d704233e73dfd35b458c6092374",
-    "aa6363d08544a1ec4da33d7a0be5640d",
-    "0bdc21a3acdebc393bc2c22e71bbeada",
-    "a48f7a484ba4ad3916055c7160665b56",
-    "6e30009c45474a22032678b1bd579c8f",
-    "e57cba016d808aa8a35619df2a65f049",
-#if CONFIG_SMOOTH_HV
-    "55a6c37f39afcbbf5abca4a985b96459",
-    "a623d45b37dafec1f8a75c4c5218913d",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures4x8[kNumAv1IntraFuncs] = {
-    "22d519b796d59644043466320e4ccd14",
-    "09513a738c49b3f9542d27f34abbe1d5",
-    "807ae5e8813443ff01e71be6efacfb69",
-    "cbfa18d0293430b6e9708b0be1fd2394",
-    "346c354c34ec7fa780b576db355dab88",
-    "f97dae85c35359632380b09ca98d611e",
-    "aed1beef71de33856c814ff7d63dd9db",
-    "49c47c04dd3d23d6fc5cc32bf9d40ae4",
-    "a24aade6e22b323ee28c8bf08aa2d234",
-    "aefef502f9e144e71cd27dc7383b3c28",
-    "b284ae5277b85ebdd16b5952149f7458",
-    "8dc5791167271f6f347582e07379f580",
-    "698ae351d8896d89ed9e4e67b6e53eda",
-    "dcc197034a9c45a3d8238bf085835f4e",
-#if CONFIG_SMOOTH_HV
-    "7a35e2c42ffdc2efc2d6d1d75a100fc7",
-    "41ab6cebd4516c87a91b2a593e2c2506",
-#endif
-  };
-
-  if (!strcmp(block_name, "Hbd Intra4x4")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures4x4, 4, 4);
-  }
-  if (!strcmp(block_name, "Hbd Intra4x8")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures4x8, 4, 8);
-  }
-}
-
-void TestHighbdIntraPred8(const char *block_name,
-                          AvxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures8x8[kNumAv1IntraFuncs] = {
-    "03da8829fe94663047fd108c5fcaa71d",
-    "ecdb37b8120a2d3a4c706b016bd1bfd7",
-    "1d4543ed8d2b9368cb96898095fe8a75",
-    "f791c9a67b913cbd82d9da8ecede30e2",
-    "065c70646f4dbaff913282f55a45a441",
-    "51f87123616662ef7c35691497dfd0ba",
-    "4f53cf8e5f43894dc0759f43c7081f60",
-    "9ffe186a6bc7db95275f1bbddd6f7aba",
-    "a3258a2eae2e2bd55cb8f71351b22998",
-    "8d909f0a2066e39b3216092c6289ece4",
-    "6751f60655aba44aff78aaaf4e967377",
-    "d31a449872fab968a8d41de578338780",
-    "85c01ba03df68f9ece7bd3fa0f8980e6",
-    "ad19b7dac092f56df6d054e1f67f21e7",
-#if CONFIG_SMOOTH_HV
-    "0edc415b5dd7299f7a34fb9f71d31d78",
-    "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
-    "d58cd4c4bf3b7bbaa5db5e1a5622ec78",
-    "6e572c35aa782d00cafcb99e9ea047ea",
-    "e8c22a3702b416dc9ab974505afbed09",
-    "aaa4e4762a795aad7ad74de0c662c4e4",
-    "a19f9101967383c3dcbd516dc317a291",
-    "9ab8cb91f1a595b9ebe3fe8de58031aa",
-    "c6c7d65264397d4d31e378e1f1cfd921",
-    "5804158e463ff794b6b8a623f5d2c10d",
-    "c342cdeb39aae4c4f7be10e057029298",
-    "c1bbbcfe4b25f6b8eca6ad2f7ee793d3",
-    "98d1dab8b949859b9c65298ee9f105f8",
-    "396e803aaf6d7a03a231edc48b396051",
-    "2cf9021d5f1169268699807ee118b65f",
-    "ee9605fcbd6fb871f1c5cd81a6989327",
-#if CONFIG_SMOOTH_HV
-    "0edc415b5dd7299f7a34fb9f71d31d78",
-    "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
-#endif
-  };
-  static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
-    "4562de1d0336610880fdd5685498a9ec",
-    "16310fa7076394f16fc85c4b149d89c9",
-    "0e94af88e1dc573b6f0f499cddd1f530",
-    "dfd245ee20d091c67809160340365aa9",
-    "d3562504327f70c096c5be23fd8a3747",
-    "601b853558502acbb5135eadd2da117a",
-    "e83f9a8bc16b507d2ed0b6b31a25d6f5",
-    "fc8427d942246e8cba81247bb294afb5",
-    "89cde712e4c1ef675ea156ad679c62c7",
-    "0a68c2b28c3b171ad797cf76a7058f10",
-    "e70724010e12d8f374cedd3910ceb0d5",
-    "ad7987e91267503ba6fd3e8be42eb48c",
-    "3c624345a723a1b2b1bea05a6a08bc99",
-    "2a9c781de609e0184cc7ab442050f4e5",
-#if CONFIG_SMOOTH_HV
-    "0ddc5035c22252747126b61fc238c74d",
-    "e43f5d83bab759af69c7b6773fc8f9b2",
-#endif
-  };
-  if (!strcmp(block_name, "Hbd Intra8x8")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x8, 8, 8);
-  }
-  if (!strcmp(block_name, "Hbd Intra8x4")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x4, 8, 4);
-  }
-  if (!strcmp(block_name, "Hbd Intra8x16")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x16, 8, 16);
-  }
-}
-
-void TestHighbdIntraPred16(const char *block_name,
-                           AvxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures16x16[kNumAv1IntraFuncs] = {
-    "e33cb3f56a878e2fddb1b2fc51cdd275",
-    "c7bff6f04b6052c8ab335d726dbbd52d",
-    "d0b0b47b654a9bcc5c6008110a44589b",
-    "78f5da7b10b2b9ab39f114a33b6254e9",
-    "c78e31d23831abb40d6271a318fdd6f3",
-    "90d1347f4ec9198a0320daecb6ff90b8",
-    "e38e12830e2ee5a01a064ec5998d5948",
-    "cf28bd387b81ad3e5f1a1c779a4b70a0",
-    "24c304330431ddeaf630f6ce94af2eac",
-    "91a329798036bf64e8e00a87b131b8b1",
-    "e536338d1a8ee192b9e591855db1a222",
-    "54ecd47737f71c62d24e3779585113f2",
-    "e63ded54ab3d0e8728b6f24d4f01e53f",
-    "35ce21fbe0ea114c089fc3489a78155d",
-#if CONFIG_SMOOTH_HV
-    "f277f6ef8e4d717f1f0dfe2706ac197d",
-    "e8014d3f41256976c02e0f1e622ba2b9",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
-    "a57d6b5a9bfd30c29591d8717ace9c51",
-    "f5907ba97ee6c53e339e953fc8d845ee",
-    "ea3aa727913ce45af06f89dd1808db5f",
-    "408af4f23e48d14b48ee35ae094fcd18",
-    "85c41cbcb5d744f7961e8950026fbffe",
-    "8a4e588a837638887ba671f8d4910485",
-    "caae3cc3d419bbd28aa389dbe4febee1",
-    "ea67fb80d71b6471467c79662af1186c",
-    "c83f7252412dd1ad2fc6af848e7f6be8",
-    "f45af3d697f42f1b9b8def4e46bac78c",
-    "dca4a2aaf5f63db387e264ba5963943a",
-    "d01b1bcc50b4b66c1231142eae628cd3",
-    "b792d8826b67a21757ea7097cff9e05b",
-    "f94ce7101bb87fd3bb9312112527dbf4",
-#if CONFIG_SMOOTH_HV
-    "688c6660a6dc6fa61fa1aa38e708c209",
-    "0cdf641b4f81d69509c92ae0b93ef5ff",
-#endif
-  };
-  static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
-    "aee4b3b0e3cc02d48e2c40d77f807927",
-    "8baef2b2e789f79c8df9d90ad10f34a4",
-    "038c38ee3c4f090bb8d736eab136aafc",
-    "1a3de2aaeaffd68a9fd6c7f6557b83f3",
-    "385c6e0ea29421dd81011a2934641e26",
-    "6cf96c285d1a2d4787f955dad715b08c",
-    "21f82421fda1c3afca8baca0dc048a52",
-    "eac3734852c99a051f6d15a921d9e7b9",
-    "c81f7ffec79508bf78d0f2c67d8abe96",
-    "14b8c62304f65a06653b9b35dfe12d97",
-    "e0893310042511275ae04e5186ee5326",
-    "b4f05903a6191093be719794417ac6fd",
-    "2d7f75dcd73b9528c8396279ff09ff3a",
-    "5a63cd1841e4ed470e4ca5ef845f2281",
-#if CONFIG_SMOOTH_HV
-    "610d899ca945fbead33287d4335a8b32",
-    "6bafaad81fce37be46730187e78d8b11",
-#endif
-  };
-  if (!strcmp(block_name, "Hbd Intra16x16")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x16, 16, 16);
-  }
-  if (!strcmp(block_name, "Hbd Intra16x8")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x8, 16, 8);
-  }
-  if (!strcmp(block_name, "Hbd Intra16x32")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x32, 16, 32);
+    CheckMd5Signature(
+        tx_size, true, signatures, intra_pred_test_mem.src,
+        intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
+        elapsed_time, k);
   }
 }
 
-void TestHighbdIntraPred32(const char *block_name,
-                           AvxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures32x32[kNumAv1IntraFuncs] = {
-    "a3e8056ba7e36628cce4917cd956fedd",
-    "cc7d3024fe8748b512407edee045377e",
-    "2aab0a0f330a1d3e19b8ecb8f06387a3",
-    "a547bc3fb7b06910bf3973122a426661",
-    "26f712514da95042f93d6e8dc8e431dc",
-    "bb08c6e16177081daa3d936538dbc2e3",
-    "4e10f10b082a5b4265080c102d34eb47",
-    "42867c8553285e94ee8e4df7abafbda8",
-    "6496bdee96100667833f546e1be3d640",
-    "2ebfa25bf981377e682e580208504300",
-    "1788695b10a6f82ae1a56686dcbcd0a9",
-    "c3b9c506604a7132bbb5f4e97bdb03f0",
-    "84bf83f94a51b33654ca940c6f8bc057",
-    "7168b03fc31bf29596a344d6a35d007c",
-#if CONFIG_SMOOTH_HV
-    "b073a70d3672f1282236994f5d12e94b",
-    "c51607aebad5dcb3c1e3b58ef9e5b84e",
-#endif  // CONFIG_SMOOTH_HV
-  };
-  static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
-    "290b23c9f5a1de7905bfa71a942da29b",
-    "701e7b82593c66da5052fc4b6afd79ce",
-    "4da828c5455cd246735a663fbb204989",
-    "e3fbeaf234efece8dbd752b77226200c",
-    "4d1d8c969f05155a7e7e84cf7aad021b",
-    "c22e4877c2c946d5bdc0d542e29e70cf",
-    "ffd86b234d65c2e1386a5b5b5c188a69",
-    "50aaaa7d90e300b635ab18cdd73e189b",
-    "a945dc7429df168e2169d81b58a15859",
-    "66725070d7fad02dee78730ba0843e19",
-    "33d873cb05d45df2af4ff59033833db7",
-    "0dd783695b69271f65d56f5516fa6dc0",
-    "8ac1ce815e7780500f842b0beb0bb980",
-    "9fee2e2502b507f25bfad30a55b0b610",
-#if CONFIG_SMOOTH_HV
-    "4ced9c212ec6f9956e27f68a91b59fef",
-    "4a7a0b93f138bb0863e4e465b01ec0b1",
-#endif
-  };
-  if (!strcmp(block_name, "Hbd Intra32x32")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures32x32, 32, 32);
-  }
-  if (!strcmp(block_name, "Hbd Intra32x16")) {
-    TestHighbdIntraPred(block_name, pred_funcs, kSignatures32x16, 32, 16);
-  }
-}
+static const char *const kHighbdSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
+  {
+      // 4X4
+      "11f74af6c5737df472f3275cbde062fa",
+      "51bea056b6447c93f6eb8f6b7e8f6f71",
+      "27e97f946766331795886f4de04c5594",
+      "53ab15974b049111fb596c5168ec7e3f",
+      "f0b640bb176fbe4584cf3d32a9b0320a",
+      "729783ca909e03afd4b47111c80d967b",
+      "6e30009c45474a22032678b1bd579c8f",
+      "e57cba016d808aa8a35619df2a65f049",
+      "55a6c37f39afcbbf5abca4a985b96459",
+      "a623d45b37dafec1f8a75c4c5218913d",
+  },
+  {
+      // 8X8
+      "03da8829fe94663047fd108c5fcaa71d",
+      "ecdb37b8120a2d3a4c706b016bd1bfd7",
+      "1d4543ed8d2b9368cb96898095fe8a75",
+      "f791c9a67b913cbd82d9da8ecede30e2",
+      "065c70646f4dbaff913282f55a45a441",
+      "51f87123616662ef7c35691497dfd0ba",
+      "85c01ba03df68f9ece7bd3fa0f8980e6",
+      "ad19b7dac092f56df6d054e1f67f21e7",
+      "0edc415b5dd7299f7a34fb9f71d31d78",
+      "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
+  },
+  {
+      // 16X16
+      "e33cb3f56a878e2fddb1b2fc51cdd275",
+      "c7bff6f04b6052c8ab335d726dbbd52d",
+      "d0b0b47b654a9bcc5c6008110a44589b",
+      "78f5da7b10b2b9ab39f114a33b6254e9",
+      "c78e31d23831abb40d6271a318fdd6f3",
+      "90d1347f4ec9198a0320daecb6ff90b8",
+      "e63ded54ab3d0e8728b6f24d4f01e53f",
+      "35ce21fbe0ea114c089fc3489a78155d",
+      "f277f6ef8e4d717f1f0dfe2706ac197d",
+      "e8014d3f41256976c02e0f1e622ba2b9",
+  },
+  {
+      // 32X32
+      "a3e8056ba7e36628cce4917cd956fedd",
+      "cc7d3024fe8748b512407edee045377e",
+      "2aab0a0f330a1d3e19b8ecb8f06387a3",
+      "a547bc3fb7b06910bf3973122a426661",
+      "26f712514da95042f93d6e8dc8e431dc",
+      "bb08c6e16177081daa3d936538dbc2e3",
+      "84bf83f94a51b33654ca940c6f8bc057",
+      "7168b03fc31bf29596a344d6a35d007c",
+      "b073a70d3672f1282236994f5d12e94b",
+      "c51607aebad5dcb3c1e3b58ef9e5b84e",
+  },
+  {
+      // 64X64
+      "a6baa0d4bfb2269a94c7a38f86a4bccf",
+      "3f1ef5f473a49eba743f17a3324adf9d",
+      "12ac11889ae5f55b7781454efd706a6a",
+      "d9a906c0e692b22e1b4414e71a704b7e",
+      "47d4cadd56f70c11ff8f3e5d8df81161",
+      "de997744cf24c16c5ac2a36b02b351cc",
+      "23781211ae178ddeb6c4bb97a6bd7d83",
+      "a79d2e28340ca34b9e37daabbf030f63",
+      "0372bd3ddfc258750a6ac106b70587f4",
+      "228ef625d9460cbf6fa253a16a730976",
+  },
+  {
+      // 4X8
+      "22d519b796d59644043466320e4ccd14",
+      "09513a738c49b3f9542d27f34abbe1d5",
+      "807ae5e8813443ff01e71be6efacfb69",
+      "cbfa18d0293430b6e9708b0be1fd2394",
+      "346c354c34ec7fa780b576db355dab88",
+      "f97dae85c35359632380b09ca98d611e",
+      "698ae351d8896d89ed9e4e67b6e53eda",
+      "dcc197034a9c45a3d8238bf085835f4e",
+      "7a35e2c42ffdc2efc2d6d1d75a100fc7",
+      "41ab6cebd4516c87a91b2a593e2c2506",
+  },
+  {
+      // 8X4
+      "d58cd4c4bf3b7bbaa5db5e1a5622ec78",
+      "6e572c35aa782d00cafcb99e9ea047ea",
+      "e8c22a3702b416dc9ab974505afbed09",
+      "aaa4e4762a795aad7ad74de0c662c4e4",
+      "a19f9101967383c3dcbd516dc317a291",
+      "9ab8cb91f1a595b9ebe3fe8de58031aa",
+      "2cf9021d5f1169268699807ee118b65f",
+      "ee9605fcbd6fb871f1c5cd81a6989327",
+      "b4871af8316089e3e23522175df7e93f",
+      "d33301e1c2cb173be46792a22d19881a",
+  },
+  {
+      // 8X16
+      "4562de1d0336610880fdd5685498a9ec",
+      "16310fa7076394f16fc85c4b149d89c9",
+      "0e94af88e1dc573b6f0f499cddd1f530",
+      "dfd245ee20d091c67809160340365aa9",
+      "d3562504327f70c096c5be23fd8a3747",
+      "601b853558502acbb5135eadd2da117a",
+      "3c624345a723a1b2b1bea05a6a08bc99",
+      "2a9c781de609e0184cc7ab442050f4e5",
+      "0ddc5035c22252747126b61fc238c74d",
+      "e43f5d83bab759af69c7b6773fc8f9b2",
+  },
+  {
+      // 16X8
+      "a57d6b5a9bfd30c29591d8717ace9c51",
+      "f5907ba97ee6c53e339e953fc8d845ee",
+      "ea3aa727913ce45af06f89dd1808db5f",
+      "408af4f23e48d14b48ee35ae094fcd18",
+      "85c41cbcb5d744f7961e8950026fbffe",
+      "8a4e588a837638887ba671f8d4910485",
+      "b792d8826b67a21757ea7097cff9e05b",
+      "f94ce7101bb87fd3bb9312112527dbf4",
+      "688c6660a6dc6fa61fa1aa38e708c209",
+      "0cdf641b4f81d69509c92ae0b93ef5ff",
+  },
+  {
+      // 16X32
+      "aee4b3b0e3cc02d48e2c40d77f807927",
+      "8baef2b2e789f79c8df9d90ad10f34a4",
+      "038c38ee3c4f090bb8d736eab136aafc",
+      "1a3de2aaeaffd68a9fd6c7f6557b83f3",
+      "385c6e0ea29421dd81011a2934641e26",
+      "6cf96c285d1a2d4787f955dad715b08c",
+      "2d7f75dcd73b9528c8396279ff09ff3a",
+      "5a63cd1841e4ed470e4ca5ef845f2281",
+      "610d899ca945fbead33287d4335a8b32",
+      "6bafaad81fce37be46730187e78d8b11",
+  },
+  {
+      // 32X16
+      "290b23c9f5a1de7905bfa71a942da29b",
+      "701e7b82593c66da5052fc4b6afd79ce",
+      "4da828c5455cd246735a663fbb204989",
+      "e3fbeaf234efece8dbd752b77226200c",
+      "4d1d8c969f05155a7e7e84cf7aad021b",
+      "c22e4877c2c946d5bdc0d542e29e70cf",
+      "8ac1ce815e7780500f842b0beb0bb980",
+      "9fee2e2502b507f25bfad30a55b0b610",
+      "4ced9c212ec6f9956e27f68a91b59fef",
+      "4a7a0b93f138bb0863e4e465b01ec0b1",
+  },
+  {
+      // 32X64
+      "ad9cfc395a5c5644a21d958c7274ac14",
+      "f29d6d03c143ddf96fef04c19f2c8333",
+      "a8bdc852ef704dd4975c61893e8fbc3f",
+      "7d0bd7dea26226741dbca9a97f27fa74",
+      "45c27c5cca9a91b6ae8379feb0881c9f",
+      "8a0b78df1e001b85c874d686eac4aa1b",
+      "ce9fa75fac54a3f6c0cc3f2083b938f1",
+      "c0dca10d88762c954af18dc9e3791a39",
+      "61df229eddfccab913b8fda4bb02f9ac",
+      "4f4df6bc8d50a5600b573f0e44d70e66",
+  },
+  {
+      // 64X32
+      "db9d82921fd88b24fdff6f849f2f9c87",
+      "5ecc7fdc52d2f575ad4f2d0e9e6b1e11",
+      "b4581311a0a73d95dfac7f8f44591032",
+      "68bd283cfd1a125f6b2ee47cee874d36",
+      "804179f05c032908a5e36077bb87c994",
+      "fc5fd041a8ee779015394d0c066ee43c",
+      "68f5579ccadfe9a1baafb158334a3db2",
+      "fe237e45e215ab06d79046da9ad71e84",
+      "9a8a938a6824551bf7d21b8fd1d70ea1",
+      "eb7332f2017cd96882c76e7136aeaf53",
+  },
+  {
+      // 4X16
+      "7bafa307d507747b8132e7735b7f1c73",
+      "e58bc2d8213a97d1fea9cfb73d7a9633",
+      "435f8a8e8bbf14dbf2fe16b2be9e97aa",
+      "1d0e767b68d84acbfb50b7a04e633836",
+      "5f713bd7b324fe73bb7063e35ee14e5e",
+      "0dac4e1fa3d59814202715468c01ed56",
+      "47709d1db4a330c7a8900f450e6fddd1",
+      "258e0b930bb27db28f05da9cf7d1ee7c",
+      "36cf030fbae767912593efea045bfff5",
+      "248d7aceabb7499febae663fae41a920",
+  },
+  {
+      // 16X4
+      "04dde98e632670e393704742c89f9067",
+      "8c72543f1664651ae1fa08e2ac0adb9b",
+      "2354a2cdc2773aa2df8ab4010db1be39",
+      "6300ad3221c26da39b10e0e6d87ee3be",
+      "8ea30b661c6ba60b28d3167f19e449b8",
+      "fb6c1e4ff101a371cede63c2955cdb7e",
+      "a517c06433d6d7927b16a72184a23e92",
+      "393828be5d62ab6c48668bea5e2f801a",
+      "b1e510c542013eb9d6fb188dea2ce90a",
+      "569a8f2fe01679ca216535ecbcdccb62",
+  },
+  {
+      // 8X32
+      "9d541865c185ca7607852852613ac1fc",
+      "b96be67f08c6b5fa5ebd3411299c2f7c",
+      "75a2dcf50004b9d188849b048239767e",
+      "429492ff415c9fd9b050d73b2ad500f8",
+      "64b3606c1ccd036bd766bd5711392cf4",
+      "cb59844a0f01660ac955bae3511f1100",
+      "3e076155b7a70e8828618e3f33b51e3d",
+      "ed2d1f597ab7c50beff690f737cf9726",
+      "7909c6a26aaf20c59d996d3e5b5f9c29",
+      "965798807240c98c6f7cc9b457ed0773",
+  },
+  {
+      // 32X8
+      "36f391aa31619eec1f4d9ee95ea454cc",
+      "b82648f14eeba2527357cb50bc3223cb",
+      "7a7b2adf429125e8bee9d1d00a66e13f",
+      "4198e4d6ba503b7cc2d7e96bb845f661",
+      "96c160d2ec1be9fe0cdea9682f14d257",
+      "19a450bcebaa75afb4fc6bd1fd6434af",
+      "2bd2e35967d43d0ec1c6587a36f204d5",
+      "49799a99aa4ccfbd989bee92a99422f1",
+      "955530e99813812a74659edeac3f5475",
+      "f0316b84e378a19cd11b19a6e40b2914",
+  },
+  {
+      // 16X64
+      "8cba1b70a0bde29e8ef235cedc5faa7d",
+      "96d00ddc7537bf7f196006591b733b4e",
+      "cbf69d5d157c9f3355a4757b1d6e3414",
+      "3ac1f642019493dec1b737d7a3a1b4e5",
+      "35f9ee300d7fa3c97338e81a6f21dcd4",
+      "aae335442e77c8ebc280f16ea50ba9c7",
+      "a6140fdac2278644328be094d88731db",
+      "2df93621b6ff100f7008432d509f4161",
+      "c77bf5aee39e7ed4a3dd715f816f452a",
+      "02109bd63557d90225c32a8f1338258e",
+  },
+  {
+      // 64X16
+      "a5e2f9fb685d5f4a048e9a96affd25a4",
+      "1348f249690d9eefe09d9ad7ead2c801",
+      "525da4b187acd81b1ff1116b60461141",
+      "e99d072de858094c98b01bd4a6772634",
+      "873bfa9dc24693f19721f7c8d527f7d3",
+      "0acfc6507bd3468e9679efc127d6e4b9",
+      "57d03f8d079c7264854e22ac1157cfae",
+      "6c2c4036f70c7d957a9399b5436c0774",
+      "42b8e4a97b7f8416c72a5148c031c0b1",
+      "a38a2c5f79993dfae8530e9e25800893",
+  },
+};
 
 }  // namespace
 
-#define HIGHBD_INTRA_PRED_TEST(arch, test_func, block_size, dc, dc_left,     \
-                               dc_top, dc_128, v, h, d45e, d135, d117, d153, \
-                               d207e, d63e, tm, smooth, smooth_v, smooth_h)  \
-  TEST(arch, DISABLED_##test_func) {                                         \
-    static const AvxHighbdPredFunc aom_intra_pred[] = {                      \
-      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,             \
-      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h          \
-    };                                                                       \
-    test_func(block_size, aom_intra_pred);                                   \
+#define HIGHBD_INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, \
+                               h, paeth, smooth, smooth_v, smooth_h)          \
+  TEST(arch, DISABLED_##TestHighbdIntraPred_##tx_size) {                      \
+    static const AvxHighbdPredFunc aom_intra_pred[] = {                       \
+      dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h    \
+    };                                                                        \
+    TestHighbdIntraPred(tx_size, aom_intra_pred, kHighbdSignatures[tx_size]); \
   }
 
 // -----------------------------------------------------------------------------
-// 4x4
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_4x4_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_4x4_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// 4x4, 4x8, 4x16
 
 HIGHBD_INTRA_PRED_TEST(
-    C_1, TestHighbdIntraPred4, "Hbd Intra4x4", aom_highbd_dc_predictor_4x4_c,
+    C_1, TX_4X4, aom_highbd_dc_predictor_4x4_c,
     aom_highbd_dc_left_predictor_4x4_c, aom_highbd_dc_top_predictor_4x4_c,
     aom_highbd_dc_128_predictor_4x4_c, aom_highbd_v_predictor_4x4_c,
-    aom_highbd_h_predictor_4x4_c, aom_highbd_d45e_predictor_4x4_c,
-    aom_highbd_d135_predictor_4x4_c, aom_highbd_d117_predictor_4x4_c,
-    aom_highbd_d153_predictor_4x4_c, aom_highbd_d207e_predictor_4x4_c,
-    aom_highbd_d63e_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c,
-    aom_highbd_smooth_predictor_4x4_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    aom_highbd_h_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c,
+    aom_highbd_smooth_predictor_4x4_c, aom_highbd_smooth_v_predictor_4x4_c,
+    aom_highbd_smooth_h_predictor_4x4_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TX_4X8, aom_highbd_dc_predictor_4x8_c,
+    aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c,
+    aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c,
+    aom_highbd_h_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c,
+    aom_highbd_smooth_predictor_4x8_c, aom_highbd_smooth_v_predictor_4x8_c,
+    aom_highbd_smooth_h_predictor_4x8_c)
 
-#if HAVE_SSE2
 HIGHBD_INTRA_PRED_TEST(
-    SSE2_1, TestHighbdIntraPred4, "Hbd Intra4x4",
-    aom_highbd_dc_predictor_4x4_sse2, aom_highbd_dc_left_predictor_4x4_sse2,
-    aom_highbd_dc_top_predictor_4x4_sse2, aom_highbd_dc_128_predictor_4x4_sse2,
-    aom_highbd_v_predictor_4x4_sse2, aom_highbd_h_predictor_4x4_sse2,
-    aom_highbd_d45e_predictor_4x4_sse2, aom_highbd_d135_predictor_4x4_sse2,
-    aom_highbd_d117_predictor_4x4_sse2, aom_highbd_d153_predictor_4x4_sse2,
-    NULL, NULL, NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8",
-                       aom_highbd_dc_predictor_4x8_sse2,
+    C_3, TX_4X16, aom_highbd_dc_predictor_4x16_c,
+    aom_highbd_dc_left_predictor_4x16_c, aom_highbd_dc_top_predictor_4x16_c,
+    aom_highbd_dc_128_predictor_4x16_c, aom_highbd_v_predictor_4x16_c,
+    aom_highbd_h_predictor_4x16_c, aom_highbd_paeth_predictor_4x16_c,
+    aom_highbd_smooth_predictor_4x16_c, aom_highbd_smooth_v_predictor_4x16_c,
+    aom_highbd_smooth_h_predictor_4x16_c)
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_4X4, aom_highbd_dc_predictor_4x4_sse2,
+                       aom_highbd_dc_left_predictor_4x4_sse2,
+                       aom_highbd_dc_top_predictor_4x4_sse2,
+                       aom_highbd_dc_128_predictor_4x4_sse2,
+                       aom_highbd_v_predictor_4x4_sse2,
+                       aom_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_4X8, aom_highbd_dc_predictor_4x8_sse2,
                        aom_highbd_dc_left_predictor_4x8_sse2,
                        aom_highbd_dc_top_predictor_4x8_sse2,
                        aom_highbd_dc_128_predictor_4x8_sse2,
                        aom_highbd_v_predictor_4x8_sse2,
-                       aom_highbd_h_predictor_4x8_sse2,
-                       aom_highbd_d45e_predictor_4x8_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
+                       aom_highbd_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
 #endif
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_4x8_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_4x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// -----------------------------------------------------------------------------
+// 8x8, 8x4, 8x16, 8x32
 
 HIGHBD_INTRA_PRED_TEST(
-    C_2, TestHighbdIntraPred4, "Hbd Intra4x8", aom_highbd_dc_predictor_4x8_c,
-    aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c,
-    aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c,
-    aom_highbd_h_predictor_4x8_c, aom_highbd_d45e_predictor_4x8_c,
-    aom_highbd_d135_predictor_4x8_c, aom_highbd_d117_predictor_4x8_c,
-    aom_highbd_d153_predictor_4x8_c, aom_highbd_d207e_predictor_4x8_c,
-    aom_highbd_d63e_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c,
-    aom_highbd_smooth_predictor_4x8_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_1, TX_8X8, aom_highbd_dc_predictor_8x8_c,
+    aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c,
+    aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c,
+    aom_highbd_h_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c,
+    aom_highbd_smooth_predictor_8x8_c, aom_highbd_smooth_v_predictor_8x8_c,
+    aom_highbd_smooth_h_predictor_8x8_c)
 
-// -----------------------------------------------------------------------------
-// 8x8
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TX_8X4, aom_highbd_dc_predictor_8x4_c,
+    aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c,
+    aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c,
+    aom_highbd_h_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c,
+    aom_highbd_smooth_predictor_8x4_c, aom_highbd_smooth_v_predictor_8x4_c,
+    aom_highbd_smooth_h_predictor_8x4_c)
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x8_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TX_8X16, aom_highbd_dc_predictor_8x16_c,
+    aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c,
+    aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c,
+    aom_highbd_h_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c,
+    aom_highbd_smooth_predictor_8x16_c, aom_highbd_smooth_v_predictor_8x16_c,
+    aom_highbd_smooth_h_predictor_8x16_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_1, TestHighbdIntraPred8, "Hbd Intra8x8", aom_highbd_dc_predictor_8x8_c,
-    aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c,
-    aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c,
-    aom_highbd_h_predictor_8x8_c, aom_highbd_d45e_predictor_8x8_c,
-    aom_highbd_d135_predictor_8x8_c, aom_highbd_d117_predictor_8x8_c,
-    aom_highbd_d153_predictor_8x8_c, aom_highbd_d207e_predictor_8x8_c,
-    aom_highbd_d63e_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c,
-    aom_highbd_smooth_predictor_8x8_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_4, TX_8X32, aom_highbd_dc_predictor_8x32_c,
+    aom_highbd_dc_left_predictor_8x32_c, aom_highbd_dc_top_predictor_8x32_c,
+    aom_highbd_dc_128_predictor_8x32_c, aom_highbd_v_predictor_8x32_c,
+    aom_highbd_h_predictor_8x32_c, aom_highbd_paeth_predictor_8x32_c,
+    aom_highbd_smooth_predictor_8x32_c, aom_highbd_smooth_v_predictor_8x32_c,
+    aom_highbd_smooth_h_predictor_8x32_c)
 
 #if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred8, "Hbd Intra8x8",
-                       aom_highbd_dc_predictor_8x8_sse2,
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_8X8, aom_highbd_dc_predictor_8x8_sse2,
                        aom_highbd_dc_left_predictor_8x8_sse2,
                        aom_highbd_dc_top_predictor_8x8_sse2,
                        aom_highbd_dc_128_predictor_8x8_sse2,
                        aom_highbd_v_predictor_8x8_sse2,
-                       aom_highbd_h_predictor_8x8_sse2,
-                       aom_highbd_d45e_predictor_8x8_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred8, "Hbd Intra8x4",
-                       aom_highbd_dc_predictor_8x4_sse2,
+                       aom_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_8X4, aom_highbd_dc_predictor_8x4_sse2,
                        aom_highbd_dc_left_predictor_8x4_sse2,
                        aom_highbd_dc_top_predictor_8x4_sse2,
                        aom_highbd_dc_128_predictor_8x4_sse2,
                        aom_highbd_v_predictor_8x4_sse2,
-                       aom_highbd_h_predictor_8x4_sse2,
-                       aom_highbd_d45e_predictor_8x4_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred8, "Hbd Intra8x16",
-                       aom_highbd_dc_predictor_8x16_sse2,
+                       aom_highbd_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_3, TX_8X16, aom_highbd_dc_predictor_8x16_sse2,
                        aom_highbd_dc_left_predictor_8x16_sse2,
                        aom_highbd_dc_top_predictor_8x16_sse2,
                        aom_highbd_dc_128_predictor_8x16_sse2,
                        aom_highbd_v_predictor_8x16_sse2,
-                       aom_highbd_h_predictor_8x16_sse2,
-                       aom_highbd_d45e_predictor_8x16_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
+                       aom_highbd_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL)
 #endif
 
 #if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred8, "Hbd Intra8x8", NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d135_predictor_8x8_ssse3,
-                       aom_highbd_d117_predictor_8x8_ssse3,
-                       aom_highbd_d153_predictor_8x8_ssse3, NULL, NULL, NULL,
+HIGHBD_INTRA_PRED_TEST(SSSE3, TX_8X8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                        NULL, NULL, NULL)
 #endif
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x4_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x4_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// -----------------------------------------------------------------------------
+// 16x16, 16x8, 16x32, 16x4, 16x64
 
 HIGHBD_INTRA_PRED_TEST(
-    C_2, TestHighbdIntraPred8, "Hbd Intra8x4", aom_highbd_dc_predictor_8x4_c,
-    aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c,
-    aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c,
-    aom_highbd_h_predictor_8x4_c, aom_highbd_d45e_predictor_8x4_c,
-    aom_highbd_d135_predictor_8x4_c, aom_highbd_d117_predictor_8x4_c,
-    aom_highbd_d153_predictor_8x4_c, aom_highbd_d207e_predictor_8x4_c,
-    aom_highbd_d63e_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c,
-    aom_highbd_smooth_predictor_8x4_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x16_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+    C_1, TX_16X16, aom_highbd_dc_predictor_16x16_c,
+    aom_highbd_dc_left_predictor_16x16_c, aom_highbd_dc_top_predictor_16x16_c,
+    aom_highbd_dc_128_predictor_16x16_c, aom_highbd_v_predictor_16x16_c,
+    aom_highbd_h_predictor_16x16_c, aom_highbd_paeth_predictor_16x16_c,
+    aom_highbd_smooth_predictor_16x16_c, aom_highbd_smooth_v_predictor_16x16_c,
+    aom_highbd_smooth_h_predictor_16x16_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_3, TestHighbdIntraPred8, "Hbd Intra8x16", aom_highbd_dc_predictor_8x16_c,
-    aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c,
-    aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c,
-    aom_highbd_h_predictor_8x16_c, aom_highbd_d45e_predictor_8x16_c,
-    aom_highbd_d135_predictor_8x16_c, aom_highbd_d117_predictor_8x16_c,
-    aom_highbd_d153_predictor_8x16_c, aom_highbd_d207e_predictor_8x16_c,
-    aom_highbd_d63e_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c,
-    aom_highbd_smooth_predictor_8x16_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_2, TX_16X8, aom_highbd_dc_predictor_16x8_c,
+    aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c,
+    aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c,
+    aom_highbd_h_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c,
+    aom_highbd_smooth_predictor_16x8_c, aom_highbd_smooth_v_predictor_16x8_c,
+    aom_highbd_smooth_h_predictor_16x8_c)
 
-// -----------------------------------------------------------------------------
-// 16x16
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TX_16X32, aom_highbd_dc_predictor_16x32_c,
+    aom_highbd_dc_left_predictor_16x32_c, aom_highbd_dc_top_predictor_16x32_c,
+    aom_highbd_dc_128_predictor_16x32_c, aom_highbd_v_predictor_16x32_c,
+    aom_highbd_h_predictor_16x32_c, aom_highbd_paeth_predictor_16x32_c,
+    aom_highbd_smooth_predictor_16x32_c, aom_highbd_smooth_v_predictor_16x32_c,
+    aom_highbd_smooth_h_predictor_16x32_c)
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x16_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+HIGHBD_INTRA_PRED_TEST(
+    C_4, TX_16X4, aom_highbd_dc_predictor_16x4_c,
+    aom_highbd_dc_left_predictor_16x4_c, aom_highbd_dc_top_predictor_16x4_c,
+    aom_highbd_dc_128_predictor_16x4_c, aom_highbd_v_predictor_16x4_c,
+    aom_highbd_h_predictor_16x4_c, aom_highbd_paeth_predictor_16x4_c,
+    aom_highbd_smooth_predictor_16x4_c, aom_highbd_smooth_v_predictor_16x4_c,
+    aom_highbd_smooth_h_predictor_16x4_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_1, TestHighbdIntraPred16, "Hbd Intra16x16",
-    aom_highbd_dc_predictor_16x16_c, aom_highbd_dc_left_predictor_16x16_c,
-    aom_highbd_dc_top_predictor_16x16_c, aom_highbd_dc_128_predictor_16x16_c,
-    aom_highbd_v_predictor_16x16_c, aom_highbd_h_predictor_16x16_c,
-    aom_highbd_d45e_predictor_16x16_c, aom_highbd_d135_predictor_16x16_c,
-    aom_highbd_d117_predictor_16x16_c, aom_highbd_d153_predictor_16x16_c,
-    aom_highbd_d207e_predictor_16x16_c, aom_highbd_d63e_predictor_16x16_c,
-    aom_highbd_paeth_predictor_16x16_c, aom_highbd_smooth_predictor_16x16_c,
-    smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_5, TX_16X64, aom_highbd_dc_predictor_16x64_c,
+    aom_highbd_dc_left_predictor_16x64_c, aom_highbd_dc_top_predictor_16x64_c,
+    aom_highbd_dc_128_predictor_16x64_c, aom_highbd_v_predictor_16x64_c,
+    aom_highbd_h_predictor_16x64_c, aom_highbd_paeth_predictor_16x64_c,
+    aom_highbd_smooth_predictor_16x64_c, aom_highbd_smooth_v_predictor_16x64_c,
+    aom_highbd_smooth_h_predictor_16x64_c)
 
 #if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred16, "Hbd Intra16x16",
-                       aom_highbd_dc_predictor_16x16_sse2,
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_16X16, aom_highbd_dc_predictor_16x16_sse2,
                        aom_highbd_dc_left_predictor_16x16_sse2,
                        aom_highbd_dc_top_predictor_16x16_sse2,
                        aom_highbd_dc_128_predictor_16x16_sse2,
                        aom_highbd_v_predictor_16x16_sse2,
                        aom_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred16, "Hbd Intra16x8",
-                       aom_highbd_dc_predictor_16x8_sse2,
+                       NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_16X8, aom_highbd_dc_predictor_16x8_sse2,
                        aom_highbd_dc_left_predictor_16x8_sse2,
                        aom_highbd_dc_top_predictor_16x8_sse2,
                        aom_highbd_dc_128_predictor_16x8_sse2,
                        aom_highbd_v_predictor_16x8_sse2,
-                       aom_highbd_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred16, "Hbd Intra16x32",
-                       aom_highbd_dc_predictor_16x32_sse2,
+                       aom_highbd_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_3, TX_16X32, aom_highbd_dc_predictor_16x32_sse2,
                        aom_highbd_dc_left_predictor_16x32_sse2,
                        aom_highbd_dc_top_predictor_16x32_sse2,
                        aom_highbd_dc_128_predictor_16x32_sse2,
                        aom_highbd_v_predictor_16x32_sse2,
                        aom_highbd_h_predictor_16x32_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+                       NULL)
 #endif
 
 #if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3_1, TestHighbdIntraPred16, "Hbd Intra16x16", NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d135_predictor_16x16_ssse3,
-                       aom_highbd_d117_predictor_16x16_ssse3,
-                       aom_highbd_d153_predictor_16x16_ssse3, NULL, NULL, NULL,
-                       NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSSE3_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
 #endif
 
 #if HAVE_AVX2
-HIGHBD_INTRA_PRED_TEST(AVX2_1, TestHighbdIntraPred16, "Hbd Intra16x16", NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d45e_predictor_16x16_avx2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_2, TestHighbdIntraPred16, "Hbd Intra16x8", NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d45e_predictor_16x8_avx2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_3, TestHighbdIntraPred16, "Hbd Intra16x32", NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d45e_predictor_16x32_avx2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(AVX2_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
 #endif
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x8_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x8_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// -----------------------------------------------------------------------------
+// 32x32, 32x16, 32x64, 32x8
 
 HIGHBD_INTRA_PRED_TEST(
-    C_2, TestHighbdIntraPred16, "Hbd Intra16x8", aom_highbd_dc_predictor_16x8_c,
-    aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c,
-    aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c,
-    aom_highbd_h_predictor_16x8_c, aom_highbd_d45e_predictor_16x8_c,
-    aom_highbd_d135_predictor_16x8_c, aom_highbd_d117_predictor_16x8_c,
-    aom_highbd_d153_predictor_16x8_c, aom_highbd_d207e_predictor_16x8_c,
-    aom_highbd_d63e_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c,
-    aom_highbd_smooth_predictor_16x8_c, smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x32_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x32_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+    C_1, TX_32X32, aom_highbd_dc_predictor_32x32_c,
+    aom_highbd_dc_left_predictor_32x32_c, aom_highbd_dc_top_predictor_32x32_c,
+    aom_highbd_dc_128_predictor_32x32_c, aom_highbd_v_predictor_32x32_c,
+    aom_highbd_h_predictor_32x32_c, aom_highbd_paeth_predictor_32x32_c,
+    aom_highbd_smooth_predictor_32x32_c, aom_highbd_smooth_v_predictor_32x32_c,
+    aom_highbd_smooth_h_predictor_32x32_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_3, TestHighbdIntraPred16, "Hbd Intra16x32",
-    aom_highbd_dc_predictor_16x32_c, aom_highbd_dc_left_predictor_16x32_c,
-    aom_highbd_dc_top_predictor_16x32_c, aom_highbd_dc_128_predictor_16x32_c,
-    aom_highbd_v_predictor_16x32_c, aom_highbd_h_predictor_16x32_c,
-    aom_highbd_d45e_predictor_16x32_c, aom_highbd_d135_predictor_16x32_c,
-    aom_highbd_d117_predictor_16x32_c, aom_highbd_d153_predictor_16x32_c,
-    aom_highbd_d207e_predictor_16x32_c, aom_highbd_d63e_predictor_16x32_c,
-    aom_highbd_paeth_predictor_16x32_c, aom_highbd_smooth_predictor_16x32_c,
-    smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_2, TX_32X16, aom_highbd_dc_predictor_32x16_c,
+    aom_highbd_dc_left_predictor_32x16_c, aom_highbd_dc_top_predictor_32x16_c,
+    aom_highbd_dc_128_predictor_32x16_c, aom_highbd_v_predictor_32x16_c,
+    aom_highbd_h_predictor_32x16_c, aom_highbd_paeth_predictor_32x16_c,
+    aom_highbd_smooth_predictor_32x16_c, aom_highbd_smooth_v_predictor_32x16_c,
+    aom_highbd_smooth_h_predictor_32x16_c)
 
-// -----------------------------------------------------------------------------
-// 32x32
-
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_32x32_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_32x32_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TX_32X64, aom_highbd_dc_predictor_32x64_c,
+    aom_highbd_dc_left_predictor_32x64_c, aom_highbd_dc_top_predictor_32x64_c,
+    aom_highbd_dc_128_predictor_32x64_c, aom_highbd_v_predictor_32x64_c,
+    aom_highbd_h_predictor_32x64_c, aom_highbd_paeth_predictor_32x64_c,
+    aom_highbd_smooth_predictor_32x64_c, aom_highbd_smooth_v_predictor_32x64_c,
+    aom_highbd_smooth_h_predictor_32x64_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_1, TestHighbdIntraPred32, "Hbd Intra32x32",
-    aom_highbd_dc_predictor_32x32_c, aom_highbd_dc_left_predictor_32x32_c,
-    aom_highbd_dc_top_predictor_32x32_c, aom_highbd_dc_128_predictor_32x32_c,
-    aom_highbd_v_predictor_32x32_c, aom_highbd_h_predictor_32x32_c,
-    aom_highbd_d45e_predictor_32x32_c, aom_highbd_d135_predictor_32x32_c,
-    aom_highbd_d117_predictor_32x32_c, aom_highbd_d153_predictor_32x32_c,
-    aom_highbd_d207e_predictor_32x32_c, aom_highbd_d63e_predictor_32x32_c,
-    aom_highbd_paeth_predictor_32x32_c, aom_highbd_smooth_predictor_32x32_c,
-    smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
+    C_4, TX_32X8, aom_highbd_dc_predictor_32x8_c,
+    aom_highbd_dc_left_predictor_32x8_c, aom_highbd_dc_top_predictor_32x8_c,
+    aom_highbd_dc_128_predictor_32x8_c, aom_highbd_v_predictor_32x8_c,
+    aom_highbd_h_predictor_32x8_c, aom_highbd_paeth_predictor_32x8_c,
+    aom_highbd_smooth_predictor_32x8_c, aom_highbd_smooth_v_predictor_32x8_c,
+    aom_highbd_smooth_h_predictor_32x8_c)
 
 #if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred32, "Hbd Intra32x32",
-                       aom_highbd_dc_predictor_32x32_sse2,
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_32X32, aom_highbd_dc_predictor_32x32_sse2,
                        aom_highbd_dc_left_predictor_32x32_sse2,
                        aom_highbd_dc_top_predictor_32x32_sse2,
                        aom_highbd_dc_128_predictor_32x32_sse2,
                        aom_highbd_v_predictor_32x32_sse2,
                        aom_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred32, "Hbd Intra32x16",
-                       aom_highbd_dc_predictor_32x16_sse2,
+                       NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_32X16, aom_highbd_dc_predictor_32x16_sse2,
                        aom_highbd_dc_left_predictor_32x16_sse2,
                        aom_highbd_dc_top_predictor_32x16_sse2,
                        aom_highbd_dc_128_predictor_32x16_sse2,
                        aom_highbd_v_predictor_32x16_sse2,
                        aom_highbd_h_predictor_32x16_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+                       NULL)
 #endif
 
 #if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3_1, TestHighbdIntraPred32, "Hbd Intra32x32", NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d135_predictor_32x32_ssse3,
-                       aom_highbd_d117_predictor_32x32_ssse3,
-                       aom_highbd_d153_predictor_32x32_ssse3, NULL, NULL, NULL,
-                       NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSSE3_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
 #endif
 
 #if HAVE_AVX2
-HIGHBD_INTRA_PRED_TEST(AVX2_1, TestHighbdIntraPred32, "Hbd Intra32x32", NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d45e_predictor_32x32_avx2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_2, TestHighbdIntraPred32, "Hbd Intra32x16", NULL,
-                       NULL, NULL, NULL, NULL, NULL,
-                       aom_highbd_d45e_predictor_32x16_avx2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(AVX2_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_2, TX_32X16, NULL, NULL, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL)
 #endif
 
-#if CONFIG_SMOOTH_HV
-#define smooth_v_pred_func aom_highbd_smooth_v_predictor_32x16_c
-#define smooth_h_pred_func aom_highbd_smooth_h_predictor_32x16_c
-#else
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_SMOOTH_HV
+// -----------------------------------------------------------------------------
+// 64x64, 64x32, 64x16
+
+HIGHBD_INTRA_PRED_TEST(
+    C_1, TX_64X64, aom_highbd_dc_predictor_64x64_c,
+    aom_highbd_dc_left_predictor_64x64_c, aom_highbd_dc_top_predictor_64x64_c,
+    aom_highbd_dc_128_predictor_64x64_c, aom_highbd_v_predictor_64x64_c,
+    aom_highbd_h_predictor_64x64_c, aom_highbd_paeth_predictor_64x64_c,
+    aom_highbd_smooth_predictor_64x64_c, aom_highbd_smooth_v_predictor_64x64_c,
+    aom_highbd_smooth_h_predictor_64x64_c)
 
 HIGHBD_INTRA_PRED_TEST(
-    C_2, TestHighbdIntraPred32, "Hbd Intra32x16",
-    aom_highbd_dc_predictor_32x16_c, aom_highbd_dc_left_predictor_32x16_c,
-    aom_highbd_dc_top_predictor_32x16_c, aom_highbd_dc_128_predictor_32x16_c,
-    aom_highbd_v_predictor_32x16_c, aom_highbd_h_predictor_32x16_c,
-    aom_highbd_d45e_predictor_32x16_c, aom_highbd_d135_predictor_32x16_c,
-    aom_highbd_d117_predictor_32x16_c, aom_highbd_d153_predictor_32x16_c,
-    aom_highbd_d207e_predictor_32x16_c, aom_highbd_d63e_predictor_32x16_c,
-    aom_highbd_paeth_predictor_32x16_c, aom_highbd_smooth_predictor_32x16_c,
-    smooth_v_pred_func, smooth_h_pred_func)
-#undef smooth_v_pred_func
-#undef smooth_h_pred_func
-#endif  // CONFIG_HIGHBITDEPTH
+    C_2, TX_64X32, aom_highbd_dc_predictor_64x32_c,
+    aom_highbd_dc_left_predictor_64x32_c, aom_highbd_dc_top_predictor_64x32_c,
+    aom_highbd_dc_128_predictor_64x32_c, aom_highbd_v_predictor_64x32_c,
+    aom_highbd_h_predictor_64x32_c, aom_highbd_paeth_predictor_64x32_c,
+    aom_highbd_smooth_predictor_64x32_c, aom_highbd_smooth_v_predictor_64x32_c,
+    aom_highbd_smooth_h_predictor_64x32_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TX_64X16, aom_highbd_dc_predictor_64x16_c,
+    aom_highbd_dc_left_predictor_64x16_c, aom_highbd_dc_top_predictor_64x16_c,
+    aom_highbd_dc_128_predictor_64x16_c, aom_highbd_v_predictor_64x16_c,
+    aom_highbd_h_predictor_64x16_c, aom_highbd_paeth_predictor_64x16_c,
+    aom_highbd_smooth_predictor_64x16_c, aom_highbd_smooth_v_predictor_64x16_c,
+    aom_highbd_smooth_h_predictor_64x16_c)
+
+// -----------------------------------------------------------------------------
 
 #include "test/test_libaom.cc"
diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc
index 6d83ce66e7..b55d762373 100644
--- a/third_party/aom/test/test_libaom.cc
+++ b/third_party/aom/test/test_libaom.cc
@@ -7,20 +7,21 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
+
+#include <string.h>
 
 #include <string>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #if ARCH_X86 || ARCH_X86_64
 #include "aom_ports/x86.h"
 #endif
 extern "C" {
-#if CONFIG_AV1
 extern void av1_rtcd();
-#endif  // CONFIG_AV1
 extern void aom_dsp_rtcd();
 extern void aom_scale_rtcd();
 }
@@ -30,7 +31,17 @@ static void append_negative_gtest_filter(const char *str) {
   std::string filter = ::testing::FLAGS_gtest_filter;
   // Negative patterns begin with one '-' followed by a ':' separated list.
   if (filter.find('-') == std::string::npos) filter += '-';
-  filter += str;
+  // OPT.* matches TEST() functions
+  // OPT/* matches TEST_P() functions
+  // OPT_* matches tests which have been manually sharded.
+  // We do not match OPT* because of SSE/SSE2 collisions.
+  const char *search_terminators = "./_";
+  for (size_t pos = 0; pos < strlen(search_terminators); ++pos) {
+    filter += ":";
+    filter += str;
+    filter += search_terminators[pos];
+    filter += "*";
+  }
   ::testing::FLAGS_gtest_filter = filter;
 }
 #endif  // ARCH_X86 || ARCH_X86_64
@@ -40,25 +51,21 @@ int main(int argc, char **argv) {
 
 #if ARCH_X86 || ARCH_X86_64
   const int simd_caps = x86_simd_caps();
-  if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*");
-  if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*");
-  if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter(":SSE2.*:SSE2/*");
-  if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter(":SSE3.*:SSE3/*");
-  if (!(simd_caps & HAS_SSSE3))
-    append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
-  if (!(simd_caps & HAS_SSE4_1))
-    append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
-  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*");
-  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*");
+  if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX");
+  if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE");
+  if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter("SSE2");
+  if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter("SSE3");
+  if (!(simd_caps & HAS_SSSE3)) append_negative_gtest_filter("SSSE3");
+  if (!(simd_caps & HAS_SSE4_1)) append_negative_gtest_filter("SSE4_1");
+  if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2");
+  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX");
+  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2");
 #endif  // ARCH_X86 || ARCH_X86_64
 
+// Shared library builds don't support whitebox tests that exercise internal
+// symbols.
 #if !CONFIG_SHARED
-// Shared library builds don't support whitebox tests
-// that exercise internal symbols.
-
-#if CONFIG_AV1
   av1_rtcd();
-#endif  // CONFIG_AV1
   aom_dsp_rtcd();
   aom_scale_rtcd();
 #endif  // !CONFIG_SHARED
diff --git a/third_party/aom/test/test_runner.cmake b/third_party/aom/test/test_runner.cmake
index a1f399642f..d3747b1e30 100644
--- a/third_party/aom/test/test_runner.cmake
+++ b/third_party/aom/test/test_runner.cmake
@@ -1,22 +1,28 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-if (NOT GTEST_TOTAL_SHARDS OR "${GTEST_SHARD_INDEX}" STREQUAL ""
-    OR NOT TEST_LIBAOM)
-  message(FATAL_ERROR
-          "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM
-          must be defined.")
-endif ()
+#
+# Copyright (c) 2017, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and the
+# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
+# not distributed with this source code in the LICENSE file, you can obtain it
+# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
+# License 1.0 was not distributed with this source code in the PATENTS file, you
+# can obtain it at www.aomedia.org/license/patent.
+#
+if(NOT GTEST_TOTAL_SHARDS OR "${GTEST_SHARD_INDEX}" STREQUAL "" OR NOT
+   TEST_LIBAOM)
+  message(
+    FATAL_ERROR
+      "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM
+          must be defined."
+    )
+endif()
 
 set($ENV{GTEST_SHARD_INDEX} ${GTEST_SHARD_INDEX})
 set($ENV{GTEST_TOTAL_SHARDS} ${GTEST_TOTAL_SHARDS})
 execute_process(COMMAND ${TEST_LIBAOM} RESULT_VARIABLE test_result)
 set(test_message "Test shard ${GTEST_SHARD_INDEX}/${GTEST_TOTAL_SHARDS} result")
 message("${test_message}: ${test_result}")
+
+if(NOT "${test_result}" STREQUAL "0")
+  message(FATAL_ERROR "${test_message}: FAILED, non-zero exit code.")
+endif()
diff --git a/third_party/aom/test/test_vector_test.cc b/third_party/aom/test/test_vector_test.cc
new file mode 100644
index 0000000000..85223177cf
--- /dev/null
+++ b/third_party/aom/test/test_vector_test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <set>
+#include <string>
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "common/tools_common.h"
+#include "config/aom_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+
+namespace {
+
+const int kThreads = 0;
+const int kFileName = 1;
+
+typedef ::testing::tuple<int, const char *> DecodeParam;
+
+class TestVectorTest : public ::libaom_test::DecoderTest,
+                       public ::libaom_test::CodecTestWithParam<DecodeParam> {
+ protected:
+  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}
+
+  virtual ~TestVectorTest() {
+    if (md5_file_) fclose(md5_file_);
+  }
+
+  void OpenMD5File(const std::string &md5_file_name_) {
+    md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_TRUE(md5_file_ != NULL)
+        << "Md5 file open failed. Filename: " << md5_file_name_;
+  }
+
+  virtual void DecompressedFrameHook(const aom_image_t &img,
+                                     const unsigned int frame_number) {
+    ASSERT_TRUE(md5_file_ != NULL);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(res, EOF) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libaom_test::MD5 md5_res;
+    md5_res.Add(&img);
+    const char *actual_md5 = md5_res.Get();
+
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5)
+        << "Md5 checksums don't match: frame number = " << frame_number;
+  }
+
+ private:
+  FILE *md5_file_;
+};
+
+// This test runs through the whole set of test vectors, and decodes them.
+// The md5 checksums are computed for each frame in the video file. If md5
+// checksums match the correct md5 data, then the test is passed. Otherwise,
+// the test failed.
+TEST_P(TestVectorTest, MD5Match) {
+  const DecodeParam input = GET_PARAM(1);
+  const std::string filename = ::testing::get<kFileName>(input);
+  aom_codec_flags_t flags = 0;
+  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
+  char str[256];
+
+  cfg.threads = ::testing::get<kThreads>(input);
+
+  snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
+           filename.c_str(), cfg.threads);
+  SCOPED_TRACE(str);
+
+  // Open compressed video file.
+  testing::internal::scoped_ptr<libaom_test::CompressedVideoSource> video;
+  if (filename.substr(filename.length() - 3, 3) == "ivf") {
+    video.reset(new libaom_test::IVFVideoSource(filename));
+  } else if (filename.substr(filename.length() - 4, 4) == "webm") {
+#if CONFIG_WEBM_IO
+    video.reset(new libaom_test::WebMVideoSource(filename));
+#else
+    fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
+            filename.c_str());
+    return;
+#endif
+  }
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+
+  // Construct md5 file name.
+  const std::string md5_filename = filename + ".md5";
+  OpenMD5File(md5_filename);
+
+  // Set decode config and flags.
+  cfg.allow_lowbitdepth = CONFIG_LOWBITDEPTH;
+  set_cfg(cfg);
+  set_flags(flags);
+
+  // Decode frame, and check the md5 matching.
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
+}
+
+// TODO(yaowu): Current md5 check works only when CONFIG_LOWBITDEPTH is enabled,
+// remove CONFIG_LOWBITDEPTH when md5 check is reworked to be compatible with
+// CONFIG_LOWBITDEPTH = 0
+#if CONFIG_AV1_DECODER && CONFIG_LOWBITDEPTH
+AV1_INSTANTIATE_TEST_CASE(
+    TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(1),  // Single thread.
+        ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                            libaom_test::kAV1TestVectors +
+                                libaom_test::kNumAV1TestVectors)));
+
+// Test AV1 decode in with different numbers of threads.
+INSTANTIATE_TEST_CASE_P(
+    AV1MultiThreaded, TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
+        ::testing::Combine(
+            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
+            ::testing::ValuesIn(libaom_test::kAV1TestVectors,
+                                libaom_test::kAV1TestVectors +
+                                    libaom_test::kNumAV1TestVectors))));
+
+#endif  // CONFIG_AV1_DECODER
+
+}  // namespace
diff --git a/third_party/aom/test/test_vectors.cc b/third_party/aom/test/test_vectors.cc
new file mode 100644
index 0000000000..a9edf75205
--- /dev/null
+++ b/third_party/aom/test/test_vectors.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "test/test_vectors.h"
+
+namespace libaom_test {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+#if CONFIG_AV1_DECODER
+const char *const kAV1TestVectors[] = {
+  "av1-1-b8-00-quantizer-00.ivf", "av1-1-b8-00-quantizer-01.ivf",
+  "av1-1-b8-00-quantizer-02.ivf", "av1-1-b8-00-quantizer-03.ivf",
+  "av1-1-b8-00-quantizer-04.ivf", "av1-1-b8-00-quantizer-05.ivf",
+  "av1-1-b8-00-quantizer-06.ivf", "av1-1-b8-00-quantizer-07.ivf",
+  "av1-1-b8-00-quantizer-08.ivf", "av1-1-b8-00-quantizer-09.ivf",
+  "av1-1-b8-00-quantizer-10.ivf", "av1-1-b8-00-quantizer-11.ivf",
+  "av1-1-b8-00-quantizer-12.ivf", "av1-1-b8-00-quantizer-13.ivf",
+  "av1-1-b8-00-quantizer-14.ivf", "av1-1-b8-00-quantizer-15.ivf",
+  "av1-1-b8-00-quantizer-16.ivf", "av1-1-b8-00-quantizer-17.ivf",
+  "av1-1-b8-00-quantizer-18.ivf", "av1-1-b8-00-quantizer-19.ivf",
+  "av1-1-b8-00-quantizer-20.ivf", "av1-1-b8-00-quantizer-21.ivf",
+  "av1-1-b8-00-quantizer-22.ivf", "av1-1-b8-00-quantizer-23.ivf",
+  "av1-1-b8-00-quantizer-24.ivf", "av1-1-b8-00-quantizer-25.ivf",
+  "av1-1-b8-00-quantizer-26.ivf", "av1-1-b8-00-quantizer-27.ivf",
+  "av1-1-b8-00-quantizer-28.ivf", "av1-1-b8-00-quantizer-29.ivf",
+  "av1-1-b8-00-quantizer-30.ivf", "av1-1-b8-00-quantizer-31.ivf",
+  "av1-1-b8-00-quantizer-32.ivf", "av1-1-b8-00-quantizer-33.ivf",
+  "av1-1-b8-00-quantizer-34.ivf", "av1-1-b8-00-quantizer-35.ivf",
+  "av1-1-b8-00-quantizer-36.ivf", "av1-1-b8-00-quantizer-37.ivf",
+  "av1-1-b8-00-quantizer-38.ivf", "av1-1-b8-00-quantizer-39.ivf",
+  "av1-1-b8-00-quantizer-40.ivf", "av1-1-b8-00-quantizer-41.ivf",
+  "av1-1-b8-00-quantizer-42.ivf", "av1-1-b8-00-quantizer-43.ivf",
+  "av1-1-b8-00-quantizer-44.ivf", "av1-1-b8-00-quantizer-45.ivf",
+  "av1-1-b8-00-quantizer-46.ivf", "av1-1-b8-00-quantizer-47.ivf",
+  "av1-1-b8-00-quantizer-48.ivf", "av1-1-b8-00-quantizer-49.ivf",
+  "av1-1-b8-00-quantizer-50.ivf", "av1-1-b8-00-quantizer-51.ivf",
+  "av1-1-b8-00-quantizer-52.ivf", "av1-1-b8-00-quantizer-53.ivf",
+  "av1-1-b8-00-quantizer-54.ivf", "av1-1-b8-00-quantizer-55.ivf",
+  "av1-1-b8-00-quantizer-56.ivf", "av1-1-b8-00-quantizer-57.ivf",
+  "av1-1-b8-00-quantizer-58.ivf", "av1-1-b8-00-quantizer-59.ivf",
+  "av1-1-b8-00-quantizer-60.ivf", "av1-1-b8-00-quantizer-61.ivf",
+  "av1-1-b8-00-quantizer-62.ivf", "av1-1-b8-00-quantizer-63.ivf",
+  "av1-1-b8-01-size-16x16.ivf",   "av1-1-b8-01-size-16x18.ivf",
+  "av1-1-b8-01-size-16x32.ivf",   "av1-1-b8-01-size-16x34.ivf",
+  "av1-1-b8-01-size-16x64.ivf",   "av1-1-b8-01-size-16x66.ivf",
+  "av1-1-b8-01-size-18x16.ivf",   "av1-1-b8-01-size-18x18.ivf",
+  "av1-1-b8-01-size-18x32.ivf",   "av1-1-b8-01-size-18x34.ivf",
+  "av1-1-b8-01-size-18x64.ivf",   "av1-1-b8-01-size-18x66.ivf",
+  "av1-1-b8-01-size-196x196.ivf", "av1-1-b8-01-size-196x198.ivf",
+  "av1-1-b8-01-size-196x200.ivf", "av1-1-b8-01-size-196x202.ivf",
+  "av1-1-b8-01-size-196x208.ivf", "av1-1-b8-01-size-196x210.ivf",
+  "av1-1-b8-01-size-196x224.ivf", "av1-1-b8-01-size-196x226.ivf",
+  "av1-1-b8-01-size-198x196.ivf", "av1-1-b8-01-size-198x198.ivf",
+  "av1-1-b8-01-size-198x200.ivf", "av1-1-b8-01-size-198x202.ivf",
+  "av1-1-b8-01-size-198x208.ivf", "av1-1-b8-01-size-198x210.ivf",
+  "av1-1-b8-01-size-198x224.ivf", "av1-1-b8-01-size-198x226.ivf",
+  "av1-1-b8-01-size-200x196.ivf", "av1-1-b8-01-size-200x198.ivf",
+  "av1-1-b8-01-size-200x200.ivf", "av1-1-b8-01-size-200x202.ivf",
+  "av1-1-b8-01-size-200x208.ivf", "av1-1-b8-01-size-200x210.ivf",
+  "av1-1-b8-01-size-200x224.ivf", "av1-1-b8-01-size-200x226.ivf",
+  "av1-1-b8-01-size-202x196.ivf", "av1-1-b8-01-size-202x198.ivf",
+  "av1-1-b8-01-size-202x200.ivf", "av1-1-b8-01-size-202x202.ivf",
+  "av1-1-b8-01-size-202x208.ivf", "av1-1-b8-01-size-202x210.ivf",
+  "av1-1-b8-01-size-202x224.ivf", "av1-1-b8-01-size-202x226.ivf",
+  "av1-1-b8-01-size-208x196.ivf", "av1-1-b8-01-size-208x198.ivf",
+  "av1-1-b8-01-size-208x200.ivf", "av1-1-b8-01-size-208x202.ivf",
+  "av1-1-b8-01-size-208x208.ivf", "av1-1-b8-01-size-208x210.ivf",
+  "av1-1-b8-01-size-208x224.ivf", "av1-1-b8-01-size-208x226.ivf",
+  "av1-1-b8-01-size-210x196.ivf", "av1-1-b8-01-size-210x198.ivf",
+  "av1-1-b8-01-size-210x200.ivf", "av1-1-b8-01-size-210x202.ivf",
+  "av1-1-b8-01-size-210x208.ivf", "av1-1-b8-01-size-210x210.ivf",
+  "av1-1-b8-01-size-210x224.ivf", "av1-1-b8-01-size-210x226.ivf",
+  "av1-1-b8-01-size-224x196.ivf", "av1-1-b8-01-size-224x198.ivf",
+  "av1-1-b8-01-size-224x200.ivf", "av1-1-b8-01-size-224x202.ivf",
+  "av1-1-b8-01-size-224x208.ivf", "av1-1-b8-01-size-224x210.ivf",
+  "av1-1-b8-01-size-224x224.ivf", "av1-1-b8-01-size-224x226.ivf",
+  "av1-1-b8-01-size-226x196.ivf", "av1-1-b8-01-size-226x198.ivf",
+  "av1-1-b8-01-size-226x200.ivf", "av1-1-b8-01-size-226x202.ivf",
+  "av1-1-b8-01-size-226x208.ivf", "av1-1-b8-01-size-226x210.ivf",
+  "av1-1-b8-01-size-226x224.ivf", "av1-1-b8-01-size-226x226.ivf",
+  "av1-1-b8-01-size-32x16.ivf",   "av1-1-b8-01-size-32x18.ivf",
+  "av1-1-b8-01-size-32x32.ivf",   "av1-1-b8-01-size-32x34.ivf",
+  "av1-1-b8-01-size-32x64.ivf",   "av1-1-b8-01-size-32x66.ivf",
+  "av1-1-b8-01-size-34x16.ivf",   "av1-1-b8-01-size-34x18.ivf",
+  "av1-1-b8-01-size-34x32.ivf",   "av1-1-b8-01-size-34x34.ivf",
+  "av1-1-b8-01-size-34x64.ivf",   "av1-1-b8-01-size-34x66.ivf",
+  "av1-1-b8-01-size-64x16.ivf",   "av1-1-b8-01-size-64x18.ivf",
+  "av1-1-b8-01-size-64x32.ivf",   "av1-1-b8-01-size-64x34.ivf",
+  "av1-1-b8-01-size-64x64.ivf",   "av1-1-b8-01-size-64x66.ivf",
+  "av1-1-b8-01-size-66x16.ivf",   "av1-1-b8-01-size-66x18.ivf",
+  "av1-1-b8-01-size-66x32.ivf",   "av1-1-b8-01-size-66x34.ivf",
+  "av1-1-b8-01-size-66x64.ivf",   "av1-1-b8-01-size-66x66.ivf",
+};
+const int kNumAV1TestVectors = NELEMENTS(kAV1TestVectors);
+#endif  // CONFIG_AV1_DECODER
+
+}  // namespace libaom_test
diff --git a/third_party/aom/test/test_vectors.h b/third_party/aom/test/test_vectors.h
new file mode 100644
index 0000000000..229f063a62
--- /dev/null
+++ b/third_party/aom/test/test_vectors.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef TEST_TEST_VECTORS_H_
+#define TEST_TEST_VECTORS_H_
+
+#include "config/aom_config.h"
+
+namespace libaom_test {
+
+#if CONFIG_AV1_DECODER
+extern const int kNumAV1TestVectors;
+extern const char *const kAV1TestVectors[];
+#endif
+
+}  // namespace libaom_test
+
+#endif  // TEST_TEST_VECTORS_H_
diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc
index 832227fb8a..e8b2e1fe4f 100644
--- a/third_party/aom/test/tile_independence_test.cc
+++ b/third_party/aom/test/tile_independence_test.cc
@@ -7,7 +7,7 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <cstdio>
 #include <cstdlib>
@@ -22,30 +22,29 @@
 
 namespace {
 class TileIndependenceTest
-    : public ::libaom_test::CodecTestWith2Params<int, int>,
+    : public ::libaom_test::CodecTestWith3Params<int, int, int>,
       public ::libaom_test::EncoderTest {
  protected:
   TileIndependenceTest()
       : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(),
-        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)) {
+        n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
+        n_tile_groups_(GET_PARAM(3)) {
     init_flags_ = AOM_CODEC_USE_PSNR;
     aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
     cfg.w = 704;
-    cfg.h = 144;
+    cfg.h = 576;
     cfg.threads = 1;
     cfg.allow_lowbitdepth = 1;
     fw_dec_ = codec_->CreateDecoder(cfg, 0);
     inv_dec_ = codec_->CreateDecoder(cfg, 0);
     inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1);
 
-#if CONFIG_AV1
     if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) {
       fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
       fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
       inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
       inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
     }
-#endif
   }
 
   virtual ~TileIndependenceTest() {
@@ -63,10 +62,9 @@ class TileIndependenceTest
     if (video->frame() == 1) {
       encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
       encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-      encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
-#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       SetCpuUsed(encoder);
+    } else if (video->frame() == 3) {
+      encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
     }
   }
 
@@ -114,15 +112,16 @@ class TileIndependenceTest
  private:
   int n_tile_cols_;
   int n_tile_rows_;
+  int n_tile_groups_;
 };
 
 // run an encode with 2 or 4 tiles, and do the decode both in normal and
 // inverted tile ordering. Ensure that the MD5 of the output in both cases
 // is identical. If so, tiles are considered independent and the test passes.
 TEST_P(TileIndependenceTest, MD5Match) {
-#if CONFIG_EXT_TILE
   cfg_.large_scale_tile = 0;
-#endif  // CONFIG_EXT_TILE
+  fw_dec_->Control(AV1_SET_TILE_MODE, 0);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 0);
   DoTest();
 }
 
@@ -134,36 +133,38 @@ class TileIndependenceTestLarge : public TileIndependenceTest {
 };
 
 TEST_P(TileIndependenceTestLarge, MD5Match) {
-#if CONFIG_EXT_TILE
   cfg_.large_scale_tile = 0;
-#endif  // CONFIG_EXT_TILE
+  fw_dec_->Control(AV1_SET_TILE_MODE, 0);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 0);
   DoTest();
 }
 
 AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(0, 1),
-                          ::testing::Values(0, 1));
+                          ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
 AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge, ::testing::Values(0, 1),
-                          ::testing::Values(0, 1));
+                          ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
 
-#if CONFIG_EXT_TILE
 class TileIndependenceLSTest : public TileIndependenceTest {};
 
-TEST_P(TileIndependenceLSTest, MD5Match) {
+TEST_P(TileIndependenceLSTest, DISABLED_MD5Match) {
   cfg_.large_scale_tile = 1;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 1);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 1);
   DoTest();
 }
 
 class TileIndependenceLSTestLarge : public TileIndependenceTestLarge {};
 
-TEST_P(TileIndependenceLSTestLarge, MD5Match) {
+TEST_P(TileIndependenceLSTestLarge, DISABLED_MD5Match) {
   cfg_.large_scale_tile = 1;
+  fw_dec_->Control(AV1_SET_TILE_MODE, 1);
+  inv_dec_->Control(AV1_SET_TILE_MODE, 1);
   DoTest();
 }
 
 AV1_INSTANTIATE_TEST_CASE(TileIndependenceLSTest, ::testing::Values(1, 2, 32),
-                          ::testing::Values(1, 2, 32));
+                          ::testing::Values(1, 2, 32), ::testing::Values(1));
 AV1_INSTANTIATE_TEST_CASE(TileIndependenceLSTestLarge,
                           ::testing::Values(1, 2, 32),
-                          ::testing::Values(1, 2, 32));
-#endif  // CONFIG_EXT_TILE
+                          ::testing::Values(1, 2, 32), ::testing::Values(1));
 }  // namespace
diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh
index c1262b6c88..21a6b9b8e9 100755
--- a/third_party/aom/test/tools_common.sh
+++ b/third_party/aom/test/tools_common.sh
@@ -76,61 +76,46 @@ cleanup() {
   if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then
     echo "FAIL: $AOM_TOOL_TEST"
   fi
+  if [ "${AOM_TEST_PRESERVE_OUTPUT}" = "yes" ]; then
+    return
+  fi
   if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then
     rm -rf "${AOM_TEST_OUTPUT_DIR}"
   fi
 }
 
-# Echoes the git hash portion of the VERSION_STRING variable defined in
-# $LIBAOM_CONFIG_PATH/config.mk to stdout, or the version number string when
-# no git hash is contained in VERSION_STRING.
-config_hash() {
-  aom_config_mk="${LIBAOM_CONFIG_PATH}/config.mk"
-  if [ ! -f "${aom_config_mk}" ]; then
-    aom_config_c="${LIBAOM_CONFIG_PATH}/aom_config.c"
-    # Clean up the aom_git_hash pointer line from aom_config.c.
-    # 1. Run grep on aom_config.c for aom_git_hash and limit results to 1.
-    # 2. Split the line using ' = "' as separator.
-    # 3. Abuse sed to consume the trailing "; from the assignment to the
-    #    aom_git_hash pointer.
-    awk -F ' = "' '/aom_git_hash/ { print $NF; exit }' "${aom_config_c}" \
-      | sed s/\"\;//
-    return
-  fi
+# Echoes the version string assigned to the VERSION_STRING_NOSP variable defined
+# in $LIBAOM_CONFIG_PATH/config/aom_version.h to stdout.
+cmake_version() {
+  aom_version_h="${LIBAOM_CONFIG_PATH}/config/aom_version.h"
 
-  # Find VERSION_STRING line, split it with "-g" and print the last field to
-  # output the git hash to stdout.
-  aom_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${aom_config_mk}")
-  # Handle two situations here:
-  # 1. The default case: $aom_version is a git hash, so echo it unchanged.
-  # 2. When being run a non-dev tree, the -g portion is not present in the
-  #    version string: It's only the version number.
-  #    In this case $aom_version is something like 'VERSION_STRING=v1.3.0', so
-  #    we echo only what is after the '='.
-  echo "${aom_version##*=}"
+  # Find VERSION_STRING_NOSP line, split it with '"' and print the next to last
+  # field to output the version string to stdout.
+  aom_version=$(awk -F \" '/VERSION_STRING_NOSP/ {print $(NF-1)}' \
+    "${aom_version_h}")
+  echo "v${aom_version}"
 }
 
-# Echoes the short form of the current git hash.
-current_hash() {
+# Echoes current git version as reported by running 'git describe', or the
+# version used by the cmake build when git is unavailable.
+source_version() {
   if git --version > /dev/null 2>&1; then
     (cd "$(dirname "${0}")"
-    git rev-parse HEAD)
+    git describe)
   else
-    # Return the config hash if git is unavailable: Fail silently, git hashes
-    # are used only for warnings.
-    config_hash
+    cmake_version
   fi
 }
 
-# Echoes warnings to stdout when git hash in aom_config.h does not match the
-# current git hash.
-check_git_hashes() {
-  hash_at_configure_time=$(config_hash)
-  hash_now=$(current_hash)
+# Echoes warnings to stdout when source version and CMake build generated
+# version are out of sync.
+check_version_strings() {
+  cmake_version=$(cmake_version)
+  source_version=$(source_version)
 
-  if [ "${hash_at_configure_time}" != "${hash_now}" ]; then
-    echo "Warning: git hash has changed since last configure."
-    vlog "  config hash: ${hash_at_configure_time} hash now: ${hash_now}"
+  if [ "${cmake_version}" != "${source_version}" ]; then
+    echo "Warning: version has changed since last cmake run."
+    vlog "  cmake version: ${cmake_version} version now: ${source_version}"
   fi
 }
 
@@ -159,7 +144,7 @@ verify_aom_test_environment() {
 # is available.
 aom_config_option_enabled() {
   aom_config_option="${1}"
-  aom_config_file="${LIBAOM_CONFIG_PATH}/aom_config.h"
+  aom_config_file="${LIBAOM_CONFIG_PATH}/config/aom_config.h"
   config_line=$(grep "${aom_config_option}" "${aom_config_file}")
   if echo "${config_line}" | egrep -q '1$'; then
     echo yes
@@ -174,22 +159,29 @@ is_windows_target() {
   fi
 }
 
-# Echoes path to $1 when it's executable and exists in ${LIBAOM_BIN_PATH}, or an
-# empty string. Caller is responsible for testing the string once the function
-# returns.
+# Echoes path to $1 when it's executable and exists in one of the directories
+# included in $tool_paths, or an empty string. Caller is responsible for testing
+# the string once the function returns.
 aom_tool_path() {
   local readonly tool_name="$1"
-  local tool_path="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}"
-  if [ ! -x "${tool_path}" ]; then
-    # Try one directory up: when running via examples.sh the tool could be in
-    # the parent directory of $LIBAOM_BIN_PATH.
-    tool_path="${LIBAOM_BIN_PATH}/../${tool_name}${AOM_TEST_EXE_SUFFIX}"
-  fi
+  local readonly root_path="${LIBAOM_BIN_PATH}"
+  local readonly suffix="${AOM_TEST_EXE_SUFFIX}"
+  local readonly tool_paths="\
+    ${root_path}/${tool_name}${suffix} \
+    ${root_path}/../${tool_name}${suffix} \
+    ${root_path}/tools/${tool_name}${suffix} \
+    ${root_path}/../tools/${tool_name}${suffix}"
+
+  local toolpath=""
+
+  for tool_path in ${tool_paths}; do
+    if [ -x "${tool_path}" ] && [ -f "${tool_path}" ]; then
+      echo "${tool_path}"
+      return 0
+    fi
+  done
 
-  if [ ! -x "${tool_path}" ]; then
-    tool_path=""
-  fi
-  echo "${tool_path}"
+  return 1
 }
 
 # Echoes yes to stdout when the file named by positional parameter one exists
@@ -212,6 +204,14 @@ av1_encode_available() {
   [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes
 }
 
+# Echoes "fast" encode params for use with aomenc.
+aomenc_encode_test_fast_params() {
+  echo "--cpu-used=1
+        --limit=${AV1_ENCODE_TEST_FRAME_LIMIT}
+        --lag-in-frames=0
+        --test-decode=fatal"
+}
+
 # Echoes yes to stdout when aom_config_option_enabled() reports yes for
 # CONFIG_WEBM_IO.
 webm_io_available() {
@@ -285,7 +285,7 @@ run_tests() {
   # Combine environment and actual tests.
   local tests_to_run="${env_tests} ${tests_to_filter}"
 
-  check_git_hashes
+  check_version_strings
 
   # Run tests.
   for test in ${tests_to_run}; do
@@ -296,7 +296,7 @@ run_tests() {
     test_end "${test}"
   done
 
-  local tested_config="$(test_configuration_target) @ $(current_hash)"
+  local tested_config="$(test_configuration_target) @ $(source_version)"
   echo "${test_name}: Done, all tests pass for ${tested_config}."
 }
 
@@ -352,10 +352,9 @@ encode_yuv_raw_input_av1() {
     local readonly encoder="$(aom_tool_path aomenc)"
     shift
     eval "${encoder}" $(yuv_raw_input) \
-      --codec=av1 \
-      $@ \
-      --limit=5 \
+      $(aomenc_encode_test_fast_params) \
       --output="${output}" \
+      $@ \
       ${devnull}
 
     if [ ! -e "${output}" ]; then
@@ -427,7 +426,7 @@ else
   AOM_TEST_TEMP_ROOT=/tmp
 fi
 
-AOM_TEST_OUTPUT_DIR="${AOM_TEST_TEMP_ROOT}/aom_test_$$"
+AOM_TEST_OUTPUT_DIR="${AOM_TEST_OUTPUT_DIR:-${AOM_TEST_TEMP_ROOT}/aom_test_$$}"
 
 if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
    [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then
@@ -436,17 +435,19 @@ if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
   exit 1
 fi
 
+AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT:-no}
+
 if [ "$(is_windows_target)" = "yes" ]; then
   AOM_TEST_EXE_SUFFIX=".exe"
 fi
 
 # Variables shared by tests.
-VP8_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
-AV1_IVF_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"
-
-AV1_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
-AV1_FPM_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"
-AV1_LT_50_FRAMES_WEBM_FILE="${LIBAOM_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm"
+AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED:-1}
+AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT:-5}
+AV1_IVF_FILE="${AV1_IVF_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.ivf}"
+AV1_OBU_ANNEXB_FILE="${AV1_OBU_ANNEXB_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.annexb.obu}"
+AV1_OBU_SEC5_FILE="${AV1_OBU_SEC5_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.section5.obu}"
+AV1_WEBM_FILE="${AV1_WEBM_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.webm}"
 
 YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
 YUV_RAW_INPUT_WIDTH=352
@@ -462,18 +463,22 @@ vlog "$(basename "${0%.*}") test configuration:
   LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH}
   LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH}
   LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH}
-  AOM_IVF_FILE=${AOM_IVF_FILE}
-  AV1_IVF_FILE=${AV1_IVF_FILE}
-  AV1_WEBM_FILE=${AV1_WEBM_FILE}
   AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX}
   AOM_TEST_FILTER=${AOM_TEST_FILTER}
   AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS}
   AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}
   AOM_TEST_PREFIX=${AOM_TEST_PREFIX}
+  AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT}
   AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS}
   AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT}
   AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT}
   AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT}
+  AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED}
+  AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT}
+  AV1_IVF_FILE=${AV1_IVF_FILE}
+  AV1_OBU_ANNEXB_FILE=${AV1_OBU_ANNEXB_FILE}
+  AV1_OBU_SEC5_FILE=${AV1_OBU_SEC5_FILE}
+  AV1_WEBM_FILE=${AV1_WEBM_FILE}
   YUV_RAW_INPUT=${YUV_RAW_INPUT}
   YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
   YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h
index 21441beef0..67e8faf33b 100644
--- a/third_party/aom/test/transform_test_base.h
+++ b/third_party/aom/test/transform_test_base.h
@@ -12,7 +12,8 @@
 #ifndef TEST_TRANSFORM_TEST_BASE_H_
 #define TEST_TRANSFORM_TEST_BASE_H_
 
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
 #include "aom_mem/aom_mem.h"
 #include "aom/aom_codec.h"
 #include "aom_dsp/txfm_common.h"
@@ -57,12 +58,10 @@ class TransformTestBase {
         aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
     uint8_t *src = reinterpret_cast<uint8_t *>(
         aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
-#if CONFIG_HIGHBITDEPTH
     uint16_t *dst16 = reinterpret_cast<uint16_t *>(
         aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
     uint16_t *src16 = reinterpret_cast<uint16_t *>(
         aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
-#endif
 
     for (int i = 0; i < count_test_block; ++i) {
       // Initialize a test block with input range [-255, 255].
@@ -71,12 +70,10 @@ class TransformTestBase {
           src[j] = rnd.Rand8();
           dst[j] = rnd.Rand8();
           test_input_block[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
         } else {
           src16[j] = rnd.Rand16() & mask_;
           dst16[j] = rnd.Rand16() & mask_;
           test_input_block[j] = src16[j] - dst16[j];
-#endif
         }
       }
 
@@ -84,21 +81,14 @@ class TransformTestBase {
           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
       if (bit_depth_ == AOM_BITS_8) {
         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
       } else {
         ASM_REGISTER_STATE_CHECK(
             RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
       }
 
       for (int j = 0; j < num_coeffs_; ++j) {
-#if CONFIG_HIGHBITDEPTH
         const int diff =
             bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        ASSERT_EQ(AOM_BITS_8, bit_depth_);
-        const int diff = dst[j] - src[j];
-#endif
         const uint32_t error = diff * diff;
         if (max_error < error) max_error = error;
         total_error += error;
@@ -119,10 +109,8 @@ class TransformTestBase {
     aom_free(test_temp_block);
     aom_free(dst);
     aom_free(src);
-#if CONFIG_HIGHBITDEPTH
     aom_free(dst16);
     aom_free(src16);
-#endif
   }
 
   void RunCoeffCheck() {
@@ -149,11 +137,9 @@ class TransformTestBase {
           input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
           if (bit_depth_ == AOM_BITS_8) {
             output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
-#if CONFIG_HIGHBITDEPTH
           } else {
             output_block[out_idx] = output_ref_block[out_idx] =
                 rnd.Rand16() & mask_;
-#endif
           }
         }
       }
@@ -255,7 +241,7 @@ class TransformTestBase {
       int row_length = FindRowLength();
       // The minimum quant value is 4.
       for (int j = 0; j < num_coeffs_; ++j) {
-        EXPECT_EQ(output_block[j], output_ref_block[j])
+        ASSERT_EQ(output_block[j], output_ref_block[j])
             << "Not bit-exact at test index: " << i << ", "
             << "j = " << j << std::endl;
         EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
@@ -281,12 +267,10 @@ class TransformTestBase {
     uint8_t *src = reinterpret_cast<uint8_t *>(
         aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
 
-#if CONFIG_HIGHBITDEPTH
     uint16_t *dst16 = reinterpret_cast<uint16_t *>(
         aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
     uint16_t *src16 = reinterpret_cast<uint16_t *>(
         aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
-#endif
 
     for (int i = 0; i < count_test_block; ++i) {
       // Initialize a test block with input range [-mask_, mask_].
@@ -295,12 +279,10 @@ class TransformTestBase {
           src[j] = rnd.Rand8();
           dst[j] = rnd.Rand8();
           in[j] = src[j] - dst[j];
-#if CONFIG_HIGHBITDEPTH
         } else {
           src16[j] = rnd.Rand16() & mask_;
           dst16[j] = rnd.Rand16() & mask_;
           in[j] = src16[j] - dst16[j];
-#endif
         }
       }
 
@@ -308,22 +290,16 @@ class TransformTestBase {
 
       if (bit_depth_ == AOM_BITS_8) {
         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
-#if CONFIG_HIGHBITDEPTH
       } else {
         ASM_REGISTER_STATE_CHECK(
             RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
-#endif
       }
 
       for (int j = 0; j < num_coeffs_; ++j) {
-#if CONFIG_HIGHBITDEPTH
         const int diff =
             bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
-#else
-        const int diff = dst[j] - src[j];
-#endif
         const uint32_t error = diff * diff;
-        EXPECT_GE(static_cast<uint32_t>(limit), error)
+        ASSERT_GE(static_cast<uint32_t>(limit), error)
             << "Error: 4x4 IDCT has error " << error << " at index " << j;
       }
     }
@@ -331,10 +307,8 @@ class TransformTestBase {
     aom_free(coeff);
     aom_free(dst);
     aom_free(src);
-#if CONFIG_HIGHBITDEPTH
     aom_free(src16);
     aom_free(dst16);
-#endif
   }
 
   int pitch_;
diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh
index 3abb7628b3..cca44ced8a 100755
--- a/third_party/aom/test/twopass_encoder.sh
+++ b/third_party/aom/test/twopass_encoder.sh
@@ -26,7 +26,7 @@ twopass_encoder_verify_environment() {
 # Runs twopass_encoder using the codec specified by $1 with a frame limit of
 # 100.
 twopass_encoder() {
-  local encoder="${LIBAOM_BIN_PATH}/twopass_encoder${AOM_TEST_EXE_SUFFIX}"
+  local encoder="$(aom_tool_path twopass_encoder)"
   local codec="$1"
   local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
   local limit=7
diff --git a/third_party/aom/test/user_priv_test.cc b/third_party/aom/test/user_priv_test.cc
deleted file mode 100644
index da289c9901..0000000000
--- a/third_party/aom/test/user_priv_test.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "./aom_config.h"
-#include "test/acm_random.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "aom_mem/aom_mem.h"
-#include "aom/aom.h"
-
-namespace {
-
-using std::string;
-using libaom_test::ACMRandom;
-
-#if CONFIG_WEBM_IO
-
-void CheckUserPrivateData(void *user_priv, int *target) {
-  // actual pointer value should be the same as expected.
-  EXPECT_EQ(reinterpret_cast<void *>(target), user_priv)
-      << "user_priv pointer value does not match.";
-}
-
-// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and
-// compares the user_priv from return img with the original user_priv to see if
-// they match. Both the pointer values and the values inside the addresses
-// should match.
-string DecodeFile(const string &filename) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  libaom_test::WebMVideoSource video(filename);
-  video.Init();
-
-  aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
-  cfg.allow_lowbitdepth = 1;
-  libaom_test::AV1Decoder decoder(cfg, 0);
-
-  libaom_test::MD5 md5;
-  int frame_num = 0;
-  for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata();
-       video.Next()) {
-    void *user_priv = reinterpret_cast<void *>(&frame_num);
-    const aom_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size(),
-                            (frame_num == 0) ? NULL : user_priv);
-    if (res != AOM_CODEC_OK) {
-      EXPECT_EQ(AOM_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-    libaom_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const aom_image_t *img = NULL;
-
-    // Get decompressed data.
-    while ((img = dec_iter.Next())) {
-      if (frame_num == 0) {
-        CheckUserPrivateData(img->user_priv, NULL);
-      } else {
-        CheckUserPrivateData(img->user_priv, &frame_num);
-
-        // Also test ctrl_get_reference api.
-        struct av1_ref_frame ref;
-        // Randomly fetch a reference frame.
-        ref.idx = rnd.Rand8() % 3;
-        decoder.Control(AV1_GET_REFERENCE, &ref);
-
-        CheckUserPrivateData(ref.img.user_priv, NULL);
-      }
-      md5.Add(img);
-    }
-
-    frame_num++;
-  }
-  return string(md5.Get());
-}
-
-TEST(UserPrivTest, VideoDecode) {
-  // no tiles or frame parallel; this exercises the decoding to test the
-  // user_priv.
-  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
-               DecodeFile("av10-2-03-size-226x226.webm").c_str());
-}
-
-#endif  // CONFIG_WEBM_IO
-
-}  // namespace
diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h
index d1587b6eac..db00875efd 100644
--- a/third_party/aom/test/util.h
+++ b/third_party/aom/test/util.h
@@ -17,9 +17,10 @@
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "aom/aom_integer.h"
 #include "aom/aom_image.h"
+#include "aom_ports/aom_timer.h"
 
 // Macros
-#define GET_PARAM(k) std::tr1::get<k>(GetParam())
+#define GET_PARAM(k) ::testing::get<k>(GetParam())
 
 inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
   assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
@@ -44,4 +45,9 @@ inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
   return psnr;
 }
 
+static INLINE double get_time_mark(aom_usec_timer *t) {
+  aom_usec_timer_mark(t);
+  return static_cast<double>(aom_usec_timer_elapsed(t));
+}
+
 #endif  // TEST_UTIL_H_
diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc
index 2f5c222833..eb801b442f 100644
--- a/third_party/aom/test/variance_test.cc
+++ b/third_party/aom/test/variance_test.cc
@@ -7,15 +7,16 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <cstdlib>
 #include <new>
 
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -41,6 +42,10 @@ typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
 typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
                                       const uint8_t *b, int b_stride);
 typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+typedef unsigned int (*JntSubpixAvgVarMxNFunc)(
+    const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
+    int b_stride, uint32_t *sse, const uint8_t *second_pred,
+    const JNT_COMP_PARAMS *jcp_param);
 
 using libaom_test::ACMRandom;
 
@@ -89,13 +94,11 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
         diff = src[y * src_stride + x] - ref[y * ref_stride + x];
         se += diff;
         sse += diff * diff;
-#if CONFIG_HIGHBITDEPTH
       } else {
         diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
                CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
         se += diff;
         sse += diff * diff;
-#endif  // CONFIG_HIGHBITDEPTH
       }
     }
   }
@@ -136,7 +139,6 @@ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
         const int diff = r - src[w * y + x];
         se += diff;
         sse += diff * diff;
-#if CONFIG_HIGHBITDEPTH
       } else {
         uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
         uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
@@ -150,7 +152,6 @@ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
         const int diff = r - src16[w * y + x];
         se += diff;
         sse += diff * diff;
-#endif  // CONFIG_HIGHBITDEPTH
       }
     }
   }
@@ -188,7 +189,6 @@ static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
             ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
         se += diff;
         sse += diff * diff;
-#if CONFIG_HIGHBITDEPTH
       } else {
         const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
         const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
@@ -203,7 +203,64 @@ static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
         const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
         se += diff;
         sse += diff * diff;
-#endif  // CONFIG_HIGHBITDEPTH
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
+}
+
+static uint32_t jnt_subpel_avg_variance_ref(
+    const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
+    int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
+    aom_bit_depth_t bit_depth, JNT_COMP_PARAMS *jcp_param) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 0) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 0) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 0) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 0) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int avg = ROUND_POWER_OF_TWO(
+            r * jcp_param->fwd_offset +
+                second_pred[w * y + x] * jcp_param->bck_offset,
+            DIST_PRECISION_BITS);
+        const int diff = avg - src[w * y + x];
+
+        se += diff;
+        sse += diff * diff;
+      } else {
+        const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
+        const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+        const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
+        const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
+        const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
+        const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
+        const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int avg =
+            ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
+                                   sec16[w * y + x] * jcp_param->bck_offset,
+                               DIST_PRECISION_BITS);
+        const int diff = avg - src16[w * y + x];
+
+        se += diff;
+        sse += diff * diff;
       }
     }
   }
@@ -287,7 +344,7 @@ struct TestParams {
 
 template <typename Func>
 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
-  return os << "log2width/height:" << p.log2width << "/" << p.log2height
+  return os << "width/height:" << p.width << "/" << p.height
             << " function:" << reinterpret_cast<const void *>(p.func)
             << " bit-depth:" << p.bit_depth;
 }
@@ -307,23 +364,19 @@ class MainTestClass
     ref_ = new uint8_t[block_size() * unit];
     ASSERT_TRUE(src_ != NULL);
     ASSERT_TRUE(ref_ != NULL);
-#if CONFIG_HIGHBITDEPTH
     if (use_high_bit_depth()) {
       // TODO(skal): remove!
       src_ = CONVERT_TO_BYTEPTR(src_);
       ref_ = CONVERT_TO_BYTEPTR(ref_);
     }
-#endif
   }
 
   virtual void TearDown() {
-#if CONFIG_HIGHBITDEPTH
     if (use_high_bit_depth()) {
       // TODO(skal): remove!
       src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
       ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
     }
-#endif
 
     aom_free(src_);
     delete[] ref_;
@@ -343,6 +396,7 @@ class MainTestClass
   void RefTest();
   void RefStrideTest();
   void OneQuarterTest();
+  void SpeedTest();
 
   // MSE/SSE tests
   void RefTestMse();
@@ -398,11 +452,9 @@ void MainTestClass<VarianceFunctionType>::RefTest() {
       if (!use_high_bit_depth()) {
         src_[j] = rnd_.Rand8();
         ref_[j] = rnd_.Rand8();
-#if CONFIG_HIGHBITDEPTH
       } else {
         CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
         CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
-#endif  // CONFIG_HIGHBITDEPTH
       }
     }
     unsigned int sse1, sse2, var1, var2;
@@ -428,11 +480,9 @@ void MainTestClass<VarianceFunctionType>::RefStrideTest() {
       if (!use_high_bit_depth()) {
         src_[src_ind] = rnd_.Rand8();
         ref_[ref_ind] = rnd_.Rand8();
-#if CONFIG_HIGHBITDEPTH
       } else {
         CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
         CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
-#endif  // CONFIG_HIGHBITDEPTH
       }
     }
     unsigned int sse1, sse2;
@@ -455,12 +505,10 @@ void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
     memset(src_, 255, block_size());
     memset(ref_, 255, half);
     memset(ref_ + half, 0, half);
-#if CONFIG_HIGHBITDEPTH
   } else {
     aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
     aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
     aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
-#endif  // CONFIG_HIGHBITDEPTH
   }
   unsigned int sse, var, expected;
   ASM_REGISTER_STATE_CHECK(
@@ -469,6 +517,31 @@ void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
   EXPECT_EQ(expected, var);
 }
 
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::SpeedTest() {
+  for (int j = 0; j < block_size(); j++) {
+    if (!use_high_bit_depth()) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    } else {
+      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+    }
+  }
+  unsigned int sse1, sse2, var1, var2;
+  const int stride = width();
+  int run_time = 1000000000 / block_size();
+
+  ASM_REGISTER_STATE_CHECK(var1 =
+                               params_.func(src_, stride, ref_, stride, &sse1));
+  for (int i = 0; i < run_time; ++i) {
+    ASM_REGISTER_STATE_CHECK(
+        var2 = params_.func(src_, stride, ref_, stride, &sse2));
+  }
+  EXPECT_EQ(var1, var2);
+  EXPECT_EQ(sse1, sse2);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Tests related to MSE / SSE.
 
@@ -527,9 +600,9 @@ void MainTestClass<FunctionType>::MaxTestSse() {
 
 ////////////////////////////////////////////////////////////////////////////////
 
-using ::std::tr1::get;
-using ::std::tr1::make_tuple;
-using ::std::tr1::tuple;
+using ::testing::get;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 template <typename FunctionType>
 class SubpelVarianceTest
@@ -540,18 +613,17 @@ class SubpelVarianceTest
 
     rnd_.Reset(ACMRandom::DeterministicSeed());
     if (!use_high_bit_depth()) {
-      src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size()));
-      sec_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size()));
-      ref_ = new uint8_t[block_size() + width() + height() + 1];
-#if CONFIG_HIGHBITDEPTH
+      src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
+      sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
+      ref_ = reinterpret_cast<uint8_t *>(
+          aom_memalign(32, block_size() + width() + height() + 1));
     } else {
       src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
-          aom_memalign(16, block_size() * sizeof(uint16_t))));
+          aom_memalign(32, block_size() * sizeof(uint16_t))));
       sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
-          aom_memalign(16, block_size() * sizeof(uint16_t))));
+          aom_memalign(32, block_size() * sizeof(uint16_t))));
       ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
-          16, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
-#endif  // CONFIG_HIGHBITDEPTH
+          32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
     }
     ASSERT_TRUE(src_ != NULL);
     ASSERT_TRUE(sec_ != NULL);
@@ -561,14 +633,12 @@ class SubpelVarianceTest
   virtual void TearDown() {
     if (!use_high_bit_depth()) {
       aom_free(src_);
-      delete[] ref_;
+      aom_free(ref_);
       aom_free(sec_);
-#if CONFIG_HIGHBITDEPTH
     } else {
       aom_free(CONVERT_TO_SHORTPTR(src_));
       aom_free(CONVERT_TO_SHORTPTR(ref_));
       aom_free(CONVERT_TO_SHORTPTR(sec_));
-#endif  // CONFIG_HIGHBITDEPTH
     }
     libaom_test::ClearSystemState();
   }
@@ -582,6 +652,7 @@ class SubpelVarianceTest
   uint8_t *ref_;
   uint8_t *sec_;
   TestParams<FunctionType> params_;
+  JNT_COMP_PARAMS jcp_param_;
 
   // some relay helpers
   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
@@ -603,7 +674,6 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
           ref_[j] = rnd_.Rand8();
         }
-#if CONFIG_HIGHBITDEPTH
       } else {
         for (int j = 0; j < block_size(); j++) {
           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
@@ -611,7 +681,6 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
         }
-#endif  // CONFIG_HIGHBITDEPTH
       }
       unsigned int sse1, sse2;
       unsigned int var1;
@@ -639,14 +708,12 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
         memset(src_ + half, 255, half);
         memset(ref_, 255, half);
         memset(ref_ + half, 0, half + width() + height() + 1);
-#if CONFIG_HIGHBITDEPTH
       } else {
         aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
         aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
         aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
         aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
                      half + width() + height() + 1);
-#endif  // CONFIG_HIGHBITDEPTH
       }
       unsigned int sse1, sse2;
       unsigned int var1;
@@ -673,7 +740,6 @@ void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
           ref_[j] = rnd_.Rand8();
         }
-#if CONFIG_HIGHBITDEPTH
       } else {
         for (int j = 0; j < block_size(); j++) {
           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
@@ -682,7 +748,6 @@ void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
         }
-#endif  // CONFIG_HIGHBITDEPTH
       }
       uint32_t sse1, sse2;
       uint32_t var1, var2;
@@ -697,11 +762,53 @@ void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
   }
 }
 
+template <>
+void SubpelVarianceTest<JntSubpixAvgVarMxNFunc>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      } else {
+        for (int j = 0; j < block_size(); j++) {
+          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
+          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
+        }
+      }
+      for (int x0 = 0; x0 < 2; ++x0) {
+        for (int y0 = 0; y0 < 4; ++y0) {
+          uint32_t sse1, sse2;
+          uint32_t var1, var2;
+          jcp_param_.fwd_offset = quant_dist_lookup_table[x0][y0][0];
+          jcp_param_.bck_offset = quant_dist_lookup_table[x0][y0][1];
+          ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
+                                                       src_, width(), &sse1,
+                                                       sec_, &jcp_param_));
+          var2 = jnt_subpel_avg_variance_ref(
+              ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
+              &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
+          EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+          EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+        }
+      }
+    }
+  }
+}
+
 typedef MainTestClass<Get4x4SseFunc> AvxSseTest;
 typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
 typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
+typedef SubpelVarianceTest<JntSubpixAvgVarMxNFunc> AvxJntSubpelAvgVarianceTest;
 
 TEST_P(AvxSseTest, RefSse) { RefTestSse(); }
 TEST_P(AvxSseTest, MaxSse) { MaxTestSse(); }
@@ -711,11 +818,13 @@ TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
+TEST_P(AvxJntSubpelAvgVarianceTest, Ref) { RefTest(); }
 
 INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
                         ::testing::Values(aom_get_mb_ss_c));
@@ -735,7 +844,10 @@ INSTANTIATE_TEST_CASE_P(C, AvxMseTest,
 typedef TestParams<VarianceMxNFunc> VarianceParams;
 INSTANTIATE_TEST_CASE_P(
     C, AvxVarianceTest,
-    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_c),
+    ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_c),
+                      VarianceParams(7, 6, &aom_variance128x64_c),
+                      VarianceParams(6, 7, &aom_variance64x128_c),
+                      VarianceParams(6, 6, &aom_variance64x64_c),
                       VarianceParams(6, 5, &aom_variance64x32_c),
                       VarianceParams(5, 6, &aom_variance32x64_c),
                       VarianceParams(5, 5, &aom_variance32x32_c),
@@ -753,6 +865,9 @@ typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
 INSTANTIATE_TEST_CASE_P(
     C, AvxSubpelVarianceTest,
     ::testing::Values(
+        SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
+        SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
+        SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
         SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
         SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
         SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
@@ -771,6 +886,9 @@ typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
 INSTANTIATE_TEST_CASE_P(
     C, AvxSubpelAvgVarianceTest,
     ::testing::Values(
+        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
+        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
+        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
         SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
         SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
@@ -785,7 +903,37 @@ INSTANTIATE_TEST_CASE_P(
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
         SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0)));
 
-#if CONFIG_HIGHBITDEPTH
+typedef TestParams<JntSubpixAvgVarMxNFunc> JntSubpelAvgVarianceParams;
+INSTANTIATE_TEST_CASE_P(
+    C, AvxJntSubpelAvgVarianceTest,
+    ::testing::Values(
+        JntSubpelAvgVarianceParams(6, 6, &aom_jnt_sub_pixel_avg_variance64x64_c,
+                                   0),
+        JntSubpelAvgVarianceParams(6, 5, &aom_jnt_sub_pixel_avg_variance64x32_c,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 6, &aom_jnt_sub_pixel_avg_variance32x64_c,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 5, &aom_jnt_sub_pixel_avg_variance32x32_c,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 4, &aom_jnt_sub_pixel_avg_variance32x16_c,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 5, &aom_jnt_sub_pixel_avg_variance16x32_c,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 4, &aom_jnt_sub_pixel_avg_variance16x16_c,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 3, &aom_jnt_sub_pixel_avg_variance16x8_c,
+                                   0),
+        JntSubpelAvgVarianceParams(3, 4, &aom_jnt_sub_pixel_avg_variance8x16_c,
+                                   0),
+        JntSubpelAvgVarianceParams(3, 3, &aom_jnt_sub_pixel_avg_variance8x8_c,
+                                   0),
+        JntSubpelAvgVarianceParams(3, 2, &aom_jnt_sub_pixel_avg_variance8x4_c,
+                                   0),
+        JntSubpelAvgVarianceParams(2, 3, &aom_jnt_sub_pixel_avg_variance4x8_c,
+                                   0),
+        JntSubpelAvgVarianceParams(2, 2, &aom_jnt_sub_pixel_avg_variance4x4_c,
+                                   0)));
+
 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
@@ -819,11 +967,9 @@ INSTANTIATE_TEST_CASE_P(
 */
 
 const VarianceParams kArrayHBDVariance_c[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
   VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
   VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
   VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
   VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
@@ -837,11 +983,9 @@ const VarianceParams kArrayHBDVariance_c[] = {
   VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
   VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
   VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
   VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
   VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
   VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
   VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
@@ -855,11 +999,9 @@ const VarianceParams kArrayHBDVariance_c[] = {
   VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
   VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
   VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
   VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
   VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
   VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
   VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
@@ -877,21 +1019,19 @@ const VarianceParams kArrayHBDVariance_c[] = {
 INSTANTIATE_TEST_CASE_P(C, AvxHBDVarianceTest,
                         ::testing::ValuesIn(kArrayHBDVariance_c));
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(
     SSE4_1, AvxHBDVarianceTest,
     ::testing::Values(
         VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
         VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
         VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1
 
 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
   SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
   SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
@@ -905,11 +1045,9 @@ const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
   SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
   SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
@@ -923,11 +1061,9 @@ const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
   SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
   SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
   SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
   SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
@@ -946,14 +1082,12 @@ INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
 
 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
                           8),
   SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
                           8),
   SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
                           8),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
@@ -967,14 +1101,12 @@ const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
   SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
   SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
                           10),
   SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
                           10),
   SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
                           10),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
                           10),
   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
@@ -997,14 +1129,12 @@ const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
   SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
   SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
                           12),
   SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
                           12),
   SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
                           12),
-#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
                           12),
   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
@@ -1030,7 +1160,6 @@ const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
 };
 INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelAvgVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
-#endif  // CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
@@ -1044,23 +1173,35 @@ INSTANTIATE_TEST_CASE_P(SSE2, AvxMseTest,
 
 INSTANTIATE_TEST_CASE_P(
     SSE2, AvxVarianceTest,
-    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_sse2),
+    ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_sse2),
+                      VarianceParams(7, 6, &aom_variance128x64_sse2),
+                      VarianceParams(6, 7, &aom_variance64x128_sse2),
+                      VarianceParams(6, 6, &aom_variance64x64_sse2),
                       VarianceParams(6, 5, &aom_variance64x32_sse2),
+                      VarianceParams(6, 4, &aom_variance64x16_sse2),
                       VarianceParams(5, 6, &aom_variance32x64_sse2),
                       VarianceParams(5, 5, &aom_variance32x32_sse2),
                       VarianceParams(5, 4, &aom_variance32x16_sse2),
+                      VarianceParams(5, 3, &aom_variance32x8_sse2),
+                      VarianceParams(4, 6, &aom_variance16x64_sse2),
                       VarianceParams(4, 5, &aom_variance16x32_sse2),
                       VarianceParams(4, 4, &aom_variance16x16_sse2),
                       VarianceParams(4, 3, &aom_variance16x8_sse2),
+                      VarianceParams(4, 2, &aom_variance16x4_sse2),
+                      VarianceParams(3, 5, &aom_variance8x32_sse2),
                       VarianceParams(3, 4, &aom_variance8x16_sse2),
                       VarianceParams(3, 3, &aom_variance8x8_sse2),
                       VarianceParams(3, 2, &aom_variance8x4_sse2),
+                      VarianceParams(2, 4, &aom_variance4x16_sse2),
                       VarianceParams(2, 3, &aom_variance4x8_sse2),
                       VarianceParams(2, 2, &aom_variance4x4_sse2)));
 
 INSTANTIATE_TEST_CASE_P(
     SSE2, AvxSubpelVarianceTest,
     ::testing::Values(
+        SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_sse2, 0),
+        SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_sse2, 0),
+        SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_sse2, 0),
         SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_sse2, 0),
         SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_sse2, 0),
         SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_sse2, 0),
@@ -1078,6 +1219,12 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSE2, AvxSubpelAvgVarianceTest,
     ::testing::Values(
+        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_sse2,
+                                0),
+        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_sse2,
+                                0),
+        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_sse2,
+                                0),
         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0),
         SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0),
         SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0),
@@ -1092,7 +1239,7 @@ INSTANTIATE_TEST_CASE_P(
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
         SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(
     SSE4_1, AvxSubpelVarianceTest,
     ::testing::Values(
@@ -1115,9 +1262,8 @@ INSTANTIATE_TEST_CASE_P(
         SubpelAvgVarianceParams(2, 2,
                                 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
                                 12)));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1
 
-#if CONFIG_HIGHBITDEPTH
 /* TODO(debargha): This test does not support the highbd version
 INSTANTIATE_TEST_CASE_P(
     SSE2, AvxHBDMseTest,
@@ -1278,13 +1424,15 @@ const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
-#endif  // CONFIG_HIGHBITDEPTH
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
 INSTANTIATE_TEST_CASE_P(
     SSSE3, AvxSubpelVarianceTest,
     ::testing::Values(
+        SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
+        SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
+        SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
         SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
         SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
         SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
@@ -1302,6 +1450,12 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     SSSE3, AvxSubpelAvgVarianceTest,
     ::testing::Values(
+        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3,
+                                0),
+        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3,
+                                0),
+        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3,
+                                0),
         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3,
                                 0),
         SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3,
@@ -1323,6 +1477,46 @@ INSTANTIATE_TEST_CASE_P(
         SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
         SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3,
                                 0)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, AvxJntSubpelAvgVarianceTest,
+    ::testing::Values(
+        JntSubpelAvgVarianceParams(6, 6,
+                                   &aom_jnt_sub_pixel_avg_variance64x64_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(6, 5,
+                                   &aom_jnt_sub_pixel_avg_variance64x32_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 6,
+                                   &aom_jnt_sub_pixel_avg_variance32x64_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 5,
+                                   &aom_jnt_sub_pixel_avg_variance32x32_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(5, 4,
+                                   &aom_jnt_sub_pixel_avg_variance32x16_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 5,
+                                   &aom_jnt_sub_pixel_avg_variance16x32_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 4,
+                                   &aom_jnt_sub_pixel_avg_variance16x16_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(4, 3,
+                                   &aom_jnt_sub_pixel_avg_variance16x8_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(3, 4,
+                                   &aom_jnt_sub_pixel_avg_variance8x16_ssse3,
+                                   0),
+        JntSubpelAvgVarianceParams(3, 3,
+                                   &aom_jnt_sub_pixel_avg_variance8x8_ssse3, 0),
+        JntSubpelAvgVarianceParams(3, 2,
+                                   &aom_jnt_sub_pixel_avg_variance8x4_ssse3, 0),
+        JntSubpelAvgVarianceParams(2, 3,
+                                   &aom_jnt_sub_pixel_avg_variance4x8_ssse3, 0),
+        JntSubpelAvgVarianceParams(2, 2,
+                                   &aom_jnt_sub_pixel_avg_variance4x4_ssse3,
+                                   0)));
 #endif  // HAVE_SSSE3
 
 #if HAVE_AVX2
@@ -1331,23 +1525,48 @@ INSTANTIATE_TEST_CASE_P(AVX2, AvxMseTest,
 
 INSTANTIATE_TEST_CASE_P(
     AVX2, AvxVarianceTest,
-    ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_avx2),
+    ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_avx2),
+                      VarianceParams(7, 6, &aom_variance128x64_avx2),
+                      VarianceParams(6, 7, &aom_variance64x128_avx2),
+                      VarianceParams(6, 6, &aom_variance64x64_avx2),
                       VarianceParams(6, 5, &aom_variance64x32_avx2),
+                      VarianceParams(6, 4, &aom_variance64x16_avx2),
+                      VarianceParams(5, 6, &aom_variance32x64_avx2),
                       VarianceParams(5, 5, &aom_variance32x32_avx2),
                       VarianceParams(5, 4, &aom_variance32x16_avx2),
-                      VarianceParams(4, 4, &aom_variance16x16_avx2)));
+                      VarianceParams(5, 3, &aom_variance32x8_avx2),
+                      VarianceParams(4, 6, &aom_variance16x64_avx2),
+                      VarianceParams(4, 5, &aom_variance16x32_avx2),
+                      VarianceParams(4, 4, &aom_variance16x16_avx2),
+                      VarianceParams(4, 3, &aom_variance16x8_avx2),
+                      VarianceParams(4, 2, &aom_variance16x4_avx2)));
 
 INSTANTIATE_TEST_CASE_P(
     AVX2, AvxSubpelVarianceTest,
     ::testing::Values(
+        SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
+        SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
+        SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
         SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
-        SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0)));
+        SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
+        SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
+        SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
+        SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0)));
 
 INSTANTIATE_TEST_CASE_P(
     AVX2, AvxSubpelAvgVarianceTest,
     ::testing::Values(
+        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
+                                0),
+        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
+                                0),
+        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
+                                0),
         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
-        SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2,
+        SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
+        SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
+        SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
+        SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
                                 0)));
 #endif  // HAVE_AVX2
 
diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h
index e986ffb373..dc39b5a801 100644
--- a/third_party/aom/test/video_source.h
+++ b/third_party/aom/test/video_source.h
@@ -71,7 +71,10 @@ static FILE *GetTempOutFile(std::string *file_name) {
   }
   return NULL;
 #else
-  return tmpfile();
+  char name_template[] = "/tmp/libaomtest.XXXXXX";
+  const int fd = mkstemp(name_template);
+  *file_name = name_template;
+  return fdopen(fd, "wb+");
 #endif
 }
 
diff --git a/third_party/aom/test/visual_metrics.py b/third_party/aom/test/visual_metrics.py
new file mode 100755
index 0000000000..9055feb334
--- /dev/null
+++ b/third_party/aom/test/visual_metrics.py
@@ -0,0 +1,466 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2016, Alliance for Open Media. All rights reserved
+#
+# This source code is subject to the terms of the BSD 2 Clause License and
+# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+# was not distributed with this source code in the LICENSE file, you can
+# obtain it at www.aomedia.org/license/software. If the Alliance for Open
+# Media Patent License 1.0 was not distributed with this source code in the
+# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+#
+
+"""Converts video encoding result data from text files to visualization
+data source."""
+
+__author__ = "jzern@google.com (James Zern),"
+__author__ += "jimbankoski@google.com (Jim Bankoski)"
+
+import fnmatch
+import numpy as np
+import scipy as sp
+import scipy.interpolate
+import os
+import re
+import string
+import sys
+import math
+import warnings
+
+import gviz_api
+
+from os.path import basename
+from os.path import splitext
+
+warnings.simplefilter('ignore', np.RankWarning)
+warnings.simplefilter('ignore', RuntimeWarning)
+
+def bdsnr2(metric_set1, metric_set2):
+  """
+  BJONTEGAARD    Bjontegaard metric calculation adapted
+  Bjontegaard's snr metric allows to compute the average % saving in decibels
+  between two rate-distortion curves [1].  This is an adaptation of that
+  method that fixes inconsistencies when the curve fit operation goes awry
+  by replacing the curve fit function with a Piecewise Cubic Hermite
+  Interpolating Polynomial and then integrating that by evaluating that
+  function at small intervals using the trapezoid method to calculate
+  the integral.
+
+  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
+  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
+  """
+
+  if not metric_set1 or not metric_set2:
+    return 0.0
+
+  try:
+
+    # pchip_interlopate requires keys sorted by x axis. x-axis will
+    # be our metric not the bitrate so sort by metric.
+    metric_set1.sort()
+    metric_set2.sort()
+
+    # Pull the log of the rate and clamped psnr from metric_sets.
+    log_rate1 = [math.log(x[0]) for x in metric_set1]
+    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
+    log_rate2 = [math.log(x[0]) for x in metric_set2]
+    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
+
+    # Integration interval.  This metric only works on the area that's
+    # overlapping.   Extrapolation of these things is sketchy so we avoid.
+    min_int = max([min(log_rate1), min(log_rate2)])
+    max_int = min([max(log_rate1), max(log_rate2)])
+
+    # No overlap means no sensible metric possible.
+    if max_int <= min_int:
+      return 0.0
+
+    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
+    # create 100 new samples points separated by interval.
+    lin = np.linspace(min_int, max_int, num=100, retstep=True)
+    interval = lin[1]
+    samples = lin[0]
+    v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
+    v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
+
+    # Calculate the integral using the trapezoid method on the samples.
+    int_v1 = np.trapz(v1, dx=interval)
+    int_v2 = np.trapz(v2, dx=interval)
+
+    # Calculate the average improvement.
+    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
+
+  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
+    return 0
+
+  return avg_exp_diff
+
+def bdrate2(metric_set1, metric_set2):
+  """
+  BJONTEGAARD    Bjontegaard metric calculation adapted
+  Bjontegaard's metric allows to compute the average % saving in bitrate
+  between two rate-distortion curves [1].  This is an adaptation of that
+  method that fixes inconsistencies when the curve fit operation goes awry
+  by replacing the curve fit function with a Piecewise Cubic Hermite
+  Interpolating Polynomial and then integrating that by evaluating that
+  function at small intervals using the trapezoid method to calculate
+  the integral.
+
+  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
+  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
+  """
+
+  if not metric_set1 or not metric_set2:
+    return 0.0
+
+  try:
+
+    # pchip_interlopate requires keys sorted by x axis. x-axis will
+    # be our metric not the bitrate so sort by metric.
+    metric_set1.sort(key=lambda tup: tup[1])
+    metric_set2.sort(key=lambda tup: tup[1])
+
+    # Pull the log of the rate and clamped psnr from metric_sets.
+    log_rate1 = [math.log(x[0]) for x in metric_set1]
+    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
+    log_rate2 = [math.log(x[0]) for x in metric_set2]
+    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
+
+    # Integration interval.  This metric only works on the area that's
+    # overlapping.   Extrapolation of these things is sketchy so we avoid.
+    min_int = max([min(metric1), min(metric2)])
+    max_int = min([max(metric1), max(metric2)])
+
+    # No overlap means no sensible metric possible.
+    if max_int <= min_int:
+      return 0.0
+
+    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
+    # create 100 new samples points separated by interval.
+    lin = np.linspace(min_int, max_int, num=100, retstep=True)
+    interval = lin[1]
+    samples = lin[0]
+    v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
+    v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
+
+    # Calculate the integral using the trapezoid method on the samples.
+    int_v1 = np.trapz(v1, dx=interval)
+    int_v2 = np.trapz(v2, dx=interval)
+
+    # Calculate the average improvement.
+    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
+
+  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
+    return 0
+
+  # Convert to a percentage.
+  avg_diff = (math.exp(avg_exp_diff) - 1) * 100
+
+  return avg_diff
+
+
+
+def FillForm(string_for_substitution, dictionary_of_vars):
+  """
+  This function substitutes all matches of the command string //%% ... %%//
+  with the variable represented by ...  .
+  """
+  return_string = string_for_substitution
+  for i in re.findall("//%%(.*)%%//", string_for_substitution):
+    return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
+                           return_string)
+  return return_string
+
+
+def HasMetrics(line):
+  """
+  The metrics files produced by aomenc are started with a B for headers.
+  """
+  # If the first char of the first word on the line is a digit
+  if len(line) == 0:
+    return False
+  if len(line.split()) == 0:
+    return False
+  if line.split()[0][0:1].isdigit():
+    return True
+  return False
+
+def GetMetrics(file_name):
+  metric_file = open(file_name, "r")
+  return metric_file.readline().split();
+
+def ParseMetricFile(file_name, metric_column):
+  metric_set1 = set([])
+  metric_file = open(file_name, "r")
+  for line in metric_file:
+    metrics = string.split(line)
+    if HasMetrics(line):
+      if metric_column < len(metrics):
+        try:
+          tuple = float(metrics[0]), float(metrics[metric_column])
+        except:
+          tuple = float(metrics[0]), 0
+      else:
+        tuple = float(metrics[0]), 0
+      metric_set1.add(tuple)
+  metric_set1_sorted = sorted(metric_set1)
+  return metric_set1_sorted
+
+
+def FileBetter(file_name_1, file_name_2, metric_column, method):
+  """
+  Compares two data files and determines which is better and by how
+  much. Also produces a histogram of how much better, by PSNR.
+  metric_column is the metric.
+  """
+  # Store and parse our two files into lists of unique tuples.
+
+  # Read the two files, parsing out lines starting with bitrate.
+  metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
+  metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
+
+
+  def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
+    """
+    Search through the sorted metric file for metrics on either side of
+    the metric from file 1.  Since both lists are sorted we really
+    should not have to search through the entire range, but these
+    are small files."""
+    total_bitrate_difference_ratio = 0.0
+    count = 0
+    for bitrate, metric in metric_set1_sorted:
+      if bitrate == 0:
+        continue
+      for i in range(len(metric_set2_sorted) - 1):
+        s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
+        s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
+        # We have a point on either side of our metric range.
+        if metric > s2_metric_0 and metric <= s2_metric_1:
+
+          # Calculate a slope.
+          if s2_metric_1 - s2_metric_0 != 0:
+            metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
+                            (s2_metric_1 - s2_metric_0))
+          else:
+            metric_slope = 0
+
+          estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
+                                  metric_slope)
+
+          if estimated_s2_bitrate == 0:
+            continue
+          # Calculate percentage difference as given by base.
+          if base_is_set_2 == 0:
+            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
+                                        bitrate)
+          else:
+            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
+                                        estimated_s2_bitrate)
+
+          total_bitrate_difference_ratio += bitrate_difference_ratio
+          count += 1
+          break
+
+    # Calculate the average improvement between graphs.
+    if count != 0:
+      avg = total_bitrate_difference_ratio / count
+
+    else:
+      avg = 0.0
+
+    return avg
+
+  # Be fair to both graphs by testing all the points in each.
+  if method == 'avg':
+    avg_improvement = 50 * (
+                       GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
+                       GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
+  elif method == 'dsnr':
+      avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
+  else:
+      avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
+
+  return avg_improvement
+
+
+def HandleFiles(variables):
+  """
+  This script creates html for displaying metric data produced from data
+  in a video stats file,  as created by the AOM project when enable_psnr
+  is turned on:
+
+  Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
+
+  The script parses each metrics file [see below] that matches the
+  statfile_pattern  in the baseline directory and looks for the file that
+  matches that same file in each of the sub_dirs, and compares the resultant
+  metrics bitrate, avg psnr, glb psnr, and ssim. "
+
+  It provides a table in which each row is a file in the line directory,
+  and a column for each subdir, with the cells representing how that clip
+  compares to baseline for that subdir.   A graph is given for each which
+  compares filesize to that metric.  If you click on a point in the graph it
+  zooms in on that point.
+
+  a SAMPLE metrics file:
+
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   25.911   38.242   38.104   38.258   38.121   75.790    14103
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   49.982   41.264   41.129   41.255   41.122   83.993    19817
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+   74.967   42.911   42.767   42.899   42.756   87.928    17332
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  100.012   43.983   43.838   43.881   43.738   89.695    25389
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  149.980   45.338   45.203   45.184   45.043   91.591    25438
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  199.852   46.225   46.123   46.113   45.999   92.679    28302
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  249.922   46.864   46.773   46.777   46.673   93.334    27244
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  299.998   47.366   47.281   47.317   47.220   93.844    27137
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  349.769   47.746   47.677   47.722   47.648   94.178    32226
+  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
+  399.773   48.032   47.971   48.013   47.946   94.362    36203
+
+  sample use:
+  visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
+  """
+
+  # The template file is the html file into which we will write the
+  # data from the stats file, formatted correctly for the gviz_api.
+  template_file = open(variables[1], "r")
+  page_template = template_file.read()
+  template_file.close()
+
+  # This is the path match pattern for finding stats files amongst
+  # all the other files it could be.  eg: *.stt
+  file_pattern = variables[2]
+
+  # This is the directory with files that we will use to do the comparison
+  # against.
+  baseline_dir = variables[3]
+  snrs = ''
+  filestable = {}
+
+  filestable['dsnr'] = ''
+  filestable['drate'] = ''
+  filestable['avg'] = ''
+
+  # Dirs is directories after the baseline to compare to the base.
+  dirs = variables[4:len(variables)]
+
+  # Find the metric files in the baseline directory.
+  dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
+
+  metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
+
+  metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
+
+  for column in range(1, len(metrics)):
+
+    for metric in ['avg','dsnr','drate']:
+      description = {"file": ("string", "File")}
+
+      # Go through each directory and add a column header to our description.
+      countoverall = {}
+      sumoverall = {}
+
+      for directory in dirs:
+        description[directory] = ("number", directory)
+        countoverall[directory] = 0
+        sumoverall[directory] = 0
+
+      # Data holds the data for the visualization, name given comes from
+      # gviz_api sample code.
+      data = []
+      for filename in dir_list:
+        row = {'file': splitext(basename(filename))[0] }
+        baseline_file_name = baseline_dir + "/" + filename
+
+        # Read the metric file from each of the directories in our list.
+        for directory in dirs:
+          metric_file_name = directory + "/" + filename
+
+          # If there is a metric file in the current directory, open it
+          # and calculate its overall difference between it and the baseline
+          # directory's metric file.
+          if os.path.isfile(metric_file_name):
+            overall = FileBetter(baseline_file_name, metric_file_name,
+                                 column, metric)
+            row[directory] = overall
+
+            sumoverall[directory] += overall
+            countoverall[directory] += 1
+
+        data.append(row)
+
+      # Add the overall numbers.
+      row = {"file": "OVERALL" }
+      for directory in dirs:
+        row[directory] = sumoverall[directory] / countoverall[directory]
+      data.append(row)
+
+      # write the tables out
+      data_table = gviz_api.DataTable(description)
+      data_table.LoadData(data)
+
+      filestable[metric] = ( filestable[metric] + "filestable_" + metric +
+                             "[" + str(column) + "]=" +
+                             data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
+
+    filestable_avg = filestable['avg']
+    filestable_dpsnr = filestable['dsnr']
+    filestable_drate = filestable['drate']
+
+    # Now we collect all the data for all the graphs.  First the column
+    # headers which will be Datarate and then each directory.
+    columns = ("datarate",baseline_dir)
+    description = {"datarate":("number", "Datarate")}
+    for directory in dirs:
+      description[directory] = ("number", directory)
+
+    description[baseline_dir] = ("number", baseline_dir)
+
+    snrs = snrs + "snrs[" + str(column) + "] = ["
+
+    # Now collect the data for the graphs, file by file.
+    for filename in dir_list:
+
+      data = []
+
+      # Collect the file in each directory and store all of its metrics
+      # in the associated gviz metrics table.
+      all_dirs = dirs + [baseline_dir]
+      for directory in all_dirs:
+
+        metric_file_name = directory + "/" + filename
+        if not os.path.isfile(metric_file_name):
+          continue
+
+        # Read and parse the metrics file storing it to the data we'll
+        # use for the gviz_api.Datatable.
+        metrics = ParseMetricFile(metric_file_name, column)
+        for bitrate, metric in metrics:
+          data.append({"datarate": bitrate, directory: metric})
+
+      data_table = gviz_api.DataTable(description)
+      data_table.LoadData(data)
+      snrs = snrs + "'" + data_table.ToJSon(
+         columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
+
+    snrs = snrs + "]\n"
+
+    formatters = ""
+    for i in range(len(dirs)):
+      formatters = "%s   formatter.format(better, %d);" % (formatters, i+1)
+
+  print FillForm(page_template, vars())
+  return
+
+if len(sys.argv) < 3:
+  print HandleFiles.__doc__
+else:
+  HandleFiles(sys.argv)
diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc
index 0353c074ec..15f8a285c1 100644
--- a/third_party/aom/test/warp_filter_test.cc
+++ b/third_party/aom/test/warp_filter_test.cc
@@ -8,39 +8,29 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
-
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/warp_filter_test_util.h"
-
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 using libaom_test::ACMRandom;
-using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
-#if CONFIG_HIGHBITDEPTH
 using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
-#endif
+using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
 
 namespace {
-
+#if HAVE_SSE4_1
 TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
+TEST_P(AV1WarpFilterTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(3)); }
 
 INSTANTIATE_TEST_CASE_P(
-    SSE2, AV1WarpFilterTest,
-    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse2));
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, AV1WarpFilterTest,
-    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_ssse3));
-#endif
+    SSE4_1, AV1WarpFilterTest,
+    libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1));
 
-#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
-TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
-  RunCheckOutput(av1_highbd_warp_affine_ssse3);
-}
+TEST_P(AV1HighbdWarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(4)); }
+TEST_P(AV1HighbdWarpFilterTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(4)); }
 
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdWarpFilterTest,
-                        libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
-#endif
+INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdWarpFilterTest,
+                        libaom_test::AV1HighbdWarpFilter::BuildParams(
+                            av1_highbd_warp_affine_sse4_1));
 
+#endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
index 47ce6c371d..b341cd0c29 100644
--- a/third_party/aom/test/warp_filter_test_util.cc
+++ b/third_party/aom/test/warp_filter_test_util.cc
@@ -8,55 +8,38 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
-
+#include "aom_ports/aom_timer.h"
 #include "test/warp_filter_test_util.h"
 
-using std::tr1::tuple;
-using std::tr1::make_tuple;
+using ::testing::make_tuple;
+using ::testing::tuple;
 
 namespace libaom_test {
 
-namespace AV1WarpFilter {
-
-::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
-    warp_affine_func filter) {
-  const WarpTestParam params[] = {
-    make_tuple(4, 4, 100, filter),   make_tuple(8, 8, 100, filter),
-    make_tuple(64, 64, 100, filter), make_tuple(4, 16, 100, filter),
-    make_tuple(32, 8, 100, filter),
-  };
-  return ::testing::ValuesIn(params);
-}
-
-AV1WarpFilterTest::~AV1WarpFilterTest() {}
-void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
-void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
-
-int32_t AV1WarpFilterTest::random_param(int bits) {
+int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits) {
   // 1 in 8 chance of generating zero (arbitrarily chosen)
-  if (((rnd_.Rand8()) & 7) == 0) return 0;
+  if (((rnd->Rand8()) & 7) == 0) return 0;
   // Otherwise, enerate uniform values in the range
   // [-(1 << bits), 1] U [1, 1<<bits]
-  int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
-  if ((rnd_.Rand8()) & 1) return -v;
+  int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
+  if ((rnd->Rand8()) & 1) return -v;
   return v;
 }
 
-void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
-                                       int16_t *beta, int16_t *gamma,
-                                       int16_t *delta) {
+void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
+                           int16_t *alpha, int16_t *beta, int16_t *gamma,
+                           int16_t *delta) {
   while (1) {
-    mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
-    mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
-    mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+    mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
+    mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
+    mat[2] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
              (1 << WARPEDMODEL_PREC_BITS);
-    mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
+    mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
     // 50/50 chance of generating ROTZOOM vs. AFFINE models
-    if (rnd_.Rand8() & 1) {
+    if (rnd->Rand8() & 1) {
       // AFFINE
-      mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
-      mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
+      mat[4] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
+      mat[5] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
                (1 << WARPEDMODEL_PREC_BITS);
     } else {
       mat[4] = -mat[3];
@@ -94,13 +77,29 @@ void AV1WarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
   }
 }
 
-void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
+namespace AV1WarpFilter {
+::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
+    warp_affine_func filter) {
+  const WarpTestParam params[] = {
+    make_tuple(4, 4, 50000, filter),  make_tuple(8, 8, 50000, filter),
+    make_tuple(64, 64, 1000, filter), make_tuple(4, 16, 20000, filter),
+    make_tuple(32, 8, 10000, filter),
+  };
+  return ::testing::ValuesIn(params);
+}
+
+AV1WarpFilterTest::~AV1WarpFilterTest() {}
+void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
+void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
+
+void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
   const int w = 128, h = 128;
   const int border = 16;
   const int stride = w + 2 * border;
   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
-  const int num_iters = GET_PARAM(2);
-  int i, j, sub_x, sub_y;
+  int sub_x, sub_y;
+  const int bd = 8;
 
   uint8_t *input_ = new uint8_t[h * stride];
   uint8_t *input = input_ + border;
@@ -109,14 +108,66 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
   // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
   int output_n = ((out_w + 7) & ~7) * out_h;
   uint8_t *output = new uint8_t[output_n];
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
+  CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
+
+  generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
+
+  for (int r = 0; r < h; ++r)
+    for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
+  for (int r = 0; r < h; ++r) {
+    memset(input + r * stride - border, input[r * stride], border);
+    memset(input + r * stride + w, input[r * stride + (w - 1)], border);
+  }
+
+  sub_x = 0;
+  sub_y = 0;
+  int do_average = 0;
+
+  conv_params = get_conv_params_no_round(0, do_average, 0, dsta, out_w, 1, bd);
+  conv_params.use_jnt_comp_avg = 0;
+
+  const int num_loops = 1000000000 / (out_w + out_h);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
+              sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
+
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
+         1000.0 * elapsed_time / num_loops);
+
+  delete[] input_;
+  delete[] output;
+  delete[] dsta;
+}
+
+void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int num_iters = GET_PARAM(2);
+  int i, j, sub_x, sub_y;
+  const int bd = 8;
+
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint8_t *input_ = new uint8_t[h * stride];
+  uint8_t *input = input_ + border;
+  uint8_t *output = new uint8_t[output_n];
   uint8_t *output2 = new uint8_t[output_n];
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
-  ConvolveParams conv_params = get_conv_params(0, 0, 0);
-#if CONFIG_CONVOLVE_ROUND
-  int32_t *dsta = new int32_t[output_n];
-  int32_t *dstb = new int32_t[output_n];
-#endif
+  ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
+  CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
+  CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
+  for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();
 
   for (i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
@@ -126,81 +177,88 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
       memset(input + r * stride - border, input[r * stride], border);
       memset(input + r * stride + w, input[r * stride + (w - 1)], border);
     }
-#if CONFIG_CONVOLVE_ROUND
     const int use_no_round = rnd_.Rand8() & 1;
-#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
-        generate_model(mat, &alpha, &beta, &gamma, &delta);
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          // Prepare two copies of the destination
-          for (j = 0; j < out_w * out_h; ++j) {
-            int32_t v = rnd_.Rand16();
-            dsta[j] = v;
-            dstb[j] = v;
+        generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
+        for (int ii = 0; ii < 2; ++ii) {
+          for (int jj = 0; jj < 5; ++jj) {
+            for (int do_average = 0; do_average <= 1; ++do_average) {
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(0, do_average, 0, dsta,
+                                                       out_w, 1, bd);
+              } else {
+                conv_params = get_conv_params(0, 0, 0, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_jnt_comp_avg = 0;
+              } else {
+                conv_params.use_jnt_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+                conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+              }
+              av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
+                                out_h, out_w, sub_x, sub_y, &conv_params, alpha,
+                                beta, gamma, delta);
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(0, do_average, 0, dstb,
+                                                       out_w, 1, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_jnt_comp_avg = 0;
+              } else {
+                conv_params.use_jnt_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+                conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+              }
+              test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
+                        out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma,
+                        delta);
+              if (use_no_round) {
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(dsta[j], dstb[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              } else {
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              }
+            }
           }
-          conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
-        } else {
-          conv_params = get_conv_params(0, 0, 0);
-        }
-#endif
-        av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
-                          out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
-                          gamma, delta);
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
         }
-#endif
-        test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
-                  out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
-
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          for (j = 0; j < out_w * out_h; ++j)
-            ASSERT_EQ(dsta[j], dstb[j])
-                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-                << ", " << (j / out_w) << ") on iteration " << i;
-        } else {
-          for (j = 0; j < out_w * out_h; ++j)
-            ASSERT_EQ(output[j], output2[j])
-                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-                << ", " << (j / out_w) << ") on iteration " << i;
-        }
-#else
-        for (j = 0; j < out_w * out_h; ++j)
-          ASSERT_EQ(output[j], output2[j])
-              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-              << ", " << (j / out_w) << ") on iteration " << i;
-#endif
       }
   }
   delete[] input_;
   delete[] output;
   delete[] output2;
-#if CONFIG_CONVOLVE_ROUND
   delete[] dsta;
   delete[] dstb;
-#endif
 }
 }  // namespace AV1WarpFilter
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdWarpFilter {
-
-::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams() {
-  const HighbdWarpTestParam defaultParams[] = {
-    make_tuple(4, 4, 100, 8),    make_tuple(8, 8, 100, 8),
-    make_tuple(64, 64, 100, 8),  make_tuple(4, 16, 100, 8),
-    make_tuple(32, 8, 100, 8),   make_tuple(4, 4, 100, 10),
-    make_tuple(8, 8, 100, 10),   make_tuple(64, 64, 100, 10),
-    make_tuple(4, 16, 100, 10),  make_tuple(32, 8, 100, 10),
-    make_tuple(4, 4, 100, 12),   make_tuple(8, 8, 100, 12),
-    make_tuple(64, 64, 100, 12), make_tuple(4, 16, 100, 12),
-    make_tuple(32, 8, 100, 12),
+::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
+    highbd_warp_affine_func filter) {
+  const HighbdWarpTestParam params[] = {
+    make_tuple(4, 4, 100, 8, filter),    make_tuple(8, 8, 100, 8, filter),
+    make_tuple(64, 64, 100, 8, filter),  make_tuple(4, 16, 100, 8, filter),
+    make_tuple(32, 8, 100, 8, filter),   make_tuple(4, 4, 100, 10, filter),
+    make_tuple(8, 8, 100, 10, filter),   make_tuple(64, 64, 100, 10, filter),
+    make_tuple(4, 16, 100, 10, filter),  make_tuple(32, 8, 100, 10, filter),
+    make_tuple(4, 4, 100, 12, filter),   make_tuple(8, 8, 100, 12, filter),
+    make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
+    make_tuple(32, 8, 100, 12, filter),
   };
-  return ::testing::ValuesIn(defaultParams);
+  return ::testing::ValuesIn(params);
 }
 
 AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
@@ -210,65 +268,59 @@ void AV1HighbdWarpFilterTest::SetUp() {
 
 void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
 
-int32_t AV1HighbdWarpFilterTest::random_param(int bits) {
-  // 1 in 8 chance of generating zero (arbitrarily chosen)
-  if (((rnd_.Rand8()) & 7) == 0) return 0;
-  // Otherwise, enerate uniform values in the range
-  // [-(1 << bits), 1] U [1, 1<<bits]
-  int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
-  if ((rnd_.Rand8()) & 1) return -v;
-  return v;
-}
+void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
+  const int w = 128, h = 128;
+  const int border = 16;
+  const int stride = w + 2 * border;
+  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
+  const int bd = GET_PARAM(3);
+  const int mask = (1 << bd) - 1;
+  int sub_x, sub_y;
 
-void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int16_t *alpha,
-                                             int16_t *beta, int16_t *gamma,
-                                             int16_t *delta) {
-  while (1) {
-    mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
-    mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
-    mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
-             (1 << WARPEDMODEL_PREC_BITS);
-    mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
-    // 50/50 chance of generating ROTZOOM vs. AFFINE models
-    if (rnd_.Rand8() & 1) {
-      // AFFINE
-      mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
-      mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
-               (1 << WARPEDMODEL_PREC_BITS);
-    } else {
-      mat[4] = -mat[3];
-      mat[5] = mat[2];
+  // The warp functions always write rows with widths that are multiples of 8.
+  // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
+  int output_n = ((out_w + 7) & ~7) * out_h;
+  uint16_t *input_ = new uint16_t[h * stride];
+  uint16_t *input = input_ + border;
+  uint16_t *output = new uint16_t[output_n];
+  int32_t mat[8];
+  int16_t alpha, beta, gamma, delta;
+  ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
+  CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
+
+  generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
+  // Generate an input block and extend its borders horizontally
+  for (int r = 0; r < h; ++r)
+    for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
+  for (int r = 0; r < h; ++r) {
+    for (int c = 0; c < border; ++c) {
+      input[r * stride - border + c] = input[r * stride];
+      input[r * stride + w + c] = input[r * stride + (w - 1)];
     }
+  }
 
-    // Calculate the derived parameters and check that they are suitable
-    // for the warp filter.
-    assert(mat[2] != 0);
+  sub_x = 0;
+  sub_y = 0;
+  int do_average = 0;
+  conv_params.use_jnt_comp_avg = 0;
+  conv_params = get_conv_params_no_round(0, do_average, 0, dsta, out_w, 1, bd);
 
-    *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
-    *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
-    *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
-                   INT16_MIN, INT16_MAX);
-    *delta =
-        clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
-                  (1 << WARPEDMODEL_PREC_BITS),
-              INT16_MIN, INT16_MAX);
+  const int num_loops = 1000000000 / (out_w + out_h);
+  aom_usec_timer timer;
+  aom_usec_timer_start(&timer);
 
-    if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
-        (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
-      continue;
+  for (int i = 0; i < num_loops; ++i)
+    test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
+              sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);
 
-    *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
-             (1 << WARP_PARAM_REDUCE_BITS);
-    *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
-            (1 << WARP_PARAM_REDUCE_BITS);
-    *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
-             (1 << WARP_PARAM_REDUCE_BITS);
-    *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
-             (1 << WARP_PARAM_REDUCE_BITS);
+  aom_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
+  printf("highbd warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
+         1000.0 * elapsed_time / num_loops);
 
-    // We have a valid model, so finish
-    return;
-  }
+  delete[] input_;
+  delete[] output;
+  delete[] dsta;
 }
 
 void AV1HighbdWarpFilterTest::RunCheckOutput(
@@ -291,11 +343,10 @@ void AV1HighbdWarpFilterTest::RunCheckOutput(
   uint16_t *output2 = new uint16_t[output_n];
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
-  ConvolveParams conv_params = get_conv_params(0, 0, 0);
-#if CONFIG_CONVOLVE_ROUND
-  int32_t *dsta = new int32_t[output_n];
-  int32_t *dstb = new int32_t[output_n];
-#endif
+  ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
+  CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
+  CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
+  for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();
 
   for (i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
@@ -307,68 +358,76 @@ void AV1HighbdWarpFilterTest::RunCheckOutput(
         input[r * stride + w + c] = input[r * stride + (w - 1)];
       }
     }
-#if CONFIG_CONVOLVE_ROUND
     const int use_no_round = rnd_.Rand8() & 1;
-#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
-        generate_model(mat, &alpha, &beta, &gamma, &delta);
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          // Prepare two copies of the destination
-          for (j = 0; j < out_w * out_h; ++j) {
-            int32_t v = rnd_.Rand16();
-            dsta[j] = v;
-            dstb[j] = v;
+        generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
+        for (int ii = 0; ii < 2; ++ii) {
+          for (int jj = 0; jj < 5; ++jj) {
+            for (int do_average = 0; do_average <= 1; ++do_average) {
+              if (use_no_round) {
+                conv_params = get_conv_params_no_round(0, do_average, 0, dsta,
+                                                       out_w, 1, bd);
+              } else {
+                conv_params = get_conv_params(0, 0, 0, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_jnt_comp_avg = 0;
+              } else {
+                conv_params.use_jnt_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+                conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+              }
+
+              av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
+                                       out_w, out_h, out_w, sub_x, sub_y, bd,
+                                       &conv_params, alpha, beta, gamma, delta);
+              if (use_no_round) {
+                // TODO(angiebird): Change this to test_impl once we have SIMD
+                // implementation
+                conv_params = get_conv_params_no_round(0, do_average, 0, dstb,
+                                                       out_w, 1, bd);
+              }
+              if (jj >= 4) {
+                conv_params.use_jnt_comp_avg = 0;
+              } else {
+                conv_params.use_jnt_comp_avg = 1;
+                conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
+                conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
+              }
+              test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
+                        out_w, sub_x, sub_y, bd, &conv_params, alpha, beta,
+                        gamma, delta);
+
+              if (use_no_round) {
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(dsta[j], dstb[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              } else {
+                for (j = 0; j < out_w * out_h; ++j)
+                  ASSERT_EQ(output[j], output2[j])
+                      << "Pixel mismatch at index " << j << " = ("
+                      << (j % out_w) << ", " << (j / out_w) << ") on iteration "
+                      << i;
+              }
+            }
           }
-          conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
-        } else {
-          conv_params = get_conv_params(0, 0, 0);
-        }
-#endif
-        av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
-                                 out_w, out_h, out_w, sub_x, sub_y, bd,
-                                 &conv_params, alpha, beta, gamma, delta);
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          // TODO(angiebird): Change this to test_impl once we have SIMD
-          // implementation
-          conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
-        }
-#endif
-        test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
-                  out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma,
-                  delta);
-
-#if CONFIG_CONVOLVE_ROUND
-        if (use_no_round) {
-          for (j = 0; j < out_w * out_h; ++j)
-            ASSERT_EQ(dsta[j], dstb[j])
-                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-                << ", " << (j / out_w) << ") on iteration " << i;
-        } else {
-          for (j = 0; j < out_w * out_h; ++j)
-            ASSERT_EQ(output[j], output2[j])
-                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-                << ", " << (j / out_w) << ") on iteration " << i;
         }
-#else
-        for (j = 0; j < out_w * out_h; ++j)
-          ASSERT_EQ(output[j], output2[j])
-              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-              << ", " << (j / out_w) << ") on iteration " << i;
-#endif
       }
   }
 
   delete[] input_;
   delete[] output;
   delete[] output2;
-#if CONFIG_CONVOLVE_ROUND
   delete[] dsta;
   delete[] dstb;
-#endif
 }
 }  // namespace AV1HighbdWarpFilter
-#endif  // CONFIG_HIGHBITDEPTH
 }  // namespace libaom_test
diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h
index 4828f37852..cf72d9db6d 100644
--- a/third_party/aom/test/warp_filter_test_util.h
+++ b/third_party/aom/test/warp_filter_test_util.h
@@ -12,18 +12,24 @@
 #ifndef TEST_WARP_FILTER_TEST_UTIL_H_
 #define TEST_WARP_FILTER_TEST_UTIL_H_
 
+#include "config/av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/acm_random.h"
 #include "test/util.h"
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 
 #include "av1/common/mv.h"
+#include "av1/common/common_data.h"
 
 namespace libaom_test {
 
+void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
+                           int16_t *alpha, int16_t *beta, int16_t *gamma,
+                           int16_t *delta);
+
 namespace AV1WarpFilter {
 
 typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref,
@@ -34,7 +40,7 @@ typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref,
                                  ConvolveParams *conv_params, int16_t alpha,
                                  int16_t beta, int16_t gamma, int16_t delta);
 
-typedef std::tr1::tuple<int, int, int, warp_affine_func> WarpTestParam;
+typedef ::testing::tuple<int, int, int, warp_affine_func> WarpTestParam;
 
 ::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
     warp_affine_func filter);
@@ -47,18 +53,14 @@ class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
   virtual void TearDown();
 
  protected:
-  int32_t random_param(int bits);
-  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
-                      int16_t *gamma, int16_t *delta);
-
   void RunCheckOutput(warp_affine_func test_impl);
+  void RunSpeedTest(warp_affine_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
 }  // namespace AV1WarpFilter
 
-#if CONFIG_HIGHBITDEPTH
 namespace AV1HighbdWarpFilter {
 typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref,
                                         int width, int height, int stride,
@@ -69,9 +71,11 @@ typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref,
                                         int16_t alpha, int16_t beta,
                                         int16_t gamma, int16_t delta);
 
-typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
+typedef ::testing::tuple<int, int, int, int, highbd_warp_affine_func>
+    HighbdWarpTestParam;
 
-::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams();
+::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
+    highbd_warp_affine_func filter);
 
 class AV1HighbdWarpFilterTest
     : public ::testing::TestWithParam<HighbdWarpTestParam> {
@@ -82,17 +86,13 @@ class AV1HighbdWarpFilterTest
   virtual void TearDown();
 
  protected:
-  int32_t random_param(int bits);
-  void generate_model(int32_t *mat, int16_t *alpha, int16_t *beta,
-                      int16_t *gamma, int16_t *delta);
-
   void RunCheckOutput(highbd_warp_affine_func test_impl);
+  void RunSpeedTest(highbd_warp_affine_func test_impl);
 
   libaom_test::ACMRandom rnd_;
 };
 
 }  // namespace AV1HighbdWarpFilter
-#endif  // CONFIG_HIGHBITDEPTH
 
 }  // namespace libaom_test
 
diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h
index b6c9980421..482f5dea2d 100644
--- a/third_party/aom/test/webm_video_source.h
+++ b/third_party/aom/test/webm_video_source.h
@@ -15,8 +15,8 @@
 #include <cstdlib>
 #include <new>
 #include <string>
-#include "../tools_common.h"
-#include "../webmdec.h"
+#include "common/tools_common.h"
+#include "common/webmdec.h"
 #include "test/video_source.h"
 
 namespace libaom_test {
@@ -27,8 +27,8 @@ class WebMVideoSource : public CompressedVideoSource {
  public:
   explicit WebMVideoSource(const std::string &file_name)
       : file_name_(file_name), aom_ctx_(new AvxInputContext()),
-        webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_(0),
-        end_of_file_(false) {}
+        webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_sz_(0),
+        frame_number_(0), end_of_file_(false) {}
 
   virtual ~WebMVideoSource() {
     if (aom_ctx_->file != NULL) fclose(aom_ctx_->file);
@@ -50,13 +50,13 @@ class WebMVideoSource : public CompressedVideoSource {
   }
 
   virtual void Next() {
-    ++frame_;
+    ++frame_number_;
     FillFrame();
   }
 
   void FillFrame() {
     ASSERT_TRUE(aom_ctx_->file != NULL);
-    const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
+    const int status = webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
     ASSERT_GE(status, 0) << "webm_read_frame failed";
     if (status == 1) {
       end_of_file_ = true;
@@ -66,9 +66,10 @@ class WebMVideoSource : public CompressedVideoSource {
   void SeekToNextKeyFrame() {
     ASSERT_TRUE(aom_ctx_->file != NULL);
     do {
-      const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
+      const int status =
+          webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
       ASSERT_GE(status, 0) << "webm_read_frame failed";
-      ++frame_;
+      ++frame_number_;
       if (status == 1) {
         end_of_file_ = true;
       }
@@ -76,16 +77,17 @@ class WebMVideoSource : public CompressedVideoSource {
   }
 
   virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; }
-  virtual size_t frame_size() const { return buf_sz_; }
-  virtual unsigned int frame_number() const { return frame_; }
+  virtual size_t frame_size() const { return frame_sz_; }
+  virtual unsigned int frame_number() const { return frame_number_; }
 
  protected:
   std::string file_name_;
   AvxInputContext *aom_ctx_;
   WebmInputContext *webm_ctx_;
-  uint8_t *buf_;
+  uint8_t *buf_;  // Owned by webm_ctx_ and freed when webm_ctx_ is freed.
   size_t buf_sz_;
-  unsigned int frame_;
+  size_t frame_sz_;
+  unsigned int frame_number_;
   bool end_of_file_;
 };
 
diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc
index fc9fff5142..b8011935de 100644
--- a/third_party/aom/test/y4m_test.cc
+++ b/third_party/aom/test/y4m_test.cc
@@ -7,17 +7,17 @@
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
+ */
 
 #include <string>
 
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "config/aom_config.h"
 
-#include "./aom_config.h"
-#include "./y4menc.h"
+#include "common/y4menc.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 
 namespace {
 
diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h
index f70c30be64..277ded9eb5 100644
--- a/third_party/aom/test/y4m_video_source.h
+++ b/third_party/aom/test/y4m_video_source.h
@@ -13,8 +13,8 @@
 #include <algorithm>
 #include <string>
 
+#include "common/y4minput.h"
 #include "test/video_source.h"
-#include "./y4minput.h"
 
 namespace libaom_test {
 
diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h
index 88cabd5bb2..51554af6f9 100644
--- a/third_party/aom/test/yuv_video_source.h
+++ b/third_party/aom/test/yuv_video_source.h
@@ -86,11 +86,9 @@ class YUVVideoSource : public VideoSource {
       switch (format) {
         case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break;
         case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break;
-        case AOM_IMG_FMT_I440: raw_size_ = width * height * 2; break;
         case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break;
         case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break;
         case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break;
-        case AOM_IMG_FMT_I44016: raw_size_ = width * height * 4; break;
         case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break;
         default: ASSERT_TRUE(0);
       }
author	trav90 <travawine@palemoon.org>	2018-10-19 21:52:15 -0500
committer	trav90 <travawine@palemoon.org>	2018-10-19 21:52:20 -0500
commit	bbcc64772580c8a979288791afa02d30bc476d2e (patch)
tree	437ce94c3fdd7497508e5b55de06c6d011678597 /third_party/aom/test
parent	14805f6ddbfb173c327768fff9f81f40ce5e81b0 (diff)
download	uxp-bbcc64772580c8a979288791afa02d30bc476d2e.tar.gz