Add Opus audio to WebM media decoder

author: Pale Moon <git-repo@palemoon.org> 2014-08-24 23:58:03 +0200
committer: Pale Moon <git-repo@palemoon.org> 2014-08-25 16:00:34 +0200
commit: 29d3bdf86c0cf49343a9c5919cce22ffe71b54ee (patch)
tree: 448e209251be7f12ac5dbd705add685b06d4f501 /content/media
parent: 1906840da474dbe6ec32ae7fa43f4d2e3444b860 (diff)
download: palemoon-gre-29d3bdf86c0cf49343a9c5919cce22ffe71b54ee.tar.gz
10 files changed, 619 insertions, 219 deletions
diff --git a/content/media/DecoderTraits.cpp b/content/media/DecoderTraits.cpp
index 4ae3619a6..90d3f4fec 100644
--- a/content/media/DecoderTraits.cpp
+++ b/content/media/DecoderTraits.cpp
@@ -158,10 +158,11 @@ static const char* const gWebMTypes[3] = {
   nullptr
 };
 
-static char const *const gWebMCodecs[4] = {
+static char const *const gWebMCodecs[5] = {
   "vp8",
   "vp8.0",
   "vorbis",
+  "opus",
   nullptr
 };
 
diff --git a/content/media/ogg/OggCodecState.cpp b/content/media/ogg/OggCodecState.cpp
index d23071609..f8676f581 100644
--- a/content/media/ogg/OggCodecState.cpp
+++ b/content/media/ogg/OggCodecState.cpp
@@ -815,18 +815,7 @@ nsresult VorbisState::ReconstructVorbisGranulepos()
 #ifdef MOZ_OPUS
 OpusState::OpusState(ogg_page* aBosPage) :
   OggCodecState(aBosPage, true),
-  mRate(0),
-  mNominalRate(0),
-  mChannels(0),
-  mPreSkip(0),
-#ifdef MOZ_SAMPLE_TYPE_FLOAT32
-  mGain(1.0f),
-#else
-  mGain_Q16(65536),
-#endif
-  mChannelMapping(0),
-  mStreams(0),
-  mCoupledStreams(0),
+  mParser(NULL),
   mDecoder(NULL),
   mSkip(0),
   mPrevPacketGranulepos(0),
@@ -858,7 +847,7 @@ nsresult OpusState::Reset(bool aStart)
     // Reset the decoder.
     opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE);
     // Let the seek logic handle pre-roll if we're not seeking to the start.
-    mSkip = aStart ? mPreSkip : 0;
+    mSkip = aStart ? mParser->mPreSkip : 0;
     // This lets us distinguish the first page being the last page vs. just
     // not having processed the previous page when we encounter the last page.
     mPrevPageGranulepos = aStart ? 0 : -1;
@@ -884,14 +873,14 @@ bool OpusState::Init(void)
 
   NS_ASSERTION(mDecoder == NULL, "leaking OpusDecoder");
 
-  mDecoder = opus_multistream_decoder_create(mRate,
-                                             mChannels,
-                                             mStreams,
-                                             mCoupledStreams,
-                                             mMappingTable,
+  mDecoder = opus_multistream_decoder_create(mParser->mRate,
+                                             mParser->mChannels,
+                                             mParser->mStreams,
+                                             mParser->mCoupledStreams,
+                                             mParser->mMappingTable,
                                              &error);
 
-  mSkip = mPreSkip;
+  mSkip = mParser->mPreSkip;
 
   LOG(PR_LOG_DEBUG, ("Opus decoder init, to skip %d", mSkip));
 
@@ -904,147 +893,26 @@ bool OpusState::DecodeHeader(ogg_packet* aPacket)
   switch(mPacketCount++) {
     // Parse the id header.
     case 0: {
-      if (aPacket->bytes < 19 || memcmp(aPacket->packet, "OpusHead", 8)) {
-        LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header"));
-        return false;
-      }
-
-      mRate = 48000; // The Opus decoder runs at 48 kHz regardless.
-
-      int version = aPacket->packet[8];
-      // Accept file format versions 0.x.
-      if ((version & 0xf0) != 0) {
-        LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version));
-        return false;
-      }
-
-      mChannels = aPacket->packet[9];
-      if (mChannels<1) {
-        LOG(PR_LOG_DEBUG, ("Invalid Opus file: Number of channels %d", mChannels));
-        return false;
-      }
-      mPreSkip = LEUint16(aPacket->packet + 10);
-      mNominalRate = LEUint32(aPacket->packet + 12);
-      double gain_dB = LEInt16(aPacket->packet + 16) / 256.0;
-#ifdef MOZ_SAMPLE_TYPE_FLOAT32
-      mGain = static_cast<float>(pow(10,0.05*gain_dB));
-#else
-      mGain_Q16 = static_cast<int32_t>(std::min(65536*pow(10,0.05*gain_dB)+0.5,
-                                              static_cast<double>(INT32_MAX)));
-#endif
-      mChannelMapping = aPacket->packet[18];
-
-      if (mChannelMapping == 0) {
-        // Mapping family 0 only allows two channels
-        if (mChannels>2) {
-          LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for"
-                             " mapping family 0.", mChannels));
+        mParser = new OpusParser;
+        if(!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) {
           return false;
         }
-        mStreams = 1;
-        mCoupledStreams = mChannels - 1;
-        mMappingTable[0] = 0;
-        mMappingTable[1] = 1;
-      } else if (mChannelMapping == 1) {
-        // Currently only up to 8 channels are defined for mapping family 1
-        if (mChannels>8) {
-          LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for"
-                             " mapping family 1.", mChannels));
-          return false;
-        }
-        if (aPacket->bytes>20+mChannels) {
-          mStreams = aPacket->packet[19];
-          mCoupledStreams = aPacket->packet[20];
-          int i;
-          for (i=0; i<mChannels; i++)
-            mMappingTable[i] = aPacket->packet[21+i];
-        } else {
-          LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d,"
-                             " but no channel mapping table", mChannelMapping));
-          return false;
-        }
-      } else {
-        LOG(PR_LOG_DEBUG, ("Invalid Opus file: unsupported channel mapping "
-                           "family %d", mChannelMapping));
-        return false;
-      }
-      if (mStreams < 1) {
-        LOG(PR_LOG_DEBUG, ("Invalid Opus file: no streams"));
-        return false;
-      }
-      if (mCoupledStreams > mStreams) {
-        LOG(PR_LOG_DEBUG, ("Invalid Opus file: more coupled streams (%d) than "
-                           "total streams (%d)", mCoupledStreams, mStreams));
-        return false;
-      }
-
-#ifdef DEBUG
-      LOG(PR_LOG_DEBUG, ("Opus stream header:"));
-      LOG(PR_LOG_DEBUG, (" channels: %d", mChannels));
-      LOG(PR_LOG_DEBUG, ("  preskip: %d", mPreSkip));
-      LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate));
-      LOG(PR_LOG_DEBUG, ("     gain: %.2f dB", gain_dB));
-      LOG(PR_LOG_DEBUG, ("Channel Mapping:"));
-      LOG(PR_LOG_DEBUG, ("   family: %d", mChannelMapping));
-      LOG(PR_LOG_DEBUG, ("  streams: %d", mStreams));
+        mRate = mParser->mRate;
+        mChannels = mParser->mChannels;
+        mPreSkip = mParser->mPreSkip;
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+        mGain = mParser->mGain;
+#else
+        mGain_Q16 = mParser->mGain_Q16;
 #endif
     }
     break;
 
     // Parse the metadata header.
     case 1: {
-      if (aPacket->bytes < 16 || memcmp(aPacket->packet, "OpusTags", 8))
-        return false;
-
-      // Copy out the raw comment lines, but only do basic validation
-      // checks against the string packing: too little data, too many
-      // comments, or comments that are too long. Rejecting these cases
-      // helps reduce the propagation of broken files.
-      // We do not ensure they are valid UTF-8 here, nor do we validate
-      // the required ASCII_TAG=value format of the user comments.
-      const unsigned char* buf = aPacket->packet + 8;
-      uint32_t bytes = aPacket->bytes - 8;
-      uint32_t len;
-      // Read the vendor string.
-      len = LEUint32(buf);
-      buf += 4;
-      bytes -= 4;
-      if (len > bytes)
-        return false;
-      mVendorString = nsCString(reinterpret_cast<const char*>(buf), len);
-      buf += len;
-      bytes -= len;
-      // Read the user comments.
-      if (bytes < 4)
-        return false;
-      uint32_t ncomments = LEUint32(buf);
-      buf += 4;
-      bytes -= 4;
-      // If there are so many comments even their length fields
-      // won't fit in the packet, stop reading now.
-      if (ncomments > (bytes>>2))
-        return false;
-      uint32_t i;
-      for (i = 0; i < ncomments; i++) {
-        if (bytes < 4)
+        if(!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) {
           return false;
-        len = LEUint32(buf);
-        buf += 4;
-        bytes -= 4;
-        if (len > bytes)
-          return false;
-        mTags.AppendElement(nsCString(reinterpret_cast<const char*>(buf), len));
-        buf += len;
-        bytes -= len;
-      }
-
-#ifdef DEBUG
-      LOG(PR_LOG_DEBUG, ("Opus metadata header:"));
-      LOG(PR_LOG_DEBUG, ("  vendor: %s", mVendorString.get()));
-      for (uint32_t i = 0; i < mTags.Length(); i++) {
-        LOG(PR_LOG_DEBUG, (" %s", mTags[i].get()));
-      }
-#endif
+        }
     }
     break;
 
@@ -1067,8 +935,8 @@ MetadataTags* OpusState::GetTags()
 
   tags = new MetadataTags;
   tags->Init();
-  for (uint32_t i = 0; i < mTags.Length(); i++) {
-    AddVorbisComment(tags, mTags[i].Data(), mTags[i].Length());
+  for (uint32_t i = 0; i < mParser->mTags.Length(); i++) {
+    AddVorbisComment(tags, mParser->mTags[i].Data(), mParser->mTags[i].Length());
   }
 
   return tags;
@@ -1080,7 +948,7 @@ int64_t OpusState::Time(int64_t aGranulepos)
   if (!mActive)
     return -1;
 
-  return Time(mPreSkip, aGranulepos);
+  return Time(mParser->mPreSkip, aGranulepos);
 }
 
 int64_t OpusState::Time(int aPreSkip, int64_t aGranulepos)
diff --git a/content/media/ogg/OggCodecState.h b/content/media/ogg/OggCodecState.h
index ff06f9425..c598cbc9c 100644
--- a/content/media/ogg/OggCodecState.h
+++ b/content/media/ogg/OggCodecState.h
@@ -36,6 +36,8 @@
 #include <map>
 #endif
 
+#include "OpusParser.h"
+
 namespace mozilla {
 
 // Deallocates a packet, used in OggPacketQueue below.
@@ -341,7 +343,6 @@ public:
 
   // Various fields from the Ogg Opus header.
   int mRate;        // Sample rate the decoder uses (always 48 kHz).
-  uint32_t mNominalRate; // Original sample rate of the data (informational).
   int mChannels;    // Number of channels the stream encodes.
   uint16_t mPreSkip; // Number of samples to strip after decoder reset.
 #ifdef MOZ_SAMPLE_TYPE_FLOAT32
@@ -349,11 +350,8 @@ public:
 #else
   int32_t mGain_Q16; // Gain to apply to the decoder output.
 #endif
-  int mChannelMapping; // Channel mapping family.
-  int mStreams;     // Number of packed streams in each packet.
-  int mCoupledStreams; // Number of packed coupled streams in each packet.
-  unsigned char mMappingTable[255]; // Channel mapping table.
 
+  nsAutoPtr<OpusParser> mParser;
   OpusMSDecoder *mDecoder;
 
   int mSkip;        // Number of samples left to trim before playback.
@@ -366,9 +364,6 @@ public:
 
 private:
 
-  nsCString mVendorString;   // Encoder vendor string from the header.
-  nsTArray<nsCString> mTags; // Unparsed comment strings from the header.
-
   // Reconstructs the granulepos of Opus packets stored in the
   // mUnstamped array. mUnstamped must be filled with consecutive packets from
   // the stream, with the last packet having a known granulepos. Using this
diff --git a/content/media/ogg/OggReader.cpp b/content/media/ogg/OggReader.cpp
index 4874f2c06..8b2f7cc9b 100644
--- a/content/media/ogg/OggReader.cpp
+++ b/content/media/ogg/OggReader.cpp
@@ -589,6 +589,61 @@ nsresult OggReader::DecodeOpus(ogg_packet* aPacket) {
 }
 #endif /* MOZ_OPUS */
 
+void OggReader::DownmixToStereo(nsAutoArrayPtr<AudioDataValue>& buffer,
+                              uint32_t& channels, int32_t frames)
+{
+  uint32_t out_channels;
+  out_channels = 2;
+  // dBuffer stores the downmixed samples.
+  nsAutoArrayPtr<AudioDataValue> dBuffer(new AudioDataValue[frames * out_channels]);
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+  // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8.
+  static const float dmatrix[6][8][2]= {
+      /*3*/{{0.5858f,0},{0.4142f,0.4142f},{0,     0.5858f}},
+      /*4*/{{0.4226f,0},{0,      0.4226f},{0.366f,0.2114f},{0.2114f,0.366f}},
+      /*5*/{{0.6510f,0},{0.4600f,0.4600f},{0,     0.6510f},{0.5636f,0.3254f},{0.3254f,0.5636f}},
+      /*6*/{{0.5290f,0},{0.3741f,0.3741f},{0,     0.5290f},{0.4582f,0.2645f},{0.2645f,0.4582f},{0.3741f,0.3741f}},
+      /*7*/{{0.4553f,0},{0.3220f,0.3220f},{0,     0.4553f},{0.3943f,0.2277f},{0.2277f,0.3943f},{0.2788f,0.2788f},{0.3220f,0.3220f}},
+      /*8*/{{0.3886f,0},{0.2748f,0.2748f},{0,     0.3886f},{0.3366f,0.1943f},{0.1943f,0.3366f},{0.3366f,0.1943f},{0.1943f,0.3366f},{0.2748f,0.2748f}},
+  };
+  for (int32_t i = 0; i < frames; i++) {
+    float sampL = 0.0;
+    float sampR = 0.0;
+    for (uint32_t j = 0; j < channels; j++) {
+      sampL+=buffer[i*channels+j]*dmatrix[channels-3][j][0];
+      sampR+=buffer[i*channels+j]*dmatrix[channels-3][j][1];
+    }
+    dBuffer[i*out_channels]=sampL;
+    dBuffer[i*out_channels+1]=sampR;
+  }
+#else
+  // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8.
+  // Coefficients in Q14.
+  static const int16_t dmatrix[6][8][2]= {
+      /*3*/{{9598, 0},{6786,6786},{0,   9598}},
+      /*4*/{{6925, 0},{0,   6925},{5997,3462},{3462,5997}},
+      /*5*/{{10663,0},{7540,7540},{0,  10663},{9234,5331},{5331,9234}},
+      /*6*/{{8668, 0},{6129,6129},{0,   8668},{7507,4335},{4335,7507},{6129,6129}},
+      /*7*/{{7459, 0},{5275,5275},{0,   7459},{6460,3731},{3731,6460},{4568,4568},{5275,5275}},
+      /*8*/{{6368, 0},{4502,4502},{0,   6368},{5514,3184},{3184,5514},{5514,3184},{3184,5514},{4502,4502}}
+  };
+  for (int32_t i = 0; i < frames; i++) {
+    int32_t sampL = 0;
+    int32_t sampR = 0;
+    for (uint32_t j = 0; j < channels; j++) {
+      sampL+=buffer[i*channels+j]*dmatrix[channels-3][j][0];
+      sampR+=buffer[i*channels+j]*dmatrix[channels-3][j][1];
+    }
+    sampL = (sampL + 8192)>>14;
+    dBuffer[i*out_channels]   = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(sampL));
+    sampR = (sampR + 8192)>>14;
+    dBuffer[i*out_channels+1] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(sampR));
+  }
+#endif
+  channels = out_channels;
+  buffer = dBuffer;
+}
+
 bool OggReader::DecodeAudioData()
 {
   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
diff --git a/content/media/ogg/OggReader.h b/content/media/ogg/OggReader.h
index 76171c824..76d3894b1 100644
--- a/content/media/ogg/OggReader.h
+++ b/content/media/ogg/OggReader.h
@@ -74,6 +74,13 @@ public:
                                 MetadataTags** aTags);
   virtual nsresult Seek(int64_t aTime, int64_t aStartTime, int64_t aEndTime, int64_t aCurrentTime);
   virtual nsresult GetBuffered(dom::TimeRanges* aBuffered, int64_t aStartTime);
+  
+  // Downmix multichannel Audio samples to Stereo.
+  // It is used from Vorbis and Opus decoders.
+  // Input are the buffer contains multichannel data,
+  // the number of channels and the number of frames.
+  static void DownmixToStereo(nsAutoArrayPtr<AudioDataValue>& buffer,
+                     uint32_t& channel, int32_t frames);
 
 private:
   // This monitor should be taken when reading or writing to mIsChained.
diff --git a/content/media/ogg/OpusParser.cpp b/content/media/ogg/OpusParser.cpp
new file mode 100644
index 000000000..6658ad492
--- /dev/null
+++ b/content/media/ogg/OpusParser.cpp
@@ -0,0 +1,197 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <string.h>
+
+#include "mozilla/DebugOnly.h"
+#include "mozilla/Endian.h"
+#include <stdint.h>
+
+#include "OpusParser.h"
+
+#include "nsDebug.h"
+#include "MediaDecoderReader.h"
+#include "VideoUtils.h"
+#include <algorithm>
+
+#include "opus/opus.h"
+extern "C" {
+#include "opus/opus_multistream.h"
+}
+
+namespace mozilla {
+
+#ifdef PR_LOGGING
+extern PRLogModuleInfo* gMediaDecoderLog;
+#define OPUS_LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg)
+#else
+#define OPUS_LOG(type, msg)
+#endif
+
+OpusParser::OpusParser():
+  mRate(0),
+  mNominalRate(0),
+  mChannels(0),
+  mPreSkip(0),
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+  mGain(1.0f),
+#else
+  mGain_Q16(65536),
+#endif
+  mChannelMapping(0),
+  mStreams(0),
+  mCoupledStreams(0)
+{ }
+
+bool OpusParser::DecodeHeader(unsigned char* aData, size_t aLength)
+{
+    if (aLength < 19 || memcmp(aData, "OpusHead", 8)) {
+      OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header"));
+      return false;
+    }
+
+    mRate = 48000; // The Opus decoder runs at 48 kHz regardless.
+
+    int version = aData[8];
+    // Accept file format versions 0.x.
+    if ((version & 0xf0) != 0) {
+      OPUS_LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version));
+      return false;
+    }
+
+    mChannels = aData[9];
+    if (mChannels<1) {
+      OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: Number of channels %d", mChannels));
+      return false;
+    }
+
+    mPreSkip = LittleEndian::readUint16(aData + 10);
+    mNominalRate = LittleEndian::readUint32(aData + 12);
+    double gain_dB = LittleEndian::readInt16(aData + 16) / 256.0;
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+    mGain = static_cast<float>(pow(10,0.05*gain_dB));
+#else
+    mGain_Q16 = static_cast<int32_t>(std::min(65536*pow(10,0.05*gain_dB)+0.5,
+                                            static_cast<double>(INT32_MAX)));
+#endif
+    mChannelMapping = aData[18];
+
+    if (mChannelMapping == 0) {
+      // Mapping family 0 only allows two channels
+      if (mChannels>2) {
+        OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for"
+                           " mapping family 0.", mChannels));
+        return false;
+      }
+      mStreams = 1;
+      mCoupledStreams = mChannels - 1;
+      mMappingTable[0] = 0;
+      mMappingTable[1] = 1;
+    } else if (mChannelMapping == 1) {
+      // Currently only up to 8 channels are defined for mapping family 1
+      if (mChannels>8) {
+        OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for"
+                           " mapping family 1.", mChannels));
+        return false;
+      }
+      if (aLength>static_cast<unsigned>(20+mChannels)) {
+        mStreams = aData[19];
+        mCoupledStreams = aData[20];
+        int i;
+        for (i=0; i<mChannels; i++)
+          mMappingTable[i] = aData[21+i];
+      } else {
+        OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d,"
+                           " but no channel mapping table", mChannelMapping));
+        return false;
+      }
+    } else {
+      OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: unsupported channel mapping "
+                         "family %d", mChannelMapping));
+      return false;
+    }
+    if (mStreams < 1) {
+      OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: no streams"));
+      return false;
+    }
+    if (mCoupledStreams > mStreams) {
+      OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: more coupled streams (%d) than "
+                         "total streams (%d)", mCoupledStreams, mStreams));
+      return false;
+    }
+
+#ifdef DEBUG
+    OPUS_LOG(PR_LOG_DEBUG, ("Opus stream header:"));
+    OPUS_LOG(PR_LOG_DEBUG, (" channels: %d", mChannels));
+    OPUS_LOG(PR_LOG_DEBUG, ("  preskip: %d", mPreSkip));
+    OPUS_LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate));
+    OPUS_LOG(PR_LOG_DEBUG, ("     gain: %.2f dB", gain_dB));
+    OPUS_LOG(PR_LOG_DEBUG, ("Channel Mapping:"));
+    OPUS_LOG(PR_LOG_DEBUG, ("   family: %d", mChannelMapping));
+    OPUS_LOG(PR_LOG_DEBUG, ("  streams: %d", mStreams));
+#endif
+  return true;
+}
+
+bool OpusParser::DecodeTags(unsigned char* aData, size_t aLength)
+{
+  if (aLength < 16 || memcmp(aData, "OpusTags", 8))
+    return false;
+
+  // Copy out the raw comment lines, but only do basic validation
+  // checks against the string packing: too little data, too many
+  // comments, or comments that are too long. Rejecting these cases
+  // helps reduce the propagation of broken files.
+  // We do not ensure they are valid UTF-8 here, nor do we validate
+  // the required ASCII_TAG=value format of the user comments.
+  const unsigned char* buf = aData + 8;
+  uint32_t bytes = aLength - 8;
+  uint32_t len;
+  // Read the vendor string.
+  len = LittleEndian::readUint32(buf);
+  buf += 4;
+  bytes -= 4;
+  if (len > bytes)
+    return false;
+  mVendorString = nsCString(reinterpret_cast<const char*>(buf), len);
+  buf += len;
+  bytes -= len;
+  // Read the user comments.
+  if (bytes < 4)
+    return false;
+  uint32_t ncomments = LittleEndian::readUint32(buf);
+  buf += 4;
+  bytes -= 4;
+  // If there are so many comments even their length fields
+  // won't fit in the packet, stop reading now.
+  if (ncomments > (bytes>>2))
+    return false;
+  uint32_t i;
+  for (i = 0; i < ncomments; i++) {
+    if (bytes < 4)
+      return false;
+    len = LittleEndian::readUint32(buf);
+    buf += 4;
+    bytes -= 4;
+    if (len > bytes)
+      return false;
+    mTags.AppendElement(nsCString(reinterpret_cast<const char*>(buf), len));
+    buf += len;
+    bytes -= len;
+  }
+
+#ifdef DEBUG
+  OPUS_LOG(PR_LOG_DEBUG, ("Opus metadata header:"));
+  OPUS_LOG(PR_LOG_DEBUG, ("  vendor: %s", mVendorString.get()));
+  for (uint32_t i = 0; i < mTags.Length(); i++) {
+    OPUS_LOG(PR_LOG_DEBUG, (" %s", mTags[i].get()));
+  }
+#endif
+  return true;
+}
+
+} // namespace mozilla
+
diff --git a/content/media/ogg/OpusParser.h b/content/media/ogg/OpusParser.h
new file mode 100644
index 000000000..4e9ce0c5b
--- /dev/null
+++ b/content/media/ogg/OpusParser.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#if !defined(OpusParser_h_)
+#define OpusParser_h_
+
+#include <stdint.h>
+
+#include <opus/opus.h>
+#include "opus/opus_multistream.h"
+
+#include "nsTArray.h"
+#include "nsString.h"
+
+namespace mozilla {
+
+class OpusParser
+{
+public:
+  OpusParser();
+
+  bool DecodeHeader(unsigned char* aData, size_t aLength);
+  bool DecodeTags(unsigned char* aData, size_t aLength);
+
+  // Various fields from the Ogg Opus header.
+  int mRate;        // Sample rate the decoder uses (always 48 kHz).
+  uint32_t mNominalRate; // Original sample rate of the data (informational).
+  int mChannels;    // Number of channels the stream encodes.
+  uint16_t mPreSkip; // Number of samples to strip after decoder reset.
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+  float mGain;      // Gain to apply to decoder output.
+#else
+  int32_t mGain_Q16; // Gain to apply to the decoder output.
+#endif
+  int mChannelMapping; // Channel mapping family.
+  int mStreams;     // Number of packed streams in each packet.
+  int mCoupledStreams; // Number of packed coupled streams in each packet.
+  unsigned char mMappingTable[255]; // Channel mapping table.
+
+  // Granule position (end sample) of the last decoded Opus packet. This is
+  // used to calculate the amount we should trim from the last packet.
+  int64_t mPrevPacketGranulepos;
+
+  nsTArray<nsCString> mTags; // Unparsed comment strings from the header.
+
+  nsCString mVendorString;   // Encoder vendor string from the header.
+
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/content/media/ogg/moz.build b/content/media/ogg/moz.build
index 7b2a72174..b418a7a08 100644
--- a/content/media/ogg/moz.build
+++ b/content/media/ogg/moz.build
@@ -11,6 +11,7 @@ EXPORTS += [
     'OggDecoder.h',
     'OggReader.h',
     'OggWriter.h',
+    'OpusParser.h',
 ]
 
 CPP_SOURCES += [
@@ -18,6 +19,7 @@ CPP_SOURCES += [
     'OggDecoder.cpp',
     'OggReader.cpp',
     'OggWriter.cpp',
+    'OpusParser.cpp',
 ]
 
 LIBRARY_NAME = 'gkconogg_s'
diff --git a/content/media/webm/WebMReader.cpp b/content/media/webm/WebMReader.cpp
index e0441ca53..d2ca2edff 100644
--- a/content/media/webm/WebMReader.cpp
+++ b/content/media/webm/WebMReader.cpp
@@ -17,6 +17,8 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
 
+#include "OggReader.h"
+
 using mozilla::NesteggPacketHolder;
 
 template <>
@@ -153,6 +155,11 @@ WebMReader::WebMReader(AbstractMediaDecoder* aDecoder)
   mContext(nullptr),
   mPacketCount(0),
   mChannels(0),
+#ifdef MOZ_OPUS
+  mOpusParser(nullptr),
+  mOpusDecoder(nullptr),
+  mSkip(0),
+#endif
   mVideoTrack(0),
   mAudioTrack(0),
   mAudioStartUsec(-1),
@@ -199,6 +206,11 @@ WebMReader::~WebMReader()
   vorbis_info_clear(&mVorbisInfo);
   vorbis_comment_clear(&mVorbisComment);
 
+  if (mOpusDecoder) {
+    opus_multistream_decoder_destroy(mOpusDecoder);
+    mOpusDecoder = nullptr;
+  }
+
   MOZ_COUNT_DTOR(WebMReader);
 }
 
@@ -383,51 +395,83 @@ nsresult WebMReader::ReadMetadata(VideoInfo* aInfo,
       mAudioTrack = track;
       mHasAudio = true;
       mInfo.mHasAudio = true;
+      mAudioCodec = nestegg_track_codec_id(mContext, track);
+      mCodecDelay = params.codec_delay;
+
+      if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
+        // Get the Vorbis header data
+        unsigned int nheaders = 0;
+        r = nestegg_track_codec_data_count(mContext, track, &nheaders);
+        if (r == -1 || nheaders != 3) {
+          Cleanup();
+          return NS_ERROR_FAILURE;
+        }
 
-      // Get the Vorbis header data
-      unsigned int nheaders = 0;
-      r = nestegg_track_codec_data_count(mContext, track, &nheaders);
-      if (r == -1 || nheaders != 3) {
-        Cleanup();
-        return NS_ERROR_FAILURE;
-      }
+        for (uint32_t header = 0; header < nheaders; ++header) {
+          unsigned char* data = 0;
+          size_t length = 0;
+
+          r = nestegg_track_codec_data(mContext, track, header, &data, &length);
+          if (r == -1) {
+            Cleanup();
+            return NS_ERROR_FAILURE;
+          }
+          ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0);
+
+          r = vorbis_synthesis_headerin(&mVorbisInfo,
+                                        &mVorbisComment,
+                                        &opacket);
+          if (r != 0) {
+            Cleanup();
+            return NS_ERROR_FAILURE;
+          }
+        }
+
+        r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo);
+        if (r != 0) {
+          Cleanup();
+          return NS_ERROR_FAILURE;
+        }
+
+        r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock);
+        if (r != 0) {
+          Cleanup();
+          return NS_ERROR_FAILURE;
+        }
 
-      for (uint32_t header = 0; header < nheaders; ++header) {
+        mInfo.mAudioRate = mVorbisDsp.vi->rate;
+        mInfo.mAudioChannels = mVorbisDsp.vi->channels;
+        mChannels = mInfo.mAudioChannels;
+#ifdef MOZ_OPUS
+      } else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
         unsigned char* data = 0;
         size_t length = 0;
-
-        r = nestegg_track_codec_data(mContext, track, header, &data, &length);
+        r = nestegg_track_codec_data(mContext, track, 0, &data, &length);
         if (r == -1) {
           Cleanup();
           return NS_ERROR_FAILURE;
         }
 
-        ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0);
+        mOpusParser = new OpusParser;
+        if (!mOpusParser->DecodeHeader(data, length)) {
+          Cleanup();
+          return NS_ERROR_FAILURE;
+        }
 
-        r = vorbis_synthesis_headerin(&mVorbisInfo,
-                                      &mVorbisComment,
-                                      &opacket);
-        if (r != 0) {
+        if (!InitOpusDecoder()) {
           Cleanup();
           return NS_ERROR_FAILURE;
         }
-      }
 
-      r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo);
-      if (r != 0) {
-        Cleanup();
-        return NS_ERROR_FAILURE;
-      }
+        mInfo.mAudioRate = mOpusParser->mRate;
 
-      r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock);
-      if (r != 0) {
+        mInfo.mAudioChannels = mOpusParser->mChannels;
+        mInfo.mAudioChannels = mInfo.mAudioChannels > 2 ? 2 : mInfo.mAudioChannels;
+#endif
+      } else {
         Cleanup();
         return NS_ERROR_FAILURE;
       }
-
-      mInfo.mAudioRate = mVorbisDsp.vi->rate;
-      mInfo.mAudioChannels = mVorbisDsp.vi->channels;
-      mChannels = mInfo.mAudioChannels;
     }
   }
 
@@ -484,6 +528,25 @@ nsresult WebMReader::ReadMetadata(VideoInfo* aInfo,
   return NS_OK;
 }
 
+#ifdef MOZ_OPUS
+bool WebMReader::InitOpusDecoder()
+{
+  int r;
+
+  NS_ASSERTION(mOpusDecoder == nullptr, "leaking OpusDecoder");
+
+  mOpusDecoder = opus_multistream_decoder_create(mOpusParser->mRate,
+                                             mOpusParser->mChannels,
+                                             mOpusParser->mStreams,
+                                             mOpusParser->mCoupledStreams,
+                                             mOpusParser->mMappingTable,
+                                             &r);
+  mSkip = mOpusParser->mPreSkip;
+
+  return r == OPUS_OK;
+}
+#endif
+
 ogg_packet WebMReader::InitOggPacket(unsigned char* aData,
                                        size_t aLength,
                                        bool aBOS,
@@ -517,7 +580,7 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
     return false;
   }
 
-  const uint32_t rate = mVorbisDsp.vi->rate;
+  const uint32_t rate = mInfo.mAudioRate;
   uint64_t tstamp_usecs = tstamp / NS_PER_USEC;
   if (mAudioStartUsec == -1) {
     // This is the first audio chunk. Assume the start time of our decode
@@ -559,27 +622,167 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
     if (r == -1) {
       return false;
     }
+    if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
+      ogg_packet opacket = InitOggPacket(data, length, false, false, -1);
+
+      if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
+        return false;
+      }
 
-    ogg_packet opacket = InitOggPacket(data, length, false, false, -1);
+      if (vorbis_synthesis_blockin(&mVorbisDsp,
+                                   &mVorbisBlock) != 0) {
+        return false;
+      }
 
-    if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
-      return false;
-    }
+      VorbisPCMValue** pcm = 0;
+      int32_t frames = 0;
+      while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) {
+        nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
+        for (uint32_t j = 0; j < mChannels; ++j) {
+          VorbisPCMValue* channel = pcm[j];
+          for (uint32_t i = 0; i < uint32_t(frames); ++i) {
+            buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
+          }
+        }
 
-    if (vorbis_synthesis_blockin(&mVorbisDsp,
-                                 &mVorbisBlock) != 0) {
-      return false;
-    }
+        CheckedInt64 duration = FramesToUsecs(frames, rate);
+        if (!duration.isValid()) {
+          NS_WARNING("Int overflow converting WebM audio duration");
+          return false;
+        }
+        CheckedInt64 total_duration = FramesToUsecs(total_frames, rate);
+        if (!total_duration.isValid()) {
+          NS_WARNING("Int overflow converting WebM audio total_duration");
+          return false;
+        }
+
+        CheckedInt64 time = total_duration + tstamp_usecs;
+        if (!time.isValid()) {
+          NS_WARNING("Int overflow adding total_duration and tstamp_usecs");
+          nestegg_free_packet(aPacket);
+          return false;
+        };
+
+        total_frames += frames;
+        AudioQueue().Push(new AudioData(aOffset,
+                                       time.value(),
+                                       duration.value(),
+                                       frames,
+                                       buffer.forget(),
+                                       mChannels));
+        mAudioFrames += frames;
+        if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
+          return false;
+        }
+      }
+    } else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
+#ifdef MOZ_OPUS
+      uint32_t channels = mOpusParser->mChannels;
+
+      // Maximum value is 63*2880, so there's no chance of overflow.
+      int32_t frames_number = opus_packet_get_nb_frames(data, length);
+
+      if (frames_number <= 0)
+        return false; // Invalid packet header.
+      int32_t samples = opus_packet_get_samples_per_frame(data,
+                                                          (opus_int32) rate);
+      int32_t frames = frames_number*samples;
+
+      // A valid Opus packet must be between 2.5 and 120 ms long.
+      if (frames < 120 || frames > 5760)
+        return false;
+      nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]);
 
-    VorbisPCMValue** pcm = 0;
-    int32_t frames = 0;
-    while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) {
-      nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
-      for (uint32_t j = 0; j < mChannels; ++j) {
-        VorbisPCMValue* channel = pcm[j];
-        for (uint32_t i = 0; i < uint32_t(frames); ++i) {
-          buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
+      // Decode to the appropriate sample type.
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+      int ret = opus_multistream_decode_float(mOpusDecoder,
+                                              data, length,
+                                              buffer, frames, false);
+#else
+      int ret = opus_multistream_decode(mOpusDecoder,
+                                        data, length,
+                                        buffer, frames, false);
+#endif
+      if (ret < 0)
+        return false;
+      NS_ASSERTION(ret == frames, "Opus decoded too few audio samples");
+
+      // Trim the initial frames while the decoder is settling.
+      if (mSkip > 0) {
+        int32_t skipFrames = std::min(mSkip, frames);
+        if (skipFrames == frames) {
+          // discard the whole packet
+          mSkip -= frames;
+          LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames"
+                             " (whole packet)", frames));
+          return true;
         }
+        int32_t keepFrames = frames - skipFrames;
+        int samples = keepFrames * channels;
+        nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
+        for (int i = 0; i < samples; i++)
+          trimBuffer[i] = buffer[skipFrames*channels + i];
+
+        frames = keepFrames;
+        buffer = trimBuffer;
+
+        mSkip -= skipFrames;
+        LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames));
+      }
+
+      int64_t discardPadding = 0;
+      r = nestegg_packet_discard_padding(aPacket, &discardPadding);
+      if (r == -1) {
+        return false;
+      }
+      if (discardPadding > 0) {
+        CheckedInt64 discardFrames = UsecsToFrames(discardPadding * NS_PER_USEC, rate);
+        if (!discardFrames.isValid()) {
+          NS_WARNING("Int overflow in DiscardPadding");
+          return false;
+        }
+        int32_t keepFrames = frames - discardFrames.value();
+        if (keepFrames > 0) {
+          int samples = keepFrames * channels;
+          nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
+          for (int i = 0; i < samples; i++)
+            trimBuffer[i] = buffer[i];
+          frames = keepFrames;
+          buffer = trimBuffer;
+        } else {
+          LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet"
+                             " ( %d frames) as padding", frames));
+          return true;
+        }
+      }
+
+      // Apply the header gain if one was specified.
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+      if (mOpusParser->mGain != 1.0f) {
+        float gain = mOpusParser->mGain;
+        int samples = frames * channels;
+        for (int i = 0; i < samples; i++) {
+          buffer[i] *= gain;
+        }
+      }
+#else
+      if (mOpusParser->mGain_Q16 != 65536) {
+        int64_t gain_Q16 = mOpusParser->mGain_Q16;
+        int samples = frames * channels;
+        for (int i = 0; i < samples; i++) {
+          int32_t val = static_cast<int32_t>((gain_Q16*buffer[i] + 32768)>>16);
+          buffer[i] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(val));
+        }
+      }
+#endif
+
+      // More than 2 decoded channels must be downmixed to stereo.
+      if (channels > 2) {
+        // Opus doesn't provide a channel mapping for more than 8 channels,
+        // so we can't downmix more than that.
+        if (channels > 8)
+          return false;
+        OggReader::DownmixToStereo(buffer, channels, frames);
       }
 
       CheckedInt64 duration = FramesToUsecs(frames, rate);
@@ -587,30 +790,25 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
         NS_WARNING("Int overflow converting WebM audio duration");
         return false;
       }
-      CheckedInt64 total_duration = FramesToUsecs(total_frames, rate);
-      if (!total_duration.isValid()) {
-        NS_WARNING("Int overflow converting WebM audio total_duration");
-        return false;
-      }
-      
-      CheckedInt64 time = total_duration + tstamp_usecs;
+
+      CheckedInt64 time = tstamp_usecs;
       if (!time.isValid()) {
         NS_WARNING("Int overflow adding total_duration and tstamp_usecs");
         nestegg_free_packet(aPacket);
         return false;
       };
 
-      total_frames += frames;
-      AudioQueue().Push(new AudioData(aOffset,
+      AudioQueue().Push(new AudioData(mDecoder->GetResource()->Tell(),
                                      time.value(),
                                      duration.value(),
                                      frames,
                                      buffer.forget(),
                                      mChannels));
+
       mAudioFrames += frames;
-      if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
-        return false;
-      }
+#else
+      return false;
+#endif /* MOZ_OPUS */
     }
   }
 
diff --git a/content/media/webm/WebMReader.h b/content/media/webm/WebMReader.h
index 5cac84aa8..83823cc8b 100644
--- a/content/media/webm/WebMReader.h
+++ b/content/media/webm/WebMReader.h
@@ -26,6 +26,10 @@
 #include "DASHRepReader.h"
 #endif
 
+#ifdef MOZ_OPUS
+#include "OpusParser.h"
+#endif
+
 namespace mozilla {
 
 class WebMBufferedState;
@@ -244,6 +248,11 @@ protected:
                            bool aEOS,
                            int64_t aGranulepos);
 
+#ifdef MOZ_OPUS
+  // Setup opus decoder
+  bool InitOpusDecoder();
+#endif
+
   // Decode a nestegg packet of audio data. Push the audio data on the
   // audio queue. Returns true when there's more audio to decode,
   // false if the audio is finished, end of file has been reached,
@@ -272,6 +281,14 @@ private:
   uint32_t mPacketCount;
   uint32_t mChannels;
 
+
+#ifdef MOZ_OPUS
+  // Opus decoder state
+  nsAutoPtr<OpusParser> mOpusParser;
+  OpusMSDecoder *mOpusDecoder;
+  int mSkip;        // Number of samples left to trim before playback.
+#endif
+
   // Queue of video and audio packets that have been read but not decoded. These
   // must only be accessed from the state machine thread.
   WebMPacketQueue mVideoPackets;
@@ -287,6 +304,9 @@ private:
   // Number of audio frames we've decoded since decoding began at mAudioStartMs.
   uint64_t mAudioFrames;
 
+  // Number of nanoseconds that must be discarded from the start of the Stream.
+  uint64_t mCodecDelay;
+
   // Parser state and computed offset-time mappings.  Shared by multiple
   // readers when decoder has been cloned.  Main thread only.
   nsRefPtr<WebMBufferedState> mBufferedState;
@@ -302,6 +322,9 @@ private:
   bool mHasVideo;
   bool mHasAudio;
 
+  // Codec ID of audio track
+  int mAudioCodec;
+ 
 #ifdef MOZ_DASH
   // Byte range for initialisation data; e.g. specified in DASH manifest.
   MediaByteRange mInitByteRange;
author	Pale Moon <git-repo@palemoon.org>	2014-08-24 23:58:03 +0200
committer	Pale Moon <git-repo@palemoon.org>	2014-08-25 16:00:34 +0200
commit	29d3bdf86c0cf49343a9c5919cce22ffe71b54ee (patch)
tree	448e209251be7f12ac5dbd705add685b06d4f501 /content/media
parent	1906840da474dbe6ec32ae7fa43f4d2e3444b860 (diff)
download	palemoon-gre-29d3bdf86c0cf49343a9c5919cce22ffe71b54ee.tar.gz