diff options
author | Pale Moon <git-repo@palemoon.org> | 2014-08-24 23:58:03 +0200 |
---|---|---|
committer | Pale Moon <git-repo@palemoon.org> | 2014-08-25 16:00:34 +0200 |
commit | 29d3bdf86c0cf49343a9c5919cce22ffe71b54ee (patch) | |
tree | 448e209251be7f12ac5dbd705add685b06d4f501 /content/media | |
parent | 1906840da474dbe6ec32ae7fa43f4d2e3444b860 (diff) | |
download | palemoon-gre-29d3bdf86c0cf49343a9c5919cce22ffe71b54ee.tar.gz |
Add Opus audio to WebM media decoder
Diffstat (limited to 'content/media')
-rw-r--r-- | content/media/DecoderTraits.cpp | 3 | ||||
-rw-r--r-- | content/media/ogg/OggCodecState.cpp | 176 | ||||
-rw-r--r-- | content/media/ogg/OggCodecState.h | 11 | ||||
-rw-r--r-- | content/media/ogg/OggReader.cpp | 55 | ||||
-rw-r--r-- | content/media/ogg/OggReader.h | 7 | ||||
-rw-r--r-- | content/media/ogg/OpusParser.cpp | 197 | ||||
-rw-r--r-- | content/media/ogg/OpusParser.h | 54 | ||||
-rw-r--r-- | content/media/ogg/moz.build | 2 | ||||
-rw-r--r-- | content/media/webm/WebMReader.cpp | 310 | ||||
-rw-r--r-- | content/media/webm/WebMReader.h | 23 |
10 files changed, 619 insertions, 219 deletions
diff --git a/content/media/DecoderTraits.cpp b/content/media/DecoderTraits.cpp index 4ae3619a6..90d3f4fec 100644 --- a/content/media/DecoderTraits.cpp +++ b/content/media/DecoderTraits.cpp @@ -158,10 +158,11 @@ static const char* const gWebMTypes[3] = { nullptr }; -static char const *const gWebMCodecs[4] = { +static char const *const gWebMCodecs[5] = { "vp8", "vp8.0", "vorbis", + "opus", nullptr }; diff --git a/content/media/ogg/OggCodecState.cpp b/content/media/ogg/OggCodecState.cpp index d23071609..f8676f581 100644 --- a/content/media/ogg/OggCodecState.cpp +++ b/content/media/ogg/OggCodecState.cpp @@ -815,18 +815,7 @@ nsresult VorbisState::ReconstructVorbisGranulepos() #ifdef MOZ_OPUS OpusState::OpusState(ogg_page* aBosPage) : OggCodecState(aBosPage, true), - mRate(0), - mNominalRate(0), - mChannels(0), - mPreSkip(0), -#ifdef MOZ_SAMPLE_TYPE_FLOAT32 - mGain(1.0f), -#else - mGain_Q16(65536), -#endif - mChannelMapping(0), - mStreams(0), - mCoupledStreams(0), + mParser(NULL), mDecoder(NULL), mSkip(0), mPrevPacketGranulepos(0), @@ -858,7 +847,7 @@ nsresult OpusState::Reset(bool aStart) // Reset the decoder. opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); // Let the seek logic handle pre-roll if we're not seeking to the start. - mSkip = aStart ? mPreSkip : 0; + mSkip = aStart ? mParser->mPreSkip : 0; // This lets us distinguish the first page being the last page vs. just // not having processed the previous page when we encounter the last page. mPrevPageGranulepos = aStart ? 0 : -1; @@ -884,14 +873,14 @@ bool OpusState::Init(void) NS_ASSERTION(mDecoder == NULL, "leaking OpusDecoder"); - mDecoder = opus_multistream_decoder_create(mRate, - mChannels, - mStreams, - mCoupledStreams, - mMappingTable, + mDecoder = opus_multistream_decoder_create(mParser->mRate, + mParser->mChannels, + mParser->mStreams, + mParser->mCoupledStreams, + mParser->mMappingTable, &error); - mSkip = mPreSkip; + mSkip = mParser->mPreSkip; LOG(PR_LOG_DEBUG, ("Opus decoder init, to skip %d", mSkip)); @@ -904,147 +893,26 @@ bool OpusState::DecodeHeader(ogg_packet* aPacket) switch(mPacketCount++) { // Parse the id header. case 0: { - if (aPacket->bytes < 19 || memcmp(aPacket->packet, "OpusHead", 8)) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header")); - return false; - } - - mRate = 48000; // The Opus decoder runs at 48 kHz regardless. - - int version = aPacket->packet[8]; - // Accept file format versions 0.x. - if ((version & 0xf0) != 0) { - LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version)); - return false; - } - - mChannels = aPacket->packet[9]; - if (mChannels<1) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: Number of channels %d", mChannels)); - return false; - } - mPreSkip = LEUint16(aPacket->packet + 10); - mNominalRate = LEUint32(aPacket->packet + 12); - double gain_dB = LEInt16(aPacket->packet + 16) / 256.0; -#ifdef MOZ_SAMPLE_TYPE_FLOAT32 - mGain = static_cast<float>(pow(10,0.05*gain_dB)); -#else - mGain_Q16 = static_cast<int32_t>(std::min(65536*pow(10,0.05*gain_dB)+0.5, - static_cast<double>(INT32_MAX))); -#endif - mChannelMapping = aPacket->packet[18]; - - if (mChannelMapping == 0) { - // Mapping family 0 only allows two channels - if (mChannels>2) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for" - " mapping family 0.", mChannels)); + mParser = new OpusParser; + if(!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { return false; } - mStreams = 1; - mCoupledStreams = mChannels - 1; - mMappingTable[0] = 0; - mMappingTable[1] = 1; - } else if (mChannelMapping == 1) { - // Currently only up to 8 channels are defined for mapping family 1 - if (mChannels>8) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for" - " mapping family 1.", mChannels)); - return false; - } - if (aPacket->bytes>20+mChannels) { - mStreams = aPacket->packet[19]; - mCoupledStreams = aPacket->packet[20]; - int i; - for (i=0; i<mChannels; i++) - mMappingTable[i] = aPacket->packet[21+i]; - } else { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d," - " but no channel mapping table", mChannelMapping)); - return false; - } - } else { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: unsupported channel mapping " - "family %d", mChannelMapping)); - return false; - } - if (mStreams < 1) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: no streams")); - return false; - } - if (mCoupledStreams > mStreams) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: more coupled streams (%d) than " - "total streams (%d)", mCoupledStreams, mStreams)); - return false; - } - -#ifdef DEBUG - LOG(PR_LOG_DEBUG, ("Opus stream header:")); - LOG(PR_LOG_DEBUG, (" channels: %d", mChannels)); - LOG(PR_LOG_DEBUG, (" preskip: %d", mPreSkip)); - LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate)); - LOG(PR_LOG_DEBUG, (" gain: %.2f dB", gain_dB)); - LOG(PR_LOG_DEBUG, ("Channel Mapping:")); - LOG(PR_LOG_DEBUG, (" family: %d", mChannelMapping)); - LOG(PR_LOG_DEBUG, (" streams: %d", mStreams)); + mRate = mParser->mRate; + mChannels = mParser->mChannels; + mPreSkip = mParser->mPreSkip; +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain = mParser->mGain; +#else + mGain_Q16 = mParser->mGain_Q16; #endif } break; // Parse the metadata header. case 1: { - if (aPacket->bytes < 16 || memcmp(aPacket->packet, "OpusTags", 8)) - return false; - - // Copy out the raw comment lines, but only do basic validation - // checks against the string packing: too little data, too many - // comments, or comments that are too long. Rejecting these cases - // helps reduce the propagation of broken files. - // We do not ensure they are valid UTF-8 here, nor do we validate - // the required ASCII_TAG=value format of the user comments. - const unsigned char* buf = aPacket->packet + 8; - uint32_t bytes = aPacket->bytes - 8; - uint32_t len; - // Read the vendor string. - len = LEUint32(buf); - buf += 4; - bytes -= 4; - if (len > bytes) - return false; - mVendorString = nsCString(reinterpret_cast<const char*>(buf), len); - buf += len; - bytes -= len; - // Read the user comments. - if (bytes < 4) - return false; - uint32_t ncomments = LEUint32(buf); - buf += 4; - bytes -= 4; - // If there are so many comments even their length fields - // won't fit in the packet, stop reading now. - if (ncomments > (bytes>>2)) - return false; - uint32_t i; - for (i = 0; i < ncomments; i++) { - if (bytes < 4) + if(!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { return false; - len = LEUint32(buf); - buf += 4; - bytes -= 4; - if (len > bytes) - return false; - mTags.AppendElement(nsCString(reinterpret_cast<const char*>(buf), len)); - buf += len; - bytes -= len; - } - -#ifdef DEBUG - LOG(PR_LOG_DEBUG, ("Opus metadata header:")); - LOG(PR_LOG_DEBUG, (" vendor: %s", mVendorString.get())); - for (uint32_t i = 0; i < mTags.Length(); i++) { - LOG(PR_LOG_DEBUG, (" %s", mTags[i].get())); - } -#endif + } } break; @@ -1067,8 +935,8 @@ MetadataTags* OpusState::GetTags() tags = new MetadataTags; tags->Init(); - for (uint32_t i = 0; i < mTags.Length(); i++) { - AddVorbisComment(tags, mTags[i].Data(), mTags[i].Length()); + for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { + AddVorbisComment(tags, mParser->mTags[i].Data(), mParser->mTags[i].Length()); } return tags; @@ -1080,7 +948,7 @@ int64_t OpusState::Time(int64_t aGranulepos) if (!mActive) return -1; - return Time(mPreSkip, aGranulepos); + return Time(mParser->mPreSkip, aGranulepos); } int64_t OpusState::Time(int aPreSkip, int64_t aGranulepos) diff --git a/content/media/ogg/OggCodecState.h b/content/media/ogg/OggCodecState.h index ff06f9425..c598cbc9c 100644 --- a/content/media/ogg/OggCodecState.h +++ b/content/media/ogg/OggCodecState.h @@ -36,6 +36,8 @@ #include <map> #endif +#include "OpusParser.h" + namespace mozilla { // Deallocates a packet, used in OggPacketQueue below. @@ -341,7 +343,6 @@ public: // Various fields from the Ogg Opus header. int mRate; // Sample rate the decoder uses (always 48 kHz). - uint32_t mNominalRate; // Original sample rate of the data (informational). int mChannels; // Number of channels the stream encodes. uint16_t mPreSkip; // Number of samples to strip after decoder reset. #ifdef MOZ_SAMPLE_TYPE_FLOAT32 @@ -349,11 +350,8 @@ public: #else int32_t mGain_Q16; // Gain to apply to the decoder output. #endif - int mChannelMapping; // Channel mapping family. - int mStreams; // Number of packed streams in each packet. - int mCoupledStreams; // Number of packed coupled streams in each packet. - unsigned char mMappingTable[255]; // Channel mapping table. + nsAutoPtr<OpusParser> mParser; OpusMSDecoder *mDecoder; int mSkip; // Number of samples left to trim before playback. @@ -366,9 +364,6 @@ public: private: - nsCString mVendorString; // Encoder vendor string from the header. - nsTArray<nsCString> mTags; // Unparsed comment strings from the header. - // Reconstructs the granulepos of Opus packets stored in the // mUnstamped array. mUnstamped must be filled with consecutive packets from // the stream, with the last packet having a known granulepos. Using this diff --git a/content/media/ogg/OggReader.cpp b/content/media/ogg/OggReader.cpp index 4874f2c06..8b2f7cc9b 100644 --- a/content/media/ogg/OggReader.cpp +++ b/content/media/ogg/OggReader.cpp @@ -589,6 +589,61 @@ nsresult OggReader::DecodeOpus(ogg_packet* aPacket) { } #endif /* MOZ_OPUS */ +void OggReader::DownmixToStereo(nsAutoArrayPtr<AudioDataValue>& buffer, + uint32_t& channels, int32_t frames) +{ + uint32_t out_channels; + out_channels = 2; + // dBuffer stores the downmixed samples. + nsAutoArrayPtr<AudioDataValue> dBuffer(new AudioDataValue[frames * out_channels]); +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8. + static const float dmatrix[6][8][2]= { + /*3*/{{0.5858f,0},{0.4142f,0.4142f},{0, 0.5858f}}, + /*4*/{{0.4226f,0},{0, 0.4226f},{0.366f,0.2114f},{0.2114f,0.366f}}, + /*5*/{{0.6510f,0},{0.4600f,0.4600f},{0, 0.6510f},{0.5636f,0.3254f},{0.3254f,0.5636f}}, + /*6*/{{0.5290f,0},{0.3741f,0.3741f},{0, 0.5290f},{0.4582f,0.2645f},{0.2645f,0.4582f},{0.3741f,0.3741f}}, + /*7*/{{0.4553f,0},{0.3220f,0.3220f},{0, 0.4553f},{0.3943f,0.2277f},{0.2277f,0.3943f},{0.2788f,0.2788f},{0.3220f,0.3220f}}, + /*8*/{{0.3886f,0},{0.2748f,0.2748f},{0, 0.3886f},{0.3366f,0.1943f},{0.1943f,0.3366f},{0.3366f,0.1943f},{0.1943f,0.3366f},{0.2748f,0.2748f}}, + }; + for (int32_t i = 0; i < frames; i++) { + float sampL = 0.0; + float sampR = 0.0; + for (uint32_t j = 0; j < channels; j++) { + sampL+=buffer[i*channels+j]*dmatrix[channels-3][j][0]; + sampR+=buffer[i*channels+j]*dmatrix[channels-3][j][1]; + } + dBuffer[i*out_channels]=sampL; + dBuffer[i*out_channels+1]=sampR; + } +#else + // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 5-8. + // Coefficients in Q14. + static const int16_t dmatrix[6][8][2]= { + /*3*/{{9598, 0},{6786,6786},{0, 9598}}, + /*4*/{{6925, 0},{0, 6925},{5997,3462},{3462,5997}}, + /*5*/{{10663,0},{7540,7540},{0, 10663},{9234,5331},{5331,9234}}, + /*6*/{{8668, 0},{6129,6129},{0, 8668},{7507,4335},{4335,7507},{6129,6129}}, + /*7*/{{7459, 0},{5275,5275},{0, 7459},{6460,3731},{3731,6460},{4568,4568},{5275,5275}}, + /*8*/{{6368, 0},{4502,4502},{0, 6368},{5514,3184},{3184,5514},{5514,3184},{3184,5514},{4502,4502}} + }; + for (int32_t i = 0; i < frames; i++) { + int32_t sampL = 0; + int32_t sampR = 0; + for (uint32_t j = 0; j < channels; j++) { + sampL+=buffer[i*channels+j]*dmatrix[channels-3][j][0]; + sampR+=buffer[i*channels+j]*dmatrix[channels-3][j][1]; + } + sampL = (sampL + 8192)>>14; + dBuffer[i*out_channels] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(sampL)); + sampR = (sampR + 8192)>>14; + dBuffer[i*out_channels+1] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(sampR)); + } +#endif + channels = out_channels; + buffer = dBuffer; +} + bool OggReader::DecodeAudioData() { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); diff --git a/content/media/ogg/OggReader.h b/content/media/ogg/OggReader.h index 76171c824..76d3894b1 100644 --- a/content/media/ogg/OggReader.h +++ b/content/media/ogg/OggReader.h @@ -74,6 +74,13 @@ public: MetadataTags** aTags); virtual nsresult Seek(int64_t aTime, int64_t aStartTime, int64_t aEndTime, int64_t aCurrentTime); virtual nsresult GetBuffered(dom::TimeRanges* aBuffered, int64_t aStartTime); + + // Downmix multichannel Audio samples to Stereo. + // It is used from Vorbis and Opus decoders. + // Input are the buffer contains multichannel data, + // the number of channels and the number of frames. + static void DownmixToStereo(nsAutoArrayPtr<AudioDataValue>& buffer, + uint32_t& channel, int32_t frames); private: // This monitor should be taken when reading or writing to mIsChained. diff --git a/content/media/ogg/OpusParser.cpp b/content/media/ogg/OpusParser.cpp new file mode 100644 index 000000000..6658ad492 --- /dev/null +++ b/content/media/ogg/OpusParser.cpp @@ -0,0 +1,197 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> + +#include "mozilla/DebugOnly.h" +#include "mozilla/Endian.h" +#include <stdint.h> + +#include "OpusParser.h" + +#include "nsDebug.h" +#include "MediaDecoderReader.h" +#include "VideoUtils.h" +#include <algorithm> + +#include "opus/opus.h" +extern "C" { +#include "opus/opus_multistream.h" +} + +namespace mozilla { + +#ifdef PR_LOGGING +extern PRLogModuleInfo* gMediaDecoderLog; +#define OPUS_LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) +#else +#define OPUS_LOG(type, msg) +#endif + +OpusParser::OpusParser(): + mRate(0), + mNominalRate(0), + mChannels(0), + mPreSkip(0), +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain(1.0f), +#else + mGain_Q16(65536), +#endif + mChannelMapping(0), + mStreams(0), + mCoupledStreams(0) +{ } + +bool OpusParser::DecodeHeader(unsigned char* aData, size_t aLength) +{ + if (aLength < 19 || memcmp(aData, "OpusHead", 8)) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header")); + return false; + } + + mRate = 48000; // The Opus decoder runs at 48 kHz regardless. + + int version = aData[8]; + // Accept file format versions 0.x. + if ((version & 0xf0) != 0) { + OPUS_LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version)); + return false; + } + + mChannels = aData[9]; + if (mChannels<1) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: Number of channels %d", mChannels)); + return false; + } + + mPreSkip = LittleEndian::readUint16(aData + 10); + mNominalRate = LittleEndian::readUint32(aData + 12); + double gain_dB = LittleEndian::readInt16(aData + 16) / 256.0; +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain = static_cast<float>(pow(10,0.05*gain_dB)); +#else + mGain_Q16 = static_cast<int32_t>(std::min(65536*pow(10,0.05*gain_dB)+0.5, + static_cast<double>(INT32_MAX))); +#endif + mChannelMapping = aData[18]; + + if (mChannelMapping == 0) { + // Mapping family 0 only allows two channels + if (mChannels>2) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for" + " mapping family 0.", mChannels)); + return false; + } + mStreams = 1; + mCoupledStreams = mChannels - 1; + mMappingTable[0] = 0; + mMappingTable[1] = 1; + } else if (mChannelMapping == 1) { + // Currently only up to 8 channels are defined for mapping family 1 + if (mChannels>8) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: too many channels (%d) for" + " mapping family 1.", mChannels)); + return false; + } + if (aLength>static_cast<unsigned>(20+mChannels)) { + mStreams = aData[19]; + mCoupledStreams = aData[20]; + int i; + for (i=0; i<mChannels; i++) + mMappingTable[i] = aData[21+i]; + } else { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d," + " but no channel mapping table", mChannelMapping)); + return false; + } + } else { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: unsupported channel mapping " + "family %d", mChannelMapping)); + return false; + } + if (mStreams < 1) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: no streams")); + return false; + } + if (mCoupledStreams > mStreams) { + OPUS_LOG(PR_LOG_DEBUG, ("Invalid Opus file: more coupled streams (%d) than " + "total streams (%d)", mCoupledStreams, mStreams)); + return false; + } + +#ifdef DEBUG + OPUS_LOG(PR_LOG_DEBUG, ("Opus stream header:")); + OPUS_LOG(PR_LOG_DEBUG, (" channels: %d", mChannels)); + OPUS_LOG(PR_LOG_DEBUG, (" preskip: %d", mPreSkip)); + OPUS_LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate)); + OPUS_LOG(PR_LOG_DEBUG, (" gain: %.2f dB", gain_dB)); + OPUS_LOG(PR_LOG_DEBUG, ("Channel Mapping:")); + OPUS_LOG(PR_LOG_DEBUG, (" family: %d", mChannelMapping)); + OPUS_LOG(PR_LOG_DEBUG, (" streams: %d", mStreams)); +#endif + return true; +} + +bool OpusParser::DecodeTags(unsigned char* aData, size_t aLength) +{ + if (aLength < 16 || memcmp(aData, "OpusTags", 8)) + return false; + + // Copy out the raw comment lines, but only do basic validation + // checks against the string packing: too little data, too many + // comments, or comments that are too long. Rejecting these cases + // helps reduce the propagation of broken files. + // We do not ensure they are valid UTF-8 here, nor do we validate + // the required ASCII_TAG=value format of the user comments. + const unsigned char* buf = aData + 8; + uint32_t bytes = aLength - 8; + uint32_t len; + // Read the vendor string. + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + mVendorString = nsCString(reinterpret_cast<const char*>(buf), len); + buf += len; + bytes -= len; + // Read the user comments. + if (bytes < 4) + return false; + uint32_t ncomments = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + // If there are so many comments even their length fields + // won't fit in the packet, stop reading now. + if (ncomments > (bytes>>2)) + return false; + uint32_t i; + for (i = 0; i < ncomments; i++) { + if (bytes < 4) + return false; + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + mTags.AppendElement(nsCString(reinterpret_cast<const char*>(buf), len)); + buf += len; + bytes -= len; + } + +#ifdef DEBUG + OPUS_LOG(PR_LOG_DEBUG, ("Opus metadata header:")); + OPUS_LOG(PR_LOG_DEBUG, (" vendor: %s", mVendorString.get())); + for (uint32_t i = 0; i < mTags.Length(); i++) { + OPUS_LOG(PR_LOG_DEBUG, (" %s", mTags[i].get())); + } +#endif + return true; +} + +} // namespace mozilla + diff --git a/content/media/ogg/OpusParser.h b/content/media/ogg/OpusParser.h new file mode 100644 index 000000000..4e9ce0c5b --- /dev/null +++ b/content/media/ogg/OpusParser.h @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OpusParser_h_) +#define OpusParser_h_ + +#include <stdint.h> + +#include <opus/opus.h> +#include "opus/opus_multistream.h" + +#include "nsTArray.h" +#include "nsString.h" + +namespace mozilla { + +class OpusParser +{ +public: + OpusParser(); + + bool DecodeHeader(unsigned char* aData, size_t aLength); + bool DecodeTags(unsigned char* aData, size_t aLength); + + // Various fields from the Ogg Opus header. + int mRate; // Sample rate the decoder uses (always 48 kHz). + uint32_t mNominalRate; // Original sample rate of the data (informational). + int mChannels; // Number of channels the stream encodes. + uint16_t mPreSkip; // Number of samples to strip after decoder reset. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + float mGain; // Gain to apply to decoder output. +#else + int32_t mGain_Q16; // Gain to apply to the decoder output. +#endif + int mChannelMapping; // Channel mapping family. + int mStreams; // Number of packed streams in each packet. + int mCoupledStreams; // Number of packed coupled streams in each packet. + unsigned char mMappingTable[255]; // Channel mapping table. + + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + int64_t mPrevPacketGranulepos; + + nsTArray<nsCString> mTags; // Unparsed comment strings from the header. + + nsCString mVendorString; // Encoder vendor string from the header. + +}; + +} // namespace mozilla + +#endif diff --git a/content/media/ogg/moz.build b/content/media/ogg/moz.build index 7b2a72174..b418a7a08 100644 --- a/content/media/ogg/moz.build +++ b/content/media/ogg/moz.build @@ -11,6 +11,7 @@ EXPORTS += [ 'OggDecoder.h', 'OggReader.h', 'OggWriter.h', + 'OpusParser.h', ] CPP_SOURCES += [ @@ -18,6 +19,7 @@ CPP_SOURCES += [ 'OggDecoder.cpp', 'OggReader.cpp', 'OggWriter.cpp', + 'OpusParser.cpp', ] LIBRARY_NAME = 'gkconogg_s' diff --git a/content/media/webm/WebMReader.cpp b/content/media/webm/WebMReader.cpp index e0441ca53..d2ca2edff 100644 --- a/content/media/webm/WebMReader.cpp +++ b/content/media/webm/WebMReader.cpp @@ -17,6 +17,8 @@ #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" +#include "OggReader.h" + using mozilla::NesteggPacketHolder; template <> @@ -153,6 +155,11 @@ WebMReader::WebMReader(AbstractMediaDecoder* aDecoder) mContext(nullptr), mPacketCount(0), mChannels(0), +#ifdef MOZ_OPUS + mOpusParser(nullptr), + mOpusDecoder(nullptr), + mSkip(0), +#endif mVideoTrack(0), mAudioTrack(0), mAudioStartUsec(-1), @@ -199,6 +206,11 @@ WebMReader::~WebMReader() vorbis_info_clear(&mVorbisInfo); vorbis_comment_clear(&mVorbisComment); + if (mOpusDecoder) { + opus_multistream_decoder_destroy(mOpusDecoder); + mOpusDecoder = nullptr; + } + MOZ_COUNT_DTOR(WebMReader); } @@ -383,51 +395,83 @@ nsresult WebMReader::ReadMetadata(VideoInfo* aInfo, mAudioTrack = track; mHasAudio = true; mInfo.mHasAudio = true; + mAudioCodec = nestegg_track_codec_id(mContext, track); + mCodecDelay = params.codec_delay; + + if (mAudioCodec == NESTEGG_CODEC_VORBIS) { + // Get the Vorbis header data + unsigned int nheaders = 0; + r = nestegg_track_codec_data_count(mContext, track, &nheaders); + if (r == -1 || nheaders != 3) { + Cleanup(); + return NS_ERROR_FAILURE; + } - // Get the Vorbis header data - unsigned int nheaders = 0; - r = nestegg_track_codec_data_count(mContext, track, &nheaders); - if (r == -1 || nheaders != 3) { - Cleanup(); - return NS_ERROR_FAILURE; - } + for (uint32_t header = 0; header < nheaders; ++header) { + unsigned char* data = 0; + size_t length = 0; + + r = nestegg_track_codec_data(mContext, track, header, &data, &length); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0); + + r = vorbis_synthesis_headerin(&mVorbisInfo, + &mVorbisComment, + &opacket); + if (r != 0) { + Cleanup(); + return NS_ERROR_FAILURE; + } + } + + r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo); + if (r != 0) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock); + if (r != 0) { + Cleanup(); + return NS_ERROR_FAILURE; + } - for (uint32_t header = 0; header < nheaders; ++header) { + mInfo.mAudioRate = mVorbisDsp.vi->rate; + mInfo.mAudioChannels = mVorbisDsp.vi->channels; + mChannels = mInfo.mAudioChannels; +#ifdef MOZ_OPUS + } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { unsigned char* data = 0; size_t length = 0; - - r = nestegg_track_codec_data(mContext, track, header, &data, &length); + r = nestegg_track_codec_data(mContext, track, 0, &data, &length); if (r == -1) { Cleanup(); return NS_ERROR_FAILURE; } - ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0); + mOpusParser = new OpusParser; + if (!mOpusParser->DecodeHeader(data, length)) { + Cleanup(); + return NS_ERROR_FAILURE; + } - r = vorbis_synthesis_headerin(&mVorbisInfo, - &mVorbisComment, - &opacket); - if (r != 0) { + if (!InitOpusDecoder()) { Cleanup(); return NS_ERROR_FAILURE; } - } - r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo); - if (r != 0) { - Cleanup(); - return NS_ERROR_FAILURE; - } + mInfo.mAudioRate = mOpusParser->mRate; - r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock); - if (r != 0) { + mInfo.mAudioChannels = mOpusParser->mChannels; + mInfo.mAudioChannels = mInfo.mAudioChannels > 2 ? 2 : mInfo.mAudioChannels; +#endif + } else { Cleanup(); return NS_ERROR_FAILURE; } - - mInfo.mAudioRate = mVorbisDsp.vi->rate; - mInfo.mAudioChannels = mVorbisDsp.vi->channels; - mChannels = mInfo.mAudioChannels; } } @@ -484,6 +528,25 @@ nsresult WebMReader::ReadMetadata(VideoInfo* aInfo, return NS_OK; } +#ifdef MOZ_OPUS +bool WebMReader::InitOpusDecoder() +{ + int r; + + NS_ASSERTION(mOpusDecoder == nullptr, "leaking OpusDecoder"); + + mOpusDecoder = opus_multistream_decoder_create(mOpusParser->mRate, + mOpusParser->mChannels, + mOpusParser->mStreams, + mOpusParser->mCoupledStreams, + mOpusParser->mMappingTable, + &r); + mSkip = mOpusParser->mPreSkip; + + return r == OPUS_OK; +} +#endif + ogg_packet WebMReader::InitOggPacket(unsigned char* aData, size_t aLength, bool aBOS, @@ -517,7 +580,7 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset) return false; } - const uint32_t rate = mVorbisDsp.vi->rate; + const uint32_t rate = mInfo.mAudioRate; uint64_t tstamp_usecs = tstamp / NS_PER_USEC; if (mAudioStartUsec == -1) { // This is the first audio chunk. Assume the start time of our decode @@ -559,27 +622,167 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset) if (r == -1) { return false; } + if (mAudioCodec == NESTEGG_CODEC_VORBIS) { + ogg_packet opacket = InitOggPacket(data, length, false, false, -1); + + if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) { + return false; + } - ogg_packet opacket = InitOggPacket(data, length, false, false, -1); + if (vorbis_synthesis_blockin(&mVorbisDsp, + &mVorbisBlock) != 0) { + return false; + } - if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) { - return false; - } + VorbisPCMValue** pcm = 0; + int32_t frames = 0; + while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) { + nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]); + for (uint32_t j = 0; j < mChannels; ++j) { + VorbisPCMValue* channel = pcm[j]; + for (uint32_t i = 0; i < uint32_t(frames); ++i) { + buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); + } + } - if (vorbis_synthesis_blockin(&mVorbisDsp, - &mVorbisBlock) != 0) { - return false; - } + CheckedInt64 duration = FramesToUsecs(frames, rate); + if (!duration.isValid()) { + NS_WARNING("Int overflow converting WebM audio duration"); + return false; + } + CheckedInt64 total_duration = FramesToUsecs(total_frames, rate); + if (!total_duration.isValid()) { + NS_WARNING("Int overflow converting WebM audio total_duration"); + return false; + } + + CheckedInt64 time = total_duration + tstamp_usecs; + if (!time.isValid()) { + NS_WARNING("Int overflow adding total_duration and tstamp_usecs"); + nestegg_free_packet(aPacket); + return false; + }; + + total_frames += frames; + AudioQueue().Push(new AudioData(aOffset, + time.value(), + duration.value(), + frames, + buffer.forget(), + mChannels)); + mAudioFrames += frames; + if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) { + return false; + } + } + } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { +#ifdef MOZ_OPUS + uint32_t channels = mOpusParser->mChannels; + + // Maximum value is 63*2880, so there's no chance of overflow. + int32_t frames_number = opus_packet_get_nb_frames(data, length); + + if (frames_number <= 0) + return false; // Invalid packet header. + int32_t samples = opus_packet_get_samples_per_frame(data, + (opus_int32) rate); + int32_t frames = frames_number*samples; + + // A valid Opus packet must be between 2.5 and 120 ms long. + if (frames < 120 || frames > 5760) + return false; + nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]); - VorbisPCMValue** pcm = 0; - int32_t frames = 0; - while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) { - nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]); - for (uint32_t j = 0; j < mChannels; ++j) { - VorbisPCMValue* channel = pcm[j]; - for (uint32_t i = 0; i < uint32_t(frames); ++i) { - buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); + // Decode to the appropriate sample type. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + int ret = opus_multistream_decode_float(mOpusDecoder, + data, length, + buffer, frames, false); +#else + int ret = opus_multistream_decode(mOpusDecoder, + data, length, + buffer, frames, false); +#endif + if (ret < 0) + return false; + NS_ASSERTION(ret == frames, "Opus decoded too few audio samples"); + + // Trim the initial frames while the decoder is settling. + if (mSkip > 0) { + int32_t skipFrames = std::min(mSkip, frames); + if (skipFrames == frames) { + // discard the whole packet + mSkip -= frames; + LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames" + " (whole packet)", frames)); + return true; } + int32_t keepFrames = frames - skipFrames; + int samples = keepFrames * channels; + nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]); + for (int i = 0; i < samples; i++) + trimBuffer[i] = buffer[skipFrames*channels + i]; + + frames = keepFrames; + buffer = trimBuffer; + + mSkip -= skipFrames; + LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames)); + } + + int64_t discardPadding = 0; + r = nestegg_packet_discard_padding(aPacket, &discardPadding); + if (r == -1) { + return false; + } + if (discardPadding > 0) { + CheckedInt64 discardFrames = UsecsToFrames(discardPadding * NS_PER_USEC, rate); + if (!discardFrames.isValid()) { + NS_WARNING("Int overflow in DiscardPadding"); + return false; + } + int32_t keepFrames = frames - discardFrames.value(); + if (keepFrames > 0) { + int samples = keepFrames * channels; + nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]); + for (int i = 0; i < samples; i++) + trimBuffer[i] = buffer[i]; + frames = keepFrames; + buffer = trimBuffer; + } else { + LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet" + " ( %d frames) as padding", frames)); + return true; + } + } + + // Apply the header gain if one was specified. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + if (mOpusParser->mGain != 1.0f) { + float gain = mOpusParser->mGain; + int samples = frames * channels; + for (int i = 0; i < samples; i++) { + buffer[i] *= gain; + } + } +#else + if (mOpusParser->mGain_Q16 != 65536) { + int64_t gain_Q16 = mOpusParser->mGain_Q16; + int samples = frames * channels; + for (int i = 0; i < samples; i++) { + int32_t val = static_cast<int32_t>((gain_Q16*buffer[i] + 32768)>>16); + buffer[i] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(val)); + } + } +#endif + + // More than 2 decoded channels must be downmixed to stereo. + if (channels > 2) { + // Opus doesn't provide a channel mapping for more than 8 channels, + // so we can't downmix more than that. + if (channels > 8) + return false; + OggReader::DownmixToStereo(buffer, channels, frames); } CheckedInt64 duration = FramesToUsecs(frames, rate); @@ -587,30 +790,25 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset) NS_WARNING("Int overflow converting WebM audio duration"); return false; } - CheckedInt64 total_duration = FramesToUsecs(total_frames, rate); - if (!total_duration.isValid()) { - NS_WARNING("Int overflow converting WebM audio total_duration"); - return false; - } - - CheckedInt64 time = total_duration + tstamp_usecs; + + CheckedInt64 time = tstamp_usecs; if (!time.isValid()) { NS_WARNING("Int overflow adding total_duration and tstamp_usecs"); nestegg_free_packet(aPacket); return false; }; - total_frames += frames; - AudioQueue().Push(new AudioData(aOffset, + AudioQueue().Push(new AudioData(mDecoder->GetResource()->Tell(), time.value(), duration.value(), frames, buffer.forget(), mChannels)); + mAudioFrames += frames; - if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) { - return false; - } +#else + return false; +#endif /* MOZ_OPUS */ } } diff --git a/content/media/webm/WebMReader.h b/content/media/webm/WebMReader.h index 5cac84aa8..83823cc8b 100644 --- a/content/media/webm/WebMReader.h +++ b/content/media/webm/WebMReader.h @@ -26,6 +26,10 @@ #include "DASHRepReader.h" #endif +#ifdef MOZ_OPUS +#include "OpusParser.h" +#endif + namespace mozilla { class WebMBufferedState; @@ -244,6 +248,11 @@ protected: bool aEOS, int64_t aGranulepos); +#ifdef MOZ_OPUS + // Setup opus decoder + bool InitOpusDecoder(); +#endif + // Decode a nestegg packet of audio data. Push the audio data on the // audio queue. Returns true when there's more audio to decode, // false if the audio is finished, end of file has been reached, @@ -272,6 +281,14 @@ private: uint32_t mPacketCount; uint32_t mChannels; + +#ifdef MOZ_OPUS + // Opus decoder state + nsAutoPtr<OpusParser> mOpusParser; + OpusMSDecoder *mOpusDecoder; + int mSkip; // Number of samples left to trim before playback. +#endif + // Queue of video and audio packets that have been read but not decoded. These // must only be accessed from the state machine thread. WebMPacketQueue mVideoPackets; @@ -287,6 +304,9 @@ private: // Number of audio frames we've decoded since decoding began at mAudioStartMs. uint64_t mAudioFrames; + // Number of nanoseconds that must be discarded from the start of the Stream. + uint64_t mCodecDelay; + // Parser state and computed offset-time mappings. Shared by multiple // readers when decoder has been cloned. Main thread only. nsRefPtr<WebMBufferedState> mBufferedState; @@ -302,6 +322,9 @@ private: bool mHasVideo; bool mHasAudio; + // Codec ID of audio track + int mAudioCodec; + #ifdef MOZ_DASH // Byte range for initialisation data; e.g. specified in DASH manifest. MediaByteRange mInitByteRange; |