diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /dom/media/ogg | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | uxp-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz |
Add m-esr52 at 52.6.0
Diffstat (limited to 'dom/media/ogg')
-rw-r--r-- | dom/media/ogg/OggCodecState.cpp | 1839 | ||||
-rw-r--r-- | dom/media/ogg/OggCodecState.h | 644 | ||||
-rw-r--r-- | dom/media/ogg/OggCodecStore.cpp | 37 | ||||
-rw-r--r-- | dom/media/ogg/OggCodecStore.h | 38 | ||||
-rw-r--r-- | dom/media/ogg/OggDecoder.cpp | 79 | ||||
-rw-r--r-- | dom/media/ogg/OggDecoder.h | 61 | ||||
-rw-r--r-- | dom/media/ogg/OggDemuxer.cpp | 2194 | ||||
-rw-r--r-- | dom/media/ogg/OggDemuxer.h | 387 | ||||
-rw-r--r-- | dom/media/ogg/OggWriter.cpp | 214 | ||||
-rw-r--r-- | dom/media/ogg/OggWriter.h | 52 | ||||
-rw-r--r-- | dom/media/ogg/OpusParser.cpp | 185 | ||||
-rw-r--r-- | dom/media/ogg/OpusParser.h | 48 | ||||
-rw-r--r-- | dom/media/ogg/moz.build | 25 |
13 files changed, 5803 insertions, 0 deletions
diff --git a/dom/media/ogg/OggCodecState.cpp b/dom/media/ogg/OggCodecState.cpp new file mode 100644 index 0000000000..6830639e89 --- /dev/null +++ b/dom/media/ogg/OggCodecState.cpp @@ -0,0 +1,1839 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> + +#include "mozilla/EndianUtils.h" +#include <stdint.h> + +#include "nsDebug.h" +#include "OggCodecState.h" +#include "OpusParser.h" +#include "VideoUtils.h" +#include <algorithm> + +#include <opus/opus.h> +#include "opus/opus_multistream.h" + +// On Android JellyBean, the hardware.h header redefines version_major and +// version_minor, which breaks our build. See: +// https://bugzilla.mozilla.org/show_bug.cgi?id=912702#c6 +#ifdef MOZ_WIDGET_GONK +#ifdef version_major +#undef version_major +#endif +#ifdef version_minor +#undef version_minor +#endif +#endif + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; +#define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) + +/** Decoder base class for Ogg-encapsulated streams. */ +OggCodecState* +OggCodecState::Create(ogg_page* aPage) +{ + NS_ASSERTION(ogg_page_bos(aPage), "Only call on BOS page!"); + nsAutoPtr<OggCodecState> codecState; + if (aPage->body_len > 6 && memcmp(aPage->body+1, "theora", 6) == 0) { + codecState = new TheoraState(aPage); + } else if (aPage->body_len > 6 && memcmp(aPage->body+1, "vorbis", 6) == 0) { + codecState = new VorbisState(aPage); + } else if (aPage->body_len > 8 && memcmp(aPage->body, "OpusHead", 8) == 0) { + codecState = new OpusState(aPage); + } else if (aPage->body_len > 8 && memcmp(aPage->body, "fishead\0", 8) == 0) { + codecState = new SkeletonState(aPage); + } else if (aPage->body_len > 5 && memcmp(aPage->body, "\177FLAC", 5) == 0) { + codecState = new FlacState(aPage); + } else { + codecState = new OggCodecState(aPage, false); + } + return codecState->OggCodecState::InternalInit() ? codecState.forget() : nullptr; +} + +OggCodecState::OggCodecState(ogg_page* aBosPage, bool aActive) + : mPacketCount(0) + , mSerial(ogg_page_serialno(aBosPage)) + , mActive(aActive) + , mDoneReadingHeaders(!aActive) +{ + MOZ_COUNT_CTOR(OggCodecState); + memset(&mState, 0, sizeof(ogg_stream_state)); +} + +OggCodecState::~OggCodecState() +{ + MOZ_COUNT_DTOR(OggCodecState); + Reset(); +#ifdef DEBUG + int ret = +#endif + ogg_stream_clear(&mState); + NS_ASSERTION(ret == 0, "ogg_stream_clear failed"); +} + +nsresult +OggCodecState::Reset() +{ + if (ogg_stream_reset(&mState) != 0) { + return NS_ERROR_FAILURE; + } + mPackets.Erase(); + ClearUnstamped(); + return NS_OK; +} + +void +OggCodecState::ClearUnstamped() +{ + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + OggCodecState::ReleasePacket(mUnstamped[i]); + } + mUnstamped.Clear(); +} + +bool +OggCodecState::InternalInit() +{ + int ret = ogg_stream_init(&mState, mSerial); + return ret == 0; +} + +bool +OggCodecState::IsValidVorbisTagName(nsCString& aName) +{ + // Tag names must consist of ASCII 0x20 through 0x7D, + // excluding 0x3D '=' which is the separator. + uint32_t length = aName.Length(); + const char* data = aName.Data(); + for (uint32_t i = 0; i < length; i++) { + if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') { + return false; + } + } + return true; +} + +bool +OggCodecState::AddVorbisComment(MetadataTags* aTags, + const char* aComment, + uint32_t aLength) +{ + const char* div = (const char*)memchr(aComment, '=', aLength); + if (!div) { + LOG(LogLevel::Debug, ("Skipping comment: no separator")); + return false; + } + nsCString key = nsCString(aComment, div-aComment); + if (!IsValidVorbisTagName(key)) { + LOG(LogLevel::Debug, ("Skipping comment: invalid tag name")); + return false; + } + uint32_t valueLength = aLength - (div-aComment); + nsCString value = nsCString(div + 1, valueLength); + if (!IsUTF8(value)) { + LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value")); + return false; + } + aTags->Put(key, value); + return true; +} + +void +VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples) +{ +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + mVorbisPacketSamples[aPacket] = aSamples; +#endif +} + +void +VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples) +{ +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples, + "Decoded samples for Vorbis packet don't match expected!"); + mVorbisPacketSamples.erase(aPacket); +#endif +} + +void +VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) +{ +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1, + "Must have recorded packet samples"); +#endif +} + +static ogg_packet* +Clone(ogg_packet* aPacket) +{ + ogg_packet* p = new ogg_packet(); + memcpy(p, aPacket, sizeof(ogg_packet)); + p->packet = new unsigned char[p->bytes]; + memcpy(p->packet, aPacket->packet, p->bytes); + return p; +} + +void +OggCodecState::ReleasePacket(ogg_packet* aPacket) +{ + if (aPacket) + delete [] aPacket->packet; + delete aPacket; +} + +void +OggPacketQueue::Append(ogg_packet* aPacket) +{ + nsDeque::Push(aPacket); +} + +bool +OggCodecState::IsPacketReady() +{ + return !mPackets.IsEmpty(); +} + +ogg_packet* +OggCodecState::PacketOut() +{ + if (mPackets.IsEmpty()) { + return nullptr; + } + return mPackets.PopFront(); +} + +ogg_packet* +OggCodecState::PacketPeek() +{ + if (mPackets.IsEmpty()) { + return nullptr; + } + return mPackets.PeekFront(); +} + +void +OggCodecState::PushFront(OggPacketQueue &&aOther) +{ + while (!aOther.IsEmpty()) { + mPackets.PushFront(aOther.Pop()); + } +} + +already_AddRefed<MediaRawData> +OggCodecState::PacketOutAsMediaRawData() +{ + ogg_packet* packet = PacketOut(); + if (!packet) { + return nullptr; + } + + NS_ASSERTION(!IsHeader(packet), "PacketOutAsMediaRawData can only be called on non-header packets"); + RefPtr<MediaRawData> sample = new MediaRawData(packet->packet, packet->bytes); + if (packet->bytes && !sample->Data()) { + // OOM. + ReleasePacket(packet); + return nullptr; + } + + int64_t end_tstamp = Time(packet->granulepos); + NS_ASSERTION(end_tstamp >= 0, "timestamp invalid"); + + int64_t duration = PacketDuration(packet); + NS_ASSERTION(duration >= 0, "duration invalid"); + + sample->mTimecode = packet->granulepos; + sample->mTime = end_tstamp - duration; + sample->mDuration = duration; + sample->mKeyframe = IsKeyframe(packet); + sample->mEOS = packet->e_o_s; + + ReleasePacket(packet); + + return sample.forget(); +} + +nsresult +OggCodecState::PageIn(ogg_page* aPage) +{ + if (!mActive) { + return NS_OK; + } + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, + "Page must be for this stream!"); + if (ogg_stream_pagein(&mState, aPage) == -1) { + return NS_ERROR_FAILURE; + } + int r; + do { + ogg_packet packet; + r = ogg_stream_packetout(&mState, &packet); + if (r == 1) { + mPackets.Append(Clone(&packet)); + } + } while (r != 0); + if (ogg_stream_check(&mState)) { + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +nsresult +OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) +{ + int r; + aFoundGranulepos = false; + // Extract packets from the sync state until either no more packets + // come out, or we get a data packet with non -1 granulepos. + do { + ogg_packet packet; + r = ogg_stream_packetout(&mState, &packet); + if (r == 1) { + ogg_packet* clone = Clone(&packet); + if (IsHeader(&packet)) { + // Header packets go straight into the packet queue. + mPackets.Append(clone); + } else { + // We buffer data packets until we encounter a granulepos. We'll + // then use the granulepos to figure out the granulepos of the + // preceeding packets. + mUnstamped.AppendElement(clone); + aFoundGranulepos = packet.granulepos > 0; + } + } + } while (r != 0 && !aFoundGranulepos); + if (ogg_stream_check(&mState)) { + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +TheoraState::TheoraState(ogg_page* aBosPage) + : OggCodecState(aBosPage, true) + , mSetup(0) + , mCtx(0) + , mPixelAspectRatio(0) +{ + MOZ_COUNT_CTOR(TheoraState); + th_info_init(&mInfo); + th_comment_init(&mComment); +} + +TheoraState::~TheoraState() +{ + MOZ_COUNT_DTOR(TheoraState); + th_setup_free(mSetup); + th_decode_free(mCtx); + th_comment_clear(&mComment); + th_info_clear(&mInfo); +} + +bool +TheoraState::Init() +{ + if (!mActive) { + return false; + } + + int64_t n = mInfo.aspect_numerator; + int64_t d = mInfo.aspect_denominator; + + mPixelAspectRatio = (n == 0 || d == 0) + ? 1.0f : static_cast<float>(n) / static_cast<float>(d); + + // Ensure the frame and picture regions aren't larger than our prescribed + // maximum, or zero sized. + nsIntSize frame(mInfo.frame_width, mInfo.frame_height); + nsIntRect picture(mInfo.pic_x, mInfo.pic_y, mInfo.pic_width, mInfo.pic_height); + if (!IsValidVideoRegion(frame, picture, frame)) { + return mActive = false; + } + + mCtx = th_decode_alloc(&mInfo, mSetup); + if (!mCtx) { + return mActive = false; + } + + return true; +} + +bool +TheoraState::DecodeHeader(ogg_packet* aPacket) +{ + nsAutoRef<ogg_packet> autoRelease(aPacket); + mPacketCount++; + int ret = th_decode_headerin(&mInfo, + &mComment, + &mSetup, + aPacket); + + // We must determine when we've read the last header packet. + // th_decode_headerin() does not tell us when it's read the last header, so + // we must keep track of the headers externally. + // + // There are 3 header packets, the Identification, Comment, and Setup + // headers, which must be in that order. If they're out of order, the file + // is invalid. If we've successfully read a header, and it's the setup + // header, then we're done reading headers. The first byte of each packet + // determines it's type as follows: + // 0x80 -> Identification header + // 0x81 -> Comment header + // 0x82 -> Setup header + // See http://www.theora.org/doc/Theora.pdf Chapter 6, "Bitstream Headers", + // for more details of the Ogg/Theora containment scheme. + bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x82; + if (ret < 0 || mPacketCount > 3) { + // We've received an error, or the first three packets weren't valid + // header packets. Assume bad input. + // Our caller will deactivate the bitstream. + return false; + } else if (ret > 0 && isSetupHeader && mPacketCount == 3) { + // Successfully read the three header packets. + mDoneReadingHeaders = true; + } + return true; +} + +int64_t +TheoraState::Time(int64_t granulepos) +{ + if (!mActive) { + return -1; + } + return TheoraState::Time(&mInfo, granulepos); +} + +bool +TheoraState::IsHeader(ogg_packet* aPacket) +{ + return th_packet_isheader(aPacket); +} + +# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ + (((_info)->version_major>(_maj)||(_info)->version_major==(_maj))&& \ + (((_info)->version_minor>(_min)||(_info)->version_minor==(_min))&& \ + (_info)->version_subminor>=(_sub))) + +int64_t +TheoraState::Time(th_info* aInfo, int64_t aGranulepos) +{ + if (aGranulepos < 0 || aInfo->fps_numerator == 0) { + return -1; + } + // Implementation of th_granule_frame inlined here to operate + // on the th_info structure instead of the theora_state. + int shift = aInfo->keyframe_granule_shift; + ogg_int64_t iframe = aGranulepos >> shift; + ogg_int64_t pframe = aGranulepos - (iframe << shift); + int64_t frameno = iframe + pframe - TH_VERSION_CHECK(aInfo, 3, 2, 1); + CheckedInt64 t = + ((CheckedInt64(frameno) + 1) * USECS_PER_S) * aInfo->fps_denominator; + if (!t.isValid()) { + return -1; + } + t /= aInfo->fps_numerator; + return t.isValid() ? t.value() : -1; +} + +int64_t TheoraState::StartTime(int64_t granulepos) +{ + if (granulepos < 0 || !mActive || mInfo.fps_numerator == 0) { + return -1; + } + CheckedInt64 t = + (CheckedInt64(th_granule_frame(mCtx, granulepos)) * USECS_PER_S) + * mInfo.fps_denominator; + if (!t.isValid()) { + return -1; + } + return t.value() / mInfo.fps_numerator; +} + +int64_t +TheoraState::PacketDuration(ogg_packet* aPacket) +{ + if (!mActive || mInfo.fps_numerator == 0) { + return -1; + } + CheckedInt64 t = + SaferMultDiv(mInfo.fps_denominator, USECS_PER_S, mInfo.fps_numerator); + return t.isValid() ? t.value() : -1; +} + +int64_t +TheoraState::MaxKeyframeOffset() +{ + // Determine the maximum time in microseconds by which a key frame could + // offset for the theora bitstream. Theora granulepos encode time as: + // ((key_frame_number << granule_shift) + frame_offset). + // Therefore the maximum possible time by which any frame could be offset + // from a keyframe is the duration of (1 << granule_shift) - 1) frames. + int64_t frameDuration; + + // Max number of frames keyframe could possibly be offset. + int64_t keyframeDiff = (1 << mInfo.keyframe_granule_shift) - 1; + + // Length of frame in usecs. + frameDuration = (mInfo.fps_denominator * USECS_PER_S) / mInfo.fps_numerator; + + // Total time in usecs keyframe can be offset from any given frame. + return frameDuration * keyframeDiff; +} + +bool +TheoraState::IsKeyframe(ogg_packet* pkt) +{ + // first bit of packet is 1 for header, 0 for data + // second bit of packet is 1 for inter frame, 0 for intra frame + return (pkt->bytes >= 1 && (pkt->packet[0] & 0x40) == 0x00); +} + +nsresult +TheoraState::PageIn(ogg_page* aPage) +{ + if (!mActive) + return NS_OK; + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, + "Page must be for this stream!"); + if (ogg_stream_pagein(&mState, aPage) == -1) + return NS_ERROR_FAILURE; + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) + return res; + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructTheoraGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + ogg_packet* packet = mUnstamped[i]; +#ifdef DEBUG + NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); +#endif + mPackets.Append(packet); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +// Returns 1 if the Theora info struct is decoding a media of Theora +// version (maj,min,sub) or later, otherwise returns 0. +int +TheoraVersion(th_info* info, + unsigned char maj, + unsigned char min, + unsigned char sub) +{ + ogg_uint32_t ver = (maj << 16) + (min << 8) + sub; + ogg_uint32_t th_ver = (info->version_major << 16) + + (info->version_minor << 8) + + info->version_subminor; + return (th_ver >= ver) ? 1 : 0; +} + +void +TheoraState::ReconstructTheoraGranulepos() +{ + if (mUnstamped.Length() == 0) { + return; + } + ogg_int64_t lastGranulepos = mUnstamped[mUnstamped.Length() - 1]->granulepos; + NS_ASSERTION(lastGranulepos != -1, "Must know last granulepos"); + + // Reconstruct the granulepos (and thus timestamps) of the decoded + // frames. Granulepos are stored as ((keyframe<<shift)+offset). We + // know the granulepos of the last frame in the list, so we can infer + // the granulepos of the intermediate frames using their frame numbers. + ogg_int64_t shift = mInfo.keyframe_granule_shift; + ogg_int64_t version_3_2_1 = TheoraVersion(&mInfo,3,2,1); + ogg_int64_t lastFrame = th_granule_frame(mCtx, + lastGranulepos) + version_3_2_1; + ogg_int64_t firstFrame = lastFrame - mUnstamped.Length() + 1; + + // Until we encounter a keyframe, we'll assume that the "keyframe" + // segment of the granulepos is the first frame, or if that causes + // the "offset" segment to overflow, we assume the required + // keyframe is maximumally offset. Until we encounter a keyframe + // the granulepos will probably be wrong, but we can't decode the + // frame anyway (since we don't have its keyframe) so it doesn't really + // matter. + ogg_int64_t keyframe = lastGranulepos >> shift; + + // The lastFrame, firstFrame, keyframe variables, as well as the frame + // variable in the loop below, store the frame number for Theora + // version >= 3.2.1 streams, and store the frame index for Theora + // version < 3.2.1 streams. + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { + ogg_int64_t frame = firstFrame + i; + ogg_int64_t granulepos; + ogg_packet* packet = mUnstamped[i]; + bool isKeyframe = th_packet_iskeyframe(packet) == 1; + + if (isKeyframe) { + granulepos = frame << shift; + keyframe = frame; + } else if (frame >= keyframe && + frame - keyframe < ((ogg_int64_t)1 << shift)) + { + // (frame - keyframe) won't overflow the "offset" segment of the + // granulepos, so it's safe to calculate the granulepos. + granulepos = (keyframe << shift) + (frame - keyframe); + } else { + // (frame - keyframeno) will overflow the "offset" segment of the + // granulepos, so we take "keyframe" to be the max possible offset + // frame instead. + ogg_int64_t k = std::max(frame - (((ogg_int64_t)1 << shift) - 1), version_3_2_1); + granulepos = (k << shift) + (frame - k); + } + // Theora 3.2.1+ granulepos store frame number [1..N], so granulepos + // should be > 0. + // Theora 3.2.0 granulepos store the frame index [0..(N-1)], so + // granulepos should be >= 0. + NS_ASSERTION(granulepos >= version_3_2_1, + "Invalid granulepos for Theora version"); + + // Check that the frame's granule number is one more than the + // previous frame's. + NS_ASSERTION(i == 0 || + th_granule_frame(mCtx, granulepos) == + th_granule_frame(mCtx, mUnstamped[i-1]->granulepos) + 1, + "Granulepos calculation is incorrect!"); + + packet->granulepos = granulepos; + } + + // Check that the second to last frame's granule number is one less than + // the last frame's (the known granule number). If not our granulepos + // recovery missed a beat. + NS_ASSERTION(mUnstamped.Length() < 2 || + th_granule_frame(mCtx, mUnstamped[mUnstamped.Length()-2]->granulepos) + 1 == + th_granule_frame(mCtx, lastGranulepos), + "Granulepos recovery should catch up with packet->granulepos!"); +} + +nsresult +VorbisState::Reset() +{ + nsresult res = NS_OK; + if (mActive && vorbis_synthesis_restart(&mDsp) != 0) { + res = NS_ERROR_FAILURE; + } + if (NS_FAILED(OggCodecState::Reset())) { + return NS_ERROR_FAILURE; + } + + mGranulepos = 0; + mPrevVorbisBlockSize = 0; + + return res; +} + +VorbisState::VorbisState(ogg_page* aBosPage) + : OggCodecState(aBosPage, true) + , mPrevVorbisBlockSize(0) + , mGranulepos(0) +{ + MOZ_COUNT_CTOR(VorbisState); + vorbis_info_init(&mInfo); + vorbis_comment_init(&mComment); + memset(&mDsp, 0, sizeof(vorbis_dsp_state)); + memset(&mBlock, 0, sizeof(vorbis_block)); +} + +VorbisState::~VorbisState() +{ + MOZ_COUNT_DTOR(VorbisState); + Reset(); + vorbis_block_clear(&mBlock); + vorbis_dsp_clear(&mDsp); + vorbis_info_clear(&mInfo); + vorbis_comment_clear(&mComment); +} + +bool +VorbisState::DecodeHeader(ogg_packet* aPacket) +{ + nsAutoRef<ogg_packet> autoRelease(aPacket); + mPacketCount++; + int ret = vorbis_synthesis_headerin(&mInfo, + &mComment, + aPacket); + // We must determine when we've read the last header packet. + // vorbis_synthesis_headerin() does not tell us when it's read the last + // header, so we must keep track of the headers externally. + // + // There are 3 header packets, the Identification, Comment, and Setup + // headers, which must be in that order. If they're out of order, the file + // is invalid. If we've successfully read a header, and it's the setup + // header, then we're done reading headers. The first byte of each packet + // determines it's type as follows: + // 0x1 -> Identification header + // 0x3 -> Comment header + // 0x5 -> Setup header + // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I + // Specification, Chapter 4, Codec Setup and Packet Decode: + // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004 + + bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x5; + + if (ret < 0 || mPacketCount > 3) { + // We've received an error, or the first three packets weren't valid + // header packets. Assume bad input. Our caller will deactivate the + // bitstream. + return false; + } else if (ret == 0 && isSetupHeader && mPacketCount == 3) { + // Successfully read the three header packets. + // The bitstream remains active. + mDoneReadingHeaders = true; + } + return true; +} + +bool +VorbisState::Init() +{ + if (!mActive) { + return false; + } + + int ret = vorbis_synthesis_init(&mDsp, &mInfo); + if (ret != 0) { + NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream"); + return mActive = false; + } + ret = vorbis_block_init(&mDsp, &mBlock); + if (ret != 0) { + NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream"); + if (mActive) { + vorbis_dsp_clear(&mDsp); + } + return mActive = false; + } + return true; +} + +int64_t +VorbisState::Time(int64_t granulepos) +{ + if (!mActive) { + return -1; + } + + return VorbisState::Time(&mInfo, granulepos); +} + +int64_t +VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) +{ + if (aGranulepos == -1 || aInfo->rate == 0) { + return -1; + } + CheckedInt64 t = SaferMultDiv(aGranulepos, USECS_PER_S, aInfo->rate); + return t.isValid() ? t.value() : 0; +} + +int64_t +VorbisState::PacketDuration(ogg_packet* aPacket) +{ + if (!mActive) { + return -1; + } + if (aPacket->granulepos == -1) { + return -1; + } + // @FIXME store these in a more stable place + if (mVorbisPacketSamples.count(aPacket) == 0) { + // We haven't seen this packet, don't know its size? + return -1; + } + + long samples = mVorbisPacketSamples[aPacket]; + return Time(samples); +} + +bool +VorbisState::IsHeader(ogg_packet* aPacket) +{ + // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05, + // i.e. the first bit is odd. Audio data packets have their first bit as 0x0. + // Any packet with its first bit set cannot be a data packet, it's a + // (possibly invalid) header packet. + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1 + return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false; +} + +MetadataTags* +VorbisState::GetTags() +{ + MetadataTags* tags; + NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!"); + NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!"); + tags = new MetadataTags; + for (int i = 0; i < mComment.comments; i++) { + AddVorbisComment(tags, mComment.user_comments[i], + mComment.comment_lengths[i]); + } + return tags; +} + +nsresult +VorbisState::PageIn(ogg_page* aPage) +{ + if (!mActive) { + return NS_OK; + } + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, + "Page must be for this stream!"); + if (ogg_stream_pagein(&mState, aPage) == -1) + return NS_ERROR_FAILURE; + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) { + return res; + } + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructVorbisGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + ogg_packet* packet = mUnstamped[i]; + AssertHasRecordedPacketSamples(packet); + NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); + mPackets.Append(packet); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +nsresult +VorbisState::ReconstructVorbisGranulepos() +{ + // The number of samples in a Vorbis packet is: + // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4 + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2 + // So we maintain mPrevVorbisBlockSize, the block size of the last packet + // encountered. We also maintain mGranulepos, which is the granulepos of + // the last encountered packet. This enables us to give granulepos to + // packets when the last packet in mUnstamped doesn't have a granulepos + // (for example if the stream was truncated). + // + // We validate our prediction of the number of samples decoded when + // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted + // number of samples, and verifing we extract that many when decoding + // each packet. + + NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0"); + ogg_packet* last = mUnstamped.LastElement(); + NS_ASSERTION(last->e_o_s || last->granulepos >= 0, + "Must know last granulepos!"); + if (mUnstamped.Length() == 1) { + ogg_packet* packet = mUnstamped[0]; + long blockSize = vorbis_packet_blocksize(&mInfo, packet); + if (blockSize < 0) { + // On failure vorbis_packet_blocksize returns < 0. If we've got + // a bad packet, we just assume that decode will have to skip this + // packet, i.e. assume 0 samples are decodable from this packet. + blockSize = 0; + mPrevVorbisBlockSize = 0; + } + long samples = mPrevVorbisBlockSize / 4 + blockSize / 4; + mPrevVorbisBlockSize = blockSize; + if (packet->granulepos == -1) { + packet->granulepos = mGranulepos + samples; + } + + // Account for a partial last frame + if (packet->e_o_s && packet->granulepos >= mGranulepos) { + samples = packet->granulepos - mGranulepos; + } + + mGranulepos = packet->granulepos; + RecordVorbisPacketSamples(packet, samples); + return NS_OK; + } + + bool unknownGranulepos = last->granulepos == -1; + int totalSamples = 0; + for (int32_t i = mUnstamped.Length() - 1; i > 0; i--) { + ogg_packet* packet = mUnstamped[i]; + ogg_packet* prev = mUnstamped[i-1]; + ogg_int64_t granulepos = packet->granulepos; + NS_ASSERTION(granulepos != -1, "Must know granulepos!"); + long prevBlockSize = vorbis_packet_blocksize(&mInfo, prev); + long blockSize = vorbis_packet_blocksize(&mInfo, packet); + + if (blockSize < 0 || prevBlockSize < 0) { + // On failure vorbis_packet_blocksize returns < 0. If we've got + // a bad packet, we just assume that decode will have to skip this + // packet, i.e. assume 0 samples are decodable from this packet. + blockSize = 0; + prevBlockSize = 0; + } + + long samples = prevBlockSize / 4 + blockSize / 4; + totalSamples += samples; + prev->granulepos = granulepos - samples; + RecordVorbisPacketSamples(packet, samples); + } + + if (unknownGranulepos) { + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { + ogg_packet* packet = mUnstamped[i]; + packet->granulepos += mGranulepos + totalSamples + 1; + } + } + + ogg_packet* first = mUnstamped[0]; + long blockSize = vorbis_packet_blocksize(&mInfo, first); + if (blockSize < 0) { + mPrevVorbisBlockSize = 0; + blockSize = 0; + } + + long samples = (mPrevVorbisBlockSize == 0) ? 0 : + mPrevVorbisBlockSize / 4 + blockSize / 4; + int64_t start = first->granulepos - samples; + RecordVorbisPacketSamples(first, samples); + + if (last->e_o_s && start < mGranulepos) { + // We've calculated that there are more samples in this page than its + // granulepos claims, and it's the last page in the stream. This is legal, + // and we will need to prune the trailing samples when we come to decode it. + // We must correct the timestamps so that they follow the last Vorbis page's + // samples. + int64_t pruned = mGranulepos - start; + for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) { + mUnstamped[i]->granulepos += pruned; + } +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + mVorbisPacketSamples[last] -= pruned; +#endif + } + + mPrevVorbisBlockSize = vorbis_packet_blocksize(&mInfo, last); + mPrevVorbisBlockSize = std::max(static_cast<long>(0), mPrevVorbisBlockSize); + mGranulepos = last->granulepos; + + return NS_OK; +} + +OpusState::OpusState(ogg_page* aBosPage) + : OggCodecState(aBosPage, true) + , mParser(nullptr) + , mDecoder(nullptr) + , mSkip(0) + , mPrevPacketGranulepos(0) + , mPrevPageGranulepos(0) +{ + MOZ_COUNT_CTOR(OpusState); +} + +OpusState::~OpusState() +{ + MOZ_COUNT_DTOR(OpusState); + Reset(); + + if (mDecoder) { + opus_multistream_decoder_destroy(mDecoder); + mDecoder = nullptr; + } +} + +nsresult +OpusState::Reset() +{ + return Reset(false); +} + +nsresult +OpusState::Reset(bool aStart) +{ + nsresult res = NS_OK; + + if (mActive && mDecoder) { + // Reset the decoder. + opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); + // Let the seek logic handle pre-roll if we're not seeking to the start. + mSkip = aStart ? mParser->mPreSkip : 0; + // This lets us distinguish the first page being the last page vs. just + // not having processed the previous page when we encounter the last page. + mPrevPageGranulepos = aStart ? 0 : -1; + mPrevPacketGranulepos = aStart ? 0 : -1; + } + + // Clear queued data. + if (NS_FAILED(OggCodecState::Reset())) { + return NS_ERROR_FAILURE; + } + + LOG(LogLevel::Debug, ("Opus decoder reset, to skip %d", mSkip)); + + return res; +} + +bool +OpusState::Init(void) +{ + if (!mActive) { + return false; + } + + int error; + + NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder"); + + mDecoder = opus_multistream_decoder_create(mParser->mRate, + mParser->mChannels, + mParser->mStreams, + mParser->mCoupledStreams, + mParser->mMappingTable, + &error); + + mSkip = mParser->mPreSkip; + + LOG(LogLevel::Debug, ("Opus decoder init, to skip %d", mSkip)); + + return error == OPUS_OK; +} + +bool +OpusState::DecodeHeader(ogg_packet* aPacket) +{ + nsAutoRef<ogg_packet> autoRelease(aPacket); + switch(mPacketCount++) { + // Parse the id header. + case 0: + mParser = new OpusParser; + if (!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { + return false; + } + mRate = mParser->mRate; + mChannels = mParser->mChannels; + mPreSkip = mParser->mPreSkip; +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain = mParser->mGain; +#else + mGain_Q16 = mParser->mGain_Q16; +#endif + break; + + // Parse the metadata header. + case 1: + if (!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { + return false; + } + break; + + // We made it to the first data packet (which includes reconstructing + // timestamps for it in PageIn). Success! + default: + mDoneReadingHeaders = true; + // Put it back on the queue so we can decode it. + mPackets.PushFront(autoRelease.disown()); + break; + } + return true; +} + +/* Construct and return a tags hashmap from our internal array */ +MetadataTags* +OpusState::GetTags() +{ + MetadataTags* tags; + + tags = new MetadataTags; + for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { + AddVorbisComment(tags, mParser->mTags[i].Data(), mParser->mTags[i].Length()); + } + + return tags; +} + +/* Return the timestamp (in microseconds) equivalent to a granulepos. */ +int64_t +OpusState::Time(int64_t aGranulepos) +{ + if (!mActive) { + return -1; + } + + return Time(mParser->mPreSkip, aGranulepos); +} + +int64_t +OpusState::Time(int aPreSkip, int64_t aGranulepos) +{ + if (aGranulepos < 0) { + return -1; + } + + // Ogg Opus always runs at a granule rate of 48 kHz. + CheckedInt64 t = SaferMultDiv(aGranulepos - aPreSkip, USECS_PER_S, 48000); + return t.isValid() ? t.value() : -1; +} + +bool +OpusState::IsHeader(ogg_packet* aPacket) +{ + return aPacket->bytes >= 16 && + (!memcmp(aPacket->packet, "OpusHead", 8) || + !memcmp(aPacket->packet, "OpusTags", 8)); +} + +nsresult +OpusState::PageIn(ogg_page* aPage) +{ + if (!mActive) { + return NS_OK; + } + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, + "Page must be for this stream!"); + if (ogg_stream_pagein(&mState, aPage) == -1) + return NS_ERROR_FAILURE; + + bool haveGranulepos; + nsresult rv = PacketOutUntilGranulepos(haveGranulepos); + if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) { + return rv; + } + if (!ReconstructOpusGranulepos()) { + return NS_ERROR_FAILURE; + } + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { + ogg_packet* packet = mUnstamped[i]; + NS_ASSERTION(!IsHeader(packet), "Don't try to play a header packet"); + NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos"); + mPackets.Append(packet); + } + mUnstamped.Clear(); + return NS_OK; +} + +// Helper method to return the change in granule position due to an Opus packet +// (as distinct from the number of samples in the packet, which depends on the +// decoder rate). It should work with a multistream Opus file, and continue to +// work should we ever allow the decoder to decode at a rate other than 48 kHz. +// It even works before we've created the actual Opus decoder. +static int +GetOpusDeltaGP(ogg_packet* packet) +{ + int nframes; + nframes = opus_packet_get_nb_frames(packet->packet, packet->bytes); + if (nframes > 0) { + return nframes*opus_packet_get_samples_per_frame(packet->packet, 48000); + } + NS_WARNING("Invalid Opus packet."); + return nframes; +} + +int64_t +OpusState::PacketDuration(ogg_packet* aPacket) +{ + CheckedInt64 t = SaferMultDiv(GetOpusDeltaGP(aPacket), USECS_PER_S, 48000); + return t.isValid() ? t.value() : -1; +} + +bool +OpusState::ReconstructOpusGranulepos(void) +{ + NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); + ogg_packet* last = mUnstamped.LastElement(); + NS_ASSERTION(last->e_o_s || last->granulepos > 0, + "Must know last granulepos!"); + int64_t gp; + // If this is the last page, and we've seen at least one previous page (or + // this is the first page)... + if (last->e_o_s) { + if (mPrevPageGranulepos != -1) { + // If this file only has one page and the final granule position is + // smaller than the pre-skip amount, we MUST reject the stream. + if (!mDoneReadingHeaders && last->granulepos < mPreSkip) + return false; + int64_t last_gp = last->granulepos; + gp = mPrevPageGranulepos; + // Loop through the packets forwards, adding the current packet's + // duration to the previous granulepos to get the value for the + // current packet. + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { + ogg_packet* packet = mUnstamped[i]; + int offset = GetOpusDeltaGP(packet); + // Check for error (negative offset) and overflow. + if (offset >= 0 && gp <= INT64_MAX - offset) { + gp += offset; + if (gp >= last_gp) { + NS_WARNING("Opus end trimming removed more than a full packet."); + // We were asked to remove a full packet's worth of data or more. + // Encoders SHOULD NOT produce streams like this, but we'll handle + // it for them anyway. + gp = last_gp; + for (uint32_t j = i+1; j < mUnstamped.Length(); ++j) { + OggCodecState::ReleasePacket(mUnstamped[j]); + } + mUnstamped.RemoveElementsAt(i+1, mUnstamped.Length() - (i+1)); + last = packet; + last->e_o_s = 1; + } + } + packet->granulepos = gp; + } + mPrevPageGranulepos = last_gp; + return true; + } else { + NS_WARNING("No previous granule position to use for Opus end trimming."); + // If we don't have a previous granule position, fall through. + // We simply won't trim any samples from the end. + // TODO: Are we guaranteed to have seen a previous page if there is one? + } + } + + gp = last->granulepos; + // Loop through the packets backwards, subtracting the next + // packet's duration from its granulepos to get the value + // for the current packet. + for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { + int offset = GetOpusDeltaGP(mUnstamped[i]); + // Check for error (negative offset) and overflow. + if (offset >= 0) { + if (offset <= gp) { + gp -= offset; + } else { + // If the granule position of the first data page is smaller than the + // number of decodable audio samples on that page, then we MUST reject + // the stream. + if (!mDoneReadingHeaders) + return false; + // It's too late to reject the stream. + // If we get here, this almost certainly means the file has screwed-up + // timestamps somewhere after the first page. + NS_WARNING("Clamping negative Opus granulepos to zero."); + gp = 0; + } + } + mUnstamped[i - 1]->granulepos = gp; + } + + // Check to make sure the first granule position is at least as large as the + // total number of samples decodable from the first page with completed + // packets. This requires looking at the duration of the first packet, too. + // We MUST reject such streams. + if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0]) > gp) { + return false; + } + mPrevPageGranulepos = last->granulepos; + return true; +} + +already_AddRefed<MediaRawData> +OpusState::PacketOutAsMediaRawData() +{ + ogg_packet* packet = PacketPeek(); + uint32_t frames = 0; + const int64_t endFrame = packet->granulepos; + + if (!packet) { + return nullptr; + } + if (packet->e_o_s) { + frames = GetOpusDeltaGP(packet); + } + + RefPtr<MediaRawData> data = OggCodecState::PacketOutAsMediaRawData(); + if (!data) { + return nullptr; + } + + if (data->mEOS && mPrevPacketGranulepos != -1) { + // If this is the last packet, perform end trimming. + int64_t startFrame = mPrevPacketGranulepos; + frames -= std::max<int64_t>( + 0, std::min(endFrame - startFrame, static_cast<int64_t>(frames))); + data->mDiscardPadding = frames; + } + + // Save this packet's granule position in case we need to perform end + // trimming on the next packet. + mPrevPacketGranulepos = endFrame; + + return data.forget(); +} + +FlacState::FlacState(ogg_page* aBosPage) + : OggCodecState(aBosPage, true) +{ +} + +bool +FlacState::DecodeHeader(ogg_packet* aPacket) +{ + nsAutoRef<ogg_packet> autoRelease(aPacket); + + if (!mParser.DecodeHeaderBlock(aPacket->packet, aPacket->bytes)) { + return false; + } + if (mParser.HasFullMetadata()) { + mDoneReadingHeaders = true; + } + return true; +} + +int64_t +FlacState::Time(int64_t granulepos) +{ + if (!mParser.mInfo.IsValid()) { + return -1; + } + CheckedInt64 t = + SaferMultDiv(granulepos, USECS_PER_S, mParser.mInfo.mRate); + if (!t.isValid()) { + return -1; + } + return t.value(); +} + +int64_t +FlacState::PacketDuration(ogg_packet* aPacket) +{ + return mParser.BlockDuration(aPacket->packet, aPacket->bytes); +} + +bool +FlacState::IsHeader(ogg_packet* aPacket) +{ + return mParser.IsHeaderBlock(aPacket->packet, aPacket->bytes); +} + +nsresult +FlacState::PageIn(ogg_page* aPage) +{ + if (!mActive) { + return NS_OK; + } + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, + "Page must be for this stream!"); + if (ogg_stream_pagein(&mState, aPage) == -1) + return NS_ERROR_FAILURE; + bool foundGp; + nsresult res = PacketOutUntilGranulepos(foundGp); + if (NS_FAILED(res)) { + return res; + } + if (foundGp && mDoneReadingHeaders) { + // We've found a packet with a granulepos, and we've loaded our metadata + // and initialized our decoder. Determine granulepos of buffered packets. + ReconstructFlacGranulepos(); + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { + ogg_packet* packet = mUnstamped[i]; + NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); + mPackets.Append(packet); + } + mUnstamped.Clear(); + } + return NS_OK; +} + +// Return a hash table with tag metadata. +MetadataTags* +FlacState::GetTags() +{ + return mParser.GetTags(); +} + +const AudioInfo& +FlacState::Info() +{ + return mParser.mInfo; +} + +bool +FlacState::ReconstructFlacGranulepos(void) +{ + NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); + ogg_packet* last = mUnstamped.LastElement(); + NS_ASSERTION(last->e_o_s || last->granulepos > 0, + "Must know last granulepos!"); + int64_t gp; + + gp = last->granulepos; + // Loop through the packets backwards, subtracting the next + // packet's duration from its granulepos to get the value + // for the current packet. + for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { + int offset = + mParser.BlockDuration(mUnstamped[i]->packet, mUnstamped[i]->bytes); + // Check for error (negative offset) and overflow. + if (offset >= 0) { + if (offset <= gp) { + gp -= offset; + } else { + // If the granule position of the first data page is smaller than the + // number of decodable audio samples on that page, then we MUST reject + // the stream. + if (!mDoneReadingHeaders) { + return false; + } + // It's too late to reject the stream. + // If we get here, this almost certainly means the file has screwed-up + // timestamps somewhere after the first page. + NS_WARNING("Clamping negative granulepos to zero."); + gp = 0; + } + } + mUnstamped[i - 1]->granulepos = gp; + } + + return true; +} + +SkeletonState::SkeletonState(ogg_page* aBosPage) + : OggCodecState(aBosPage, true) + , mVersion(0) + , mPresentationTime(0) + , mLength(0) +{ + MOZ_COUNT_CTOR(SkeletonState); +} + +SkeletonState::~SkeletonState() +{ + MOZ_COUNT_DTOR(SkeletonState); +} + +// Support for Ogg Skeleton 4.0, as per specification at: +// http://wiki.xiph.org/Ogg_Skeleton_4 + +// Minimum length in bytes of a Skeleton header packet. +static const long SKELETON_MIN_HEADER_LEN = 28; +static const long SKELETON_4_0_MIN_HEADER_LEN = 80; + +// Minimum length in bytes of a Skeleton 4.0 index packet. +static const long SKELETON_4_0_MIN_INDEX_LEN = 42; + +// Minimum length in bytes of a Skeleton 3.0/4.0 Fisbone packet. +static const long SKELETON_MIN_FISBONE_LEN = 52; + +// Minimum possible size of a compressed index keypoint. +static const size_t MIN_KEY_POINT_SIZE = 2; + +// Byte offset of the major and minor version numbers in the +// Ogg Skeleton 4.0 header packet. +static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8; +static const size_t SKELETON_VERSION_MINOR_OFFSET = 10; + +// Byte-offsets of the presentation time numerator and denominator +static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12; +static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20; + +// Byte-offsets of the length of file field in the Skeleton 4.0 header packet. +static const size_t SKELETON_FILE_LENGTH_OFFSET = 64; + +// Byte-offsets of the fields in the Skeleton index packet. +static const size_t INDEX_SERIALNO_OFFSET = 6; +static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10; +static const size_t INDEX_TIME_DENOM_OFFSET = 18; +static const size_t INDEX_FIRST_NUMER_OFFSET = 26; +static const size_t INDEX_LAST_NUMER_OFFSET = 34; +static const size_t INDEX_KEYPOINT_OFFSET = 42; + +// Byte-offsets of the fields in the Skeleton Fisbone packet. +static const size_t FISBONE_MSG_FIELDS_OFFSET = 8; +static const size_t FISBONE_SERIALNO_OFFSET = 12; + +static bool +IsSkeletonBOS(ogg_packet* aPacket) +{ + static_assert(SKELETON_MIN_HEADER_LEN >= 8, + "Minimum length of skeleton BOS header incorrect"); + return aPacket->bytes >= SKELETON_MIN_HEADER_LEN && + memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0; +} + +static bool +IsSkeletonIndex(ogg_packet* aPacket) +{ + static_assert(SKELETON_4_0_MIN_INDEX_LEN >= 5, + "Minimum length of skeleton index header incorrect"); + return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN && + memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0; +} + +static bool +IsSkeletonFisbone(ogg_packet* aPacket) +{ + static_assert(SKELETON_MIN_FISBONE_LEN >= 8, + "Minimum length of skeleton fisbone header incorrect"); + return aPacket->bytes >= SKELETON_MIN_FISBONE_LEN && + memcmp(reinterpret_cast<char*>(aPacket->packet), "fisbone", 8) == 0; +} + +// Reads a variable length encoded integer at p. Will not read +// past aLimit. Returns pointer to character after end of integer. +static const unsigned char* +ReadVariableLengthInt(const unsigned char* p, + const unsigned char* aLimit, + int64_t& n) +{ + int shift = 0; + int64_t byte = 0; + n = 0; + while (p < aLimit && + (byte & 0x80) != 0x80 && + shift < 57) + { + byte = static_cast<int64_t>(*p); + n |= ((byte & 0x7f) << shift); + shift += 7; + p++; + } + return p; +} + +bool +SkeletonState::DecodeIndex(ogg_packet* aPacket) +{ + NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN, + "Index must be at least minimum size"); + if (!mActive) { + return false; + } + + uint32_t serialno = LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET); + int64_t numKeyPoints = LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET); + + int64_t endTime = 0, startTime = 0; + const unsigned char* p = aPacket->packet; + + int64_t timeDenom = LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET); + if (timeDenom == 0) { + LOG(LogLevel::Debug, ("Ogg Skeleton Index packet for stream %u has 0 " + "timestamp denominator.", serialno)); + return (mActive = false); + } + + // Extract the start time. + int64_t timeRawInt = LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET); + CheckedInt64 t = SaferMultDiv(timeRawInt, USECS_PER_S, timeDenom); + if (!t.isValid()) { + return (mActive = false); + } else { + startTime = t.value(); + } + + // Extract the end time. + timeRawInt = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET); + t = SaferMultDiv(timeRawInt, USECS_PER_S, timeDenom); + if (!t.isValid()) { + return (mActive = false); + } else { + endTime = t.value(); + } + + // Check the numKeyPoints value read, ensure we're not going to run out of + // memory while trying to decode the index packet. + CheckedInt64 minPacketSize = + (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET; + if (!minPacketSize.isValid()) + { + return (mActive = false); + } + + int64_t sizeofIndex = aPacket->bytes - INDEX_KEYPOINT_OFFSET; + int64_t maxNumKeyPoints = sizeofIndex / MIN_KEY_POINT_SIZE; + if (aPacket->bytes < minPacketSize.value() || + numKeyPoints > maxNumKeyPoints || + numKeyPoints < 0) { + // Packet size is less than the theoretical minimum size, or the packet is + // claiming to store more keypoints than it's capable of storing. This means + // that the numKeyPoints field is too large or small for the packet to + // possibly contain as many packets as it claims to, so the numKeyPoints + // field is possibly malicious. Don't try decoding this index, we may run + // out of memory. + LOG(LogLevel::Debug, ("Possibly malicious number of key points reported " + "(%lld) in index packet for stream %u.", + numKeyPoints, + serialno)); + return (mActive = false); + } + + nsAutoPtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime)); + + p = aPacket->packet + INDEX_KEYPOINT_OFFSET; + const unsigned char* limit = aPacket->packet + aPacket->bytes; + int64_t numKeyPointsRead = 0; + CheckedInt64 offset = 0; + CheckedInt64 time = 0; + while (p < limit && numKeyPointsRead < numKeyPoints) { + int64_t delta = 0; + p = ReadVariableLengthInt(p, limit, delta); + offset += delta; + if (p == limit || + !offset.isValid() || + offset.value() > mLength || + offset.value() < 0) { + return (mActive = false); + } + p = ReadVariableLengthInt(p, limit, delta); + time += delta; + if (!time.isValid() || + time.value() > endTime || + time.value() < startTime) { + return (mActive = false); + } + CheckedInt64 timeUsecs = SaferMultDiv(time.value(), USECS_PER_S, timeDenom); + if (!timeUsecs.isValid()) { + return (mActive = false); + } + keyPoints->Add(offset.value(), timeUsecs.value()); + numKeyPointsRead++; + } + + int32_t keyPointsRead = keyPoints->Length(); + if (keyPointsRead > 0) { + mIndex.Put(serialno, keyPoints.forget()); + } + + LOG(LogLevel::Debug, ("Loaded %d keypoints for Skeleton on stream %u", + keyPointsRead, serialno)); + return true; +} + +nsresult +SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno, + int64_t aTarget, + nsKeyPoint& aResult) +{ + nsKeyFrameIndex* index = nullptr; + mIndex.Get(aSerialno, &index); + + if (!index || index->Length() == 0 || + aTarget < index->mStartTime || aTarget > index->mEndTime) { + return NS_ERROR_FAILURE; + } + + // Binary search to find the last key point with time less than target. + int start = 0; + int end = index->Length() - 1; + while (end > start) { + int mid = start + ((end - start + 1) >> 1); + if (index->Get(mid).mTime == aTarget) { + start = mid; + break; + } else if (index->Get(mid).mTime < aTarget) { + start = mid; + } else { + end = mid - 1; + } + } + + aResult = index->Get(start); + NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target"); + return NS_OK; +} + +nsresult +SkeletonState::IndexedSeekTarget(int64_t aTarget, + nsTArray<uint32_t>& aTracks, + nsSeekTarget& aResult) +{ + if (!mActive || mVersion < SKELETON_VERSION(4,0)) { + return NS_ERROR_FAILURE; + } + // Loop over all requested tracks' indexes, and get the keypoint for that + // seek target. Record the keypoint with the lowest offset, this will be + // our seek result. User must seek to the one with lowest offset to ensure we + // pass "keyframes" on all tracks when we decode forwards to the seek target. + nsSeekTarget r; + for (uint32_t i=0; i<aTracks.Length(); i++) { + nsKeyPoint k; + if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) && + k.mOffset < r.mKeyPoint.mOffset) { + r.mKeyPoint = k; + r.mSerial = aTracks[i]; + } + } + if (r.IsNull()) { + return NS_ERROR_FAILURE; + } + LOG(LogLevel::Debug, ("Indexed seek target for time %lld is offset %lld", + aTarget, r.mKeyPoint.mOffset)); + aResult = r; + return NS_OK; +} + +nsresult +SkeletonState::GetDuration(const nsTArray<uint32_t>& aTracks, + int64_t& aDuration) +{ + if (!mActive || + mVersion < SKELETON_VERSION(4,0) || + !HasIndex() || + aTracks.Length() == 0) { + return NS_ERROR_FAILURE; + } + int64_t endTime = INT64_MIN; + int64_t startTime = INT64_MAX; + for (uint32_t i=0; i<aTracks.Length(); i++) { + nsKeyFrameIndex* index = nullptr; + mIndex.Get(aTracks[i], &index); + if (!index) { + // Can't get the timestamps for one of the required tracks, fail. + return NS_ERROR_FAILURE; + } + if (index->mEndTime > endTime) { + endTime = index->mEndTime; + } + if (index->mStartTime < startTime) { + startTime = index->mStartTime; + } + } + NS_ASSERTION(endTime > startTime, "Duration must be positive"); + CheckedInt64 duration = CheckedInt64(endTime) - startTime; + aDuration = duration.isValid() ? duration.value() : 0; + return duration.isValid() ? NS_OK : NS_ERROR_FAILURE; +} + +bool +SkeletonState::DecodeFisbone(ogg_packet* aPacket) +{ + if (aPacket->bytes < static_cast<long>(FISBONE_MSG_FIELDS_OFFSET + 4)) { + return false; + } + uint32_t offsetMsgField = + LittleEndian::readUint32(aPacket->packet + FISBONE_MSG_FIELDS_OFFSET); + + if (aPacket->bytes < static_cast<long>(FISBONE_SERIALNO_OFFSET + 4)) { + return false; + } + uint32_t serialno = + LittleEndian::readUint32(aPacket->packet + FISBONE_SERIALNO_OFFSET); + + CheckedUint32 checked_fields_pos = + CheckedUint32(FISBONE_MSG_FIELDS_OFFSET) + offsetMsgField; + if (!checked_fields_pos.isValid() || + aPacket->bytes < static_cast<int64_t>(checked_fields_pos.value())) { + return false; + } + int64_t msgLength = aPacket->bytes - checked_fields_pos.value(); + char* msgProbe = (char*)aPacket->packet + checked_fields_pos.value(); + char* msgHead = msgProbe; + nsAutoPtr<MessageField> field(new MessageField()); + + const static FieldPatternType kFieldTypeMaps[] = { + {"Content-Type:", eContentType}, + {"Role:", eRole}, + {"Name:", eName}, + {"Language:", eLanguage}, + {"Title:", eTitle}, + {"Display-hint:", eDisplayHint}, + {"Altitude:", eAltitude}, + {"TrackOrder:", eTrackOrder}, + {"Track dependencies:", eTrackDependencies} + }; + + bool isContentTypeParsed = false; + while (msgLength > 1) { + if (*msgProbe == '\r' && *(msgProbe+1) == '\n') { + nsAutoCString strMsg(msgHead, msgProbe-msgHead); + for (size_t i = 0; i < ArrayLength(kFieldTypeMaps); i++) { + if (strMsg.Find(kFieldTypeMaps[i].mPatternToRecognize) != -1) { + // The content of message header fields follows [RFC2822], and the + // mandatory message field must be encoded in US-ASCII, others + // must be be encoded in UTF-8. "Content-Type" must come first + // for all of message header fields. + // See http://svn.annodex.net/standards/draft-pfeiffer-oggskeleton-current.txt. + if (i != 0 && !isContentTypeParsed) { + return false; + } + + if ((i == 0 && IsASCII(strMsg)) || (i != 0 && IsUTF8(strMsg))) { + EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType; + if (!field->mValuesStore.Contains(eHeaderType)) { + uint32_t nameLen = strlen(kFieldTypeMaps[i].mPatternToRecognize); + field->mValuesStore.Put(eHeaderType, new nsCString(msgHead+nameLen, + msgProbe-msgHead-nameLen)); + } + isContentTypeParsed = i==0 ? true : isContentTypeParsed; + } + break; + } + } + msgProbe += 2; + msgLength -= 2; + msgHead = msgProbe; + continue; + } + msgLength--; + msgProbe++; + } + + if (!mMsgFieldStore.Contains(serialno)) { + mMsgFieldStore.Put(serialno, field.forget()); + } else { + return false; + } + + return true; +} + +bool +SkeletonState::DecodeHeader(ogg_packet* aPacket) +{ + nsAutoRef<ogg_packet> autoRelease(aPacket); + if (IsSkeletonBOS(aPacket)) { + uint16_t verMajor = + LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MAJOR_OFFSET); + uint16_t verMinor = + LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MINOR_OFFSET); + + // Read the presentation time. We read this before the version check as the + // presentation time exists in all versions. + int64_t n = + LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET); + int64_t d = + LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET); + mPresentationTime = + d == 0 ? 0 : (static_cast<float>(n) / static_cast<float>(d)) * USECS_PER_S; + + mVersion = SKELETON_VERSION(verMajor, verMinor); + // We can only care to parse Skeleton version 4.0+. + if (mVersion < SKELETON_VERSION(4,0) || + mVersion >= SKELETON_VERSION(5,0) || + aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) { + return false; + } + + // Extract the segment length. + mLength = + LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); + + LOG(LogLevel::Debug, ("Skeleton segment length: %lld", mLength)); + + // Initialize the serialno-to-index map. + return true; + } else if (IsSkeletonIndex(aPacket) && mVersion >= SKELETON_VERSION(4,0)) { + return DecodeIndex(aPacket); + } else if (IsSkeletonFisbone(aPacket)) { + return DecodeFisbone(aPacket); + } else if (aPacket->e_o_s) { + mDoneReadingHeaders = true; + return true; + } + return true; +} + +} // namespace mozilla + diff --git a/dom/media/ogg/OggCodecState.h b/dom/media/ogg/OggCodecState.h new file mode 100644 index 0000000000..9c254aa40f --- /dev/null +++ b/dom/media/ogg/OggCodecState.h @@ -0,0 +1,644 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggCodecState_h_) +#define OggCodecState_h_ + +#include <ogg/ogg.h> +// For MOZ_SAMPLE_TYPE_* +#include <nsAutoPtr.h> +#include <nsAutoRef.h> +#include <nsDeque.h> +#include <nsTArray.h> +#include <nsClassHashtable.h> +#include "VideoUtils.h" +#include "FlacFrameParser.h" + +#include <theora/theoradec.h> +#ifdef MOZ_TREMOR +#include <tremor/ivorbiscodec.h> +#else +#include <vorbis/codec.h> +#endif + +// Uncomment the following to validate that we're predicting the number +// of Vorbis samples in each packet correctly. +#define VALIDATE_VORBIS_SAMPLE_CALCULATION +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION +#include <map> +#endif + +struct OpusMSDecoder; + +namespace mozilla { + +class OpusParser; + +// Deallocates a packet, used in OggPacketQueue below. +class OggPacketDeallocator : public nsDequeFunctor +{ + virtual void* operator() (void* aPacket) + { + ogg_packet* p = static_cast<ogg_packet*>(aPacket); + delete [] p->packet; + delete p; + return nullptr; + } +}; + +// A queue of ogg_packets. When we read a page, we extract the page's packets +// and buffer them in the owning stream's OggCodecState. This is because +// if we're skipping up to the next keyframe in very large frame sized videos, +// there may be several megabytes of data between keyframes, and the +// ogg_stream_state would end up resizing its buffer every time we added a +// new 4KB page to the bitstream, which kills performance on Windows. This +// also gives us the option to timestamp packets rather than decoded +// frames/samples, reducing the amount of frames/samples we must decode to +// determine start-time at a particular offset, and gives us finer control +// over memory usage. +class OggPacketQueue : private nsDeque +{ +public: + OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} + ~OggPacketQueue() { Erase(); } + bool IsEmpty() { return nsDeque::GetSize() == 0; } + void Append(ogg_packet* aPacket); + ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); } + ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); } + ogg_packet* Pop() { return static_cast<ogg_packet*>(nsDeque::Pop()); } + void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } + void Erase() { nsDeque::Erase(); } +}; + +// Encapsulates the data required for decoding an ogg bitstream and for +// converting granulepos to timestamps. +class OggCodecState +{ +public: + typedef mozilla::MetadataTags MetadataTags; + // Ogg types we know about + enum CodecType + { + TYPE_VORBIS=0, + TYPE_THEORA, + TYPE_OPUS, + TYPE_SKELETON, + TYPE_FLAC, + TYPE_UNKNOWN + }; + + virtual ~OggCodecState(); + + // Factory for creating nsCodecStates. Use instead of constructor. + // aPage should be a beginning-of-stream page. + static OggCodecState* Create(ogg_page* aPage); + + virtual CodecType GetType() { return TYPE_UNKNOWN; } + + // Reads a header packet. Returns false if an error was encountered + // while reading header packets. Callers should check DoneReadingHeaders() + // to determine if the last header has been read. + // This function takes ownership of the packet and is responsible for + // releasing it or queuing it for later processing. + virtual bool DecodeHeader(ogg_packet* aPacket) + { + return (mDoneReadingHeaders = true); + } + + // Build a hash table with tag metadata parsed from the stream. + virtual MetadataTags* GetTags() + { + return nullptr; + } + + // Returns the end time that a granulepos represents. + virtual int64_t Time(int64_t granulepos) { return -1; } + + // Returns the start time that a granulepos represents. + virtual int64_t StartTime(int64_t granulepos) { return -1; } + + // Returns the duration of the given packet, if it can be determined. + virtual int64_t PacketDuration(ogg_packet* aPacket) { return -1; } + + // Returns the start time of the given packet, if it can be determined. + virtual int64_t PacketStartTime(ogg_packet* aPacket) + { + if (aPacket->granulepos < 0) { + return -1; + } + int64_t endTime = Time(aPacket->granulepos); + int64_t duration = PacketDuration(aPacket); + if (duration > endTime) { + // Audio preskip may eat a whole packet or more. + return 0; + } else { + return endTime - duration; + } + } + + // Initializes the codec state. + virtual bool Init() { return true; } + + // Returns true when this bitstream has finished reading all its + // header packets. + bool DoneReadingHeaders() { return mDoneReadingHeaders; } + + // Deactivates the bitstream. Only the primary video and audio bitstreams + // should be active. + void Deactivate() + { + mActive = false; + mDoneReadingHeaders = true; + Reset(); + } + + // Resets decoding state. + virtual nsresult Reset(); + + // Returns true if the OggCodecState thinks this packet is a header + // packet. Note this does not verify the validity of the header packet, + // it just guarantees that the packet is marked as a header packet (i.e. + // it is definintely not a data packet). Do not use this to identify + // streams, use it to filter header packets from data packets while + // decoding. + virtual bool IsHeader(ogg_packet* aPacket) { return false; } + + // Returns true if the OggCodecState thinks this packet represents a + // keyframe, from which decoding can restart safely. + virtual bool IsKeyframe(ogg_packet* aPacket) { return true; } + + // Returns true if there is a packet available for dequeueing in the stream. + bool IsPacketReady(); + + // Returns the next raw packet in the stream, or nullptr if there are no more + // packets buffered in the packet queue. More packets can be buffered by + // inserting one or more pages into the stream by calling PageIn(). The + // caller is responsible for deleting returned packet's using + // OggCodecState::ReleasePacket(). The packet will have a valid granulepos. + ogg_packet* PacketOut(); + + // Returns the next raw packet in the stream, or nullptr if there are no more + // packets buffered in the packet queue, without consuming it. + // The packet will have a valid granulepos. + ogg_packet* PacketPeek(); + + // Moves all raw packets from aOther to the front of the current packet queue. + void PushFront(OggPacketQueue&& aOther); + + // Releases the memory used by a cloned packet. Every packet returned by + // PacketOut() must be free'd using this function. + static void ReleasePacket(ogg_packet* aPacket); + + // Returns the next packet in the stream as a MediaRawData, or nullptr + // if there are no more packets buffered in the packet queue. More packets + // can be buffered by inserting one or more pages into the stream by calling + // PageIn(). The packet will have a valid granulepos. + virtual already_AddRefed<MediaRawData> PacketOutAsMediaRawData(); + + // Extracts all packets from the page, and inserts them into the packet + // queue. They can be extracted by calling PacketOut(). Packets from an + // inactive stream are not buffered, i.e. this call has no effect for + // inactive streams. Multiple pages may need to be inserted before + // PacketOut() starts to return packets, as granulepos may need to be + // captured. + virtual nsresult PageIn(ogg_page* aPage); + + // Number of packets read. + uint64_t mPacketCount; + + // Serial number of the bitstream. + uint32_t mSerial; + + // Ogg specific state. + ogg_stream_state mState; + + // Queue of as yet undecoded packets. Packets are guaranteed to have + // a valid granulepos. + OggPacketQueue mPackets; + + // Is the bitstream active; whether we're decoding and playing this bitstream. + bool mActive; + + // True when all headers packets have been read. + bool mDoneReadingHeaders; + + // Validation utility for vorbis-style tag names. + static bool IsValidVorbisTagName(nsCString& aName); + + // Utility method to parse and add a vorbis-style comment + // to a metadata hash table. Most Ogg-encapsulated codecs + // use the vorbis comment format for metadata. + static bool AddVorbisComment(MetadataTags* aTags, + const char* aComment, + uint32_t aLength); + +protected: + // Constructs a new OggCodecState. aActive denotes whether the stream is + // active. For streams of unsupported or unknown types, aActive should be + // false. + OggCodecState(ogg_page* aBosPage, bool aActive); + + // Deallocates all packets stored in mUnstamped, and clears the array. + void ClearUnstamped(); + + // Extracts packets out of mState until a data packet with a non -1 + // granulepos is encountered, or no more packets are readable. Header + // packets are pushed into the packet queue immediately, and data packets + // are buffered in mUnstamped. Once a non -1 granulepos packet is read + // the granulepos of the packets in mUnstamped can be inferred, and they + // can be pushed over to mPackets. Used by PageIn() implementations in + // subclasses. + nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); + + // Temporary buffer in which to store packets while we're reading packets + // in order to capture granulepos. + nsTArray<ogg_packet*> mUnstamped; + +private: + bool InternalInit(); +}; + +class VorbisState : public OggCodecState +{ +public: + explicit VorbisState(ogg_page* aBosPage); + virtual ~VorbisState(); + + CodecType GetType() override { return TYPE_VORBIS; } + bool DecodeHeader(ogg_packet* aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + nsresult Reset() override; + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(ogg_page* aPage) override; + + // Return a hash table with tag metadata. + MetadataTags* GetTags() override; + + // Returns the end time that a granulepos represents. + static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); + + vorbis_info mInfo; + vorbis_comment mComment; + vorbis_dsp_state mDsp; + vorbis_block mBlock; + +private: + + // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped + // array. + nsresult ReconstructVorbisGranulepos(); + + // The "block size" of the previously decoded Vorbis packet, or 0 if we've + // not yet decoded anything. This is used to calculate the number of samples + // in a Vorbis packet, since each Vorbis packet depends on the previous + // packet while being decoded. + long mPrevVorbisBlockSize; + + // Granulepos (end sample) of the last decoded Vorbis packet. This is used + // to calculate the Vorbis granulepos when we don't find a granulepos to + // back-propagate from. + int64_t mGranulepos; + +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION + // When validating that we've correctly predicted Vorbis packets' number + // of samples, we store each packet's predicted number of samples in this + // map, and verify we decode the predicted number of samples. + std::map<ogg_packet*, long> mVorbisPacketSamples; +#endif + + // Records that aPacket is predicted to have aSamples samples. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); + + // Verifies that aPacket has had its number of samples predicted. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void AssertHasRecordedPacketSamples(ogg_packet* aPacket); + +public: + // Asserts that the number of samples predicted for aPacket is aSamples. + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION + // is not defined. + void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); + +}; + +// Returns 1 if the Theora info struct is decoding a media of Theora +// version (maj,min,sub) or later, otherwise returns 0. +int TheoraVersion(th_info* info, + unsigned char maj, + unsigned char min, + unsigned char sub); + +class TheoraState : public OggCodecState +{ +public: + explicit TheoraState(ogg_page* aBosPage); + virtual ~TheoraState(); + + CodecType GetType() override { return TYPE_THEORA; } + bool DecodeHeader(ogg_packet* aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t StartTime(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + bool IsHeader(ogg_packet* aPacket) override; + bool IsKeyframe(ogg_packet* aPacket) override; + nsresult PageIn(ogg_page* aPage) override; + + // Returns the maximum number of microseconds which a keyframe can be offset + // from any given interframe. + int64_t MaxKeyframeOffset(); + + // Returns the end time that a granulepos represents. + static int64_t Time(th_info* aInfo, int64_t aGranulePos); + + th_info mInfo; + th_comment mComment; + th_setup_info* mSetup; + th_dec_ctx* mCtx; + + float mPixelAspectRatio; + +private: + + // Reconstructs the granulepos of Theora packets stored in the + // mUnstamped array. mUnstamped must be filled with consecutive packets from + // the stream, with the last packet having a known granulepos. Using this + // known granulepos, and the known frame numbers, we recover the granulepos + // of all frames in the array. This enables us to determine their timestamps. + void ReconstructTheoraGranulepos(); + +}; + +class OpusState : public OggCodecState +{ +public: + explicit OpusState(ogg_page* aBosPage); + virtual ~OpusState(); + + CodecType GetType() override { return TYPE_OPUS; } + bool DecodeHeader(ogg_packet* aPacket) override; + int64_t Time(int64_t aGranulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool Init() override; + nsresult Reset() override; + nsresult Reset(bool aStart); + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(ogg_page* aPage) override; + already_AddRefed<MediaRawData> PacketOutAsMediaRawData() override; + // Returns the end time that a granulepos represents. + static int64_t Time(int aPreSkip, int64_t aGranulepos); + + // Various fields from the Ogg Opus header. + int mRate; // Sample rate the decoder uses (always 48 kHz). + int mChannels; // Number of channels the stream encodes. + uint16_t mPreSkip; // Number of samples to strip after decoder reset. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + float mGain; // Gain to apply to decoder output. +#else + int32_t mGain_Q16; // Gain to apply to the decoder output. +#endif + + nsAutoPtr<OpusParser> mParser; + OpusMSDecoder* mDecoder; + + int mSkip; // Number of samples left to trim before playback. + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + int64_t mPrevPacketGranulepos; + + // Construct and return a table of tags from the metadata header. + MetadataTags* GetTags() override; + +private: + + // Reconstructs the granulepos of Opus packets stored in the + // mUnstamped array. mUnstamped must be filled with consecutive packets from + // the stream, with the last packet having a known granulepos. Using this + // known granulepos, and the known frame numbers, we recover the granulepos + // of all frames in the array. This enables us to determine their timestamps. + bool ReconstructOpusGranulepos(); + + // Granule position (end sample) of the last decoded Opus page. This is + // used to calculate the Opus per-packet granule positions on the last page, + // where we may need to trim some samples from the end. + int64_t mPrevPageGranulepos; + +}; + +// Constructs a 32bit version number out of two 16 bit major,minor +// version numbers. +#define SKELETON_VERSION(major, minor) (((major)<<16)|(minor)) + +enum EMsgHeaderType { + eContentType, + eRole, + eName, + eLanguage, + eTitle, + eDisplayHint, + eAltitude, + eTrackOrder, + eTrackDependencies +}; + +typedef struct +{ + const char* mPatternToRecognize; + EMsgHeaderType mMsgHeaderType; +} FieldPatternType; + +// Stores the message information for different logical bitstream. +typedef struct +{ + nsClassHashtable<nsUint32HashKey, nsCString> mValuesStore; +} MessageField; + +class SkeletonState : public OggCodecState +{ +public: + explicit SkeletonState(ogg_page* aBosPage); + ~SkeletonState(); + + nsClassHashtable<nsUint32HashKey, MessageField> mMsgFieldStore; + + CodecType GetType() override { return TYPE_SKELETON; } + bool DecodeHeader(ogg_packet* aPacket) override; + int64_t Time(int64_t granulepos) override { return -1; } + bool IsHeader(ogg_packet* aPacket) override { return true; } + + // Return true if the given time (in milliseconds) is within + // the presentation time defined in the skeleton track. + bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } + + // Stores the offset of the page on which a keyframe starts, + // and its presentation time. + class nsKeyPoint + { + public: + nsKeyPoint() + : mOffset(INT64_MAX) + , mTime(INT64_MAX) {} + + nsKeyPoint(int64_t aOffset, int64_t aTime) + : mOffset(aOffset) + ,mTime(aTime) {} + + // Offset from start of segment/link-in-the-chain in bytes. + int64_t mOffset; + + // Presentation time in usecs. + int64_t mTime; + + bool IsNull() + { + return mOffset == INT64_MAX && mTime == INT64_MAX; + } + }; + + // Stores a keyframe's byte-offset, presentation time and the serialno + // of the stream it belongs to. + class nsSeekTarget + { + public: + nsSeekTarget() : mSerial(0) {} + nsKeyPoint mKeyPoint; + uint32_t mSerial; + bool IsNull() + { + return mKeyPoint.IsNull() && mSerial == 0; + } + }; + + // Determines from the seek index the keyframe which you must seek back to + // in order to get all keyframes required to render all streams with + // serialnos in aTracks, at time aTarget. + nsresult IndexedSeekTarget(int64_t aTarget, + nsTArray<uint32_t>& aTracks, + nsSeekTarget& aResult); + + bool HasIndex() const + { + return mIndex.Count() > 0; + } + + // Returns the duration of the active tracks in the media, if we have + // an index. aTracks must be filled with the serialnos of the active tracks. + // The duration is calculated as the greatest end time of all active tracks, + // minus the smalled start time of all the active tracks. + nsresult GetDuration(const nsTArray<uint32_t>& aTracks, int64_t& aDuration); + +private: + + // Decodes an index packet. Returns false on failure. + bool DecodeIndex(ogg_packet* aPacket); + // Decodes an fisbone packet. Returns false on failure. + bool DecodeFisbone(ogg_packet* aPacket); + + // Gets the keypoint you must seek to in order to get the keyframe required + // to render the stream at time aTarget on stream with serial aSerialno. + nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, + int64_t aTarget, + nsKeyPoint& aResult); + + // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. + uint32_t mVersion; + + // Presentation time of the resource in milliseconds + int64_t mPresentationTime; + + // Length of the resource in bytes. + int64_t mLength; + + // Stores the keyframe index and duration information for a particular + // stream. + class nsKeyFrameIndex + { + public: + + nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) + : mStartTime(aStartTime) + , mEndTime(aEndTime) + { + MOZ_COUNT_CTOR(nsKeyFrameIndex); + } + + ~nsKeyFrameIndex() + { + MOZ_COUNT_DTOR(nsKeyFrameIndex); + } + + void Add(int64_t aOffset, int64_t aTimeMs) + { + mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); + } + + const nsKeyPoint& Get(uint32_t aIndex) const + { + return mKeyPoints[aIndex]; + } + + uint32_t Length() const + { + return mKeyPoints.Length(); + } + + // Presentation time of the first sample in this stream in usecs. + const int64_t mStartTime; + + // End time of the last sample in this stream in usecs. + const int64_t mEndTime; + + private: + nsTArray<nsKeyPoint> mKeyPoints; + }; + + // Maps Ogg serialnos to the index-keypoint list. + nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex; +}; + +class FlacState : public OggCodecState +{ +public: + explicit FlacState(ogg_page* aBosPage); + + CodecType GetType() override { return TYPE_FLAC; } + bool DecodeHeader(ogg_packet* aPacket) override; + int64_t Time(int64_t granulepos) override; + int64_t PacketDuration(ogg_packet* aPacket) override; + bool IsHeader(ogg_packet* aPacket) override; + nsresult PageIn(ogg_page* aPage) override; + + // Return a hash table with tag metadata. + MetadataTags* GetTags() override; + + const AudioInfo& Info(); + +private: + bool ReconstructFlacGranulepos(void); + + FlacFrameParser mParser; +}; + +} // namespace mozilla + +// This allows the use of nsAutoRefs for an ogg_packet that properly free the +// contents of the packet. +template <> +class nsAutoRefTraits<ogg_packet> : public nsPointerRefTraits<ogg_packet> +{ +public: + static void Release(ogg_packet* aPacket) + { + mozilla::OggCodecState::ReleasePacket(aPacket); + } +}; + + +#endif diff --git a/dom/media/ogg/OggCodecStore.cpp b/dom/media/ogg/OggCodecStore.cpp new file mode 100644 index 0000000000..528932dd02 --- /dev/null +++ b/dom/media/ogg/OggCodecStore.cpp @@ -0,0 +1,37 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/DebugOnly.h" + +#include "OggCodecStore.h" + +namespace mozilla { + +OggCodecStore::OggCodecStore() +: mMonitor("CodecStore") +{ +} + +void OggCodecStore::Add(uint32_t serial, OggCodecState* codecState) +{ + MonitorAutoLock mon(mMonitor); + mCodecStates.Put(serial, codecState); +} + +bool OggCodecStore::Contains(uint32_t serial) +{ + MonitorAutoLock mon(mMonitor); + return mCodecStates.Get(serial, nullptr); +} + +OggCodecState* OggCodecStore::Get(uint32_t serial) +{ + MonitorAutoLock mon(mMonitor); + return mCodecStates.Get(serial); +} + +} // namespace mozilla + diff --git a/dom/media/ogg/OggCodecStore.h b/dom/media/ogg/OggCodecStore.h new file mode 100644 index 0000000000..2d8fd58307 --- /dev/null +++ b/dom/media/ogg/OggCodecStore.h @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggCodecStore_h_) +#define OggCodecStore_h_ + +#include <ogg/ogg.h> + +#include "OggCodecState.h" +#include "VideoUtils.h" +#include "mozilla/Monitor.h" + +namespace mozilla { + +// Thread safe container to store the codec information and the serial for each +// streams. +class OggCodecStore +{ + public: + OggCodecStore(); + void Add(uint32_t serial, OggCodecState* codecState); + bool Contains(uint32_t serial); + OggCodecState* Get(uint32_t serial); + bool IsKnownStream(uint32_t aSerial); + + private: + // Maps Ogg serialnos to OggStreams. + nsClassHashtable<nsUint32HashKey, OggCodecState> mCodecStates; + + // Protects the |mCodecStates| and the |mKnownStreams| members. + Monitor mMonitor; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggDecoder.cpp b/dom/media/ogg/OggDecoder.cpp new file mode 100644 index 0000000000..2631b1cde2 --- /dev/null +++ b/dom/media/ogg/OggDecoder.cpp @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MediaPrefs.h" +#include "MediaDecoderStateMachine.h" +#include "MediaFormatReader.h" +#include "OggDemuxer.h" +#include "OggDecoder.h" +#include "nsContentTypeParser.h" + +namespace mozilla { + +MediaDecoderStateMachine* OggDecoder::CreateStateMachine() +{ + RefPtr<OggDemuxer> demuxer = new OggDemuxer(GetResource()); + RefPtr<MediaFormatReader> reader = + new MediaFormatReader(this, demuxer, GetVideoFrameContainer()); + demuxer->SetChainingEvents(&reader->TimedMetadataProducer(), + &reader->MediaNotSeekableProducer()); + return new MediaDecoderStateMachine(this, reader); +} + +/* static */ +bool +OggDecoder::IsEnabled() +{ + return MediaPrefs::OggEnabled(); +} + +/* static */ +bool +OggDecoder::CanHandleMediaType(const nsACString& aMIMETypeExcludingCodecs, + const nsAString& aCodecs) +{ + if (!IsEnabled()) { + return false; + } + + const bool isOggAudio = aMIMETypeExcludingCodecs.EqualsASCII("audio/ogg"); + const bool isOggVideo = + aMIMETypeExcludingCodecs.EqualsASCII("video/ogg") || + aMIMETypeExcludingCodecs.EqualsASCII("application/ogg"); + + if (!isOggAudio && !isOggVideo) { + return false; + } + + nsTArray<nsCString> codecMimes; + if (aCodecs.IsEmpty()) { + // WebM guarantees that the only codecs it contained are vp8, vp9, opus or vorbis. + return true; + } + // Verify that all the codecs specified are ones that we expect that + // we can play. + nsTArray<nsString> codecs; + if (!ParseCodecsString(aCodecs, codecs)) { + return false; + } + for (const nsString& codec : codecs) { + if ((IsOpusEnabled() && codec.EqualsLiteral("opus")) || + codec.EqualsLiteral("vorbis") || + (MediaPrefs::FlacInOgg() && codec.EqualsLiteral("flac"))) { + continue; + } + // Note: Only accept Theora in a video content type, not in an audio + // content type. + if (isOggVideo && codec.EqualsLiteral("theora")) { + continue; + } + // Some unsupported codec. + return false; + } + return true; +} + +} // namespace mozilla diff --git a/dom/media/ogg/OggDecoder.h b/dom/media/ogg/OggDecoder.h new file mode 100644 index 0000000000..3a98f29181 --- /dev/null +++ b/dom/media/ogg/OggDecoder.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggDecoder_h_) +#define OggDecoder_h_ + +#include "MediaDecoder.h" + +namespace mozilla { + +class OggDecoder : public MediaDecoder +{ +public: + explicit OggDecoder(MediaDecoderOwner* aOwner) + : MediaDecoder(aOwner) + , mShutdownBitMonitor("mShutdownBitMonitor") + , mShutdownBit(false) + {} + + MediaDecoder* Clone(MediaDecoderOwner* aOwner) override { + if (!IsOggEnabled()) { + return nullptr; + } + return new OggDecoder(aOwner); + } + MediaDecoderStateMachine* CreateStateMachine() override; + + // For yucky legacy reasons, the ogg decoder needs to do a cross-thread read + // to check for shutdown while it hogs its own task queue. We don't want to + // protect the general state with a lock, so we make a special copy and a + // special-purpose lock. This method may be called on any thread. + bool IsOggDecoderShutdown() override + { + MonitorAutoLock lock(mShutdownBitMonitor); + return mShutdownBit; + } + + // Returns true if aMIMEType is a type that we think we can render with the + // a platform decoder backend. If aCodecs is non emtpy, it is filled + // with a comma-delimited list of codecs to check support for. + static bool CanHandleMediaType(const nsACString& aMIMETypeExcludingCodecs, + const nsAString& aCodecs); + + static bool IsEnabled(); + +protected: + void ShutdownBitChanged() override + { + MonitorAutoLock lock(mShutdownBitMonitor); + mShutdownBit = mStateMachineIsShutdown; + } + + Monitor mShutdownBitMonitor; + bool mShutdownBit; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggDemuxer.cpp b/dom/media/ogg/OggDemuxer.cpp new file mode 100644 index 0000000000..591a5248fc --- /dev/null +++ b/dom/media/ogg/OggDemuxer.cpp @@ -0,0 +1,2194 @@ + /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsError.h" +#include "MediaDecoderStateMachine.h" +#include "AbstractMediaDecoder.h" +#include "OggDemuxer.h" +#include "OggCodecState.h" +#include "mozilla/Atomics.h" +#include "mozilla/PodOperations.h" +#include "mozilla/SharedThreadPool.h" +#include "mozilla/Telemetry.h" +#include "mozilla/TimeStamp.h" +#include "MediaDataDemuxer.h" +#include "nsAutoRef.h" +#include "XiphExtradata.h" +#include "MediaPrefs.h" + +#include <algorithm> + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define OGG_DEBUG(arg, ...) MOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, ("OggDemuxer(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) + +// Un-comment to enable logging of seek bisections. +//#define SEEK_LOGGING +#ifdef SEEK_LOGGING +#define SEEK_LOG(type, msg) MOZ_LOG(gMediaDemuxerLog, type, msg) +#else +#define SEEK_LOG(type, msg) +#endif + +namespace mozilla +{ + +using media::TimeUnit; +using media::TimeInterval; +using media::TimeIntervals; + +// The number of microseconds of "fuzz" we use in a bisection search over +// HTTP. When we're seeking with fuzz, we'll stop the search if a bisection +// lands between the seek target and OGG_SEEK_FUZZ_USECS microseconds before the +// seek target. This is becaue it's usually quicker to just keep downloading +// from an exisiting connection than to do another bisection inside that +// small range, which would open a new HTTP connetion. +static const uint32_t OGG_SEEK_FUZZ_USECS = 500000; + +// The number of microseconds of "pre-roll" we use for Opus streams. +// The specification recommends 80 ms. +static const int64_t OGG_SEEK_OPUS_PREROLL = 80 * USECS_PER_MS; + +static Atomic<uint32_t> sStreamSourceID(0u); + +class OggHeaders +{ +public: + OggHeaders() {} + ~OggHeaders() + { + for (size_t i = 0; i < mHeaders.Length(); i++) { + delete[] mHeaders[i]; + } + } + + void AppendPacket(const ogg_packet* aPacket) + { + size_t packetSize = aPacket->bytes; + unsigned char* packetData = new unsigned char[packetSize]; + memcpy(packetData, aPacket->packet, packetSize); + mHeaders.AppendElement(packetData); + mHeaderLens.AppendElement(packetSize); + } + + nsTArray<const unsigned char*> mHeaders; + nsTArray<size_t> mHeaderLens; +}; + +// Return the corresponding category in aKind based on the following specs. +// (https://www.whatwg.org/specs/web-apps/current- +// work/multipage/embedded-content.html#dom-audiotrack-kind) & +// (http://wiki.xiph.org/SkeletonHeaders) +const nsString +OggDemuxer::GetKind(const nsCString& aRole) +{ + if (aRole.Find("audio/main") != -1 || aRole.Find("video/main") != -1) { + return NS_LITERAL_STRING("main"); + } else if (aRole.Find("audio/alternate") != -1 || + aRole.Find("video/alternate") != -1) { + return NS_LITERAL_STRING("alternative"); + } else if (aRole.Find("audio/audiodesc") != -1) { + return NS_LITERAL_STRING("descriptions"); + } else if (aRole.Find("audio/described") != -1) { + return NS_LITERAL_STRING("main-desc"); + } else if (aRole.Find("audio/dub") != -1) { + return NS_LITERAL_STRING("translation"); + } else if (aRole.Find("audio/commentary") != -1) { + return NS_LITERAL_STRING("commentary"); + } else if (aRole.Find("video/sign") != -1) { + return NS_LITERAL_STRING("sign"); + } else if (aRole.Find("video/captioned") != -1) { + return NS_LITERAL_STRING("captions"); + } else if (aRole.Find("video/subtitled") != -1) { + return NS_LITERAL_STRING("subtitles"); + } + return EmptyString(); +} + +void +OggDemuxer::InitTrack(MessageField* aMsgInfo, + TrackInfo* aInfo, + bool aEnable) +{ + MOZ_ASSERT(aMsgInfo); + MOZ_ASSERT(aInfo); + + nsCString* sName = aMsgInfo->mValuesStore.Get(eName); + nsCString* sRole = aMsgInfo->mValuesStore.Get(eRole); + nsCString* sTitle = aMsgInfo->mValuesStore.Get(eTitle); + nsCString* sLanguage = aMsgInfo->mValuesStore.Get(eLanguage); + aInfo->Init(sName? NS_ConvertUTF8toUTF16(*sName):EmptyString(), + sRole? GetKind(*sRole):EmptyString(), + sTitle? NS_ConvertUTF8toUTF16(*sTitle):EmptyString(), + sLanguage? NS_ConvertUTF8toUTF16(*sLanguage):EmptyString(), + aEnable); +} + +OggDemuxer::OggDemuxer(MediaResource* aResource) + : mTheoraState(nullptr) + , mVorbisState(nullptr) + , mOpusState(nullptr) + , mFlacState(nullptr) + , mOpusEnabled(MediaDecoder::IsOpusEnabled()) + , mSkeletonState(nullptr) + , mAudioOggState(aResource) + , mVideoOggState(aResource) + , mVorbisSerial(0) + , mOpusSerial(0) + , mTheoraSerial(0) + , mFlacSerial(0) + , mOpusPreSkip(0) + , mIsChained(false) + , mTimedMetadataEvent(nullptr) + , mOnSeekableEvent(nullptr) +{ + MOZ_COUNT_CTOR(OggDemuxer); + PodZero(&mTheoraInfo); +} + +OggDemuxer::~OggDemuxer() +{ + MOZ_COUNT_DTOR(OggDemuxer); + Reset(TrackInfo::kAudioTrack); + Reset(TrackInfo::kVideoTrack); + if (HasAudio() || HasVideo()) { + // If we were able to initialize our decoders, report whether we encountered + // a chained stream or not. + bool isChained = mIsChained; + void* ptr = this; + nsCOMPtr<nsIRunnable> task = NS_NewRunnableFunction([ptr, isChained]() -> void { + // We can't use OGG_DEBUG here because it implicitly refers to `this`, + // which we can't capture in this runnable. + MOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, + ("OggDemuxer(%p)::%s: Reporting telemetry MEDIA_OGG_LOADED_IS_CHAINED=%d", + ptr, __func__, isChained)); + Telemetry::Accumulate(Telemetry::ID::MEDIA_OGG_LOADED_IS_CHAINED, isChained); + }); + AbstractThread::MainThread()->Dispatch(task.forget()); + } +} + +void +OggDemuxer::SetChainingEvents(TimedMetadataEventProducer* aMetadataEvent, + MediaEventProducer<void>* aOnSeekableEvent) +{ + mTimedMetadataEvent = aMetadataEvent; + mOnSeekableEvent = aOnSeekableEvent; +} + + +bool +OggDemuxer::HasAudio() +const +{ + return mVorbisState || mOpusState || mFlacState; +} + +bool +OggDemuxer::HasVideo() +const +{ + return mTheoraState; +} + +bool +OggDemuxer::HaveStartTime() +const +{ + return mStartTime.isSome(); +} + +int64_t +OggDemuxer::StartTime() const +{ + return mStartTime.refOr(0); +} + +bool +OggDemuxer::HaveStartTime(TrackInfo::TrackType aType) +{ + return OggState(aType).mStartTime.isSome(); +} + +int64_t +OggDemuxer::StartTime(TrackInfo::TrackType aType) +{ + return OggState(aType).mStartTime.refOr(TimeUnit::FromMicroseconds(0)).ToMicroseconds(); +} + +RefPtr<OggDemuxer::InitPromise> +OggDemuxer::Init() +{ + int ret = ogg_sync_init(OggSyncState(TrackInfo::kAudioTrack)); + if (ret != 0) { + return InitPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + ret = ogg_sync_init(OggSyncState(TrackInfo::kVideoTrack)); + if (ret != 0) { + return InitPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__); + } + if (ReadMetadata() != NS_OK) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, __func__); + } + + if (!GetNumberTracks(TrackInfo::kAudioTrack) && + !GetNumberTracks(TrackInfo::kVideoTrack)) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, __func__); + } + + return InitPromise::CreateAndResolve(NS_OK, __func__); +} + +bool +OggDemuxer::HasTrackType(TrackInfo::TrackType aType) const +{ + return !!GetNumberTracks(aType); +} + +OggCodecState* +OggDemuxer::GetTrackCodecState(TrackInfo::TrackType aType) const +{ + switch(aType) { + case TrackInfo::kAudioTrack: + if (mVorbisState) { + return mVorbisState; + } else if (mOpusState) { + return mOpusState; + } else { + return mFlacState; + } + case TrackInfo::kVideoTrack: + return mTheoraState; + default: + return 0; + } +} + +TrackInfo::TrackType +OggDemuxer::GetCodecStateType(OggCodecState* aState) const +{ + switch (aState->GetType()) { + case OggCodecState::TYPE_THEORA: + return TrackInfo::kVideoTrack; + case OggCodecState::TYPE_OPUS: + case OggCodecState::TYPE_VORBIS: + case OggCodecState::TYPE_FLAC: + return TrackInfo::kAudioTrack; + default: + return TrackInfo::kUndefinedTrack; + } +} + +uint32_t +OggDemuxer::GetNumberTracks(TrackInfo::TrackType aType) const +{ + switch(aType) { + case TrackInfo::kAudioTrack: + return HasAudio() ? 1 : 0; + case TrackInfo::kVideoTrack: + return HasVideo() ? 1 : 0; + default: + return 0; + } +} + +UniquePtr<TrackInfo> +OggDemuxer::GetTrackInfo(TrackInfo::TrackType aType, size_t aTrackNumber) const +{ + switch(aType) { + case TrackInfo::kAudioTrack: + return mInfo.mAudio.Clone(); + case TrackInfo::kVideoTrack: + return mInfo.mVideo.Clone(); + default: + return nullptr; + } +} + +already_AddRefed<MediaTrackDemuxer> +OggDemuxer::GetTrackDemuxer(TrackInfo::TrackType aType, uint32_t aTrackNumber) +{ + if (GetNumberTracks(aType) <= aTrackNumber) { + return nullptr; + } + RefPtr<OggTrackDemuxer> e = new OggTrackDemuxer(this, aType, aTrackNumber); + mDemuxers.AppendElement(e); + + return e.forget(); +} + +nsresult +OggDemuxer::Reset(TrackInfo::TrackType aType) +{ + // Discard any previously buffered packets/pages. + ogg_sync_reset(OggSyncState(aType)); + OggCodecState* trackState = GetTrackCodecState(aType); + if (trackState) { + return trackState->Reset(); + } + OggState(aType).mNeedKeyframe = true; + return NS_OK; +} + +bool +OggDemuxer::ReadHeaders(TrackInfo::TrackType aType, + OggCodecState* aState, + OggHeaders& aHeaders) +{ + while (!aState->DoneReadingHeaders()) { + DemuxUntilPacketAvailable(aType, aState); + ogg_packet* packet = aState->PacketOut(); + if (!packet) { + OGG_DEBUG("Ran out of header packets early; deactivating stream %ld", aState->mSerial); + aState->Deactivate(); + return false; + } + + // Save a copy of the header packet for the decoder to use later; + // OggCodecState::DecodeHeader will free it when processing locally. + aHeaders.AppendPacket(packet); + + // Local OggCodecState needs to decode headers in order to process + // packet granulepos -> time mappings, etc. + if (!aState->DecodeHeader(packet)) { + OGG_DEBUG("Failed to decode ogg header packet; deactivating stream %ld", aState->mSerial); + aState->Deactivate(); + return false; + } + } + + return aState->Init(); +} + +void +OggDemuxer::BuildSerialList(nsTArray<uint32_t>& aTracks) +{ + // Obtaining seek index information for currently active bitstreams. + if (HasVideo()) { + aTracks.AppendElement(mTheoraState->mSerial); + } + if (HasAudio()) { + if (mVorbisState) { + aTracks.AppendElement(mVorbisState->mSerial); + } else if (mOpusState) { + aTracks.AppendElement(mOpusState->mSerial); + } + } +} + +void +OggDemuxer::SetupTargetTheora(TheoraState* aTheoraState, OggHeaders& aHeaders) +{ + if (mTheoraState) { + mTheoraState->Reset(); + } + + nsIntRect picture = nsIntRect(aTheoraState->mInfo.pic_x, + aTheoraState->mInfo.pic_y, + aTheoraState->mInfo.pic_width, + aTheoraState->mInfo.pic_height); + + nsIntSize displaySize = nsIntSize(aTheoraState->mInfo.pic_width, + aTheoraState->mInfo.pic_height); + + // Apply the aspect ratio to produce the intrinsic display size we report + // to the element. + ScaleDisplayByAspectRatio(displaySize, aTheoraState->mPixelAspectRatio); + + nsIntSize frameSize(aTheoraState->mInfo.frame_width, + aTheoraState->mInfo.frame_height); + if (IsValidVideoRegion(frameSize, picture, displaySize)) { + // Video track's frame sizes will not overflow. Activate the video track. + mInfo.mVideo.mMimeType = "video/theora"; + mInfo.mVideo.mDisplay = displaySize; + mInfo.mVideo.mImage = frameSize; + mInfo.mVideo.SetImageRect(picture); + + // Copy Theora info data for time computations on other threads. + memcpy(&mTheoraInfo, &aTheoraState->mInfo, sizeof(mTheoraInfo)); + + // Save header packets for the decoder + if (!XiphHeadersToExtradata(mInfo.mVideo.mCodecSpecificConfig, + aHeaders.mHeaders, aHeaders.mHeaderLens)) { + return; + } + + mTheoraState = aTheoraState; + mTheoraSerial = aTheoraState->mSerial; + } +} + +void +OggDemuxer::SetupTargetVorbis(VorbisState* aVorbisState, OggHeaders& aHeaders) +{ + if (mVorbisState) { + mVorbisState->Reset(); + } + + // Copy Vorbis info data for time computations on other threads. + memcpy(&mVorbisInfo, &aVorbisState->mInfo, sizeof(mVorbisInfo)); + mVorbisInfo.codec_setup = nullptr; + + mInfo.mAudio.mMimeType = "audio/vorbis"; + mInfo.mAudio.mRate = aVorbisState->mInfo.rate; + mInfo.mAudio.mChannels = aVorbisState->mInfo.channels; + + // Save header packets for the decoder + if (!XiphHeadersToExtradata(mInfo.mAudio.mCodecSpecificConfig, + aHeaders.mHeaders, aHeaders.mHeaderLens)) { + return; + } + + mVorbisState = aVorbisState; + mVorbisSerial = aVorbisState->mSerial; +} + +void +OggDemuxer::SetupTargetOpus(OpusState* aOpusState, OggHeaders& aHeaders) +{ + if (mOpusState) { + mOpusState->Reset(); + } + + mInfo.mAudio.mMimeType = "audio/opus"; + mInfo.mAudio.mRate = aOpusState->mRate; + mInfo.mAudio.mChannels = aOpusState->mChannels; + + // Save preskip & the first header packet for the Opus decoder + uint64_t preSkip = aOpusState->Time(0, aOpusState->mPreSkip); + uint8_t c[sizeof(preSkip)]; + BigEndian::writeUint64(&c[0], preSkip); + mInfo.mAudio.mCodecSpecificConfig->AppendElements(&c[0], sizeof(preSkip)); + mInfo.mAudio.mCodecSpecificConfig->AppendElements(aHeaders.mHeaders[0], + aHeaders.mHeaderLens[0]); + + mOpusState = aOpusState; + mOpusSerial = aOpusState->mSerial; + mOpusPreSkip = aOpusState->mPreSkip; +} + +void +OggDemuxer::SetupTargetFlac(FlacState* aFlacState, OggHeaders& aHeaders) +{ + if (mFlacState) { + mFlacState->Reset(); + } + + mInfo.mAudio = aFlacState->Info(); + mFlacState = aFlacState; + mFlacSerial = aFlacState->mSerial; +} + +void +OggDemuxer::SetupTargetSkeleton() +{ + // Setup skeleton related information after mVorbisState & mTheroState + // being set (if they exist). + if (mSkeletonState) { + OggHeaders headers; + if (!HasAudio() && !HasVideo()) { + // We have a skeleton track, but no audio or video, may as well disable + // the skeleton, we can't do anything useful with this media. + OGG_DEBUG("Deactivating skeleton stream %ld", mSkeletonState->mSerial); + mSkeletonState->Deactivate(); + } else if (ReadHeaders(TrackInfo::kAudioTrack, mSkeletonState, headers) && + mSkeletonState->HasIndex()) { + // We don't particularly care about which track we are currently using + // as both MediaResource points to the same content. + // Extract the duration info out of the index, so we don't need to seek to + // the end of resource to get it. + nsTArray<uint32_t> tracks; + BuildSerialList(tracks); + int64_t duration = 0; + if (NS_SUCCEEDED(mSkeletonState->GetDuration(tracks, duration))) { + OGG_DEBUG("Got duration from Skeleton index %lld", duration); + mInfo.mMetadataDuration.emplace(TimeUnit::FromMicroseconds(duration)); + } + } + } +} + +void +OggDemuxer::SetupMediaTracksInfo(const nsTArray<uint32_t>& aSerials) +{ + // For each serial number + // 1. Retrieve a codecState from mCodecStore by this serial number. + // 2. Retrieve a message field from mMsgFieldStore by this serial number. + // 3. For now, skip if the serial number refers to a non-primary bitstream. + // 4. Setup track and other audio/video related information per different types. + for (size_t i = 0; i < aSerials.Length(); i++) { + uint32_t serial = aSerials[i]; + OggCodecState* codecState = mCodecStore.Get(serial); + + MessageField* msgInfo = nullptr; + if (mSkeletonState && mSkeletonState->mMsgFieldStore.Contains(serial)) { + mSkeletonState->mMsgFieldStore.Get(serial, &msgInfo); + } + + if (codecState->GetType() == OggCodecState::TYPE_THEORA) { + TheoraState* theoraState = static_cast<TheoraState*>(codecState); + if (!(mTheoraState && mTheoraState->mSerial == theoraState->mSerial)) { + continue; + } + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mVideo, mTheoraState == theoraState); + } + + nsIntRect picture = nsIntRect(theoraState->mInfo.pic_x, + theoraState->mInfo.pic_y, + theoraState->mInfo.pic_width, + theoraState->mInfo.pic_height); + nsIntSize displaySize = nsIntSize(theoraState->mInfo.pic_width, + theoraState->mInfo.pic_height); + nsIntSize frameSize(theoraState->mInfo.frame_width, + theoraState->mInfo.frame_height); + ScaleDisplayByAspectRatio(displaySize, theoraState->mPixelAspectRatio); + if (IsValidVideoRegion(frameSize, picture, displaySize)) { + mInfo.mVideo.mDisplay = displaySize; + } + } else if (codecState->GetType() == OggCodecState::TYPE_VORBIS) { + VorbisState* vorbisState = static_cast<VorbisState*>(codecState); + if (!(mVorbisState && mVorbisState->mSerial == vorbisState->mSerial)) { + continue; + } + + if (msgInfo) { + InitTrack(msgInfo, + &mInfo.mAudio, + mVorbisState == vorbisState); + } + + mInfo.mAudio.mRate = vorbisState->mInfo.rate; + mInfo.mAudio.mChannels = vorbisState->mInfo.channels; + FillTags(&mInfo.mAudio, vorbisState->GetTags()); + } else if (codecState->GetType() == OggCodecState::TYPE_OPUS) { + OpusState* opusState = static_cast<OpusState*>(codecState); + if (!(mOpusState && mOpusState->mSerial == opusState->mSerial)) { + continue; + } + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, mOpusState == opusState); + } + + mInfo.mAudio.mRate = opusState->mRate; + mInfo.mAudio.mChannels = opusState->mChannels; + FillTags(&mInfo.mAudio, opusState->GetTags()); + } else if (codecState->GetType() == OggCodecState::TYPE_FLAC) { + FlacState* flacState = static_cast<FlacState*>(codecState); + if (!(mFlacState && mFlacState->mSerial == flacState->mSerial)) { + continue; + } + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, mFlacState == flacState); + } + + mInfo.mAudio = flacState->Info(); + FillTags(&mInfo.mAudio, flacState->GetTags()); + } + } +} + +void +OggDemuxer::FillTags(TrackInfo* aInfo, MetadataTags* aTags) +{ + if (!aTags) { + return; + } + nsAutoPtr<MetadataTags> tags(aTags); + for (auto iter = aTags->Iter(); !iter.Done(); iter.Next()) { + aInfo->mTags.AppendElement(MetadataTag(iter.Key(), iter.Data())); + } +} + +nsresult +OggDemuxer::ReadMetadata() +{ + OGG_DEBUG("OggDemuxer::ReadMetadata called!"); + + // We read packets until all bitstreams have read all their header packets. + // We record the offset of the first non-header page so that we know + // what page to seek to when seeking to the media start. + + // @FIXME we have to read all the header packets on all the streams + // and THEN we can run SetupTarget* + // @fixme fixme + + TrackInfo::TrackType tracks[2] = + { TrackInfo::kAudioTrack, TrackInfo::kVideoTrack }; + + nsTArray<OggCodecState*> bitstreams; + nsTArray<uint32_t> serials; + + for (uint32_t i = 0; i < ArrayLength(tracks); i++) { + ogg_page page; + bool readAllBOS = false; + while (!readAllBOS) { + if (!ReadOggPage(tracks[i], &page)) { + // Some kind of error... + OGG_DEBUG("OggDemuxer::ReadOggPage failed? leaving ReadMetadata..."); + return NS_ERROR_FAILURE; + } + + int serial = ogg_page_serialno(&page); + + if (!ogg_page_bos(&page)) { + // We've encountered a non Beginning Of Stream page. No more BOS pages + // can follow in this Ogg segment, so there will be no other bitstreams + // in the Ogg (unless it's invalid). + readAllBOS = true; + } else if (!mCodecStore.Contains(serial)) { + // We've not encountered a stream with this serial number before. Create + // an OggCodecState to demux it, and map that to the OggCodecState + // in mCodecStates. + OggCodecState* codecState = OggCodecState::Create(&page); + mCodecStore.Add(serial, codecState); + bitstreams.AppendElement(codecState); + serials.AppendElement(serial); + } + if (NS_FAILED(DemuxOggPage(tracks[i], &page))) { + return NS_ERROR_FAILURE; + } + } + } + + // We've read all BOS pages, so we know the streams contained in the media. + // 1. Find the first encountered Theora/Vorbis/Opus bitstream, and configure + // it as the target A/V bitstream. + // 2. Deactivate the rest of bitstreams for now, until we have MediaInfo + // support multiple track infos. + for (uint32_t i = 0; i < bitstreams.Length(); ++i) { + OggCodecState* s = bitstreams[i]; + if (s) { + OggHeaders headers; + if (s->GetType() == OggCodecState::TYPE_THEORA && + ReadHeaders(TrackInfo::kVideoTrack, s, headers)) { + if (!mTheoraState) { + TheoraState* theoraState = static_cast<TheoraState*>(s); + SetupTargetTheora(theoraState, headers); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_VORBIS && + ReadHeaders(TrackInfo::kAudioTrack, s, headers)) { + if (!mVorbisState) { + VorbisState* vorbisState = static_cast<VorbisState*>(s); + SetupTargetVorbis(vorbisState, headers); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_OPUS && + ReadHeaders(TrackInfo::kAudioTrack, s, headers)) { + if (mOpusEnabled) { + if (!mOpusState) { + OpusState* opusState = static_cast<OpusState*>(s); + SetupTargetOpus(opusState, headers); + } else { + s->Deactivate(); + } + } else { + NS_WARNING("Opus decoding disabled." + " See media.opus.enabled in about:config"); + } + } else if (MediaPrefs::FlacInOgg() && + s->GetType() == OggCodecState::TYPE_FLAC && + ReadHeaders(TrackInfo::kAudioTrack, s, headers)) { + if (!mFlacState) { + FlacState* flacState = static_cast<FlacState*>(s); + SetupTargetFlac(flacState, headers); + } else { + s->Deactivate(); + } + } else if (s->GetType() == OggCodecState::TYPE_SKELETON && !mSkeletonState) { + mSkeletonState = static_cast<SkeletonState*>(s); + } else { + // Deactivate any non-primary bitstreams. + s->Deactivate(); + } + } + } + + SetupTargetSkeleton(); + SetupMediaTracksInfo(serials); + + if (HasAudio() || HasVideo()) { + int64_t startTime = -1; + FindStartTime(startTime); + if (startTime >= 0) { + OGG_DEBUG("Detected stream start time %lld", startTime); + mStartTime.emplace(startTime); + } + + if (mInfo.mMetadataDuration.isNothing() && + Resource(TrackInfo::kAudioTrack)->GetLength() >= 0) { + // We didn't get a duration from the index or a Content-Duration header. + // Seek to the end of file to find the end time. + int64_t length = Resource(TrackInfo::kAudioTrack)->GetLength(); + + NS_ASSERTION(length > 0, "Must have a content length to get end time"); + + int64_t endTime = RangeEndTime(TrackInfo::kAudioTrack, length); + + if (endTime != -1) { + mInfo.mUnadjustedMetadataEndTime.emplace(TimeUnit::FromMicroseconds(endTime)); + mInfo.mMetadataDuration.emplace(TimeUnit::FromMicroseconds(endTime - mStartTime.refOr(0))); + OGG_DEBUG("Got Ogg duration from seeking to end %lld", endTime); + } + } + if (mInfo.mMetadataDuration.isNothing()) { + mInfo.mMetadataDuration.emplace(TimeUnit::FromInfinity()); + } + if (HasAudio()) { + mInfo.mAudio.mDuration = mInfo.mMetadataDuration->ToMicroseconds(); + } + if (HasVideo()) { + mInfo.mVideo.mDuration = mInfo.mMetadataDuration->ToMicroseconds(); + } + } else { + OGG_DEBUG("no audio or video tracks"); + return NS_ERROR_FAILURE; + } + + OGG_DEBUG("success?!"); + return NS_OK; +} + +void +OggDemuxer::SetChained() { + { + if (mIsChained) { + return; + } + mIsChained = true; + } + if (mOnSeekableEvent) { + mOnSeekableEvent->Notify(); + } +} + +bool +OggDemuxer::ReadOggChain(const media::TimeUnit& aLastEndTime) +{ + bool chained = false; + OpusState* newOpusState = nullptr; + VorbisState* newVorbisState = nullptr; + FlacState* newFlacState = nullptr; + nsAutoPtr<MetadataTags> tags; + + if (HasVideo() || HasSkeleton() || !HasAudio()) { + return false; + } + + ogg_page page; + if (!ReadOggPage(TrackInfo::kAudioTrack, &page) || !ogg_page_bos(&page)) { + // Chaining is only supported for audio only ogg files. + return false; + } + + int serial = ogg_page_serialno(&page); + if (mCodecStore.Contains(serial)) { + return false; + } + + nsAutoPtr<OggCodecState> codecState; + codecState = OggCodecState::Create(&page); + if (!codecState) { + return false; + } + + if (mVorbisState && (codecState->GetType() == OggCodecState::TYPE_VORBIS)) { + newVorbisState = static_cast<VorbisState*>(codecState.get()); + } else if (mOpusState && (codecState->GetType() == OggCodecState::TYPE_OPUS)) { + newOpusState = static_cast<OpusState*>(codecState.get()); + } else if (mFlacState && (codecState->GetType() == OggCodecState::TYPE_FLAC)) { + newFlacState = static_cast<FlacState*>(codecState.get()); + } else { + return false; + } + + OggCodecState* state; + + mCodecStore.Add(serial, codecState.forget()); + state = mCodecStore.Get(serial); + + NS_ENSURE_TRUE(state != nullptr, false); + + if (NS_FAILED(state->PageIn(&page))) { + return false; + } + + MessageField* msgInfo = nullptr; + if (mSkeletonState && mSkeletonState->mMsgFieldStore.Contains(serial)) { + mSkeletonState->mMsgFieldStore.Get(serial, &msgInfo); + } + + OggHeaders vorbisHeaders; + if ((newVorbisState && + ReadHeaders(TrackInfo::kAudioTrack, newVorbisState, vorbisHeaders)) && + (mVorbisState->mInfo.rate == newVorbisState->mInfo.rate) && + (mVorbisState->mInfo.channels == newVorbisState->mInfo.channels)) { + + SetupTargetVorbis(newVorbisState, vorbisHeaders); + LOG(LogLevel::Debug, ("New vorbis ogg link, serial=%d\n", mVorbisSerial)); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + mInfo.mAudio.mMimeType = NS_LITERAL_CSTRING("audio/vorbis"); + mInfo.mAudio.mRate = newVorbisState->mInfo.rate; + mInfo.mAudio.mChannels = newVorbisState->mInfo.channels; + + chained = true; + tags = newVorbisState->GetTags(); + } + + OggHeaders opusHeaders; + if ((newOpusState && + ReadHeaders(TrackInfo::kAudioTrack, newOpusState, opusHeaders)) && + (mOpusState->mRate == newOpusState->mRate) && + (mOpusState->mChannels == newOpusState->mChannels)) { + + SetupTargetOpus(newOpusState, opusHeaders); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + mInfo.mAudio.mMimeType = NS_LITERAL_CSTRING("audio/opus"); + mInfo.mAudio.mRate = newOpusState->mRate; + mInfo.mAudio.mChannels = newOpusState->mChannels; + + chained = true; + tags = newOpusState->GetTags(); + } + + OggHeaders flacHeaders; + if ((newFlacState && + ReadHeaders(TrackInfo::kAudioTrack, newFlacState, flacHeaders)) && + (mFlacState->Info().mRate == newFlacState->Info().mRate) && + (mFlacState->Info().mChannels == newFlacState->Info().mChannels)) { + + SetupTargetFlac(newFlacState, flacHeaders); + LOG(LogLevel::Debug, ("New flac ogg link, serial=%d\n", mFlacSerial)); + + if (msgInfo) { + InitTrack(msgInfo, &mInfo.mAudio, true); + } + + mInfo.mAudio = newFlacState->Info(); + chained = true; + tags = newFlacState->GetTags(); + } + + if (chained) { + SetChained(); + mInfo.mMediaSeekable = false; + mDecodedAudioDuration += aLastEndTime; + if (mTimedMetadataEvent) { + mTimedMetadataEvent->Notify( + TimedMetadata(mDecodedAudioDuration, + Move(tags), + nsAutoPtr<MediaInfo>(new MediaInfo(mInfo)))); + } + // Setup a new TrackInfo so that the MediaFormatReader will flush the + // current decoder. + mSharedAudioTrackInfo = new SharedTrackInfo(mInfo.mAudio, ++sStreamSourceID); + return true; + } + + return false; +} + +OggDemuxer::OggStateContext& +OggDemuxer::OggState(TrackInfo::TrackType aType) +{ + if (aType == TrackInfo::kVideoTrack) { + return mVideoOggState; + } + return mAudioOggState; +} + +ogg_sync_state* +OggDemuxer::OggSyncState(TrackInfo::TrackType aType) +{ + return &OggState(aType).mOggState.mState; +} + +MediaResourceIndex* +OggDemuxer::Resource(TrackInfo::TrackType aType) +{ + return &OggState(aType).mResource; +} + +MediaResourceIndex* +OggDemuxer::CommonResource() +{ + return &mAudioOggState.mResource; +} + +bool +OggDemuxer::ReadOggPage(TrackInfo::TrackType aType, ogg_page* aPage) +{ + int ret = 0; + while((ret = ogg_sync_pageseek(OggSyncState(aType), aPage)) <= 0) { + if (ret < 0) { + // Lost page sync, have to skip up to next page. + continue; + } + // Returns a buffer that can be written too + // with the given size. This buffer is stored + // in the ogg synchronisation structure. + char* buffer = ogg_sync_buffer(OggSyncState(aType), 4096); + NS_ASSERTION(buffer, "ogg_sync_buffer failed"); + + // Read from the resource into the buffer + uint32_t bytesRead = 0; + + nsresult rv = Resource(aType)->Read(buffer, 4096, &bytesRead); + if (NS_FAILED(rv) || !bytesRead) { + // End of file or error. + return false; + } + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = ogg_sync_wrote(OggSyncState(aType), bytesRead); + NS_ENSURE_TRUE(ret == 0, false); + } + + return true; +} + +nsresult +OggDemuxer::DemuxOggPage(TrackInfo::TrackType aType, ogg_page* aPage) +{ + int serial = ogg_page_serialno(aPage); + OggCodecState* codecState = mCodecStore.Get(serial); + if (codecState == nullptr) { + OGG_DEBUG("encountered packet for unrecognized codecState"); + return NS_ERROR_FAILURE; + } + if (GetCodecStateType(codecState) != aType && + codecState->GetType() != OggCodecState::TYPE_SKELETON) { + // Not a page we're interested in. + return NS_OK; + } + if (NS_FAILED(codecState->PageIn(aPage))) { + OGG_DEBUG("codecState->PageIn failed"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +bool +OggDemuxer::IsSeekable() const +{ + if (mIsChained) { + return false; + } + return true; +} + +UniquePtr<EncryptionInfo> +OggDemuxer::GetCrypto() +{ + return nullptr; +} + +ogg_packet* +OggDemuxer::GetNextPacket(TrackInfo::TrackType aType) +{ + OggCodecState* state = GetTrackCodecState(aType); + ogg_packet* packet = nullptr; + OggStateContext& context = OggState(aType); + + while (true) { + if (packet) { + OggCodecState::ReleasePacket(state->PacketOut()); + } + DemuxUntilPacketAvailable(aType, state); + + packet = state->PacketPeek(); + if (!packet) { + break; + } + if (state->IsHeader(packet)) { + continue; + } + if (context.mNeedKeyframe && !state->IsKeyframe(packet)) { + continue; + } + context.mNeedKeyframe = false; + break; + } + + return packet; +} + +void +OggDemuxer::DemuxUntilPacketAvailable(TrackInfo::TrackType aType, + OggCodecState* aState) +{ + while (!aState->IsPacketReady()) { + OGG_DEBUG("no packet yet, reading some more"); + ogg_page page; + if (!ReadOggPage(aType, &page)) { + OGG_DEBUG("no more pages to read in resource?"); + return; + } + DemuxOggPage(aType, &page); + } +} + +TimeIntervals +OggDemuxer::GetBuffered(TrackInfo::TrackType aType) +{ + if (!HaveStartTime(aType)) { + return TimeIntervals(); + } + if (mIsChained) { + return TimeIntervals::Invalid(); + } + TimeIntervals buffered; + // HasAudio and HasVideo are not used here as they take a lock and cause + // a deadlock. Accessing mInfo doesn't require a lock - it doesn't change + // after metadata is read. + if (!mInfo.HasValidMedia()) { + // No need to search through the file if there are no audio or video tracks + return buffered; + } + + AutoPinned<MediaResource> resource(Resource(aType)->GetResource()); + MediaByteRangeSet ranges; + nsresult res = resource->GetCachedRanges(ranges); + NS_ENSURE_SUCCESS(res, TimeIntervals::Invalid()); + + // Traverse across the buffered byte ranges, determining the time ranges + // they contain. MediaResource::GetNextCachedData(offset) returns -1 when + // offset is after the end of the media resource, or there's no more cached + // data after the offset. This loop will run until we've checked every + // buffered range in the media, in increasing order of offset. + nsAutoOggSyncState sync; + for (uint32_t index = 0; index < ranges.Length(); index++) { + // Ensure the offsets are after the header pages. + int64_t startOffset = ranges[index].mStart; + int64_t endOffset = ranges[index].mEnd; + + // Because the granulepos time is actually the end time of the page, + // we special-case (startOffset == 0) so that the first + // buffered range always appears to be buffered from the media start + // time, rather than from the end-time of the first page. + int64_t startTime = (startOffset == 0) ? StartTime() : -1; + + // Find the start time of the range. Read pages until we find one with a + // granulepos which we can convert into a timestamp to use as the time of + // the start of the buffered range. + ogg_sync_reset(&sync.mState); + while (startTime == -1) { + ogg_page page; + int32_t discard; + PageSyncResult pageSyncResult = PageSync(Resource(aType), + &sync.mState, + true, + startOffset, + endOffset, + &page, + discard); + if (pageSyncResult == PAGE_SYNC_ERROR) { + return TimeIntervals::Invalid(); + } else if (pageSyncResult == PAGE_SYNC_END_OF_RANGE) { + // Hit the end of range without reading a page, give up trying to + // find a start time for this buffered range, skip onto the next one. + break; + } + + int64_t granulepos = ogg_page_granulepos(&page); + if (granulepos == -1) { + // Page doesn't have an end time, advance to the next page + // until we find one. + startOffset += page.header_len + page.body_len; + continue; + } + + uint32_t serial = ogg_page_serialno(&page); + if (aType == TrackInfo::kAudioTrack && mVorbisState && + serial == mVorbisSerial) { + startTime = VorbisState::Time(&mVorbisInfo, granulepos); + NS_ASSERTION(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kAudioTrack && mOpusState && + serial == mOpusSerial) { + startTime = OpusState::Time(mOpusPreSkip, granulepos); + NS_ASSERTION(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kAudioTrack && mFlacState && + serial == mFlacSerial) { + startTime = mFlacState->Time(granulepos); + NS_ASSERTION(startTime > 0, "Must have positive start time"); + } else if (aType == TrackInfo::kVideoTrack && mTheoraState && + serial == mTheoraSerial) { + startTime = TheoraState::Time(&mTheoraInfo, granulepos); + NS_ASSERTION(startTime > 0, "Must have positive start time"); + } else if (mCodecStore.Contains(serial)) { + // Stream is not the theora or vorbis stream we're playing, + // but is one that we have header data for. + startOffset += page.header_len + page.body_len; + continue; + } else { + // Page is for a stream we don't know about (possibly a chained + // ogg), return OK to abort the finding any further ranges. This + // prevents us searching through the rest of the media when we + // may not be able to extract timestamps from it. + SetChained(); + return buffered; + } + } + + if (startTime != -1) { + // We were able to find a start time for that range, see if we can + // find an end time. + int64_t endTime = RangeEndTime(aType, startOffset, endOffset, true); + if (endTime > startTime) { + buffered += TimeInterval( + TimeUnit::FromMicroseconds(startTime - StartTime()), + TimeUnit::FromMicroseconds(endTime - StartTime())); + } + } + } + + return buffered; +} + +void +OggDemuxer::FindStartTime(int64_t& aOutStartTime) +{ + // Extract the start times of the bitstreams in order to calculate + // the duration. + int64_t videoStartTime = INT64_MAX; + int64_t audioStartTime = INT64_MAX; + + if (HasVideo()) { + FindStartTime(TrackInfo::kVideoTrack, videoStartTime); + if (videoStartTime != INT64_MAX) { + OGG_DEBUG("OggDemuxer::FindStartTime() video=%lld", videoStartTime); + mVideoOggState.mStartTime = + Some(TimeUnit::FromMicroseconds(videoStartTime)); + } + } + if (HasAudio()) { + FindStartTime(TrackInfo::kAudioTrack, audioStartTime); + if (audioStartTime != INT64_MAX) { + OGG_DEBUG("OggDemuxer::FindStartTime() audio=%lld", audioStartTime); + mAudioOggState.mStartTime = + Some(TimeUnit::FromMicroseconds(audioStartTime)); + } + } + + int64_t startTime = std::min(videoStartTime, audioStartTime); + if (startTime != INT64_MAX) { + aOutStartTime = startTime; + } +} + +void +OggDemuxer::FindStartTime(TrackInfo::TrackType aType, int64_t& aOutStartTime) +{ + int64_t startTime = INT64_MAX; + + OggCodecState* state = GetTrackCodecState(aType); + ogg_packet* pkt = GetNextPacket(aType); + if (pkt) { + startTime = state->PacketStartTime(pkt); + } + + if (startTime != INT64_MAX) { + aOutStartTime = startTime; + } +} + +nsresult +OggDemuxer::SeekInternal(TrackInfo::TrackType aType, const TimeUnit& aTarget) +{ + int64_t target = aTarget.ToMicroseconds(); + OGG_DEBUG("About to seek to %lld", target); + nsresult res; + int64_t adjustedTarget = target; + int64_t startTime = StartTime(aType); + int64_t endTime = mInfo.mMetadataDuration->ToMicroseconds(); + if (aType == TrackInfo::kAudioTrack && mOpusState){ + adjustedTarget = std::max(startTime, target - OGG_SEEK_OPUS_PREROLL); + } + + if (!HaveStartTime(aType) || adjustedTarget == startTime) { + // We've seeked to the media start or we can't seek. + // Just seek to the offset of the first content page. + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(res,res); + + res = Reset(aType); + NS_ENSURE_SUCCESS(res,res); + } else { + // TODO: This may seek back unnecessarily far in the video, but we don't + // have a way of asking Skeleton to seek to a different target for each + // stream yet. Using adjustedTarget here is at least correct, if slow. + IndexedSeekResult sres = SeekToKeyframeUsingIndex(aType, adjustedTarget); + NS_ENSURE_TRUE(sres != SEEK_FATAL_ERROR, NS_ERROR_FAILURE); + if (sres == SEEK_INDEX_FAIL) { + // No index or other non-fatal index-related failure. Try to seek + // using a bisection search. Determine the already downloaded data + // in the media cache, so we can try to seek in the cached data first. + AutoTArray<SeekRange, 16> ranges; + res = GetSeekRanges(aType, ranges); + NS_ENSURE_SUCCESS(res,res); + + // Figure out if the seek target lies in a buffered range. + SeekRange r = SelectSeekRange(aType, ranges, target, startTime, endTime, true); + + if (!r.IsNull()) { + // We know the buffered range in which the seek target lies, do a + // bisection search in that buffered range. + res = SeekInBufferedRange(aType, target, adjustedTarget, startTime, endTime, ranges, r); + NS_ENSURE_SUCCESS(res,res); + } else { + // The target doesn't lie in a buffered range. Perform a bisection + // search over the whole media, using the known buffered ranges to + // reduce the search space. + res = SeekInUnbuffered(aType, target, startTime, endTime, ranges); + NS_ENSURE_SUCCESS(res,res); + } + } + } + + // Demux forwards until we find the first keyframe prior the target. + // there may be non-keyframes in the page before the keyframe. + // Additionally, we may have seeked to the first page referenced by the + // page index which may be quite far off the target. + // When doing fastSeek we display the first frame after the seek, so + // we need to advance the decode to the keyframe otherwise we'll get + // visual artifacts in the first frame output after the seek. + OggCodecState* state = GetTrackCodecState(aType); + OggPacketQueue tempPackets; + bool foundKeyframe = false; + while (true) { + DemuxUntilPacketAvailable(aType, state); + ogg_packet* packet = state->PacketPeek(); + if (packet == nullptr) { + OGG_DEBUG("End of stream reached before keyframe found in indexed seek"); + break; + } + int64_t startTstamp = state->PacketStartTime(packet); + if (foundKeyframe && startTstamp > adjustedTarget) { + break; + } + if (state->IsKeyframe(packet)) { + OGG_DEBUG("keyframe found after seeking at %lld", startTstamp); + tempPackets.Erase(); + foundKeyframe = true; + } + if (foundKeyframe && startTstamp == adjustedTarget) { + break; + } + ogg_packet* releaseMe = state->PacketOut(); + if (foundKeyframe) { + tempPackets.Append(releaseMe); + } else { + // Discard video packets before the first keyframe. + OggCodecState::ReleasePacket(releaseMe); + } + } + // Re-add all packet into the codec state in order. + state->PushFront(Move(tempPackets)); + + return NS_OK; +} + +OggDemuxer::IndexedSeekResult +OggDemuxer::RollbackIndexedSeek(TrackInfo::TrackType aType, int64_t aOffset) +{ + if (mSkeletonState) { + mSkeletonState->Deactivate(); + } + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, aOffset); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + return SEEK_INDEX_FAIL; +} + +OggDemuxer::IndexedSeekResult +OggDemuxer::SeekToKeyframeUsingIndex(TrackInfo::TrackType aType, int64_t aTarget) +{ + if (!HasSkeleton() || !mSkeletonState->HasIndex()) { + return SEEK_INDEX_FAIL; + } + // We have an index from the Skeleton track, try to use it to seek. + AutoTArray<uint32_t, 2> tracks; + BuildSerialList(tracks); + SkeletonState::nsSeekTarget keyframe; + if (NS_FAILED(mSkeletonState->IndexedSeekTarget(aTarget, + tracks, + keyframe))) { + // Could not locate a keypoint for the target in the index. + return SEEK_INDEX_FAIL; + } + + // Remember original resource read cursor position so we can rollback on failure. + int64_t tell = Resource(aType)->Tell(); + + // Seek to the keypoint returned by the index. + if (keyframe.mKeyPoint.mOffset > Resource(aType)->GetLength() || + keyframe.mKeyPoint.mOffset < 0) { + // Index must be invalid. + return RollbackIndexedSeek(aType, tell); + } + LOG(LogLevel::Debug, ("Seeking using index to keyframe at offset %lld\n", + keyframe.mKeyPoint.mOffset)); + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, + keyframe.mKeyPoint.mOffset); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + + // We've moved the read set, so reset decode. + res = Reset(aType); + NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR); + + // Check that the page the index thinks is exactly here is actually exactly + // here. If not, the index is invalid. + ogg_page page; + int skippedBytes = 0; + PageSyncResult syncres = PageSync(Resource(aType), + OggSyncState(aType), + false, + keyframe.mKeyPoint.mOffset, + Resource(aType)->GetLength(), + &page, + skippedBytes); + NS_ENSURE_TRUE(syncres != PAGE_SYNC_ERROR, SEEK_FATAL_ERROR); + if (syncres != PAGE_SYNC_OK || skippedBytes != 0) { + LOG(LogLevel::Debug, ("Indexed-seek failure: Ogg Skeleton Index is invalid " + "or sync error after seek")); + return RollbackIndexedSeek(aType, tell); + } + uint32_t serial = ogg_page_serialno(&page); + if (serial != keyframe.mSerial) { + // Serialno of page at offset isn't what the index told us to expect. + // Assume the index is invalid. + return RollbackIndexedSeek(aType, tell); + } + OggCodecState* codecState = mCodecStore.Get(serial); + if (codecState && codecState->mActive && + ogg_stream_pagein(&codecState->mState, &page) != 0) { + // Couldn't insert page into the ogg resource, or somehow the resource + // is no longer active. + return RollbackIndexedSeek(aType, tell); + } + return SEEK_OK; +} + +// Reads a page from the media resource. +OggDemuxer::PageSyncResult +OggDemuxer::PageSync(MediaResourceIndex* aResource, + ogg_sync_state* aState, + bool aCachedDataOnly, + int64_t aOffset, + int64_t aEndOffset, + ogg_page* aPage, + int& aSkippedBytes) +{ + aSkippedBytes = 0; + // Sync to the next page. + int ret = 0; + uint32_t bytesRead = 0; + int64_t readHead = aOffset; + while (ret <= 0) { + ret = ogg_sync_pageseek(aState, aPage); + if (ret == 0) { + char* buffer = ogg_sync_buffer(aState, PAGE_STEP); + NS_ASSERTION(buffer, "Must have a buffer"); + + // Read from the file into the buffer + int64_t bytesToRead = std::min(static_cast<int64_t>(PAGE_STEP), + aEndOffset - readHead); + NS_ASSERTION(bytesToRead <= UINT32_MAX, "bytesToRead range check"); + if (bytesToRead <= 0) { + return PAGE_SYNC_END_OF_RANGE; + } + nsresult rv = NS_OK; + if (aCachedDataOnly) { + rv = aResource->GetResource()->ReadFromCache(buffer, readHead, + static_cast<uint32_t>(bytesToRead)); + NS_ENSURE_SUCCESS(rv,PAGE_SYNC_ERROR); + bytesRead = static_cast<uint32_t>(bytesToRead); + } else { + rv = aResource->Seek(nsISeekableStream::NS_SEEK_SET, readHead); + NS_ENSURE_SUCCESS(rv,PAGE_SYNC_ERROR); + rv = aResource->Read(buffer, + static_cast<uint32_t>(bytesToRead), + &bytesRead); + NS_ENSURE_SUCCESS(rv,PAGE_SYNC_ERROR); + } + if (bytesRead == 0 && NS_SUCCEEDED(rv)) { + // End of file. + return PAGE_SYNC_END_OF_RANGE; + } + readHead += bytesRead; + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = ogg_sync_wrote(aState, bytesRead); + NS_ENSURE_TRUE(ret == 0, PAGE_SYNC_ERROR); + continue; + } + + if (ret < 0) { + NS_ASSERTION(aSkippedBytes >= 0, "Offset >= 0"); + aSkippedBytes += -ret; + NS_ASSERTION(aSkippedBytes >= 0, "Offset >= 0"); + continue; + } + } + + return PAGE_SYNC_OK; +} + +//OggTrackDemuxer +OggTrackDemuxer::OggTrackDemuxer(OggDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber) + : mParent(aParent) + , mType(aType) +{ + mInfo = mParent->GetTrackInfo(aType, aTrackNumber); + MOZ_ASSERT(mInfo); +} + +OggTrackDemuxer::~OggTrackDemuxer() +{ +} + +UniquePtr<TrackInfo> +OggTrackDemuxer::GetInfo() const +{ + return mInfo->Clone(); +} + +RefPtr<OggTrackDemuxer::SeekPromise> +OggTrackDemuxer::Seek(TimeUnit aTime) +{ + // Seeks to aTime. Upon success, SeekPromise will be resolved with the + // actual time seeked to. Typically the random access point time + mQueuedSample = nullptr; + TimeUnit seekTime = aTime; + if (mParent->SeekInternal(mType, aTime) == NS_OK) { + RefPtr<MediaRawData> sample(NextSample()); + + // Check what time we actually seeked to. + if (sample != nullptr) { + seekTime = TimeUnit::FromMicroseconds(sample->mTime); + OGG_DEBUG("%p seeked to time %lld", this, seekTime.ToMicroseconds()); + } + mQueuedSample = sample; + + return SeekPromise::CreateAndResolve(seekTime, __func__); + } else { + return SeekPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, __func__); + } +} + +RefPtr<MediaRawData> +OggTrackDemuxer::NextSample() +{ + if (mQueuedSample) { + RefPtr<MediaRawData> nextSample = mQueuedSample; + mQueuedSample = nullptr; + if (mType == TrackInfo::kAudioTrack) { + nextSample->mTrackInfo = mParent->mSharedAudioTrackInfo; + } + return nextSample; + } + ogg_packet* packet = mParent->GetNextPacket(mType); + if (!packet) { + return nullptr; + } + // Check the eos state in case we need to look for chained streams. + bool eos = packet->e_o_s; + OggCodecState* state = mParent->GetTrackCodecState(mType); + RefPtr<MediaRawData> data = state->PacketOutAsMediaRawData(); + if (!data) { + return nullptr; + } + if (mType == TrackInfo::kAudioTrack) { + data->mTrackInfo = mParent->mSharedAudioTrackInfo; + } + if (eos) { + // We've encountered an end of bitstream packet; check for a chained + // bitstream following this one. + // This will also update mSharedAudioTrackInfo. + mParent->ReadOggChain(TimeUnit::FromMicroseconds(data->GetEndTime())); + } + return data; +} + +RefPtr<OggTrackDemuxer::SamplesPromise> +OggTrackDemuxer::GetSamples(int32_t aNumSamples) +{ + RefPtr<SamplesHolder> samples = new SamplesHolder; + if (!aNumSamples) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, __func__); + } + + while (aNumSamples) { + RefPtr<MediaRawData> sample(NextSample()); + if (!sample) { + break; + } + samples->mSamples.AppendElement(sample); + aNumSamples--; + } + + if (samples->mSamples.IsEmpty()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, __func__); + } else { + return SamplesPromise::CreateAndResolve(samples, __func__); + } +} + +void +OggTrackDemuxer::Reset() +{ + mParent->Reset(mType); + mQueuedSample = nullptr; +} + +RefPtr<OggTrackDemuxer::SkipAccessPointPromise> +OggTrackDemuxer::SkipToNextRandomAccessPoint(TimeUnit aTimeThreshold) +{ + uint32_t parsed = 0; + bool found = false; + RefPtr<MediaRawData> sample; + + OGG_DEBUG("TimeThreshold: %f", aTimeThreshold.ToSeconds()); + while (!found && (sample = NextSample())) { + parsed++; + if (sample->mKeyframe && sample->mTime >= aTimeThreshold.ToMicroseconds()) { + found = true; + mQueuedSample = sample; + } + } + if (found) { + OGG_DEBUG("next sample: %f (parsed: %d)", + TimeUnit::FromMicroseconds(sample->mTime).ToSeconds(), + parsed); + return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); + } else { + SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); + return SkipAccessPointPromise::CreateAndReject(Move(failure), __func__); + } +} + +TimeIntervals +OggTrackDemuxer::GetBuffered() +{ + return mParent->GetBuffered(mType); +} + +void +OggTrackDemuxer::BreakCycles() +{ + mParent = nullptr; +} + + +// Returns an ogg page's checksum. +ogg_uint32_t +OggDemuxer::GetPageChecksum(ogg_page* page) +{ + if (page == 0 || page->header == 0 || page->header_len < 25) { + return 0; + } + const unsigned char* p = page->header + 22; + uint32_t c = p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24); + return c; +} + +int64_t +OggDemuxer::RangeStartTime(TrackInfo::TrackType aType, int64_t aOffset) +{ + int64_t position = Resource(aType)->Tell(); + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, aOffset); + NS_ENSURE_SUCCESS(res, 0); + int64_t startTime = 0; + FindStartTime(aType, startTime); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, position); + NS_ENSURE_SUCCESS(res, -1); + return startTime; +} + +struct nsDemuxerAutoOggSyncState +{ + nsDemuxerAutoOggSyncState() + { + ogg_sync_init(&mState); + } + ~nsDemuxerAutoOggSyncState() + { + ogg_sync_clear(&mState); + } + ogg_sync_state mState; +}; + +int64_t +OggDemuxer::RangeEndTime(TrackInfo::TrackType aType, int64_t aEndOffset) +{ + int64_t position = Resource(aType)->Tell(); + int64_t endTime = RangeEndTime(aType, 0, aEndOffset, false); + nsresult res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, position); + NS_ENSURE_SUCCESS(res, -1); + return endTime; +} + +int64_t +OggDemuxer::RangeEndTime(TrackInfo::TrackType aType, + int64_t aStartOffset, + int64_t aEndOffset, + bool aCachedDataOnly) +{ + nsDemuxerAutoOggSyncState sync; + + // We need to find the last page which ends before aEndOffset that + // has a granulepos that we can convert to a timestamp. We do this by + // backing off from aEndOffset until we encounter a page on which we can + // interpret the granulepos. If while backing off we encounter a page which + // we've previously encountered before, we'll either backoff again if we + // haven't found an end time yet, or return the last end time found. + const int step = 5000; + const int maxOggPageSize = 65306; + int64_t readStartOffset = aEndOffset; + int64_t readLimitOffset = aEndOffset; + int64_t readHead = aEndOffset; + int64_t endTime = -1; + uint32_t checksumAfterSeek = 0; + uint32_t prevChecksumAfterSeek = 0; + bool mustBackOff = false; + while (true) { + ogg_page page; + int ret = ogg_sync_pageseek(&sync.mState, &page); + if (ret == 0) { + // We need more data if we've not encountered a page we've seen before, + // or we've read to the end of file. + if (mustBackOff || readHead == aEndOffset || readHead == aStartOffset) { + if (endTime != -1 || readStartOffset == 0) { + // We have encountered a page before, or we're at the end of file. + break; + } + mustBackOff = false; + prevChecksumAfterSeek = checksumAfterSeek; + checksumAfterSeek = 0; + ogg_sync_reset(&sync.mState); + readStartOffset = std::max(static_cast<int64_t>(0), readStartOffset - step); + // There's no point reading more than the maximum size of + // an Ogg page into data we've previously scanned. Any data + // between readLimitOffset and aEndOffset must be garbage + // and we can ignore it thereafter. + readLimitOffset = std::min(readLimitOffset, + readStartOffset + maxOggPageSize); + readHead = std::max(aStartOffset, readStartOffset); + } + + int64_t limit = std::min(static_cast<int64_t>(UINT32_MAX), + aEndOffset - readHead); + limit = std::max(static_cast<int64_t>(0), limit); + limit = std::min(limit, static_cast<int64_t>(step)); + uint32_t bytesToRead = static_cast<uint32_t>(limit); + uint32_t bytesRead = 0; + char* buffer = ogg_sync_buffer(&sync.mState, bytesToRead); + NS_ASSERTION(buffer, "Must have buffer"); + nsresult res; + if (aCachedDataOnly) { + res = Resource(aType)->GetResource()->ReadFromCache(buffer, readHead, bytesToRead); + NS_ENSURE_SUCCESS(res, -1); + bytesRead = bytesToRead; + } else { + NS_ASSERTION(readHead < aEndOffset, + "resource pos must be before range end"); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, readHead); + NS_ENSURE_SUCCESS(res, -1); + res = Resource(aType)->Read(buffer, bytesToRead, &bytesRead); + NS_ENSURE_SUCCESS(res, -1); + } + readHead += bytesRead; + if (readHead > readLimitOffset) { + mustBackOff = true; + } + + // Update the synchronisation layer with the number + // of bytes written to the buffer + ret = ogg_sync_wrote(&sync.mState, bytesRead); + if (ret != 0) { + endTime = -1; + break; + } + continue; + } + + if (ret < 0 || ogg_page_granulepos(&page) < 0) { + continue; + } + + uint32_t checksum = GetPageChecksum(&page); + if (checksumAfterSeek == 0) { + // This is the first page we've decoded after a backoff/seek. Remember + // the page checksum. If we backoff further and encounter this page + // again, we'll know that we won't find a page with an end time after + // this one, so we'll know to back off again. + checksumAfterSeek = checksum; + } + if (checksum == prevChecksumAfterSeek) { + // This page has the same checksum as the first page we encountered + // after the last backoff/seek. Since we've already scanned after this + // page and failed to find an end time, we may as well backoff again and + // try to find an end time from an earlier page. + mustBackOff = true; + continue; + } + + int64_t granulepos = ogg_page_granulepos(&page); + int serial = ogg_page_serialno(&page); + + OggCodecState* codecState = nullptr; + codecState = mCodecStore.Get(serial); + if (!codecState) { + // This page is from a bitstream which we haven't encountered yet. + // It's probably from a new "link" in a "chained" ogg. Don't + // bother even trying to find a duration... + SetChained(); + endTime = -1; + break; + } + + int64_t t = codecState->Time(granulepos); + if (t != -1) { + endTime = t; + } + } + + return endTime; +} + +nsresult +OggDemuxer::GetSeekRanges(TrackInfo::TrackType aType, + nsTArray<SeekRange>& aRanges) +{ + AutoPinned<MediaResource> resource(Resource(aType)->GetResource()); + MediaByteRangeSet cached; + nsresult res = resource->GetCachedRanges(cached); + NS_ENSURE_SUCCESS(res, res); + + for (uint32_t index = 0; index < cached.Length(); index++) { + auto& range = cached[index]; + int64_t startTime = -1; + int64_t endTime = -1; + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + int64_t startOffset = range.mStart; + int64_t endOffset = range.mEnd; + startTime = RangeStartTime(aType, startOffset); + if (startTime != -1 && + ((endTime = RangeEndTime(aType, endOffset)) != -1)) { + NS_WARNING_ASSERTION(startTime < endTime, + "Start time must be before end time"); + aRanges.AppendElement(SeekRange(startOffset, + endOffset, + startTime, + endTime)); + } + } + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +OggDemuxer::SeekRange +OggDemuxer::SelectSeekRange(TrackInfo::TrackType aType, + const nsTArray<SeekRange>& ranges, + int64_t aTarget, + int64_t aStartTime, + int64_t aEndTime, + bool aExact) +{ + int64_t so = 0; + int64_t eo = Resource(aType)->GetLength(); + int64_t st = aStartTime; + int64_t et = aEndTime; + for (uint32_t i = 0; i < ranges.Length(); i++) { + const SeekRange& r = ranges[i]; + if (r.mTimeStart < aTarget) { + so = r.mOffsetStart; + st = r.mTimeStart; + } + if (r.mTimeEnd >= aTarget && r.mTimeEnd < et) { + eo = r.mOffsetEnd; + et = r.mTimeEnd; + } + + if (r.mTimeStart < aTarget && aTarget <= r.mTimeEnd) { + // Target lies exactly in this range. + return ranges[i]; + } + } + if (aExact || eo == -1) { + return SeekRange(); + } + return SeekRange(so, eo, st, et); +} + + +nsresult +OggDemuxer::SeekInBufferedRange(TrackInfo::TrackType aType, + int64_t aTarget, + int64_t aAdjustedTarget, + int64_t aStartTime, + int64_t aEndTime, + const nsTArray<SeekRange>& aRanges, + const SeekRange& aRange) +{ + OGG_DEBUG("Seeking in buffered data to %lld using bisection search", aTarget); + if (aType == TrackInfo::kVideoTrack || aAdjustedTarget >= aTarget) { + // We know the exact byte range in which the target must lie. It must + // be buffered in the media cache. Seek there. + nsresult res = SeekBisection(aType, aTarget, aRange, 0); + if (NS_FAILED(res) || aType != TrackInfo::kVideoTrack) { + return res; + } + + // We have an active Theora bitstream. Peek the next Theora frame, and + // extract its keyframe's time. + DemuxUntilPacketAvailable(aType, mTheoraState); + ogg_packet* packet = mTheoraState->PacketPeek(); + if (packet && !mTheoraState->IsKeyframe(packet)) { + // First post-seek frame isn't a keyframe, seek back to previous keyframe, + // otherwise we'll get visual artifacts. + NS_ASSERTION(packet->granulepos != -1, "Must have a granulepos"); + int shift = mTheoraState->mInfo.keyframe_granule_shift; + int64_t keyframeGranulepos = (packet->granulepos >> shift) << shift; + int64_t keyframeTime = mTheoraState->StartTime(keyframeGranulepos); + SEEK_LOG(LogLevel::Debug, ("Keyframe for %lld is at %lld, seeking back to it", + frameTime, keyframeTime)); + aAdjustedTarget = std::min(aAdjustedTarget, keyframeTime); + } + } + + nsresult res = NS_OK; + if (aAdjustedTarget < aTarget) { + SeekRange k = SelectSeekRange(aType, + aRanges, + aAdjustedTarget, + aStartTime, + aEndTime, + false); + res = SeekBisection(aType, aAdjustedTarget, k, OGG_SEEK_FUZZ_USECS); + } + return res; +} + +nsresult +OggDemuxer::SeekInUnbuffered(TrackInfo::TrackType aType, + int64_t aTarget, + int64_t aStartTime, + int64_t aEndTime, + const nsTArray<SeekRange>& aRanges) +{ + OGG_DEBUG("Seeking in unbuffered data to %lld using bisection search", aTarget); + + // If we've got an active Theora bitstream, determine the maximum possible + // time in usecs which a keyframe could be before a given interframe. We + // subtract this from our seek target, seek to the new target, and then + // will decode forward to the original seek target. We should encounter a + // keyframe in that interval. This prevents us from needing to run two + // bisections; one for the seek target frame, and another to find its + // keyframe. It's usually faster to just download this extra data, rather + // tham perform two bisections to find the seek target's keyframe. We + // don't do this offsetting when seeking in a buffered range, + // as the extra decoding causes a noticeable speed hit when all the data + // is buffered (compared to just doing a bisection to exactly find the + // keyframe). + int64_t keyframeOffsetMs = 0; + if (aType == TrackInfo::kVideoTrack && mTheoraState) { + keyframeOffsetMs = mTheoraState->MaxKeyframeOffset(); + } + // Add in the Opus pre-roll if necessary, as well. + if (aType == TrackInfo::kAudioTrack && mOpusState) { + keyframeOffsetMs = std::max(keyframeOffsetMs, OGG_SEEK_OPUS_PREROLL); + } + int64_t seekTarget = std::max(aStartTime, aTarget - keyframeOffsetMs); + // Minimize the bisection search space using the known timestamps from the + // buffered ranges. + SeekRange k = + SelectSeekRange(aType, aRanges, seekTarget, aStartTime, aEndTime, false); + return SeekBisection(aType, seekTarget, k, OGG_SEEK_FUZZ_USECS); +} + +nsresult +OggDemuxer::SeekBisection(TrackInfo::TrackType aType, + int64_t aTarget, + const SeekRange& aRange, + uint32_t aFuzz) +{ + nsresult res; + + if (aTarget <= aRange.mTimeStart) { + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, 0); + NS_ENSURE_SUCCESS(res,res); + return NS_OK; + } + + // Bisection search, find start offset of last page with end time less than + // the seek target. + ogg_int64_t startOffset = aRange.mOffsetStart; + ogg_int64_t startTime = aRange.mTimeStart; + ogg_int64_t startLength = 0; // Length of the page at startOffset. + ogg_int64_t endOffset = aRange.mOffsetEnd; + ogg_int64_t endTime = aRange.mTimeEnd; + + ogg_int64_t seekTarget = aTarget; + int64_t seekLowerBound = std::max(static_cast<int64_t>(0), aTarget - aFuzz); + int hops = 0; + DebugOnly<ogg_int64_t> previousGuess = -1; + int backsteps = 0; + const int maxBackStep = 10; + NS_ASSERTION(static_cast<uint64_t>(PAGE_STEP) * pow(2.0, maxBackStep) < INT32_MAX, + "Backstep calculation must not overflow"); + + // Seek via bisection search. Loop until we find the offset where the page + // before the offset is before the seek target, and the page after the offset + // is after the seek target. + while (true) { + ogg_int64_t duration = 0; + double target = 0; + ogg_int64_t interval = 0; + ogg_int64_t guess = 0; + ogg_page page; + int skippedBytes = 0; + ogg_int64_t pageOffset = 0; + ogg_int64_t pageLength = 0; + ogg_int64_t granuleTime = -1; + bool mustBackoff = false; + + // Guess where we should bisect to, based on the bit rate and the time + // remaining in the interval. Loop until we can determine the time at + // the guess offset. + while (true) { + + // Discard any previously buffered packets/pages. + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + + interval = endOffset - startOffset - startLength; + if (interval == 0) { + // Our interval is empty, we've found the optimal seek point, as the + // page at the start offset is before the seek target, and the page + // at the end offset is after the seek target. + SEEK_LOG(LogLevel::Debug, ("Interval narrowed, terminating bisection.")); + break; + } + + // Guess bisection point. + duration = endTime - startTime; + target = (double)(seekTarget - startTime) / (double)duration; + guess = startOffset + startLength + + static_cast<ogg_int64_t>((double)interval * target); + guess = std::min(guess, endOffset - PAGE_STEP); + if (mustBackoff) { + // We previously failed to determine the time at the guess offset, + // probably because we ran out of data to decode. This usually happens + // when we guess very close to the end offset. So reduce the guess + // offset using an exponential backoff until we determine the time. + SEEK_LOG(LogLevel::Debug, ("Backing off %d bytes, backsteps=%d", + static_cast<int32_t>(PAGE_STEP * pow(2.0, backsteps)), backsteps)); + guess -= PAGE_STEP * static_cast<ogg_int64_t>(pow(2.0, backsteps)); + + if (guess <= startOffset) { + // We've tried to backoff to before the start offset of our seek + // range. This means we couldn't find a seek termination position + // near the end of the seek range, so just set the seek termination + // condition, and break out of the bisection loop. We'll begin + // decoding from the start of the seek range. + interval = 0; + break; + } + + backsteps = std::min(backsteps + 1, maxBackStep); + // We reset mustBackoff. If we still need to backoff further, it will + // be set to true again. + mustBackoff = false; + } else { + backsteps = 0; + } + guess = std::max(guess, startOffset + startLength); + + SEEK_LOG(LogLevel::Debug, ("Seek loop start[o=%lld..%lld t=%lld] " + "end[o=%lld t=%lld] " + "interval=%lld target=%lf guess=%lld", + startOffset, (startOffset+startLength), startTime, + endOffset, endTime, interval, target, guess)); + + NS_ASSERTION(guess >= startOffset + startLength, "Guess must be after range start"); + NS_ASSERTION(guess < endOffset, "Guess must be before range end"); + NS_ASSERTION(guess != previousGuess, "Guess should be different to previous"); + previousGuess = guess; + + hops++; + + // Locate the next page after our seek guess, and then figure out the + // granule time of the audio and video bitstreams there. We can then + // make a bisection decision based on our location in the media. + PageSyncResult pageSyncResult = PageSync(Resource(aType), + OggSyncState(aType), + false, + guess, + endOffset, + &page, + skippedBytes); + NS_ENSURE_TRUE(pageSyncResult != PAGE_SYNC_ERROR, NS_ERROR_FAILURE); + + if (pageSyncResult == PAGE_SYNC_END_OF_RANGE) { + // Our guess was too close to the end, we've ended up reading the end + // page. Backoff exponentially from the end point, in case the last + // page/frame/sample is huge. + mustBackoff = true; + SEEK_LOG(LogLevel::Debug, ("Hit the end of range, backing off")); + continue; + } + + // We've located a page of length |ret| at |guess + skippedBytes|. + // Remember where the page is located. + pageOffset = guess + skippedBytes; + pageLength = page.header_len + page.body_len; + + // Read pages until we can determine the granule time of the audio and + // video bitstream. + ogg_int64_t audioTime = -1; + ogg_int64_t videoTime = -1; + do { + // Add the page to its codec state, determine its granule time. + uint32_t serial = ogg_page_serialno(&page); + OggCodecState* codecState = mCodecStore.Get(serial); + if (codecState && GetCodecStateType(codecState) == aType) { + if (codecState->mActive) { + int ret = ogg_stream_pagein(&codecState->mState, &page); + NS_ENSURE_TRUE(ret == 0, NS_ERROR_FAILURE); + } + + ogg_int64_t granulepos = ogg_page_granulepos(&page); + + if (aType == TrackInfo::kAudioTrack && + granulepos > 0 && audioTime == -1) { + if (mVorbisState && serial == mVorbisState->mSerial) { + audioTime = mVorbisState->Time(granulepos); + } else if (mOpusState && serial == mOpusState->mSerial) { + audioTime = mOpusState->Time(granulepos); + } else if (mFlacState && serial == mFlacState->mSerial) { + audioTime = mFlacState->Time(granulepos); + } + } + + if (aType == TrackInfo::kVideoTrack && + granulepos > 0 && serial == mTheoraState->mSerial && + videoTime == -1) { + videoTime = mTheoraState->Time(granulepos); + } + + if (pageOffset + pageLength >= endOffset) { + // Hit end of readable data. + break; + } + } + if (!ReadOggPage(aType, &page)) { + break; + } + + } while ((aType == TrackInfo::kAudioTrack && audioTime == -1) || + (aType == TrackInfo::kVideoTrack && videoTime == -1)); + + + if ((aType == TrackInfo::kAudioTrack && audioTime == -1) || + (aType == TrackInfo::kVideoTrack && videoTime == -1)) { + // We don't have timestamps for all active tracks... + if (pageOffset == startOffset + startLength && + pageOffset + pageLength >= endOffset) { + // We read the entire interval without finding timestamps for all + // active tracks. We know the interval start offset is before the seek + // target, and the interval end is after the seek target, and we can't + // terminate inside the interval, so we terminate the seek at the + // start of the interval. + interval = 0; + break; + } + + // We should backoff; cause the guess to back off from the end, so + // that we've got more room to capture. + mustBackoff = true; + continue; + } + + // We've found appropriate time stamps here. Proceed to bisect + // the search space. + granuleTime = aType == TrackInfo::kAudioTrack ? audioTime : videoTime; + NS_ASSERTION(granuleTime > 0, "Must get a granuletime"); + break; + } // End of "until we determine time at guess offset" loop. + + if (interval == 0) { + // Seek termination condition; we've found the page boundary of the + // last page before the target, and the first page after the target. + SEEK_LOG(LogLevel::Debug, ("Terminating seek at offset=%lld", startOffset)); + NS_ASSERTION(startTime < aTarget, "Start time must always be less than target"); + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, startOffset); + NS_ENSURE_SUCCESS(res,res); + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + break; + } + + SEEK_LOG(LogLevel::Debug, ("Time at offset %lld is %lld", guess, granuleTime)); + if (granuleTime < seekTarget && granuleTime > seekLowerBound) { + // We're within the fuzzy region in which we want to terminate the search. + res = Resource(aType)->Seek(nsISeekableStream::NS_SEEK_SET, pageOffset); + NS_ENSURE_SUCCESS(res,res); + if (NS_FAILED(Reset(aType))) { + return NS_ERROR_FAILURE; + } + SEEK_LOG(LogLevel::Debug, ("Terminating seek at offset=%lld", pageOffset)); + break; + } + + if (granuleTime >= seekTarget) { + // We've landed after the seek target. + NS_ASSERTION(pageOffset < endOffset, "offset_end must decrease"); + endOffset = pageOffset; + endTime = granuleTime; + } else if (granuleTime < seekTarget) { + // Landed before seek target. + NS_ASSERTION(pageOffset >= startOffset + startLength, + "Bisection point should be at or after end of first page in interval"); + startOffset = pageOffset; + startLength = pageLength; + startTime = granuleTime; + } + NS_ASSERTION(startTime <= seekTarget, "Must be before seek target"); + NS_ASSERTION(endTime >= seekTarget, "End must be after seek target"); + } + + SEEK_LOG(LogLevel::Debug, ("Seek complete in %d bisections.", hops)); + + return NS_OK; +} + +#undef OGG_DEBUG +#undef SEEK_DEBUG +} // namespace mozilla diff --git a/dom/media/ogg/OggDemuxer.h b/dom/media/ogg/OggDemuxer.h new file mode 100644 index 0000000000..90c075c98f --- /dev/null +++ b/dom/media/ogg/OggDemuxer.h @@ -0,0 +1,387 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OggDemuxer_h_) +#define OggDemuxer_h_ + +#include "nsTArray.h" +#include "MediaDataDemuxer.h" +#include "OggCodecState.h" +#include "OggCodecStore.h" +#include "MediaMetadataManager.h" + +namespace mozilla { + +class OggTrackDemuxer; +class OggHeaders; + +class OggDemuxer : public MediaDataDemuxer +{ +public: + explicit OggDemuxer(MediaResource* aResource); + + RefPtr<InitPromise> Init() override; + + bool HasTrackType(TrackInfo::TrackType aType) const override; + + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + + already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer(TrackInfo::TrackType aType, + uint32_t aTrackNumber) override; + + bool IsSeekable() const override; + + UniquePtr<EncryptionInfo> GetCrypto() override; + + // Set the events to notify when chaining is encountered. + void SetChainingEvents(TimedMetadataEventProducer* aMetadataEvent, + MediaEventProducer<void>* aOnSeekableEvent); + +private: + + // helpers for friend OggTrackDemuxer + UniquePtr<TrackInfo> GetTrackInfo(TrackInfo::TrackType aType, size_t aTrackNumber) const; + + struct nsAutoOggSyncState { + nsAutoOggSyncState() { + ogg_sync_init(&mState); + } + ~nsAutoOggSyncState() { + ogg_sync_clear(&mState); + } + ogg_sync_state mState; + }; + media::TimeIntervals GetBuffered(TrackInfo::TrackType aType); + void FindStartTime(int64_t& aOutStartTime); + void FindStartTime(TrackInfo::TrackType, int64_t& aOutStartTime); + + nsresult SeekInternal(TrackInfo::TrackType aType, const media::TimeUnit& aTarget); + + // Seeks to the keyframe preceding the target time using available + // keyframe indexes. + enum IndexedSeekResult + { + SEEK_OK, // Success. + SEEK_INDEX_FAIL, // Failure due to no index, or invalid index. + SEEK_FATAL_ERROR // Error returned by a stream operation. + }; + IndexedSeekResult SeekToKeyframeUsingIndex(TrackInfo::TrackType aType, int64_t aTarget); + + // Rolls back a seek-using-index attempt, returning a failure error code. + IndexedSeekResult RollbackIndexedSeek(TrackInfo::TrackType aType, int64_t aOffset); + + // Represents a section of contiguous media, with a start and end offset, + // and the timestamps of the start and end of that range, that is cached. + // Used to denote the extremities of a range in which we can seek quickly + // (because it's cached). + class SeekRange + { + public: + SeekRange() + : mOffsetStart(0) + , mOffsetEnd(0) + , mTimeStart(0) + , mTimeEnd(0) + {} + + SeekRange(int64_t aOffsetStart, + int64_t aOffsetEnd, + int64_t aTimeStart, + int64_t aTimeEnd) + : mOffsetStart(aOffsetStart) + , mOffsetEnd(aOffsetEnd) + , mTimeStart(aTimeStart) + , mTimeEnd(aTimeEnd) + {} + + bool IsNull() const { + return mOffsetStart == 0 && + mOffsetEnd == 0 && + mTimeStart == 0 && + mTimeEnd == 0; + } + + int64_t mOffsetStart, mOffsetEnd; // in bytes. + int64_t mTimeStart, mTimeEnd; // in usecs. + }; + + nsresult GetSeekRanges(TrackInfo::TrackType aType, nsTArray<SeekRange>& aRanges); + SeekRange SelectSeekRange(TrackInfo::TrackType aType, + const nsTArray<SeekRange>& ranges, + int64_t aTarget, + int64_t aStartTime, + int64_t aEndTime, + bool aExact); + + // Seeks to aTarget usecs in the buffered range aRange using bisection search, + // or to the keyframe prior to aTarget if we have video. aAdjustedTarget is + // an adjusted version of the target used to account for Opus pre-roll, if + // necessary. aStartTime must be the presentation time at the start of media, + // and aEndTime the time at end of media. aRanges must be the time/byte ranges + // buffered in the media cache as per GetSeekRanges(). + nsresult SeekInBufferedRange(TrackInfo::TrackType aType, + int64_t aTarget, + int64_t aAdjustedTarget, + int64_t aStartTime, + int64_t aEndTime, + const nsTArray<SeekRange>& aRanges, + const SeekRange& aRange); + + // Seeks to before aTarget usecs in media using bisection search. If the media + // has video, this will seek to before the keyframe required to render the + // media at aTarget. Will use aRanges in order to narrow the bisection + // search space. aStartTime must be the presentation time at the start of + // media, and aEndTime the time at end of media. aRanges must be the time/byte + // ranges buffered in the media cache as per GetSeekRanges(). + nsresult SeekInUnbuffered(TrackInfo::TrackType aType, + int64_t aTarget, + int64_t aStartTime, + int64_t aEndTime, + const nsTArray<SeekRange>& aRanges); + + // Performs a seek bisection to move the media stream's read cursor to the + // last ogg page boundary which has end time before aTarget usecs on both the + // Theora and Vorbis bitstreams. Limits its search to data inside aRange; + // i.e. it will only read inside of the aRange's start and end offsets. + // aFuzz is the number of usecs of leniency we'll allow; we'll terminate the + // seek when we land in the range (aTime - aFuzz, aTime) usecs. + nsresult SeekBisection(TrackInfo::TrackType aType, + int64_t aTarget, + const SeekRange& aRange, + uint32_t aFuzz); + + // Chunk size to read when reading Ogg files. Average Ogg page length + // is about 4300 bytes, so we read the file in chunks larger than that. + static const int PAGE_STEP = 8192; + + enum PageSyncResult + { + PAGE_SYNC_ERROR = 1, + PAGE_SYNC_END_OF_RANGE= 2, + PAGE_SYNC_OK = 3 + }; + static PageSyncResult PageSync(MediaResourceIndex* aResource, + ogg_sync_state* aState, + bool aCachedDataOnly, + int64_t aOffset, + int64_t aEndOffset, + ogg_page* aPage, + int& aSkippedBytes); + + // Demux next Ogg packet + ogg_packet* GetNextPacket(TrackInfo::TrackType aType); + + nsresult Reset(TrackInfo::TrackType aType); + + static const nsString GetKind(const nsCString& aRole); + static void InitTrack(MessageField* aMsgInfo, + TrackInfo* aInfo, + bool aEnable); + + // Really private! + ~OggDemuxer(); + + // Read enough of the file to identify track information and header + // packets necessary for decoding to begin. + nsresult ReadMetadata(); + + // Read a page of data from the Ogg file. Returns true if a page has been + // read, false if the page read failed or end of file reached. + bool ReadOggPage(TrackInfo::TrackType aType, ogg_page* aPage); + + // Send a page off to the individual streams it belongs to. + // Reconstructed packets, if any are ready, will be available + // on the individual OggCodecStates. + nsresult DemuxOggPage(TrackInfo::TrackType aType, ogg_page* aPage); + + // Read data and demux until a packet is available on the given stream state + void DemuxUntilPacketAvailable(TrackInfo::TrackType aType, OggCodecState* aState); + + // Reads and decodes header packets for aState, until either header decode + // fails, or is complete. Initializes the codec state before returning. + // Returns true if reading headers and initializtion of the stream + // succeeds. + bool ReadHeaders(TrackInfo::TrackType aType, OggCodecState* aState, OggHeaders& aHeaders); + + // Reads the next link in the chain. + bool ReadOggChain(const media::TimeUnit& aLastEndTime); + + // Set this media as being a chain and notifies the state machine that the + // media is no longer seekable. + void SetChained(); + + // Fills aTracks with the serial numbers of each active stream, for use by + // various SkeletonState functions. + void BuildSerialList(nsTArray<uint32_t>& aTracks); + + // Setup target bitstreams for decoding. + void SetupTargetTheora(TheoraState* aTheoraState, OggHeaders& aHeaders); + void SetupTargetVorbis(VorbisState* aVorbisState, OggHeaders& aHeaders); + void SetupTargetOpus(OpusState* aOpusState, OggHeaders& aHeaders); + void SetupTargetFlac(FlacState* aFlacState, OggHeaders& aHeaders); + void SetupTargetSkeleton(); + void SetupMediaTracksInfo(const nsTArray<uint32_t>& aSerials); + void FillTags(TrackInfo* aInfo, MetadataTags* aTags); + + // Compute an ogg page's checksum + ogg_uint32_t GetPageChecksum(ogg_page* aPage); + + // Get the end time of aEndOffset. This is the playback position we'd reach + // after playback finished at aEndOffset. + int64_t RangeEndTime(TrackInfo::TrackType aType, int64_t aEndOffset); + + // Get the end time of aEndOffset, without reading before aStartOffset. + // This is the playback position we'd reach after playback finished at + // aEndOffset. If bool aCachedDataOnly is true, then we'll only read + // from data which is cached in the media cached, otherwise we'll do + // regular blocking reads from the media stream. If bool aCachedDataOnly + // is true, this can safely be called on the main thread, otherwise it + // must be called on the state machine thread. + int64_t RangeEndTime(TrackInfo::TrackType aType, + int64_t aStartOffset, + int64_t aEndOffset, + bool aCachedDataOnly); + + // Get the start time of the range beginning at aOffset. This is the start + // time of the first aType sample we'd be able to play if we + // started playback at aOffset. + int64_t RangeStartTime(TrackInfo::TrackType aType, int64_t aOffset); + + MediaInfo mInfo; + nsTArray<RefPtr<OggTrackDemuxer>> mDemuxers; + + // Map of codec-specific bitstream states. + OggCodecStore mCodecStore; + + // Decode state of the Theora bitstream we're decoding, if we have video. + TheoraState* mTheoraState; + + // Decode state of the Vorbis bitstream we're decoding, if we have audio. + VorbisState* mVorbisState; + + // Decode state of the Opus bitstream we're decoding, if we have one. + OpusState* mOpusState; + + // Get the bitstream decode state for the given track type + // Decode state of the Flac bitstream we're decoding, if we have one. + FlacState* mFlacState; + + OggCodecState* GetTrackCodecState(TrackInfo::TrackType aType) const; + TrackInfo::TrackType GetCodecStateType(OggCodecState* aState) const; + + // Represents the user pref media.opus.enabled at the time our + // contructor was called. We can't check it dynamically because + // we're not on the main thread; + bool mOpusEnabled; + + // Decode state of the Skeleton bitstream. + SkeletonState* mSkeletonState; + + // Ogg decoding state. + struct OggStateContext + { + explicit OggStateContext(MediaResource* aResource) + : mResource(aResource), mNeedKeyframe(true) {} + nsAutoOggSyncState mOggState; + MediaResourceIndex mResource; + Maybe<media::TimeUnit> mStartTime; + bool mNeedKeyframe; + }; + + OggStateContext& OggState(TrackInfo::TrackType aType); + ogg_sync_state* OggSyncState(TrackInfo::TrackType aType); + MediaResourceIndex* Resource(TrackInfo::TrackType aType); + MediaResourceIndex* CommonResource(); + OggStateContext mAudioOggState; + OggStateContext mVideoOggState; + + // Vorbis/Opus/Theora data used to compute timestamps. This is written on the + // decoder thread and read on the main thread. All reading on the main + // thread must be done after metadataloaded. We can't use the existing + // data in the codec states due to threading issues. You must check the + // associated mTheoraState or mVorbisState pointer is non-null before + // using this codec data. + uint32_t mVorbisSerial; + uint32_t mOpusSerial; + uint32_t mTheoraSerial; + uint32_t mFlacSerial; + + vorbis_info mVorbisInfo; + int mOpusPreSkip; + th_info mTheoraInfo; + + Maybe<int64_t> mStartTime; + + // Booleans to indicate if we have audio and/or video data + bool HasVideo() const; + bool HasAudio() const; + bool HasSkeleton() const + { + return mSkeletonState != 0 && mSkeletonState->mActive; + } + bool HaveStartTime () const; + bool HaveStartTime (TrackInfo::TrackType aType); + int64_t StartTime() const; + int64_t StartTime(TrackInfo::TrackType aType); + + // The picture region inside Theora frame to be displayed, if we have + // a Theora video track. + nsIntRect mPicture; + + // True if we are decoding a chained ogg. + bool mIsChained; + + // Total audio duration played so far. + media::TimeUnit mDecodedAudioDuration; + + // Events manager + TimedMetadataEventProducer* mTimedMetadataEvent; + MediaEventProducer<void>* mOnSeekableEvent; + + // This will be populated only if a content change occurs, otherwise it + // will be left as null so the original metadata is used. + // It is updated once a chained ogg is encountered. + // As Ogg chaining is only supported for audio, we only need an audio track + // info. + RefPtr<SharedTrackInfo> mSharedAudioTrackInfo; + + friend class OggTrackDemuxer; +}; + +class OggTrackDemuxer : public MediaTrackDemuxer +{ +public: + OggTrackDemuxer(OggDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber); + + UniquePtr<TrackInfo> GetInfo() const override; + + RefPtr<SeekPromise> Seek(media::TimeUnit aTime) override; + + RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override; + + void Reset() override; + + RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint(media::TimeUnit aTimeThreshold) override; + + media::TimeIntervals GetBuffered() override; + + void BreakCycles() override; + +private: + ~OggTrackDemuxer(); + void SetNextKeyFrameTime(); + RefPtr<MediaRawData> NextSample(); + RefPtr<OggDemuxer> mParent; + TrackInfo::TrackType mType; + UniquePtr<TrackInfo> mInfo; + + // Queued sample extracted by the demuxer, but not yet returned. + RefPtr<MediaRawData> mQueuedSample; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OggWriter.cpp b/dom/media/ogg/OggWriter.cpp new file mode 100644 index 0000000000..bb0dca67b1 --- /dev/null +++ b/dom/media/ogg/OggWriter.cpp @@ -0,0 +1,214 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "OggWriter.h" +#include "prtime.h" +#include "GeckoProfiler.h" + +#undef LOG +#ifdef MOZ_WIDGET_GONK +#include <android/log.h> +#define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); +#else +#define LOG(args, ...) +#endif + +namespace mozilla { + +OggWriter::OggWriter() : ContainerWriter() +{ + if (NS_FAILED(Init())) { + LOG("ERROR! Fail to initialize the OggWriter."); + } +} + +OggWriter::~OggWriter() +{ + if (mInitialized) { + ogg_stream_clear(&mOggStreamState); + } + // mPacket's data was always owned by us, no need to ogg_packet_clear. +} + +nsresult +OggWriter::Init() +{ + MOZ_ASSERT(!mInitialized); + + // The serial number (serialno) should be a random number, for the current + // implementation where the output file contains only a single stream, this + // serialno is used to differentiate between files. + srand(static_cast<unsigned>(PR_Now())); + int rc = ogg_stream_init(&mOggStreamState, rand()); + + mPacket.b_o_s = 1; + mPacket.e_o_s = 0; + mPacket.granulepos = 0; + mPacket.packet = nullptr; + mPacket.packetno = 0; + mPacket.bytes = 0; + + mInitialized = (rc == 0); + + return (rc == 0) ? NS_OK : NS_ERROR_NOT_INITIALIZED; +} + +nsresult +OggWriter::WriteEncodedTrack(const EncodedFrameContainer& aData, + uint32_t aFlags) +{ + PROFILER_LABEL("OggWriter", "WriteEncodedTrack", + js::ProfileEntry::Category::OTHER); + + uint32_t len = aData.GetEncodedFrames().Length(); + for (uint32_t i = 0; i < len; i++) { + if (aData.GetEncodedFrames()[i]->GetFrameType() != EncodedFrame::OPUS_AUDIO_FRAME) { + LOG("[OggWriter] wrong encoded data type!"); + return NS_ERROR_FAILURE; + } + + // only pass END_OF_STREAM on the last frame! + nsresult rv = WriteEncodedData(aData.GetEncodedFrames()[i]->GetFrameData(), + aData.GetEncodedFrames()[i]->GetDuration(), + i < len-1 ? (aFlags & ~ContainerWriter::END_OF_STREAM) : + aFlags); + if (NS_FAILED(rv)) { + LOG("%p Failed to WriteEncodedTrack!", this); + return rv; + } + } + return NS_OK; +} + +nsresult +OggWriter::WriteEncodedData(const nsTArray<uint8_t>& aBuffer, int aDuration, + uint32_t aFlags) +{ + if (!mInitialized) { + LOG("[OggWriter] OggWriter has not initialized!"); + return NS_ERROR_FAILURE; + } + + MOZ_ASSERT(!ogg_stream_eos(&mOggStreamState), + "No data can be written after eos has marked."); + + // Set eos flag to true, and once the eos is written to a packet, there must + // not be anymore pages after a page has marked as eos. + if (aFlags & ContainerWriter::END_OF_STREAM) { + LOG("[OggWriter] Set e_o_s flag to true."); + mPacket.e_o_s = 1; + } + + mPacket.packet = const_cast<uint8_t*>(aBuffer.Elements()); + mPacket.bytes = aBuffer.Length(); + mPacket.granulepos += aDuration; + + // 0 returned on success. -1 returned in the event of internal error. + // The data in the packet is copied into the internal storage managed by the + // mOggStreamState, so we are free to alter the contents of mPacket after + // this call has returned. + int rc = ogg_stream_packetin(&mOggStreamState, &mPacket); + if (rc < 0) { + LOG("[OggWriter] Failed in ogg_stream_packetin! (%d).", rc); + return NS_ERROR_FAILURE; + } + + if (mPacket.b_o_s) { + mPacket.b_o_s = 0; + } + mPacket.packetno++; + mPacket.packet = nullptr; + + return NS_OK; +} + +void +OggWriter::ProduceOggPage(nsTArray<nsTArray<uint8_t> >* aOutputBufs) +{ + aOutputBufs->AppendElement(); + aOutputBufs->LastElement().SetLength(mOggPage.header_len + + mOggPage.body_len); + memcpy(aOutputBufs->LastElement().Elements(), mOggPage.header, + mOggPage.header_len); + memcpy(aOutputBufs->LastElement().Elements() + mOggPage.header_len, + mOggPage.body, mOggPage.body_len); +} + +nsresult +OggWriter::GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + uint32_t aFlags) +{ + int rc = -1; + PROFILER_LABEL("OggWriter", "GetContainerData", + js::ProfileEntry::Category::OTHER); + // Generate the oggOpus Header + if (aFlags & ContainerWriter::GET_HEADER) { + OpusMetadata* meta = static_cast<OpusMetadata*>(mMetadata.get()); + NS_ASSERTION(meta, "should have meta data"); + NS_ASSERTION(meta->GetKind() == TrackMetadataBase::METADATA_OPUS, + "should have Opus meta data"); + + nsresult rv = WriteEncodedData(meta->mIdHeader, 0); + NS_ENSURE_SUCCESS(rv, rv); + + rc = ogg_stream_flush(&mOggStreamState, &mOggPage); + NS_ENSURE_TRUE(rc > 0, NS_ERROR_FAILURE); + ProduceOggPage(aOutputBufs); + + rv = WriteEncodedData(meta->mCommentHeader, 0); + NS_ENSURE_SUCCESS(rv, rv); + + rc = ogg_stream_flush(&mOggStreamState, &mOggPage); + NS_ENSURE_TRUE(rc > 0, NS_ERROR_FAILURE); + + ProduceOggPage(aOutputBufs); + return NS_OK; + + // Force generate a page even if the amount of packet data is not enough. + // Usually do so after a header packet. + } else if (aFlags & ContainerWriter::FLUSH_NEEDED) { + // rc = 0 means no packet to put into a page, or an internal error. + rc = ogg_stream_flush(&mOggStreamState, &mOggPage); + } else { + // rc = 0 means insufficient data has accumulated to fill a page, or an + // internal error has occurred. + rc = ogg_stream_pageout(&mOggStreamState, &mOggPage); + } + + if (rc) { + ProduceOggPage(aOutputBufs); + } + if (aFlags & ContainerWriter::FLUSH_NEEDED) { + mIsWritingComplete = true; + } + return (rc > 0) ? NS_OK : NS_ERROR_FAILURE; +} + +nsresult +OggWriter::SetMetadata(TrackMetadataBase* aMetadata) +{ + MOZ_ASSERT(aMetadata); + + PROFILER_LABEL("OggWriter", "SetMetadata", + js::ProfileEntry::Category::OTHER); + + if (aMetadata->GetKind() != TrackMetadataBase::METADATA_OPUS) { + LOG("wrong meta data type!"); + return NS_ERROR_FAILURE; + } + // Validate each field of METADATA + mMetadata = static_cast<OpusMetadata*>(aMetadata); + if (mMetadata->mIdHeader.Length() == 0) { + LOG("miss mIdHeader!"); + return NS_ERROR_FAILURE; + } + if (mMetadata->mCommentHeader.Length() == 0) { + LOG("miss mCommentHeader!"); + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +} // namespace mozilla diff --git a/dom/media/ogg/OggWriter.h b/dom/media/ogg/OggWriter.h new file mode 100644 index 0000000000..b58bbd6df5 --- /dev/null +++ b/dom/media/ogg/OggWriter.h @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OggWriter_h_ +#define OggWriter_h_ + +#include "ContainerWriter.h" +#include "OpusTrackEncoder.h" +#include <ogg/ogg.h> + +namespace mozilla { +/** + * WriteEncodedTrack inserts raw packets into Ogg stream (ogg_stream_state), and + * GetContainerData outputs an ogg_page when enough packets have been written + * to the Ogg stream. + * For more details, please reference: + * http://www.xiph.org/ogg/doc/libogg/encoding.html + */ +class OggWriter : public ContainerWriter +{ +public: + OggWriter(); + ~OggWriter(); + + nsresult WriteEncodedTrack(const EncodedFrameContainer &aData, + uint32_t aFlags = 0) override; + + nsresult GetContainerData(nsTArray<nsTArray<uint8_t> >* aOutputBufs, + uint32_t aFlags = 0) override; + + // Check metadata type integrity and reject unacceptable track encoder. + nsresult SetMetadata(TrackMetadataBase* aMetadata) override; +private: + nsresult Init(); + + nsresult WriteEncodedData(const nsTArray<uint8_t>& aBuffer, int aDuration, + uint32_t aFlags = 0); + + void ProduceOggPage(nsTArray<nsTArray<uint8_t> >* aOutputBufs); + // Store the Medatata from track encoder + RefPtr<OpusMetadata> mMetadata; + + ogg_stream_state mOggStreamState; + ogg_page mOggPage; + ogg_packet mPacket; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/OpusParser.cpp b/dom/media/ogg/OpusParser.cpp new file mode 100644 index 0000000000..7e62f6d5fa --- /dev/null +++ b/dom/media/ogg/OpusParser.cpp @@ -0,0 +1,185 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <algorithm> +#include "mozilla/EndianUtils.h" + +#include "OpusParser.h" +#include "VideoUtils.h" + +#include "opus/opus.h" +extern "C" { +#include "opus/opus_multistream.h" +} + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; +#define OPUS_LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) + +OpusParser::OpusParser(): + mRate(0), + mNominalRate(0), + mChannels(0), + mPreSkip(0), +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain(1.0f), +#else + mGain_Q16(65536), +#endif + mChannelMapping(0), + mStreams(0), + mCoupledStreams(0) +{ } + +bool OpusParser::DecodeHeader(unsigned char* aData, size_t aLength) +{ + if (aLength < 19 || memcmp(aData, "OpusHead", 8)) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: unrecognized header")); + return false; + } + + mRate = 48000; // The Opus decoder runs at 48 kHz regardless. + + int version = aData[8]; + // Accept file format versions 0.x. + if ((version & 0xf0) != 0) { + OPUS_LOG(LogLevel::Debug, ("Rejecting unknown Opus file version %d", version)); + return false; + } + + mChannels = aData[9]; + if (mChannels<1) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: Number of channels %d", mChannels)); + return false; + } + + mPreSkip = LittleEndian::readUint16(aData + 10); + mNominalRate = LittleEndian::readUint32(aData + 12); + double gain_dB = LittleEndian::readInt16(aData + 16) / 256.0; +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + mGain = static_cast<float>(pow(10,0.05*gain_dB)); +#else + mGain_Q16 = static_cast<int32_t>(std::min(65536*pow(10,0.05*gain_dB)+0.5, + static_cast<double>(INT32_MAX))); +#endif + mChannelMapping = aData[18]; + + if (mChannelMapping == 0) { + // Mapping family 0 only allows two channels + if (mChannels>2) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: too many channels (%d) for" + " mapping family 0.", mChannels)); + return false; + } + mStreams = 1; + mCoupledStreams = mChannels - 1; + mMappingTable[0] = 0; + mMappingTable[1] = 1; + } else if (mChannelMapping == 1 || mChannelMapping == 255) { + // Currently only up to 8 channels are defined for mapping family 1 and we + // only supports only up to 8 channels for mapping family 255. + if (mChannels>8) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: too many channels (%d) for" + " mapping family 1.", mChannels)); + return false; + } + if (aLength>static_cast<unsigned>(20+mChannels)) { + mStreams = aData[19]; + mCoupledStreams = aData[20]; + int i; + for (i=0; i<mChannels; i++) + mMappingTable[i] = aData[21+i]; + } else { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: channel mapping %d," + " but no channel mapping table", mChannelMapping)); + return false; + } + } else { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: unsupported channel mapping " + "family %d", mChannelMapping)); + return false; + } + if (mStreams < 1) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: no streams")); + return false; + } + if (mCoupledStreams > mStreams) { + OPUS_LOG(LogLevel::Debug, ("Invalid Opus file: more coupled streams (%d) than " + "total streams (%d)", mCoupledStreams, mStreams)); + return false; + } + +#ifdef DEBUG + OPUS_LOG(LogLevel::Debug, ("Opus stream header:")); + OPUS_LOG(LogLevel::Debug, (" channels: %d", mChannels)); + OPUS_LOG(LogLevel::Debug, (" preskip: %d", mPreSkip)); + OPUS_LOG(LogLevel::Debug, (" original: %d Hz", mNominalRate)); + OPUS_LOG(LogLevel::Debug, (" gain: %.2f dB", gain_dB)); + OPUS_LOG(LogLevel::Debug, ("Channel Mapping:")); + OPUS_LOG(LogLevel::Debug, (" family: %d", mChannelMapping)); + OPUS_LOG(LogLevel::Debug, (" streams: %d", mStreams)); +#endif + return true; +} + +bool OpusParser::DecodeTags(unsigned char* aData, size_t aLength) +{ + if (aLength < 16 || memcmp(aData, "OpusTags", 8)) + return false; + + // Copy out the raw comment lines, but only do basic validation + // checks against the string packing: too little data, too many + // comments, or comments that are too long. Rejecting these cases + // helps reduce the propagation of broken files. + // We do not ensure they are valid UTF-8 here, nor do we validate + // the required ASCII_TAG=value format of the user comments. + const unsigned char* buf = aData + 8; + uint32_t bytes = aLength - 8; + uint32_t len; + // Read the vendor string. + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + mVendorString = nsCString(reinterpret_cast<const char*>(buf), len); + buf += len; + bytes -= len; + // Read the user comments. + if (bytes < 4) + return false; + uint32_t ncomments = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + // If there are so many comments even their length fields + // won't fit in the packet, stop reading now. + if (ncomments > (bytes>>2)) + return false; + for (uint32_t i = 0; i < ncomments; i++) { + if (bytes < 4) + return false; + len = LittleEndian::readUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + mTags.AppendElement(nsCString(reinterpret_cast<const char*>(buf), len)); + buf += len; + bytes -= len; + } + +#ifdef DEBUG + OPUS_LOG(LogLevel::Debug, ("Opus metadata header:")); + OPUS_LOG(LogLevel::Debug, (" vendor: %s", mVendorString.get())); + for (uint32_t i = 0; i < mTags.Length(); i++) { + OPUS_LOG(LogLevel::Debug, (" %s", mTags[i].get())); + } +#endif + return true; +} + +} // namespace mozilla diff --git a/dom/media/ogg/OpusParser.h b/dom/media/ogg/OpusParser.h new file mode 100644 index 0000000000..0c6d9c940a --- /dev/null +++ b/dom/media/ogg/OpusParser.h @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(OpusParser_h_) +#define OpusParser_h_ + +#include "nsTArray.h" +#include "nsString.h" + +namespace mozilla { + +class OpusParser +{ +public: + OpusParser(); + + bool DecodeHeader(unsigned char* aData, size_t aLength); + bool DecodeTags(unsigned char* aData, size_t aLength); + + // Various fields from the Ogg Opus header. + int mRate; // Sample rate the decoder uses (always 48 kHz). + uint32_t mNominalRate; // Original sample rate of the data (informational). + int mChannels; // Number of channels the stream encodes. + uint16_t mPreSkip; // Number of samples to strip after decoder reset. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + float mGain; // Gain to apply to decoder output. +#else + int32_t mGain_Q16; // Gain to apply to the decoder output. +#endif + int mChannelMapping; // Channel mapping family. + int mStreams; // Number of packed streams in each packet. + int mCoupledStreams; // Number of packed coupled streams in each packet. + unsigned char mMappingTable[255]; // Channel mapping table. + + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + int64_t mPrevPacketGranulepos; + + nsTArray<nsCString> mTags; // Unparsed comment strings from the header. + + nsCString mVendorString; // Encoder vendor string from the header. +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/ogg/moz.build b/dom/media/ogg/moz.build new file mode 100644 index 0000000000..07e195b6f7 --- /dev/null +++ b/dom/media/ogg/moz.build @@ -0,0 +1,25 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'OggCodecState.h', + 'OggCodecStore.h', + 'OggDecoder.h', + 'OggDemuxer.h', + 'OggWriter.h', + 'OpusParser.h', +] + +UNIFIED_SOURCES += [ + 'OggCodecState.cpp', + 'OggCodecStore.cpp', + 'OggDecoder.cpp', + 'OggDemuxer.cpp', + 'OggWriter.cpp', + 'OpusParser.cpp', +] + +FINAL_LIBRARY = 'xul' |