diff options
author | Jeremy Andrews <athenian200@outlook.com> | 2021-09-26 12:50:43 -0500 |
---|---|---|
committer | Jeremy Andrews <athenian200@outlook.com> | 2021-09-26 12:50:43 -0500 |
commit | cc621e1829f61df96473cd04fb8ba27a65c99e13 (patch) | |
tree | 3387ebd88906fd0392190455d089314eddc6ec81 /libs/libaom/src/examples | |
parent | 78f8ab35c2264cad5725ef6f8f21b1a614d90775 (diff) | |
download | aura-central-cc621e1829f61df96473cd04fb8ba27a65c99e13.tar.gz |
Issue %3003 - Move libaom to libs/
Diffstat (limited to 'libs/libaom/src/examples')
23 files changed, 7103 insertions, 0 deletions
diff --git a/libs/libaom/src/examples/analyzer.cc b/libs/libaom/src/examples/analyzer.cc new file mode 100644 index 000000000..35988211e --- /dev/null +++ b/libs/libaom/src/examples/analyzer.cc @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <wx/wx.h> +#include <wx/aboutdlg.h> +#include <wx/cmdline.h> +#include <wx/dcbuffer.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "av1/common/av1_common_int.h" +#include "av1/decoder/accounting.h" +#include "av1/decoder/inspection.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +#define OD_SIGNMASK(a) (-((a) < 0)) +#define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b)) +#define OD_DIV_ROUND(x, y) (((x) + OD_FLIPSIGNI((y) >> 1, x)) / (y)) + +enum { + OD_LUMA_MASK = 1 << 0, + OD_CB_MASK = 1 << 1, + OD_CR_MASK = 1 << 2, + OD_ALL_MASK = OD_LUMA_MASK | OD_CB_MASK | OD_CR_MASK +}; + +class AV1Decoder { + private: + FILE *input; + wxString path; + + AvxVideoReader *reader; + const AvxVideoInfo *info; + const AvxInterface *decoder; + + insp_frame_data frame_data; + + aom_codec_ctx_t codec; + bool show_padding; + + public: + aom_image_t *image; + int frame; + + int plane_mask; + + AV1Decoder(); + ~AV1Decoder(); + + bool open(const wxString &path); + void close(); + bool step(); + + int getWidthPadding() const; + int getHeightPadding() const; + void togglePadding(); + int getWidth() const; + int getHeight() const; + + bool getAccountingStruct(Accounting **acct); + bool setInspectionCallback(); + + static void inspect(void *decoder, void *data); +}; + +AV1Decoder::AV1Decoder() + : reader(NULL), info(NULL), decoder(NULL), show_padding(false), image(NULL), + frame(0) {} + +AV1Decoder::~AV1Decoder() {} + +void AV1Decoder::togglePadding() { show_padding = !show_padding; } + +bool AV1Decoder::open(const wxString &path) { + reader = aom_video_reader_open(path.mb_str()); + if (!reader) { + fprintf(stderr, "Failed to open %s for reading.", path.mb_str().data()); + return false; + } + this->path = path; + info = aom_video_reader_get_info(reader); + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) { + fprintf(stderr, "Unknown input codec."); + return false; + } + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) { + fprintf(stderr, "Failed to initialize decoder."); + return false; + } + ifd_init(&frame_data, info->frame_width, info->frame_height); + setInspectionCallback(); + return true; +} + +void AV1Decoder::close() {} + +bool AV1Decoder::step() { + if (aom_video_reader_read_frame(reader)) { + size_t frame_size; + const unsigned char *frame_data; + frame_data = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame_data, frame_size, NULL)) { + fprintf(stderr, "Failed to decode frame."); + return false; + } else { + aom_codec_iter_t iter = NULL; + image = aom_codec_get_frame(&codec, &iter); + if (image != NULL) { + frame++; + return true; + } + return false; + } + } + return false; +} + +int AV1Decoder::getWidth() const { + return info->frame_width + 2 * getWidthPadding(); +} + +int AV1Decoder::getWidthPadding() const { + return show_padding ? AOMMAX(info->frame_width + 16, + ALIGN_POWER_OF_TWO(info->frame_width, 6)) - + info->frame_width + : 0; +} + +int AV1Decoder::getHeight() const { + return info->frame_height + 2 * getHeightPadding(); +} + +int AV1Decoder::getHeightPadding() const { + return show_padding ? AOMMAX(info->frame_height + 16, + ALIGN_POWER_OF_TWO(info->frame_height, 6)) - + info->frame_height + : 0; +} + +bool AV1Decoder::getAccountingStruct(Accounting **accounting) { + return aom_codec_control(&codec, AV1_GET_ACCOUNTING, accounting) == + AOM_CODEC_OK; +} + +bool AV1Decoder::setInspectionCallback() { + aom_inspect_init ii; + ii.inspect_cb = AV1Decoder::inspect; + ii.inspect_ctx = (void *)this; + return aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii) == + AOM_CODEC_OK; +} + +void AV1Decoder::inspect(void *pbi, void *data) { + AV1Decoder *decoder = (AV1Decoder *)data; + ifd_inspect(&decoder->frame_data, pbi, 0); +} + +#define MIN_ZOOM (1) +#define MAX_ZOOM (4) + +class AnalyzerPanel : public wxPanel { + DECLARE_EVENT_TABLE() + + private: + AV1Decoder decoder; + const wxString path; + + int zoom; + unsigned char *pixels; + + const bool bit_accounting; + double *bpp_q3; + + int plane_mask; + + // The display size is the decode size, scaled by the zoom. + int getDisplayWidth() const; + int getDisplayHeight() const; + + bool updateDisplaySize(); + + void computeBitsPerPixel(); + + public: + AnalyzerPanel(wxWindow *parent, const wxString &path, + const bool bit_accounting); + ~AnalyzerPanel(); + + bool open(const wxString &path); + void close(); + void render(); + void togglePadding(); + bool nextFrame(); + void refresh(); + + int getZoom() const; + bool setZoom(int zoom); + + void setShowPlane(bool show_plane, int mask); + + void onPaint(wxPaintEvent &event); // NOLINT +}; + +BEGIN_EVENT_TABLE(AnalyzerPanel, wxPanel) +EVT_PAINT(AnalyzerPanel::onPaint) +END_EVENT_TABLE() + +AnalyzerPanel::AnalyzerPanel(wxWindow *parent, const wxString &path, + const bool bit_accounting) + : wxPanel(parent), path(path), zoom(0), pixels(NULL), + bit_accounting(bit_accounting), bpp_q3(NULL), plane_mask(OD_ALL_MASK) {} + +AnalyzerPanel::~AnalyzerPanel() { close(); } + +void AnalyzerPanel::setShowPlane(bool show_plane, int mask) { + if (show_plane) { + plane_mask |= mask; + } else { + plane_mask &= ~mask; + } +} + +void AnalyzerPanel::render() { + aom_image_t *img = decoder.image; + const int hbd = !!(img->fmt & AOM_IMG_FMT_HIGHBITDEPTH); + int y_stride = img->stride[0] >> hbd; + int cb_stride = img->stride[1] >> hbd; + int cr_stride = img->stride[2] >> hbd; + int p_stride = 3 * getDisplayWidth(); + unsigned char *y_row = img->planes[0]; + unsigned char *cb_row = img->planes[1]; + unsigned char *cr_row = img->planes[2]; + uint16_t *y_row16 = reinterpret_cast<uint16_t *>(y_row); + uint16_t *cb_row16 = reinterpret_cast<uint16_t *>(cb_row); + uint16_t *cr_row16 = reinterpret_cast<uint16_t *>(cr_row); + unsigned char *p_row = pixels; + int y_width_padding = decoder.getWidthPadding(); + int cb_width_padding = y_width_padding >> 1; + int cr_width_padding = y_width_padding >> 1; + int y_height_padding = decoder.getHeightPadding(); + int cb_height_padding = y_height_padding >> 1; + int cr_height_padding = y_height_padding >> 1; + for (int j = 0; j < decoder.getHeight(); j++) { + unsigned char *y = y_row - y_stride * y_height_padding; + unsigned char *cb = cb_row - cb_stride * cb_height_padding; + unsigned char *cr = cr_row - cr_stride * cr_height_padding; + uint16_t *y16 = y_row16 - y_stride * y_height_padding; + uint16_t *cb16 = cb_row16 - cb_stride * cb_height_padding; + uint16_t *cr16 = cr_row16 - cr_stride * cr_height_padding; + unsigned char *p = p_row; + for (int i = 0; i < decoder.getWidth(); i++) { + int64_t yval; + int64_t cbval; + int64_t crval; + int pmask; + unsigned rval; + unsigned gval; + unsigned bval; + if (hbd) { + yval = *(y16 - y_width_padding); + cbval = *(cb16 - cb_width_padding); + crval = *(cr16 - cr_width_padding); + } else { + yval = *(y - y_width_padding); + cbval = *(cb - cb_width_padding); + crval = *(cr - cr_width_padding); + } + pmask = plane_mask; + if (pmask & OD_LUMA_MASK) { + yval -= 16; + } else { + yval = 128; + } + cbval = ((pmask & OD_CB_MASK) >> 1) * (cbval - 128); + crval = ((pmask & OD_CR_MASK) >> 2) * (crval - 128); + /*This is intentionally slow and very accurate.*/ + rval = OD_CLAMPI( + 0, + (int32_t)OD_DIV_ROUND( + 2916394880000LL * yval + 4490222169144LL * crval, 9745792000LL), + 65535); + gval = OD_CLAMPI(0, + (int32_t)OD_DIV_ROUND(2916394880000LL * yval - + 534117096223LL * cbval - + 1334761232047LL * crval, + 9745792000LL), + 65535); + bval = OD_CLAMPI( + 0, + (int32_t)OD_DIV_ROUND( + 2916394880000LL * yval + 5290866304968LL * cbval, 9745792000LL), + 65535); + unsigned char *px_row = p; + for (int v = 0; v < zoom; v++) { + unsigned char *px = px_row; + for (int u = 0; u < zoom; u++) { + *(px + 0) = (unsigned char)(rval >> 8); + *(px + 1) = (unsigned char)(gval >> 8); + *(px + 2) = (unsigned char)(bval >> 8); + px += 3; + } + px_row += p_stride; + } + if (hbd) { + int dc = ((y16 - y_row16) & 1) | (1 - img->x_chroma_shift); + y16++; + cb16 += dc; + cr16 += dc; + } else { + int dc = ((y - y_row) & 1) | (1 - img->x_chroma_shift); + y++; + cb += dc; + cr += dc; + } + p += zoom * 3; + } + int dc = -((j & 1) | (1 - img->y_chroma_shift)); + if (hbd) { + y_row16 += y_stride; + cb_row16 += dc & cb_stride; + cr_row16 += dc & cr_stride; + } else { + y_row += y_stride; + cb_row += dc & cb_stride; + cr_row += dc & cr_stride; + } + p_row += zoom * p_stride; + } +} + +void AnalyzerPanel::computeBitsPerPixel() { + Accounting *acct; + double bpp_total; + int totals_q3[MAX_SYMBOL_TYPES] = { 0 }; + int sym_count[MAX_SYMBOL_TYPES] = { 0 }; + decoder.getAccountingStruct(&acct); + for (int j = 0; j < decoder.getHeight(); j++) { + for (int i = 0; i < decoder.getWidth(); i++) { + bpp_q3[j * decoder.getWidth() + i] = 0.0; + } + } + bpp_total = 0; + for (int i = 0; i < acct->syms.num_syms; i++) { + AccountingSymbol *s; + s = &acct->syms.syms[i]; + totals_q3[s->id] += s->bits; + sym_count[s->id] += s->samples; + } + printf("=== Frame: %-3i ===\n", decoder.frame - 1); + for (int i = 0; i < acct->syms.dictionary.num_strs; i++) { + if (totals_q3[i]) { + printf("%30s = %10.3f (%f bit/symbol)\n", acct->syms.dictionary.strs[i], + (float)totals_q3[i] / 8, (float)totals_q3[i] / 8 / sym_count[i]); + } + } + printf("\n"); +} + +void AnalyzerPanel::togglePadding() { + decoder.togglePadding(); + updateDisplaySize(); +} + +bool AnalyzerPanel::nextFrame() { + if (decoder.step()) { + refresh(); + return true; + } + return false; +} + +void AnalyzerPanel::refresh() { + if (bit_accounting) { + computeBitsPerPixel(); + } + render(); +} + +int AnalyzerPanel::getDisplayWidth() const { return zoom * decoder.getWidth(); } + +int AnalyzerPanel::getDisplayHeight() const { + return zoom * decoder.getHeight(); +} + +bool AnalyzerPanel::updateDisplaySize() { + unsigned char *p = (unsigned char *)malloc( + sizeof(*p) * 3 * getDisplayWidth() * getDisplayHeight()); + if (p == NULL) { + return false; + } + free(pixels); + pixels = p; + SetSize(getDisplayWidth(), getDisplayHeight()); + return true; +} + +bool AnalyzerPanel::open(const wxString &path) { + if (!decoder.open(path)) { + return false; + } + if (!setZoom(MIN_ZOOM)) { + return false; + } + if (bit_accounting) { + bpp_q3 = (double *)malloc(sizeof(*bpp_q3) * decoder.getWidth() * + decoder.getHeight()); + if (bpp_q3 == NULL) { + fprintf(stderr, "Could not allocate memory for bit accounting\n"); + close(); + return false; + } + } + if (!nextFrame()) { + close(); + return false; + } + SetFocus(); + return true; +} + +void AnalyzerPanel::close() { + decoder.close(); + free(pixels); + pixels = NULL; + free(bpp_q3); + bpp_q3 = NULL; +} + +int AnalyzerPanel::getZoom() const { return zoom; } + +bool AnalyzerPanel::setZoom(int z) { + if (z <= MAX_ZOOM && z >= MIN_ZOOM && zoom != z) { + int old_zoom = zoom; + zoom = z; + if (!updateDisplaySize()) { + zoom = old_zoom; + return false; + } + return true; + } + return false; +} + +void AnalyzerPanel::onPaint(wxPaintEvent &) { + wxBitmap bmp(wxImage(getDisplayWidth(), getDisplayHeight(), pixels, true)); + wxBufferedPaintDC dc(this, bmp); +} + +class AnalyzerFrame : public wxFrame { + DECLARE_EVENT_TABLE() + + private: + AnalyzerPanel *panel; + const bool bit_accounting; + + wxMenu *fileMenu; + wxMenu *viewMenu; + wxMenu *playbackMenu; + + public: + AnalyzerFrame(const bool bit_accounting); // NOLINT + + void onOpen(wxCommandEvent &event); // NOLINT + void onClose(wxCommandEvent &event); // NOLINT + void onQuit(wxCommandEvent &event); // NOLINT + + void onTogglePadding(wxCommandEvent &event); // NOLINT + void onZoomIn(wxCommandEvent &event); // NOLINT + void onZoomOut(wxCommandEvent &event); // NOLINT + void onActualSize(wxCommandEvent &event); // NOLINT + + void onToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT + void onResetAndToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT + + void onNextFrame(wxCommandEvent &event); // NOLINT + void onGotoFrame(wxCommandEvent &event); // NOLINT + void onRestart(wxCommandEvent &event); // NOLINT + + void onAbout(wxCommandEvent &event); // NOLINT + + bool open(const wxString &path); + bool setZoom(int zoom); + void updateViewMenu(); +}; + +enum { + wxID_NEXT_FRAME = 6000, + wxID_SHOW_Y, + wxID_SHOW_U, + wxID_SHOW_V, + wxID_GOTO_FRAME, + wxID_RESTART, + wxID_ACTUAL_SIZE, + wxID_PADDING +}; + +BEGIN_EVENT_TABLE(AnalyzerFrame, wxFrame) +EVT_MENU(wxID_OPEN, AnalyzerFrame::onOpen) +EVT_MENU(wxID_CLOSE, AnalyzerFrame::onClose) +EVT_MENU(wxID_EXIT, AnalyzerFrame::onQuit) +EVT_MENU(wxID_PADDING, AnalyzerFrame::onTogglePadding) +EVT_MENU(wxID_ZOOM_IN, AnalyzerFrame::onZoomIn) +EVT_MENU(wxID_ZOOM_OUT, AnalyzerFrame::onZoomOut) +EVT_MENU(wxID_ACTUAL_SIZE, AnalyzerFrame::onActualSize) +EVT_MENU(wxID_SHOW_Y, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_SHOW_U, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_SHOW_V, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_NEXT_FRAME, AnalyzerFrame::onNextFrame) +EVT_MENU(wxID_GOTO_FRAME, AnalyzerFrame::onGotoFrame) +EVT_MENU(wxID_RESTART, AnalyzerFrame::onRestart) +EVT_MENU(wxID_ABOUT, AnalyzerFrame::onAbout) +END_EVENT_TABLE() + +AnalyzerFrame::AnalyzerFrame(const bool bit_accounting) + : wxFrame(NULL, wxID_ANY, _("AV1 Stream Analyzer"), wxDefaultPosition, + wxDefaultSize, wxDEFAULT_FRAME_STYLE), + panel(NULL), bit_accounting(bit_accounting) { + wxMenuBar *mb = new wxMenuBar(); + + fileMenu = new wxMenu(); + fileMenu->Append(wxID_OPEN, _("&Open...\tCtrl-O"), _("Open AV1 file")); + fileMenu->Append(wxID_CLOSE, _("&Close\tCtrl-W"), _("Close AV1 file")); + fileMenu->Enable(wxID_CLOSE, false); + fileMenu->Append(wxID_EXIT, _("E&xit\tCtrl-Q"), _("Quit this program")); + mb->Append(fileMenu, _("&File")); + + wxAcceleratorEntry entries[2]; + entries[0].Set(wxACCEL_CTRL, (int)'=', wxID_ZOOM_IN); + entries[1].Set(wxACCEL_CTRL | wxACCEL_SHIFT, (int)'-', wxID_ZOOM_OUT); + wxAcceleratorTable accel(2, entries); + this->SetAcceleratorTable(accel); + + viewMenu = new wxMenu(); + +viewMenu->Append(wxID_PADDING, _("Toggle padding\tCtrl-p"), + _("Show padding")); + viewMenu->Append(wxID_ZOOM_IN, _("Zoom-In\tCtrl-+"), _("Double image size")); + viewMenu->Append(wxID_ZOOM_OUT, _("Zoom-Out\tCtrl--"), _("Half image size")); + viewMenu->Append(wxID_ACTUAL_SIZE, _("Actual size\tCtrl-0"), + _("Actual size of the frame")); + viewMenu->AppendSeparator(); + viewMenu->AppendCheckItem(wxID_SHOW_Y, _("&Y plane\tCtrl-Y"), + _("Show Y plane")); + viewMenu->AppendCheckItem(wxID_SHOW_U, _("&U plane\tCtrl-U"), + _("Show U plane")); + viewMenu->AppendCheckItem(wxID_SHOW_V, _("&V plane\tCtrl-V"), + _("Show V plane")); + mb->Append(viewMenu, _("&View")); + + playbackMenu = new wxMenu(); + playbackMenu->Append(wxID_NEXT_FRAME, _("Next frame\tCtrl-."), + _("Go to next frame")); + /*playbackMenu->Append(wxID_RESTART, _("&Restart\tCtrl-R"), + _("Set video to frame 0")); + playbackMenu->Append(wxID_GOTO_FRAME, _("Jump to Frame\tCtrl-J"), + _("Go to frame number"));*/ + mb->Append(playbackMenu, _("&Playback")); + + wxMenu *helpMenu = new wxMenu(); + helpMenu->Append(wxID_ABOUT, _("&About...\tF1"), _("Show about dialog")); + mb->Append(helpMenu, _("&Help")); + + SetMenuBar(mb); + + CreateStatusBar(1); +} + +void AnalyzerFrame::onOpen(wxCommandEvent &WXUNUSED(event)) { + wxFileDialog openFileDialog(this, _("Open file"), wxEmptyString, + wxEmptyString, _("AV1 files (*.ivf)|*.ivf"), + wxFD_OPEN | wxFD_FILE_MUST_EXIST); + if (openFileDialog.ShowModal() != wxID_CANCEL) { + open(openFileDialog.GetPath()); + } +} + +void AnalyzerFrame::onClose(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onQuit(wxCommandEvent &WXUNUSED(event)) { Close(true); } + +void AnalyzerFrame::onTogglePadding(wxCommandEvent &WXUNUSED(event)) { + panel->togglePadding(); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(); +} + +void AnalyzerFrame::onZoomIn(wxCommandEvent &WXUNUSED(event)) { + setZoom(panel->getZoom() + 1); +} + +void AnalyzerFrame::onZoomOut(wxCommandEvent &WXUNUSED(event)) { + setZoom(panel->getZoom() - 1); +} + +void AnalyzerFrame::onActualSize(wxCommandEvent &WXUNUSED(event)) { + setZoom(MIN_ZOOM); +} + +void AnalyzerFrame::onToggleViewMenuCheckBox(wxCommandEvent &event) { // NOLINT + GetMenuBar()->Check(event.GetId(), event.IsChecked()); + updateViewMenu(); +} + +void AnalyzerFrame::onResetAndToggleViewMenuCheckBox( + wxCommandEvent &event) { // NOLINT + int id = event.GetId(); + if (id != wxID_SHOW_Y && id != wxID_SHOW_U && id != wxID_SHOW_V) { + GetMenuBar()->Check(wxID_SHOW_Y, true); + GetMenuBar()->Check(wxID_SHOW_U, true); + GetMenuBar()->Check(wxID_SHOW_V, true); + } + onToggleViewMenuCheckBox(event); +} + +void AnalyzerFrame::onNextFrame(wxCommandEvent &WXUNUSED(event)) { + panel->nextFrame(); + panel->Refresh(false); +} + +void AnalyzerFrame::onGotoFrame(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onRestart(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onAbout(wxCommandEvent &WXUNUSED(event)) { + wxAboutDialogInfo info; + info.SetName(_("AV1 Bitstream Analyzer")); + info.SetVersion(_("0.1-beta")); + info.SetDescription( + _("This program implements a bitstream analyzer for AV1")); + info.SetCopyright( + wxT("(C) 2017 Alliance for Open Media <negge@mozilla.com>")); + wxAboutBox(info); +} + +bool AnalyzerFrame::open(const wxString &path) { + panel = new AnalyzerPanel(this, path, bit_accounting); + if (panel->open(path)) { + SetClientSize(panel->GetSize()); + return true; + } else { + delete panel; + return false; + } +} + +bool AnalyzerFrame::setZoom(int zoom) { + if (panel->setZoom(zoom)) { + GetMenuBar()->Enable(wxID_ACTUAL_SIZE, zoom != MIN_ZOOM); + GetMenuBar()->Enable(wxID_ZOOM_IN, zoom != MAX_ZOOM); + GetMenuBar()->Enable(wxID_ZOOM_OUT, zoom != MIN_ZOOM); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(); + return true; + } + return false; +} + +void AnalyzerFrame::updateViewMenu() { + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_Y), OD_LUMA_MASK); + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_U), OD_CB_MASK); + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_V), OD_CR_MASK); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(false); +} + +class Analyzer : public wxApp { + private: + AnalyzerFrame *frame; + + public: + void OnInitCmdLine(wxCmdLineParser &parser); // NOLINT + bool OnCmdLineParsed(wxCmdLineParser &parser); // NOLINT +}; + +static const wxCmdLineEntryDesc CMD_LINE_DESC[] = { + { wxCMD_LINE_SWITCH, _("h"), _("help"), _("Display this help and exit."), + wxCMD_LINE_VAL_NONE, wxCMD_LINE_OPTION_HELP }, + { wxCMD_LINE_SWITCH, _("a"), _("bit-accounting"), _("Enable bit accounting"), + wxCMD_LINE_VAL_NONE, wxCMD_LINE_PARAM_OPTIONAL }, + { wxCMD_LINE_PARAM, NULL, NULL, _("input.ivf"), wxCMD_LINE_VAL_STRING, + wxCMD_LINE_PARAM_OPTIONAL }, + { wxCMD_LINE_NONE } +}; + +void Analyzer::OnInitCmdLine(wxCmdLineParser &parser) { // NOLINT + parser.SetDesc(CMD_LINE_DESC); + parser.SetSwitchChars(_("-")); +} + +bool Analyzer::OnCmdLineParsed(wxCmdLineParser &parser) { // NOLINT + bool bit_accounting = parser.Found(_("a")); + if (bit_accounting && !CONFIG_ACCOUNTING) { + fprintf(stderr, + "Bit accounting support not found. " + "Recompile with:\n./cmake -DCONFIG_ACCOUNTING=1\n"); + return false; + } + frame = new AnalyzerFrame(parser.Found(_("a"))); + frame->Show(); + if (parser.GetParamCount() > 0) { + return frame->open(parser.GetParam(0)); + } + return true; +} + +void usage_exit(void) { + fprintf(stderr, "uhh\n"); + exit(EXIT_FAILURE); +} + +IMPLEMENT_APP(Analyzer) diff --git a/libs/libaom/src/examples/aom_cx_set_ref.c b/libs/libaom/src/examples/aom_cx_set_ref.c new file mode 100644 index 000000000..2f4f6586f --- /dev/null +++ b/libs/libaom/src/examples/aom_cx_set_ref.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// AV1 Set Reference Frame +// ============================ +// +// This is an example demonstrating how to overwrite the AV1 encoder's +// internal reference frame. In the sample we set the last frame to the +// current frame. This technique could be used to bounce between two cameras. +// +// The decoder would also have to set the reference frame to the same value +// on the same frame, or the video will become corrupt. The 'test_decode' +// variable is set to 1 in this example that tests if the encoder and decoder +// results are matching. +// +// Usage +// ----- +// This example encodes a raw video. And the last argument passed in specifies +// the frame number to update the reference frame on. For example, run +// examples/aom_cx_set_ref av1 352 288 in.yuv out.ivf 4 30 +// The parameter is parsed as follows: +// +// +// Extra Variables +// --------------- +// This example maintains the frame number passed on the command line +// in the `update_frame_num` variable. +// +// +// Configuration +// ------------- +// +// The reference frame is updated on the frame specified on the command +// line. +// +// Observing The Effects +// --------------------- +// The encoder and decoder results should be matching when the same reference +// frame setting operation is done in both encoder and decoder. Otherwise, +// the encoder/decoder mismatch would be seen. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "aom_scale/yv12config.h" +#include "common/tools_common.h" +#include "common/video_writer.h" +#include "examples/encoder_util.h" + +static const char *exec_name; + +void usage_exit() { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<frame> <limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void testing_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder, + unsigned int frame_out, int *mismatch_seen) { + aom_image_t enc_img, dec_img; + + if (*mismatch_seen) return; + + /* Get the internal reference frame */ + if (aom_codec_control(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img)) + die_codec(encoder, "Failed to get encoder reference frame"); + if (aom_codec_control(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img)) + die_codec(decoder, "Failed to get decoder reference frame"); + + if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t enc_hbd_img; + aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img); + enc_img = enc_hbd_img; + } + if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t dec_hbd_img; + aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img); + dec_img = dec_hbd_img; + } + } + + if (!aom_compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + aom_find_mismatch(&enc_img, &dec_img, y, u, v); + } + + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", + frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = 1; + } + + aom_img_free(&enc_img); + aom_img_free(&dec_img); +} + +static int encode_frame(aom_codec_ctx_t *ecodec, aom_image_t *img, + unsigned int frame_in, AvxVideoWriter *writer, + int test_decode, aom_codec_ctx_t *dcodec, + unsigned int *frame_out, int *mismatch_seen, + aom_image_t *ext_ref) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + int got_data; + const aom_codec_err_t res = aom_codec_encode(ecodec, img, frame_in, 1, 0); + if (res != AOM_CODEC_OK) die_codec(ecodec, "Failed to encode frame"); + + got_data = 0; + + while ((pkt = aom_codec_get_cx_data(ecodec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + ++*frame_out; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(ecodec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + got_data = 1; + + // Decode 1 frame. + if (test_decode) { + if (aom_codec_decode(dcodec, pkt->data.frame.buf, + (unsigned int)pkt->data.frame.sz, NULL)) + die_codec(dcodec, "Failed to decode frame."); + + // Copy out first decoded frame, and use it as reference later. + if (*frame_out == 1 && ext_ref != NULL) + if (aom_codec_control(dcodec, AV1_COPY_NEW_FRAME_IMAGE, ext_ref)) + die_codec(dcodec, "Failed to get decoder new frame"); + } + } + } + + // Mismatch checking + if (got_data && test_decode) { + testing_decode(ecodec, dcodec, *frame_out, mismatch_seen); + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + // Encoder + aom_codec_ctx_t ecodec; + aom_codec_enc_cfg_t cfg; + unsigned int frame_in = 0; + aom_image_t raw; + aom_image_t raw_shift; + aom_image_t ext_ref; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const AvxInterface *encoder = NULL; + int flags = 0; + int allocated_raw_shift = 0; + aom_img_fmt_t raw_fmt = AOM_IMG_FMT_I420; + aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420; + + // Test encoder/decoder mismatch. + int test_decode = 1; + // Decoder + aom_codec_ctx_t dcodec; + unsigned int frame_out = 0; + + // The frame number to set reference frame on + unsigned int update_frame_num = 0; + int mismatch_seen = 0; + + const int fps = 30; + const int bitrate = 500; + + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *update_frame_num_arg = NULL; + unsigned int limit = 0; + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&ecodec, 0, sizeof(ecodec)); + memset(&cfg, 0, sizeof(cfg)); + memset(&info, 0, sizeof(info)); + + if (argc < 7) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile_arg = argv[4]; + outfile_arg = argv[5]; + update_frame_num_arg = argv[6]; + + encoder = get_aom_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0); + // In AV1, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are + // allocated while calling aom_codec_encode(), thus, setting reference for + // 1st frame isn't supported. + if (update_frame_num <= 1) { + die("Couldn't parse frame number '%s'\n", update_frame_num_arg); + } + + if (argc > 7) { + limit = (unsigned int)strtoul(argv[7], NULL, 0); + if (update_frame_num > limit) + die("Update frame number couldn't larger than limit\n"); + } + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + // In this test, the bit depth of input video is 8-bit, and the input format + // is AOM_IMG_FMT_I420. + if (!aom_img_alloc(&raw, raw_fmt, info.frame_width, info.frame_height, 32)) { + die("Failed to allocate image."); + } + + if (FORCE_HIGHBITDEPTH_DECODING) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + // Allocate memory with the border so that it can be used as a reference. + if (!aom_img_alloc_with_border(&ext_ref, ref_fmt, info.frame_width, + info.frame_height, 32, 8, + AOM_BORDER_IN_PIXELS)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&ecodec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_lag_in_frames = 3; + cfg.g_bit_depth = AOM_BITS_8; + + flags |= (cfg.g_bit_depth > AOM_BITS_8 || FORCE_HIGHBITDEPTH_DECODING) + ? AOM_CODEC_USE_HIGHBITDEPTH + : 0; + + writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (aom_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, flags)) + die_codec(&ecodec, "Failed to initialize encoder"); + + // Disable alt_ref. + if (aom_codec_control(&ecodec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&ecodec, "Failed to set enable auto alt ref"); + + if (test_decode) { + const AvxInterface *decoder = get_aom_decoder_by_name(codec_arg); + if (aom_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0)) + die_codec(&dcodec, "Failed to initialize decoder."); + } + + // Encode frames. + while (aom_img_read(&raw, infile)) { + if (limit && frame_in >= limit) break; + aom_image_t *frame_to_encode; + + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + int input_shift = 0; + if (!allocated_raw_shift) { + aom_img_alloc(&raw_shift, raw_fmt | AOM_IMG_FMT_HIGHBITDEPTH, + info.frame_width, info.frame_height, 32); + allocated_raw_shift = 1; + } + aom_img_upshift(&raw_shift, &raw, input_shift); + frame_to_encode = &raw_shift; + } else { + frame_to_encode = &raw; + } + + if (update_frame_num > 1 && frame_out + 1 == update_frame_num) { + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 0; + ref.img = ext_ref; + // Set reference frame in encoder. + if (aom_codec_control(&ecodec, AV1_SET_REFERENCE, &ref)) + die_codec(&ecodec, "Failed to set encoder reference frame"); + printf(" <SET_REF>"); + + // If set_reference in decoder is commented out, the enc/dec mismatch + // would be seen. + if (test_decode) { + ref.use_external_ref = 1; + if (aom_codec_control(&dcodec, AV1_SET_REFERENCE, &ref)) + die_codec(&dcodec, "Failed to set decoder reference frame"); + } + } + + encode_frame(&ecodec, frame_to_encode, frame_in, writer, test_decode, + &dcodec, &frame_out, &mismatch_seen, &ext_ref); + frame_in++; + if (mismatch_seen) break; + } + + // Flush encoder. + if (!mismatch_seen) + while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec, + &frame_out, &mismatch_seen, NULL)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_out); + + if (test_decode) { + if (!mismatch_seen) + printf("Encoder/decoder results are matching.\n"); + else + printf("Encoder/decoder results are NOT matching.\n"); + } + + if (test_decode) + if (aom_codec_destroy(&dcodec)) + die_codec(&dcodec, "Failed to destroy decoder"); + + if (allocated_raw_shift) aom_img_free(&raw_shift); + aom_img_free(&ext_ref); + aom_img_free(&raw); + if (aom_codec_destroy(&ecodec)) + die_codec(&ecodec, "Failed to destroy encoder."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/av1_dec_fuzzer.cc b/libs/libaom/src/examples/av1_dec_fuzzer.cc new file mode 100644 index 000000000..1cddc8cc1 --- /dev/null +++ b/libs/libaom/src/examples/av1_dec_fuzzer.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* + * See build_av1_dec_fuzzer.sh for building instructions. + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <algorithm> +#include <memory> +#include "config/aom_config.h" +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_ports/mem_ops.h" + +#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ +#define IVF_FILE_HDR_SZ 32 + +extern "C" void usage_exit(void) { exit(EXIT_FAILURE); } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size <= IVF_FILE_HDR_SZ) { + return 0; + } + + const aom_codec_iface_t *codec_interface = aom_codec_av1_dx(); + aom_codec_ctx_t codec; + // Set thread count in the range [1, 64]. + const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; + aom_codec_dec_cfg_t cfg = { threads, 0, 0, !FORCE_HIGHBITDEPTH_DECODING }; + if (aom_codec_dec_init(&codec, codec_interface, &cfg, 0)) { + return 0; + } + + data += IVF_FILE_HDR_SZ; + size -= IVF_FILE_HDR_SZ; + + while (size > IVF_FRAME_HDR_SZ) { + size_t frame_size = mem_get_le32(data); + size -= IVF_FRAME_HDR_SZ; + data += IVF_FRAME_HDR_SZ; + frame_size = std::min(size, frame_size); + + const aom_codec_err_t err = + aom_codec_decode(&codec, data, frame_size, nullptr); + static_cast<void>(err); + aom_codec_iter_t iter = nullptr; + aom_image_t *img = nullptr; + while ((img = aom_codec_get_frame(&codec, &iter)) != nullptr) { + } + data += frame_size; + size -= frame_size; + } + aom_codec_destroy(&codec); + return 0; +} diff --git a/libs/libaom/src/examples/build_av1_dec_fuzzer.sh b/libs/libaom/src/examples/build_av1_dec_fuzzer.sh new file mode 100644 index 000000000..0dcb254da --- /dev/null +++ b/libs/libaom/src/examples/build_av1_dec_fuzzer.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Copyright (c) 2019, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +############################################################################### +# Fuzzer for libaom decoder. +# ========================== +# Requirements +# --------------------- +# Clang6.0 or above (must support -fsanitize=fuzzer -fsanitize=fuzzer-no-link) +# +# References: +# --------------------- +# http://llvm.org/docs/LibFuzzer.html +# https://github.com/google/oss-fuzz +# +# Steps to build / run +# --------------------- + +set -eu + +# Have a copy of AOM and a build directory ready. +if [[ $# -ne 2 ]]; then + echo "Pass in the AOM source tree as first argument, and a build directory " + echo "as the second argument. The AOM source tree can be obtained via: " + echo " git clone https://aomedia.googlesource.com/aom" + exit 2 +fi +if [[ -z "$CC" ]]; then + echo "Set the CC environment variable to point to your C compiler." + exit 2 +fi +if [[ -z "$CXX" ]]; then + echo "Set the CXX environment variable to point to your C++ compiler." + exit 2 +fi + +AOM_DIR=$1 +BUILD_DIR=$2 +# Run CMake with address sanitizer enabled and build the codec. +# Enable DO_RANGE_CHECK_CLAMP to suppress the noise of integer overflows +# in the transform functions. Also set memory limits. +EXTRA_C_FLAGS='-DDO_RANGE_CHECK_CLAMP=1 -DAOM_MAX_ALLOCABLE_MEMORY=1073741824' +cd "${BUILD_DIR}" +cmake "${AOM_DIR}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCONFIG_PIC=1 \ + -DCONFIG_SCALABILITY=0 -DFORCE_HIGHBITDEPTH_DECODING=0 \ + -DCONFIG_AV1_ENCODER=0 -DENABLE_EXAMPLES=0 -DENABLE_DOCS=0 -DENABLE_TESTS=0 \ + -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=12288 -DDECODE_WIDTH_LIMIT=12288 \ + -DAOM_EXTRA_C_FLAGS="${EXTRA_C_FLAGS}" \ + -DAOM_EXTRA_CXX_FLAGS="${EXTRA_C_FLAGS}" -DSANITIZE=fuzzer-no-link,address + +# Build the codec. +make -j$(nproc) + +# Build the av1 fuzzer +$CXX -std=c++11 -DDECODER=av1 -I${AOM_DIR} -I${BUILD_DIR} \ + -fsanitize=fuzzer,address -Wl,--start-group \ + ${AOM_DIR}/examples/av1_dec_fuzzer.cc -o ${BUILD_DIR}/av1_dec_fuzzer \ + ${BUILD_DIR}/libaom.a -Wl,--end-group + +echo "Fuzzer built at ${BUILD_DIR}/av1_dec_fuzzer." +echo "Create a corpus directory, copy IVF files in there, and run:" +echo " av1_dec_fuzzer CORPUS_DIR" diff --git a/libs/libaom/src/examples/decode_to_md5.c b/libs/libaom/src/examples/decode_to_md5.c new file mode 100644 index 000000000..bc127b78d --- /dev/null +++ b/libs/libaom/src/examples/decode_to_md5.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Frame-by-frame MD5 Checksum +// =========================== +// +// This example builds upon the simple decoder loop to show how checksums +// of the decoded output can be generated. These are used for validating +// decoder implementations against the reference implementation, for example. +// +// MD5 algorithm +// ------------- +// The Message-Digest 5 (MD5) is a well known hash function. We have provided +// an implementation derived from the RSA Data Security, Inc. MD5 Message-Digest +// Algorithm for your use. Our implmentation only changes the interface of this +// reference code. You must include the `md5_utils.h` header for access to these +// functions. +// +// Processing The Decoded Data +// --------------------------- +// Each row of the image is passed to the MD5 accumulator. First the Y plane +// is processed, then U, then V. It is important to honor the image's `stride` +// values. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/md5_utils.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static void get_image_md5(const aom_image_t *img, unsigned char digest[16]) { + int plane, y; + MD5Context md5; + + MD5Init(&md5); + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int w = plane ? (img->d_w + 1) >> 1 : img->d_w; + const int h = plane ? (img->d_h + 1) >> 1 : img->d_h; + + for (y = 0; y < h; ++y) { + MD5Update(&md5, buf, w); + buf += stride; + } + } + + MD5Final(digest, &md5); +} + +static void print_md5(FILE *stream, unsigned char digest[16]) { + int i; + + for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]); +} + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + aom_codec_ctx_t codec; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + const AvxInterface *decoder = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = aom_video_reader_get_info(reader); + + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder"); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame"); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + unsigned char digest[16]; + + get_image_md5(img, digest); + print_md5(outfile, digest); + fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h, + ++frame_cnt); + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_reader_close(reader); + + fclose(outfile); + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/decode_with_drops.c b/libs/libaom/src/examples/decode_with_drops.c new file mode 100644 index 000000000..214401958 --- /dev/null +++ b/libs/libaom/src/examples/decode_with_drops.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Decode With Drops Example +// ========================= +// +// This is an example utility which drops a series of frames, as specified +// on the command line. This is useful for observing the error recovery +// features of the codec. +// +// Usage +// ----- +// This example adds a single argument to the `simple_decoder` example, +// which specifies the range or pattern of frames to drop. The parameter is +// parsed as follows: +// +// Dropping A Range Of Frames +// -------------------------- +// To drop a range of frames, specify the starting frame and the ending +// frame to drop, separated by a dash. The following command will drop +// frames 5 through 10 (base 1). +// +// $ ./decode_with_drops in.ivf out.i420 5-10 +// +// +// Dropping A Pattern Of Frames +// ---------------------------- +// To drop a pattern of frames, specify the number of frames to drop and +// the number of frames after which to repeat the pattern, separated by +// a forward-slash. The following command will drop 3 of 7 frames. +// Specifically, it will decode 4 frames, then drop 3 frames, and then +// repeat. +// +// $ ./decode_with_drops in.ivf out.i420 3/7 +// +// +// Extra Variables +// --------------- +// This example maintains the pattern passed on the command line in the +// `n`, `m`, and `is_range` variables: +// +// +// Making The Drop Decision +// ------------------------ +// The example decides whether to drop the frame based on the current +// frame number, immediately before decoding the frame. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + aom_codec_ctx_t codec; + const AvxInterface *decoder = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + int n = 0; + int m = 0; + int is_range = 0; + char *nptr = NULL; + + exec_name = argv[0]; + + if (argc != 4) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + n = (int)strtol(argv[3], &nptr, 0); + m = (int)strtol(nptr + 1, NULL, 0); + is_range = (*nptr == '-'); + if (!n || !m || (*nptr != '-' && *nptr != '/')) + die("Couldn't parse pattern %s.\n", argv[3]); + + info = aom_video_reader_get_info(reader); + + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + int skip; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + ++frame_cnt; + + skip = (is_range && frame_cnt >= n && frame_cnt <= m) || + (!is_range && m - (frame_cnt - 1) % m <= n); + + if (!skip) { + putc('.', stdout); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) + aom_img_write(img, outfile); + } else { + putc('X', stdout); + } + + fflush(stdout); + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/encoder_util.c b/libs/libaom/src/examples/encoder_util.c new file mode 100644 index 000000000..e43b37250 --- /dev/null +++ b/libs/libaom/src/examples/encoder_util.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Utility functions used by encoder binaries. + +#include "examples/encoder_util.h" + +#include <assert.h> +#include <string.h> + +#include "aom/aom_integer.h" + +#define mmin(a, b) ((a) < (b) ? (a) : (b)) + +static void find_mismatch_plane(const aom_image_t *const img1, + const aom_image_t *const img2, int plane, + int use_highbitdepth, int loc[4]) { + const unsigned char *const p1 = img1->planes[plane]; + const int p1_stride = img1->stride[plane] >> use_highbitdepth; + const unsigned char *const p2 = img2->planes[plane]; + const int p2_stride = img2->stride[plane] >> use_highbitdepth; + const uint32_t bsize = 64; + const int is_y_plane = (plane == AOM_PLANE_Y); + const uint32_t bsizex = is_y_plane ? bsize : bsize >> img1->x_chroma_shift; + const uint32_t bsizey = is_y_plane ? bsize : bsize >> img1->y_chroma_shift; + const uint32_t c_w = + is_y_plane ? img1->d_w + : (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + is_y_plane ? img1->d_h + : (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + assert(img1->d_w == img2->d_w && img1->d_h == img2->d_h); + assert(img1->x_chroma_shift == img2->x_chroma_shift && + img1->y_chroma_shift == img2->y_chroma_shift); + loc[0] = loc[1] = loc[2] = loc[3] = -1; + if (img1->monochrome && img2->monochrome && plane) return; + int match = 1; + uint32_t i, j; + for (i = 0; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + const int si = + is_y_plane ? mmin(i + bsizey, c_h) - i : mmin(i + bsizey, c_h - i); + const int sj = + is_y_plane ? mmin(j + bsizex, c_w) - j : mmin(j + bsizex, c_w - j); + int k, l; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + const int row = i + k; + const int col = j + l; + const int offset1 = row * p1_stride + col; + const int offset2 = row * p2_stride + col; + const int val1 = use_highbitdepth + ? p1[2 * offset1] | (p1[2 * offset1 + 1] << 8) + : p1[offset1]; + const int val2 = use_highbitdepth + ? p2[2 * offset2] | (p2[2 * offset2 + 1] << 8) + : p2[offset2]; + if (val1 != val2) { + loc[0] = row; + loc[1] = col; + loc[2] = val1; + loc[3] = val2; + match = 0; + break; + } + } + } + } + } +} + +static void find_mismatch_helper(const aom_image_t *const img1, + const aom_image_t *const img2, + int use_highbitdepth, int yloc[4], int uloc[4], + int vloc[4]) { + find_mismatch_plane(img1, img2, AOM_PLANE_Y, use_highbitdepth, yloc); + find_mismatch_plane(img1, img2, AOM_PLANE_U, use_highbitdepth, uloc); + find_mismatch_plane(img1, img2, AOM_PLANE_V, use_highbitdepth, vloc); +} + +void aom_find_mismatch_high(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], + int uloc[4], int vloc[4]) { + find_mismatch_helper(img1, img2, 1, yloc, uloc, vloc); +} + +void aom_find_mismatch(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]) { + find_mismatch_helper(img1, img2, 0, yloc, uloc, vloc); +} + +int aom_compare_img(const aom_image_t *const img1, + const aom_image_t *const img2) { + assert(img1->cp == img2->cp); + assert(img1->tc == img2->tc); + assert(img1->mc == img2->mc); + assert(img1->monochrome == img2->monochrome); + + int num_planes = img1->monochrome ? 1 : 3; + + uint32_t l_w = img1->d_w; + uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + + match &= (img1->fmt == img2->fmt); + match &= (img1->d_w == img2->d_w); + match &= (img1->d_h == img2->d_h); + if (img1->fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + l_w *= 2; + c_w *= 2; + } + + for (int plane = 0; plane < num_planes; ++plane) { + uint32_t height = plane ? c_h : img1->d_h; + uint32_t width = plane ? c_w : l_w; + + for (uint32_t i = 0; i < height; ++i) { + match &= + (memcmp(img1->planes[plane] + i * img1->stride[plane], + img2->planes[plane] + i * img2->stride[plane], width) == 0); + } + } + + return match; +} diff --git a/libs/libaom/src/examples/encoder_util.h b/libs/libaom/src/examples/encoder_util.h new file mode 100644 index 000000000..a6bb3fb48 --- /dev/null +++ b/libs/libaom/src/examples/encoder_util.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Utility functions used by encoder binaries. + +#ifndef AOM_EXAMPLES_ENCODER_UTIL_H_ +#define AOM_EXAMPLES_ENCODER_UTIL_H_ + +#include "aom/aom_image.h" + +// Returns mismatch location (?loc[0],?loc[1]) and the values at that location +// in img1 (?loc[2]) and img2 (?loc[3]). +void aom_find_mismatch_high(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], + int uloc[4], int vloc[4]); + +void aom_find_mismatch(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]); + +// Returns 1 if the two images match. +int aom_compare_img(const aom_image_t *const img1, + const aom_image_t *const img2); + +#endif // AOM_EXAMPLES_ENCODER_UTIL_H_ diff --git a/libs/libaom/src/examples/inspect.c b/libs/libaom/src/examples/inspect.c new file mode 100644 index 000000000..526bdc16c --- /dev/null +++ b/libs/libaom/src/examples/inspect.c @@ -0,0 +1,958 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Inspect Decoder +// ================ +// +// This is a simple decoder loop that writes JSON stats to stdout. This tool +// can also be compiled with Emscripten and used as a library. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef __EMSCRIPTEN__ +#include <emscripten.h> +#else +#define EMSCRIPTEN_KEEPALIVE +#endif + +#include "config/aom_config.h" + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "av1/common/av1_common_int.h" + +#if CONFIG_ACCOUNTING +#include "av1/decoder/accounting.h" +#endif + +#include "av1/decoder/inspection.h" +#include "common/args.h" +#include "common/tools_common.h" +#include "common/video_common.h" +#include "common/video_reader.h" + +// Max JSON buffer size. +const int MAX_BUFFER = 1024 * 1024 * 256; + +typedef enum { + ACCOUNTING_LAYER = 1, + BLOCK_SIZE_LAYER = 1 << 1, + TRANSFORM_SIZE_LAYER = 1 << 2, + TRANSFORM_TYPE_LAYER = 1 << 3, + MODE_LAYER = 1 << 4, + SKIP_LAYER = 1 << 5, + FILTER_LAYER = 1 << 6, + CDEF_LAYER = 1 << 7, + REFERENCE_FRAME_LAYER = 1 << 8, + MOTION_VECTORS_LAYER = 1 << 9, + UV_MODE_LAYER = 1 << 10, + CFL_LAYER = 1 << 11, + DUAL_FILTER_LAYER = 1 << 12, + Q_INDEX_LAYER = 1 << 13, + SEGMENT_ID_LAYER = 1 << 14, + MOTION_MODE_LAYER = 1 << 15, + COMPOUND_TYPE_LAYER = 1 << 16, + INTRABC_LAYER = 1 << 17, + PALETTE_LAYER = 1 << 18, + UV_PALETTE_LAYER = 1 << 19, + ALL_LAYERS = (1 << 20) - 1 +} LayerType; + +static LayerType layers = 0; + +static int stop_after = 0; +static int compress = 0; + +static const arg_def_t limit_arg = + ARG_DEF(NULL, "limit", 1, "Stop decoding after n frames"); +static const arg_def_t dump_all_arg = ARG_DEF("A", "all", 0, "Dump All"); +static const arg_def_t compress_arg = + ARG_DEF("x", "compress", 0, "Compress JSON using RLE"); +static const arg_def_t dump_accounting_arg = + ARG_DEF("a", "accounting", 0, "Dump Accounting"); +static const arg_def_t dump_block_size_arg = + ARG_DEF("bs", "blockSize", 0, "Dump Block Size"); +static const arg_def_t dump_motion_vectors_arg = + ARG_DEF("mv", "motionVectors", 0, "Dump Motion Vectors"); +static const arg_def_t dump_transform_size_arg = + ARG_DEF("ts", "transformSize", 0, "Dump Transform Size"); +static const arg_def_t dump_transform_type_arg = + ARG_DEF("tt", "transformType", 0, "Dump Transform Type"); +static const arg_def_t dump_mode_arg = ARG_DEF("m", "mode", 0, "Dump Mode"); +static const arg_def_t dump_motion_mode_arg = + ARG_DEF("mm", "motion_mode", 0, "Dump Motion Modes"); +static const arg_def_t dump_compound_type_arg = + ARG_DEF("ct", "compound_type", 0, "Dump Compound Types"); +static const arg_def_t dump_uv_mode_arg = + ARG_DEF("uvm", "uv_mode", 0, "Dump UV Intra Prediction Modes"); +static const arg_def_t dump_skip_arg = ARG_DEF("s", "skip", 0, "Dump Skip"); +static const arg_def_t dump_filter_arg = + ARG_DEF("f", "filter", 0, "Dump Filter"); +static const arg_def_t dump_cdef_arg = ARG_DEF("c", "cdef", 0, "Dump CDEF"); +static const arg_def_t dump_cfl_arg = + ARG_DEF("cfl", "chroma_from_luma", 0, "Dump Chroma from Luma Alphas"); +static const arg_def_t dump_dual_filter_type_arg = + ARG_DEF("df", "dualFilterType", 0, "Dump Dual Filter Type"); +static const arg_def_t dump_reference_frame_arg = + ARG_DEF("r", "referenceFrame", 0, "Dump Reference Frame"); +static const arg_def_t dump_delta_q_arg = + ARG_DEF("dq", "delta_q", 0, "Dump QIndex"); +static const arg_def_t dump_seg_id_arg = + ARG_DEF("si", "seg_id", 0, "Dump Segment ID"); +static const arg_def_t dump_intrabc_arg = + ARG_DEF("ibc", "intrabc", 0, "Dump If IntraBC Is Used"); +static const arg_def_t dump_palette_arg = + ARG_DEF("plt", "palette", 0, "Dump Palette Size"); +static const arg_def_t dump_uv_palette_arg = + ARG_DEF("uvp", "uv_palette", 0, "Dump UV Palette Size"); +static const arg_def_t usage_arg = ARG_DEF("h", "help", 0, "Help"); +static const arg_def_t skip_non_transform_arg = ARG_DEF( + "snt", "skip_non_transform", 1, "Skip is counted as a non transform."); +static const arg_def_t combined_arg = + ARG_DEF("comb", "combined", 1, "combinining parameters into one output."); + +int combined_parm_list[15]; +int combined_parm_count = 0; + +static const arg_def_t *main_args[] = { &limit_arg, + &dump_all_arg, + &compress_arg, +#if CONFIG_ACCOUNTING + &dump_accounting_arg, +#endif + &dump_block_size_arg, + &dump_transform_size_arg, + &dump_transform_type_arg, + &dump_mode_arg, + &dump_uv_mode_arg, + &dump_motion_mode_arg, + &dump_compound_type_arg, + &dump_skip_arg, + &dump_filter_arg, + &dump_cdef_arg, + &dump_dual_filter_type_arg, + &dump_cfl_arg, + &dump_reference_frame_arg, + &dump_motion_vectors_arg, + &dump_delta_q_arg, + &dump_seg_id_arg, + &dump_intrabc_arg, + &dump_palette_arg, + &dump_uv_palette_arg, + &usage_arg, + &skip_non_transform_arg, + &combined_arg, + NULL }; +#define ENUM(name) \ + { #name, name } +#define LAST_ENUM \ + { NULL, 0 } +typedef struct map_entry { + const char *name; + int value; +} map_entry; + +const map_entry refs_map[] = { + ENUM(INTRA_FRAME), ENUM(LAST_FRAME), ENUM(LAST2_FRAME), + ENUM(LAST3_FRAME), ENUM(GOLDEN_FRAME), ENUM(BWDREF_FRAME), + ENUM(ALTREF2_FRAME), ENUM(ALTREF_FRAME), LAST_ENUM +}; + +const map_entry block_size_map[] = { + ENUM(BLOCK_4X4), ENUM(BLOCK_4X8), ENUM(BLOCK_8X4), + ENUM(BLOCK_8X8), ENUM(BLOCK_8X16), ENUM(BLOCK_16X8), + ENUM(BLOCK_16X16), ENUM(BLOCK_16X32), ENUM(BLOCK_32X16), + ENUM(BLOCK_32X32), ENUM(BLOCK_32X64), ENUM(BLOCK_64X32), + ENUM(BLOCK_64X64), ENUM(BLOCK_64X128), ENUM(BLOCK_128X64), + ENUM(BLOCK_128X128), ENUM(BLOCK_4X16), ENUM(BLOCK_16X4), + ENUM(BLOCK_8X32), ENUM(BLOCK_32X8), ENUM(BLOCK_16X64), + ENUM(BLOCK_64X16), LAST_ENUM +}; + +#define TX_SKIP -1 + +const map_entry tx_size_map[] = { + ENUM(TX_4X4), ENUM(TX_8X8), ENUM(TX_16X16), ENUM(TX_32X32), + ENUM(TX_64X64), ENUM(TX_4X8), ENUM(TX_8X4), ENUM(TX_8X16), + ENUM(TX_16X8), ENUM(TX_16X32), ENUM(TX_32X16), ENUM(TX_32X64), + ENUM(TX_64X32), ENUM(TX_4X16), ENUM(TX_16X4), ENUM(TX_8X32), + ENUM(TX_32X8), ENUM(TX_16X64), ENUM(TX_64X16), LAST_ENUM +}; + +const map_entry tx_type_map[] = { ENUM(DCT_DCT), + ENUM(ADST_DCT), + ENUM(DCT_ADST), + ENUM(ADST_ADST), + ENUM(FLIPADST_DCT), + ENUM(DCT_FLIPADST), + ENUM(FLIPADST_FLIPADST), + ENUM(ADST_FLIPADST), + ENUM(FLIPADST_ADST), + ENUM(IDTX), + ENUM(V_DCT), + ENUM(H_DCT), + ENUM(V_ADST), + ENUM(H_ADST), + ENUM(V_FLIPADST), + ENUM(H_FLIPADST), + LAST_ENUM }; +const map_entry dual_filter_map[] = { ENUM(REG_REG), ENUM(REG_SMOOTH), + ENUM(REG_SHARP), ENUM(SMOOTH_REG), + ENUM(SMOOTH_SMOOTH), ENUM(SMOOTH_SHARP), + ENUM(SHARP_REG), ENUM(SHARP_SMOOTH), + ENUM(SHARP_SHARP), LAST_ENUM }; + +const map_entry prediction_mode_map[] = { + ENUM(DC_PRED), ENUM(V_PRED), ENUM(H_PRED), + ENUM(D45_PRED), ENUM(D135_PRED), ENUM(D113_PRED), + ENUM(D157_PRED), ENUM(D203_PRED), ENUM(D67_PRED), + ENUM(SMOOTH_PRED), ENUM(SMOOTH_V_PRED), ENUM(SMOOTH_H_PRED), + ENUM(PAETH_PRED), ENUM(NEARESTMV), ENUM(NEARMV), + ENUM(GLOBALMV), ENUM(NEWMV), ENUM(NEAREST_NEARESTMV), + ENUM(NEAR_NEARMV), ENUM(NEAREST_NEWMV), ENUM(NEW_NEARESTMV), + ENUM(NEAR_NEWMV), ENUM(NEW_NEARMV), ENUM(GLOBAL_GLOBALMV), + ENUM(NEW_NEWMV), ENUM(INTRA_INVALID), LAST_ENUM +}; + +const map_entry motion_mode_map[] = { ENUM(SIMPLE_TRANSLATION), + ENUM(OBMC_CAUSAL), // 2-sided OBMC + ENUM(WARPED_CAUSAL), // 2-sided WARPED + LAST_ENUM }; + +const map_entry compound_type_map[] = { ENUM(COMPOUND_AVERAGE), + ENUM(COMPOUND_WEDGE), + ENUM(COMPOUND_DIFFWTD), LAST_ENUM }; + +const map_entry uv_prediction_mode_map[] = { + ENUM(UV_DC_PRED), ENUM(UV_V_PRED), + ENUM(UV_H_PRED), ENUM(UV_D45_PRED), + ENUM(UV_D135_PRED), ENUM(UV_D113_PRED), + ENUM(UV_D157_PRED), ENUM(UV_D203_PRED), + ENUM(UV_D67_PRED), ENUM(UV_SMOOTH_PRED), + ENUM(UV_SMOOTH_V_PRED), ENUM(UV_SMOOTH_H_PRED), + ENUM(UV_PAETH_PRED), ENUM(UV_CFL_PRED), + ENUM(UV_MODE_INVALID), LAST_ENUM +}; +#define NO_SKIP 0 +#define SKIP 1 + +const map_entry skip_map[] = { ENUM(SKIP), ENUM(NO_SKIP), LAST_ENUM }; + +const map_entry intrabc_map[] = { { "INTRABC", 1 }, + { "NO_INTRABC", 0 }, + LAST_ENUM }; + +const map_entry palette_map[] = { + { "ZERO_COLORS", 0 }, { "TWO_COLORS", 2 }, { "THREE_COLORS", 3 }, + { "FOUR_COLORS", 4 }, { "FIVE_COLORS", 5 }, { "SIX_COLORS", 6 }, + { "SEVEN_COLORS", 7 }, { "EIGHT_COLORS", 8 }, LAST_ENUM +}; + +const map_entry config_map[] = { ENUM(MI_SIZE), LAST_ENUM }; + +static const char *exec_name; + +struct parm_offset { + char parm[60]; + char offset; +}; +struct parm_offset parm_offsets[] = { + { "blockSize", offsetof(insp_mi_data, sb_type) }, + { "transformSize", offsetof(insp_mi_data, tx_size) }, + { "transformType", offsetof(insp_mi_data, tx_type) }, + { "dualFilterType", offsetof(insp_mi_data, dual_filter_type) }, + { "mode", offsetof(insp_mi_data, mode) }, + { "uv_mode", offsetof(insp_mi_data, uv_mode) }, + { "motion_mode", offsetof(insp_mi_data, motion_mode) }, + { "compound_type", offsetof(insp_mi_data, compound_type) }, + { "referenceFrame", offsetof(insp_mi_data, ref_frame) }, + { "skip", offsetof(insp_mi_data, skip) }, +}; +int parm_count = sizeof(parm_offsets) / sizeof(parm_offsets[0]); + +int convert_to_indices(char *str, int *indices, int maxCount, int *count) { + *count = 0; + do { + char *comma = strchr(str, ','); + int length = (comma ? (int)(comma - str) : (int)strlen(str)); + int i; + for (i = 0; i < parm_count; ++i) { + if (!strncmp(str, parm_offsets[i].parm, length)) { + break; + } + } + if (i == parm_count) return 0; + indices[(*count)++] = i; + if (*count > maxCount) return 0; + str += length + 1; + } while (strlen(str) > 0); + return 1; +} + +insp_frame_data frame_data; +int frame_count = 0; +int decoded_frame_count = 0; +aom_codec_ctx_t codec; +AvxVideoReader *reader = NULL; +const AvxVideoInfo *info = NULL; +aom_image_t *img = NULL; + +void on_frame_decoded_dump(char *json) { +#ifdef __EMSCRIPTEN__ + EM_ASM_({ Module.on_frame_decoded_json($0); }, json); +#else + printf("%s", json); +#endif +} + +// Writing out the JSON buffer using snprintf is very slow, especially when +// compiled with emscripten, these functions speed things up quite a bit. +int put_str(char *buffer, const char *str) { + int i; + for (i = 0; str[i] != '\0'; i++) { + buffer[i] = str[i]; + } + return i; +} + +int put_str_with_escape(char *buffer, const char *str) { + int i; + int j = 0; + for (i = 0; str[i] != '\0'; i++) { + if (str[i] < ' ') { + continue; + } else if (str[i] == '"' || str[i] == '\\') { + buffer[j++] = '\\'; + } + buffer[j++] = str[i]; + } + return j; +} + +int put_num(char *buffer, char prefix, int num, char suffix) { + int i = 0; + char *buf = buffer; + int is_neg = 0; + if (prefix) { + buf[i++] = prefix; + } + if (num == 0) { + buf[i++] = '0'; + } else { + if (num < 0) { + num = -num; + is_neg = 1; + } + int s = i; + while (num != 0) { + buf[i++] = '0' + (num % 10); + num = num / 10; + } + if (is_neg) { + buf[i++] = '-'; + } + int e = i - 1; + while (s < e) { + int t = buf[s]; + buf[s] = buf[e]; + buf[e] = t; + s++; + e--; + } + } + if (suffix) { + buf[i++] = suffix; + } + return i; +} + +int put_map(char *buffer, const map_entry *map) { + char *buf = buffer; + const map_entry *entry = map; + while (entry->name != NULL) { + *(buf++) = '"'; + buf += put_str(buf, entry->name); + *(buf++) = '"'; + buf += put_num(buf, ':', entry->value, 0); + entry++; + if (entry->name != NULL) { + *(buf++) = ','; + } + } + return (int)(buf - buffer); +} + +int put_reference_frame(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t; + buf += put_str(buf, " \"referenceFrameMap\": {"); + buf += put_map(buf, refs_map); + buf += put_str(buf, "},\n"); + buf += put_str(buf, " \"referenceFrame\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + buf += put_num(buf, '[', mi->ref_frame[0], 0); + buf += put_num(buf, ',', mi->ref_frame[1], ']'); + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + if (mi->ref_frame[0] != next_mi->ref_frame[0] || + mi->ref_frame[1] != next_mi->ref_frame[1]) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_motion_vectors(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t; + buf += put_str(buf, " \"motionVectors\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + buf += put_num(buf, '[', mi->mv[0].col, 0); + buf += put_num(buf, ',', mi->mv[0].row, 0); + buf += put_num(buf, ',', mi->mv[1].col, 0); + buf += put_num(buf, ',', mi->mv[1].row, ']'); + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + if (mi->mv[0].col != next_mi->mv[0].col || + mi->mv[0].row != next_mi->mv[0].row || + mi->mv[1].col != next_mi->mv[1].col || + mi->mv[1].row != next_mi->mv[1].row) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_combined(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, p; + buf += put_str(buf, " \""); + for (p = 0; p < combined_parm_count; ++p) { + if (p) buf += put_str(buf, "&"); + buf += put_str(buf, parm_offsets[combined_parm_list[p]].parm); + } + buf += put_str(buf, "\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + *(buf++) = '['; + for (p = 0; p < combined_parm_count; ++p) { + if (p) *(buf++) = ','; + int16_t *v = (int16_t *)(((int8_t *)mi) + + parm_offsets[combined_parm_list[p]].offset); + buf += put_num(buf, 0, v[0], 0); + } + *(buf++) = ']'; + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_block_info(char *buffer, const map_entry *map, const char *name, + size_t offset, int len) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t, i; + if (compress && len == 1) { + die("Can't encode scalars as arrays when RLE compression is enabled."); + return -1; + } + if (map) { + buf += snprintf(buf, MAX_BUFFER, " \"%sMap\": {", name); + buf += put_map(buf, map); + buf += put_str(buf, "},\n"); + } + buf += snprintf(buf, MAX_BUFFER, " \"%s\": [", name); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + int16_t *v = (int16_t *)(((int8_t *)mi) + offset); + if (len == 0) { + buf += put_num(buf, 0, v[0], 0); + } else { + buf += put_str(buf, "["); + for (i = 0; i < len; i++) { + buf += put_num(buf, 0, v[i], 0); + if (i < len - 1) { + buf += put_str(buf, ","); + } + } + buf += put_str(buf, "]"); + } + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + int16_t *nv = (int16_t *)(((int8_t *)next_mi) + offset); + int same = 0; + if (len == 0) { + same = v[0] == nv[0]; + } else { + for (i = 0; i < len; i++) { + same = v[i] == nv[i]; + if (!same) { + break; + } + } + } + if (!same) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +#if CONFIG_ACCOUNTING +int put_accounting(char *buffer) { + char *buf = buffer; + int i; + const Accounting *accounting = frame_data.accounting; + if (accounting == NULL) { + printf("XXX\n"); + return 0; + } + const int num_syms = accounting->syms.num_syms; + const int num_strs = accounting->syms.dictionary.num_strs; + buf += put_str(buf, " \"symbolsMap\": ["); + for (i = 0; i < num_strs; i++) { + buf += snprintf(buf, MAX_BUFFER, "\"%s\"", + accounting->syms.dictionary.strs[i]); + if (i < num_strs - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + buf += put_str(buf, " \"symbols\": [\n "); + AccountingSymbolContext context; + context.x = -2; + context.y = -2; + AccountingSymbol *sym; + for (i = 0; i < num_syms; i++) { + sym = &accounting->syms.syms[i]; + if (memcmp(&context, &sym->context, sizeof(AccountingSymbolContext)) != 0) { + buf += put_num(buf, '[', sym->context.x, 0); + buf += put_num(buf, ',', sym->context.y, ']'); + } else { + buf += put_num(buf, '[', sym->id, 0); + buf += put_num(buf, ',', sym->bits, 0); + buf += put_num(buf, ',', sym->samples, ']'); + } + context = sym->context; + if (i < num_syms - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} +#endif + +int skip_non_transform = 0; + +void inspect(void *pbi, void *data) { + /* Fetch frame data. */ + ifd_inspect(&frame_data, pbi, skip_non_transform); + + // Show existing frames just show a reference buffer we've already decoded. + // There's no information to show. + if (frame_data.show_existing_frame) return; + + (void)data; + // We allocate enough space and hope we don't write out of bounds. Totally + // unsafe but this speeds things up, especially when compiled to Javascript. + char *buffer = aom_malloc(MAX_BUFFER); + char *buf = buffer; + buf += put_str(buf, "{\n"); + if (layers & BLOCK_SIZE_LAYER) { + buf += put_block_info(buf, block_size_map, "blockSize", + offsetof(insp_mi_data, sb_type), 0); + } + if (layers & TRANSFORM_SIZE_LAYER) { + buf += put_block_info(buf, tx_size_map, "transformSize", + offsetof(insp_mi_data, tx_size), 0); + } + if (layers & TRANSFORM_TYPE_LAYER) { + buf += put_block_info(buf, tx_type_map, "transformType", + offsetof(insp_mi_data, tx_type), 0); + } + if (layers & DUAL_FILTER_LAYER) { + buf += put_block_info(buf, dual_filter_map, "dualFilterType", + offsetof(insp_mi_data, dual_filter_type), 0); + } + if (layers & MODE_LAYER) { + buf += put_block_info(buf, prediction_mode_map, "mode", + offsetof(insp_mi_data, mode), 0); + } + if (layers & UV_MODE_LAYER) { + buf += put_block_info(buf, uv_prediction_mode_map, "uv_mode", + offsetof(insp_mi_data, uv_mode), 0); + } + if (layers & MOTION_MODE_LAYER) { + buf += put_block_info(buf, motion_mode_map, "motion_mode", + offsetof(insp_mi_data, motion_mode), 0); + } + if (layers & COMPOUND_TYPE_LAYER) { + buf += put_block_info(buf, compound_type_map, "compound_type", + offsetof(insp_mi_data, compound_type), 0); + } + if (layers & SKIP_LAYER) { + buf += + put_block_info(buf, skip_map, "skip", offsetof(insp_mi_data, skip), 0); + } + if (layers & FILTER_LAYER) { + buf += + put_block_info(buf, NULL, "filter", offsetof(insp_mi_data, filter), 2); + } + if (layers & CDEF_LAYER) { + buf += put_block_info(buf, NULL, "cdef_level", + offsetof(insp_mi_data, cdef_level), 0); + buf += put_block_info(buf, NULL, "cdef_strength", + offsetof(insp_mi_data, cdef_strength), 0); + } + if (layers & CFL_LAYER) { + buf += put_block_info(buf, NULL, "cfl_alpha_idx", + offsetof(insp_mi_data, cfl_alpha_idx), 0); + buf += put_block_info(buf, NULL, "cfl_alpha_sign", + offsetof(insp_mi_data, cfl_alpha_sign), 0); + } + if (layers & Q_INDEX_LAYER) { + buf += put_block_info(buf, NULL, "delta_q", + offsetof(insp_mi_data, current_qindex), 0); + } + if (layers & SEGMENT_ID_LAYER) { + buf += put_block_info(buf, NULL, "seg_id", + offsetof(insp_mi_data, segment_id), 0); + } + if (layers & MOTION_VECTORS_LAYER) { + buf += put_motion_vectors(buf); + } + if (layers & INTRABC_LAYER) { + buf += put_block_info(buf, intrabc_map, "intrabc", + offsetof(insp_mi_data, intrabc), 0); + } + if (layers & PALETTE_LAYER) { + buf += put_block_info(buf, palette_map, "palette", + offsetof(insp_mi_data, palette), 0); + } + if (layers & UV_PALETTE_LAYER) { + buf += put_block_info(buf, palette_map, "uv_palette", + offsetof(insp_mi_data, uv_palette), 0); + } + if (combined_parm_count > 0) buf += put_combined(buf); + if (layers & REFERENCE_FRAME_LAYER) { + buf += put_block_info(buf, refs_map, "referenceFrame", + offsetof(insp_mi_data, ref_frame), 2); + } +#if CONFIG_ACCOUNTING + if (layers & ACCOUNTING_LAYER) { + buf += put_accounting(buf); + } +#endif + buf += + snprintf(buf, MAX_BUFFER, " \"frame\": %d,\n", frame_data.frame_number); + buf += snprintf(buf, MAX_BUFFER, " \"showFrame\": %d,\n", + frame_data.show_frame); + buf += snprintf(buf, MAX_BUFFER, " \"frameType\": %d,\n", + frame_data.frame_type); + buf += snprintf(buf, MAX_BUFFER, " \"baseQIndex\": %d,\n", + frame_data.base_qindex); + buf += snprintf(buf, MAX_BUFFER, " \"tileCols\": %d,\n", + frame_data.tile_mi_cols); + buf += snprintf(buf, MAX_BUFFER, " \"tileRows\": %d,\n", + frame_data.tile_mi_rows); + buf += snprintf(buf, MAX_BUFFER, " \"deltaQPresentFlag\": %d,\n", + frame_data.delta_q_present_flag); + buf += snprintf(buf, MAX_BUFFER, " \"deltaQRes\": %d,\n", + frame_data.delta_q_res); + buf += put_str(buf, " \"config\": {"); + buf += put_map(buf, config_map); + buf += put_str(buf, "},\n"); + buf += put_str(buf, " \"configString\": \""); + buf += put_str_with_escape(buf, aom_codec_build_config()); + buf += put_str(buf, "\"\n"); + decoded_frame_count++; + buf += put_str(buf, "},\n"); + *(buf++) = 0; + on_frame_decoded_dump(buffer); + aom_free(buffer); +} + +void ifd_init_cb() { + aom_inspect_init ii; + ii.inspect_cb = inspect; + ii.inspect_ctx = NULL; + aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii); +} + +EMSCRIPTEN_KEEPALIVE +int open_file(char *file) { + if (file == NULL) { + // The JS analyzer puts the .ivf file at this location. + file = "/tmp/input.ivf"; + } + reader = aom_video_reader_open(file); + if (!reader) die("Failed to open %s for reading.", file); + info = aom_video_reader_get_info(reader); + const AvxInterface *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + fprintf(stderr, "Using %s\n", + aom_codec_iface_name(decoder->codec_interface())); + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + ifd_init(&frame_data, info->frame_width, info->frame_height); + ifd_init_cb(); + return EXIT_SUCCESS; +} + +Av1DecodeReturn adr; +int have_frame = 0; +const unsigned char *frame; +const unsigned char *end_frame; +size_t frame_size = 0; + +EMSCRIPTEN_KEEPALIVE +int read_frame() { + img = NULL; + + // This loop skips over any frames that are show_existing_frames, as + // there is nothing to analyze. + do { + if (!have_frame) { + if (!aom_video_reader_read_frame(reader)) return EXIT_FAILURE; + frame = aom_video_reader_get_frame(reader, &frame_size); + + have_frame = 1; + end_frame = frame + frame_size; + } + + if (aom_codec_decode(&codec, frame, (unsigned int)frame_size, &adr) != + AOM_CODEC_OK) { + die_codec(&codec, "Failed to decode frame."); + } + + frame = adr.buf; + if (frame == end_frame) have_frame = 0; + } while (adr.show_existing); + + int got_any_frames = 0; + aom_image_t *frame_img; + struct av1_ref_frame ref_dec; + ref_dec.idx = adr.idx; + + // ref_dec.idx is the index to the reference buffer idx to AV1_GET_REFERENCE + // if its -1 the decoder didn't update any reference buffer and the only + // way to see the frame is aom_codec_get_frame. + if (ref_dec.idx == -1) { + aom_codec_iter_t iter = NULL; + img = frame_img = aom_codec_get_frame(&codec, &iter); + ++frame_count; + got_any_frames = 1; + } else if (!aom_codec_control(&codec, AV1_GET_REFERENCE, &ref_dec)) { + img = frame_img = &ref_dec.img; + ++frame_count; + got_any_frames = 1; + } + if (!got_any_frames) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +EMSCRIPTEN_KEEPALIVE +const char *get_aom_codec_build_config() { return aom_codec_build_config(); } + +EMSCRIPTEN_KEEPALIVE +int get_bit_depth() { return img->bit_depth; } + +EMSCRIPTEN_KEEPALIVE +int get_bits_per_sample() { return img->bps; } + +EMSCRIPTEN_KEEPALIVE +int get_image_format() { return img->fmt; } + +EMSCRIPTEN_KEEPALIVE +unsigned char *get_plane(int plane) { return img->planes[plane]; } + +EMSCRIPTEN_KEEPALIVE +int get_plane_stride(int plane) { return img->stride[plane]; } + +EMSCRIPTEN_KEEPALIVE +int get_plane_width(int plane) { return aom_img_plane_width(img, plane); } + +EMSCRIPTEN_KEEPALIVE +int get_plane_height(int plane) { return aom_img_plane_height(img, plane); } + +EMSCRIPTEN_KEEPALIVE +int get_frame_width() { return info->frame_width; } + +EMSCRIPTEN_KEEPALIVE +int get_frame_height() { return info->frame_height; } + +static void parse_args(char **argv) { + char **argi, **argj; + struct arg arg; + (void)dump_accounting_arg; + (void)dump_cdef_arg; + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + if (arg_match(&arg, &dump_block_size_arg, argi)) layers |= BLOCK_SIZE_LAYER; +#if CONFIG_ACCOUNTING + else if (arg_match(&arg, &dump_accounting_arg, argi)) + layers |= ACCOUNTING_LAYER; +#endif + else if (arg_match(&arg, &dump_transform_size_arg, argi)) + layers |= TRANSFORM_SIZE_LAYER; + else if (arg_match(&arg, &dump_transform_type_arg, argi)) + layers |= TRANSFORM_TYPE_LAYER; + else if (arg_match(&arg, &dump_mode_arg, argi)) + layers |= MODE_LAYER; + else if (arg_match(&arg, &dump_uv_mode_arg, argi)) + layers |= UV_MODE_LAYER; + else if (arg_match(&arg, &dump_motion_mode_arg, argi)) + layers |= MOTION_MODE_LAYER; + else if (arg_match(&arg, &dump_compound_type_arg, argi)) + layers |= COMPOUND_TYPE_LAYER; + else if (arg_match(&arg, &dump_skip_arg, argi)) + layers |= SKIP_LAYER; + else if (arg_match(&arg, &dump_filter_arg, argi)) + layers |= FILTER_LAYER; + else if (arg_match(&arg, &dump_cdef_arg, argi)) + layers |= CDEF_LAYER; + else if (arg_match(&arg, &dump_cfl_arg, argi)) + layers |= CFL_LAYER; + else if (arg_match(&arg, &dump_reference_frame_arg, argi)) + layers |= REFERENCE_FRAME_LAYER; + else if (arg_match(&arg, &dump_motion_vectors_arg, argi)) + layers |= MOTION_VECTORS_LAYER; + else if (arg_match(&arg, &dump_dual_filter_type_arg, argi)) + layers |= DUAL_FILTER_LAYER; + else if (arg_match(&arg, &dump_delta_q_arg, argi)) + layers |= Q_INDEX_LAYER; + else if (arg_match(&arg, &dump_seg_id_arg, argi)) + layers |= SEGMENT_ID_LAYER; + else if (arg_match(&arg, &dump_intrabc_arg, argi)) + layers |= INTRABC_LAYER; + else if (arg_match(&arg, &dump_palette_arg, argi)) + layers |= PALETTE_LAYER; + else if (arg_match(&arg, &dump_uv_palette_arg, argi)) + layers |= UV_PALETTE_LAYER; + else if (arg_match(&arg, &dump_all_arg, argi)) + layers |= ALL_LAYERS; + else if (arg_match(&arg, &compress_arg, argi)) + compress = 1; + else if (arg_match(&arg, &usage_arg, argi)) + usage_exit(); + else if (arg_match(&arg, &limit_arg, argi)) + stop_after = arg_parse_uint(&arg); + else if (arg_match(&arg, &skip_non_transform_arg, argi)) + skip_non_transform = arg_parse_uint(&arg); + else if (arg_match(&arg, &combined_arg, argi)) + convert_to_indices( + (char *)arg.val, combined_parm_list, + sizeof(combined_parm_list) / sizeof(combined_parm_list[0]), + &combined_parm_count); + else + argj++; + } +} + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s src_filename <options>\n", exec_name); + fprintf(stderr, "\nOptions:\n"); + arg_show_usage(stderr, main_args); + exit(EXIT_FAILURE); +} + +EMSCRIPTEN_KEEPALIVE +int main(int argc, char **argv) { + exec_name = argv[0]; + parse_args(argv); + if (argc >= 2) { + open_file(argv[1]); + printf("[\n"); + while (1) { + if (stop_after && (decoded_frame_count >= stop_after)) break; + if (read_frame()) break; + } + printf("null\n"); + printf("]"); + } else { + usage_exit(); + } +} + +EMSCRIPTEN_KEEPALIVE +void quit() { + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); +} + +EMSCRIPTEN_KEEPALIVE +void set_layers(LayerType v) { layers = v; } + +EMSCRIPTEN_KEEPALIVE +void set_compress(int v) { compress = v; } diff --git a/libs/libaom/src/examples/lightfield_bitstream_parsing.c b/libs/libaom/src/examples/lightfield_bitstream_parsing.c new file mode 100644 index 000000000..ffcbcb9cb --- /dev/null +++ b/libs/libaom/src/examples/lightfield_bitstream_parsing.c @@ -0,0 +1,414 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Bitstream Parsing +// ============================ +// +// This is a lightfield bitstream parsing example. It takes an input file +// containing the whole compressed lightfield bitstream(ivf file) and a text +// file containing a stream of tiles to decode and then constructs and outputs +// a new bitstream that can be decoded by an AV1 decoder. The output bitstream +// contains reference frames(i.e. anchor frames), camera frame header, and +// tile list OBUs. num_references is the number of anchor frames coded at the +// beginning of the light field file. After running the lightfield encoder, +// run lightfield bitstream parsing: +// examples/lightfield_bitstream_parsing vase10x10.ivf vase_tile_list.ivf 4 +// tile_list.txt +// +// The tile_list.txt is expected to be of the form: +// Frame <frame_index0> +// <image_index0> <anchor_index0> <tile_col0> <tile_row0> +// <image_index1> <anchor_index1> <tile_col1> <tile_row1> +// ... +// Frame <frame_index1) +// ... +// +// The "Frame" markers indicate a new render frame and thus a new tile list +// will be started and the old one flushed. The image_indexN, anchor_indexN, +// tile_colN, and tile_rowN identify an individual tile to be decoded and +// to use anchor_indexN anchor image for MCP. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#include "aom/aom_integer.h" +#include "aom/aomdx.h" +#include "aom_dsp/bitwriter_buffer.h" +#include "common/tools_common.h" +#include "common/video_reader.h" +#include "common/video_writer.h" + +#define MAX_TILES 512 + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile> <num_references> <tile_list>\n", + exec_name); + exit(EXIT_FAILURE); +} + +#define ALIGN_POWER_OF_TWO(value, n) \ + (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +const int output_frame_width = 512; +const int output_frame_height = 512; + +// Spec: +// typedef struct { +// uint8_t anchor_frame_idx; +// uint8_t tile_row; +// uint8_t tile_col; +// uint16_t coded_tile_data_size_minus_1; +// uint8_t *coded_tile_data; +// } TILE_LIST_ENTRY; + +// Tile list entry provided by the application +typedef struct { + int image_idx; + int reference_idx; + int tile_col; + int tile_row; +} TILE_LIST_INFO; + +static int get_image_bps(aom_img_fmt_t fmt) { + switch (fmt) { + case AOM_IMG_FMT_I420: return 12; + case AOM_IMG_FMT_I422: return 16; + case AOM_IMG_FMT_I444: return 24; + case AOM_IMG_FMT_I42016: return 24; + case AOM_IMG_FMT_I42216: return 32; + case AOM_IMG_FMT_I44416: return 48; + default: die("Invalid image format"); + } + return 0; +} + +void process_tile_list(const TILE_LIST_INFO *tiles, int num_tiles, + aom_codec_pts_t tl_pts, unsigned char **frames, + const size_t *frame_sizes, aom_codec_ctx_t *codec, + unsigned char *tl_buf, AvxVideoWriter *writer, + uint8_t output_frame_width_in_tiles_minus_1, + uint8_t output_frame_height_in_tiles_minus_1) { + unsigned char *tl = tl_buf; + struct aom_write_bit_buffer wb = { tl, 0 }; + unsigned char *saved_obu_size_loc = NULL; + uint32_t tile_list_obu_header_size = 0; + uint32_t tile_list_obu_size = 0; + int num_tiles_minus_1 = num_tiles - 1; + int i; + + // Write the tile list OBU header that is 1 byte long. + aom_wb_write_literal(&wb, 0, 1); // forbidden bit. + aom_wb_write_literal(&wb, 8, 4); // tile list OBU: "1000" + aom_wb_write_literal(&wb, 0, 1); // obu_extension = 0 + aom_wb_write_literal(&wb, 1, 1); // obu_has_size_field + aom_wb_write_literal(&wb, 0, 1); // reserved + tl++; + tile_list_obu_header_size++; + + // Write the OBU size using a fixed length_field_size of 4 bytes. + saved_obu_size_loc = tl; + // aom_wb_write_unsigned_literal(&wb, data, bits) requires that bits <= 32. + aom_wb_write_unsigned_literal(&wb, 0, 32); + tl += 4; + tile_list_obu_header_size += 4; + + // write_tile_list_obu() + aom_wb_write_literal(&wb, output_frame_width_in_tiles_minus_1, 8); + aom_wb_write_literal(&wb, output_frame_height_in_tiles_minus_1, 8); + aom_wb_write_literal(&wb, num_tiles_minus_1, 16); + tl += 4; + tile_list_obu_size += 4; + + // Write each tile's data + for (i = 0; i <= num_tiles_minus_1; i++) { + aom_tile_data tile_data = { 0, NULL, 0 }; + + int image_idx = tiles[i].image_idx; + int ref_idx = tiles[i].reference_idx; + int tc = tiles[i].tile_col; + int tr = tiles[i].tile_row; + + // Reset bit writer to the right location. + wb.bit_buffer = tl; + wb.bit_offset = 0; + + size_t frame_size = frame_sizes[image_idx]; + const unsigned char *frame = frames[image_idx]; + + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_ROW, tr); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_COL, tc); + + aom_codec_err_t aom_status = + aom_codec_decode(codec, frame, frame_size, NULL); + if (aom_status) die_codec(codec, "Failed to decode tile."); + + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_DATA, &tile_data); + + // Copy over tile info. + // uint8_t anchor_frame_idx; + // uint8_t tile_row; + // uint8_t tile_col; + // uint16_t coded_tile_data_size_minus_1; + // uint8_t *coded_tile_data; + uint32_t tile_info_bytes = 5; + aom_wb_write_literal(&wb, ref_idx, 8); + aom_wb_write_literal(&wb, tr, 8); + aom_wb_write_literal(&wb, tc, 8); + aom_wb_write_literal(&wb, (int)tile_data.coded_tile_data_size - 1, 16); + tl += tile_info_bytes; + + memcpy(tl, (uint8_t *)tile_data.coded_tile_data, + tile_data.coded_tile_data_size); + tl += tile_data.coded_tile_data_size; + + tile_list_obu_size += + tile_info_bytes + (uint32_t)tile_data.coded_tile_data_size; + } + + // Write tile list OBU size. + size_t bytes_written = 0; + if (aom_uleb_encode_fixed_size(tile_list_obu_size, 4, 4, saved_obu_size_loc, + &bytes_written)) + die_codec(codec, "Failed to encode the tile list obu size."); + + // Copy the tile list. + if (!aom_video_writer_write_frame( + writer, tl_buf, tile_list_obu_header_size + tile_list_obu_size, + tl_pts)) + die_codec(codec, "Failed to copy compressed tile list."); +} + +int main(int argc, char **argv) { + aom_codec_ctx_t codec; + AvxVideoReader *reader = NULL; + AvxVideoWriter *writer = NULL; + const AvxInterface *decoder = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + int i; + aom_codec_pts_t pts; + const char *tile_list_file = NULL; + + exec_name = argv[0]; + if (argc != 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + num_references = (int)strtol(argv[3], NULL, 0); + info = aom_video_reader_get_info(reader); + + aom_video_reader_set_fourcc(reader, AV1_FOURCC); + + // The writer to write out ivf file in tile list OBU, which can be decoded by + // AV1 decoder. + writer = aom_video_writer_open(argv[2], kContainerIVF, info); + if (!writer) die("Failed to open %s for writing", argv[2]); + + tile_list_file = argv[4]; + + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + + printf("Reading %d reference images.\n", num_references); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + pts = (aom_codec_pts_t)aom_video_reader_get_frame_pts(reader); + + // Copy references bitstream directly. + if (!aom_video_writer_write_frame(writer, frame, frame_size, pts)) + die_codec(&codec, "Failed to copy compressed anchor frame."); + + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + } + + // Decode camera frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 1); + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_EXT_TILE_DEBUG, 1); + + FILE *infile = aom_video_reader_get_file(reader); + // Record the offset of the first camera image. + const FileOffset camera_frame_pos = ftello(infile); + + printf("Loading compressed frames into memory.\n"); + + // Count the frames in the lightfield. + int num_frames = 0; + while (aom_video_reader_read_frame(reader)) { + ++num_frames; + } + if (num_frames < 1) die("Input light field has no frames."); + + // Read all of the lightfield frames into memory. + unsigned char **frames = + (unsigned char **)malloc(num_frames * sizeof(unsigned char *)); + size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t)); + // Seek to the first camera image. + fseeko(infile, camera_frame_pos, SEEK_SET); + for (int f = 0; f < num_frames; ++f) { + aom_video_reader_read_frame(reader); + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char)); + memcpy(frames[f], frame, frame_size); + frame_sizes[f] = frame_size; + } + printf("Read %d frames.\n", num_frames); + + // Copy first camera frame for getting camera frame header. This is done + // only once. + { + size_t frame_size = frame_sizes[0]; + const unsigned char *frame = frames[0]; + pts = num_references; + aom_tile_data frame_header_info = { 0, NULL, 0 }; + + // Need to decode frame header to get camera frame header info. So, here + // decoding 1 tile is enough. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_DECODE_TILE_ROW, 0); + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_DECODE_TILE_COL, 0); + + aom_codec_err_t aom_status = + aom_codec_decode(&codec, frame, frame_size, NULL); + if (aom_status) die_codec(&codec, "Failed to decode tile."); + + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_HEADER_INFO, + &frame_header_info); + + size_t obu_size_offset = + (uint8_t *)frame_header_info.coded_tile_data - frame; + size_t length_field_size = frame_header_info.coded_tile_data_size; + // Remove ext-tile tile info. + uint32_t frame_header_size = (uint32_t)frame_header_info.extra_size - 1; + size_t bytes_to_copy = + obu_size_offset + length_field_size + frame_header_size; + + unsigned char *frame_hdr_buf = (unsigned char *)malloc(bytes_to_copy); + if (frame_hdr_buf == NULL) + die_codec(&codec, "Failed to allocate frame header buffer."); + + memcpy(frame_hdr_buf, frame, bytes_to_copy); + + // Update frame header OBU size. + size_t bytes_written = 0; + if (aom_uleb_encode_fixed_size( + frame_header_size, length_field_size, length_field_size, + frame_hdr_buf + obu_size_offset, &bytes_written)) + die_codec(&codec, "Failed to encode the tile list obu size."); + + // Copy camera frame header bitstream. + if (!aom_video_writer_write_frame(writer, frame_hdr_buf, bytes_to_copy, + pts)) + die_codec(&codec, "Failed to copy compressed camera frame header."); + free(frame_hdr_buf); + } + + // Read out the image format. + aom_img_fmt_t ref_fmt = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + const int bps = get_image_bps(ref_fmt); + if (!bps) die_codec(&codec, "Invalid image format."); + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(&codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + // Allocate a buffer to store tile list bitstream. + const size_t data_sz = MAX_TILES * ALIGN_POWER_OF_TWO(tile_width, 5) * + ALIGN_POWER_OF_TWO(tile_height, 5) * bps / 8; + + unsigned char *tl_buf = (unsigned char *)malloc(data_sz); + if (tl_buf == NULL) die_codec(&codec, "Failed to allocate tile list buffer."); + + aom_codec_pts_t tl_pts = num_references; + const uint8_t output_frame_width_in_tiles_minus_1 = + output_frame_width / tile_width - 1; + const uint8_t output_frame_height_in_tiles_minus_1 = + output_frame_height / tile_height - 1; + + printf("Reading tile list from file.\n"); + char line[1024]; + FILE *tile_list_fptr = fopen(tile_list_file, "r"); + if (!tile_list_fptr) die_codec(&codec, "Failed to open tile list file."); + int num_tiles = 0; + TILE_LIST_INFO tiles[MAX_TILES]; + while ((fgets(line, 1024, tile_list_fptr)) != NULL) { + if (line[0] == 'F' || num_tiles >= MAX_TILES) { + // Flush existing tile list and start another, either because we hit a + // new render frame or because we've hit our max number of tiles per list. + if (num_tiles > 0) { + process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec, + tl_buf, writer, output_frame_width_in_tiles_minus_1, + output_frame_height_in_tiles_minus_1); + ++tl_pts; + } + num_tiles = 0; + } + if (line[0] == 'F') { + continue; + } + if (sscanf(line, "%d %d %d %d", &tiles[num_tiles].image_idx, + &tiles[num_tiles].reference_idx, &tiles[num_tiles].tile_col, + &tiles[num_tiles].tile_row) == 4) { + if (tiles[num_tiles].image_idx >= num_frames) { + die("Tile list image_idx out of bounds: %d >= %d.", + tiles[num_tiles].image_idx, num_frames); + } + if (tiles[num_tiles].reference_idx >= num_references) { + die("Tile list reference_idx out of bounds: %d >= %d.", + tiles[num_tiles].reference_idx, num_references); + } + ++num_tiles; + } + } + if (num_tiles > 0) { + // Flush out the last tile list. + process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec, + tl_buf, writer, output_frame_width_in_tiles_minus_1, + output_frame_height_in_tiles_minus_1); + ++tl_pts; + } + + const int num_tile_lists = (int)(tl_pts - pts); + printf("Finished processing tile lists. Num tile lists: %d.\n", + num_tile_lists); + free(tl_buf); + for (int f = 0; f < num_frames; ++f) { + free(frames[f]); + } + free(frame_sizes); + free(frames); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_writer_close(writer); + aom_video_reader_close(reader); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/lightfield_decoder.c b/libs/libaom/src/examples/lightfield_decoder.c new file mode 100644 index 000000000..a292e9c75 --- /dev/null +++ b/libs/libaom/src/examples/lightfield_decoder.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Decoder +// ================== +// +// This is an example of a simple lightfield decoder. It builds upon the +// simple_decoder.c example. It takes an input file containing the compressed +// data (in ivf format), treating it as a lightfield instead of a video; and a +// text file with a list of tiles to decode. There is an optional parameter +// allowing to choose the output format, and the supported formats are +// YUV1D(default), YUV, and NV12. +// After running the lightfield encoder, run lightfield decoder to decode a +// batch of tiles: +// examples/lightfield_decoder vase10x10.ivf vase_reference.yuv 4 tile_list.txt +// 0(optional) +// The tile_list.txt is expected to be of the form: +// Frame <frame_index0> +// <image_index0> <anchor_index0> <tile_col0> <tile_row0> +// <image_index1> <anchor_index1> <tile_col1> <tile_row1> +// ... +// Frame <frame_index1) +// ... +// +// The "Frame" markers indicate a new render frame and thus a new tile list +// will be started and the old one flushed. The image_indexN, anchor_indexN, +// tile_colN, and tile_rowN identify an individual tile to be decoded and +// to use anchor_indexN anchor image for MCP. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <infile> <outfile> <num_references> <tile_list> <output " + "format(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +// Output frame size +const int output_frame_width = 512; +const int output_frame_height = 512; + +static void aom_img_copy_tile(const aom_image_t *src, const aom_image_t *dst, + int dst_row_offset, int dst_col_offset) { + const int shift = (src->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0; + int plane; + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *src_buf = src->planes[plane]; + const int src_stride = src->stride[plane]; + unsigned char *dst_buf = dst->planes[plane]; + const int dst_stride = dst->stride[plane]; + const int roffset = + (plane > 0) ? dst_row_offset >> dst->y_chroma_shift : dst_row_offset; + const int coffset = + (plane > 0) ? dst_col_offset >> dst->x_chroma_shift : dst_col_offset; + + // col offset needs to be adjusted for HBD. + dst_buf += roffset * dst_stride + (coffset << shift); + + const int w = (aom_img_plane_width(src, plane) << shift); + const int h = aom_img_plane_height(src, plane); + int y; + + for (y = 0; y < h; ++y) { + memcpy(dst_buf, src_buf, w); + src_buf += src_stride; + dst_buf += dst_stride; + } + } +} + +void decode_tile(aom_codec_ctx_t *codec, const unsigned char *frame, + size_t frame_size, int tr, int tc, int ref_idx, + aom_image_t *reference_images, aom_image_t *output, + int *tile_idx, unsigned int *output_bit_depth, + aom_image_t **img_ptr, int output_format) { + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_TILE_MODE, 1); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_EXT_TILE_DEBUG, 1); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_ROW, tr); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_COL, tc); + + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 1; + ref.img = reference_images[ref_idx]; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_REFERENCE, &ref)) { + die_codec(codec, "Failed to set reference frame."); + } + + aom_codec_err_t aom_status = aom_codec_decode(codec, frame, frame_size, NULL); + if (aom_status) die_codec(codec, "Failed to decode tile."); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = aom_codec_get_frame(codec, &iter); + if (!img) die_codec(codec, "Failed to get frame."); + *img_ptr = img; + + // aom_img_alloc() sets bit_depth as follows: + // output->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; + // Use img->bit_depth(read from bitstream), so that aom_shift_img() + // works as expected. + output->bit_depth = img->bit_depth; + *output_bit_depth = img->bit_depth; + + if (output_format != YUV1D) { + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + const uint32_t output_frame_width_in_tiles = + output_frame_width / tile_width; + + // Copy the tile to the output frame. + const int row_offset = + (*tile_idx / output_frame_width_in_tiles) * tile_height; + const int col_offset = + (*tile_idx % output_frame_width_in_tiles) * tile_width; + + aom_img_copy_tile(img, output, row_offset, col_offset); + (*tile_idx)++; + } +} + +static void img_write_to_file(const aom_image_t *img, FILE *file, + int output_format) { + if (output_format == YUV) + aom_img_write(img, file); + else if (output_format == NV12) + aom_img_write_nv12(img, file); + else + die("Invalid output format"); +} + +int main(int argc, char **argv) { + FILE *outfile = NULL; + aom_codec_ctx_t codec; + AvxVideoReader *reader = NULL; + const AvxInterface *decoder = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + aom_img_fmt_t ref_fmt = 0; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + aom_image_t output; + aom_image_t *output_shifted = NULL; + size_t frame_size = 0; + const unsigned char *frame = NULL; + int i, j; + const char *tile_list_file = NULL; + int output_format = YUV1D; + exec_name = argv[0]; + + if (argc < 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + num_references = (int)strtol(argv[3], NULL, 0); + tile_list_file = argv[4]; + + if (argc > 5) output_format = (int)strtol(argv[5], NULL, 0); + if (output_format < YUV1D || output_format > NV12) + die("Output format out of range [0, 2]"); + + info = aom_video_reader_get_info(reader); + + if (info->codec_fourcc == LST_FOURCC) + decoder = get_aom_decoder_by_fourcc(AV1_FOURCC); + else + die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_IS_ANNEXB, + info->is_annexb)) { + die("Failed to set annex b status"); + } + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + if (i == 0) { + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + + int frame_res[2]; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_SIZE, frame_res)) + die_codec(&codec, "Failed to get the image frame size"); + + // Allocate memory to store decoded references. Allocate memory with the + // border so that it can be used as a reference. + for (j = 0; j < num_references; j++) { + unsigned int border = AOM_DEC_BORDER_IN_PIXELS; + if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt, + frame_res[0], frame_res[1], 32, 8, + border)) { + die("Failed to allocate references."); + } + } + } + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[i])) + die_codec(&codec, "Failed to copy decoded reference frame"); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + char name[1024]; + snprintf(name, sizeof(name), "ref_%d.yuv", i); + printf("writing ref image to %s, %d, %d\n", name, img->d_w, img->d_h); + FILE *ref_file = fopen(name, "wb"); + aom_img_write(img, ref_file); + fclose(ref_file); + } + } + + FILE *infile = aom_video_reader_get_file(reader); + // Record the offset of the first camera image. + const FileOffset camera_frame_pos = ftello(infile); + + printf("Loading compressed frames into memory.\n"); + + // Count the frames in the lightfield. + int num_frames = 0; + while (aom_video_reader_read_frame(reader)) { + ++num_frames; + } + if (num_frames < 1) die("Input light field has no frames."); + + // Read all of the lightfield frames into memory. + unsigned char **frames = + (unsigned char **)malloc(num_frames * sizeof(unsigned char *)); + size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t)); + // Seek to the first camera image. + fseeko(infile, camera_frame_pos, SEEK_SET); + for (int f = 0; f < num_frames; ++f) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char)); + memcpy(frames[f], frame, frame_size); + frame_sizes[f] = frame_size; + } + printf("Read %d frames.\n", num_frames); + + if (output_format != YUV1D) { + // Allocate the output frame. + aom_img_fmt_t out_fmt = ref_fmt; + if (FORCE_HIGHBITDEPTH_DECODING) out_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + if (!aom_img_alloc(&output, out_fmt, output_frame_width, + output_frame_height, 32)) + die("Failed to allocate output image."); + } + + printf("Decoding tile list from file.\n"); + char line[1024]; + FILE *tile_list_fptr = fopen(tile_list_file, "r"); + if (!tile_list_fptr) die_codec(&codec, "Failed to open tile list file."); + int tile_list_cnt = 0; + int tile_list_writes = 0; + int tile_idx = 0; + aom_image_t *out = NULL; + unsigned int output_bit_depth = 0; + + while ((fgets(line, 1024, tile_list_fptr)) != NULL) { + if (line[0] == 'F') { + if (output_format != YUV1D) { + // Write out the tile list. + if (tile_list_cnt) { + out = &output; + if (output_bit_depth != 0) + aom_shift_img(output_bit_depth, &out, &output_shifted); + img_write_to_file(out, outfile, output_format); + tile_list_writes++; + } + + tile_list_cnt++; + tile_idx = 0; + // Then memset the frame. + memset(output.img_data, 0, output.sz); + } + continue; + } + + int image_idx, ref_idx, tc, tr; + sscanf(line, "%d %d %d %d", &image_idx, &ref_idx, &tc, &tr); + if (image_idx >= num_frames) { + die("Tile list image_idx out of bounds: %d >= %d.", image_idx, + num_frames); + } + if (ref_idx >= num_references) { + die("Tile list ref_idx out of bounds: %d >= %d.", ref_idx, + num_references); + } + frame = frames[image_idx]; + frame_size = frame_sizes[image_idx]; + + aom_image_t *img = NULL; + decode_tile(&codec, frame, frame_size, tr, tc, ref_idx, reference_images, + &output, &tile_idx, &output_bit_depth, &img, output_format); + if (output_format == YUV1D) { + out = img; + if (output_bit_depth != 0) + aom_shift_img(output_bit_depth, &out, &output_shifted); + aom_img_write(out, outfile); + } + } + + if (output_format != YUV1D) { + // Write out the last tile list. + if (tile_list_writes < tile_list_cnt) { + out = &output; + if (output_bit_depth != 0) + aom_shift_img(output_bit_depth, &out, &output_shifted); + img_write_to_file(out, outfile, output_format); + } + } + + if (output_shifted) aom_img_free(output_shifted); + if (output_format != YUV1D) aom_img_free(&output); + for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]); + for (int f = 0; f < num_frames; ++f) { + free(frames[f]); + } + free(frame_sizes); + free(frames); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/lightfield_encoder.c b/libs/libaom/src/examples/lightfield_encoder.c new file mode 100644 index 000000000..e80fe24f6 --- /dev/null +++ b/libs/libaom/src/examples/lightfield_encoder.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Encoder +// ================== +// +// This is an example of a simple lightfield encoder. It builds upon the +// twopass_encoder.c example. It takes an input file in YV12 format, +// treating it as a planar lightfield instead of a video. The img_width +// and img_height arguments are the dimensions of the lightfield images, +// while the lf_width and lf_height arguments are the number of +// lightfield images in each dimension. The lf_blocksize determines the +// number of reference images used for MCP. For example, 5 means that there +// is a reference image for every 5x5 lightfield image block. All images +// within a block will use the center image in that block as the reference +// image for MCP. +// Run "make test" to download lightfield test data: vase10x10.yuv. +// Run lightfield encoder to encode whole lightfield: +// examples/lightfield_encoder 1024 1024 vase10x10.yuv vase10x10.ivf 10 10 5 + +// Note: In bitstream.c and encoder.c, define EXT_TILE_DEBUG as 1 will print +// out the uncompressed header and the frame contexts, which can be used to +// test the bit exactness of the headers and the frame contexts for large scale +// tile coded frames. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <img_width> <img_height> <infile> <outfile> " + "<lf_width> <lf_height> <lf_blocksize>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int img_size_bytes(aom_image_t *img) { + int image_size_bytes = 0; + int plane; + for (plane = 0; plane < 3; ++plane) { + const int w = aom_img_plane_width(img, plane) * + ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); + const int h = aom_img_plane_height(img, plane); + image_size_bytes += w * h; + } + return image_size_bytes; +} + +static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, + aom_fixed_buf_t *stats) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_STATS_PKT) { + const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; + const size_t pkt_size = pkt->data.twopass_stats.sz; + stats->buf = realloc(stats->buf, stats->sz + pkt_size); + memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); + stats->sz += pkt_size; + } + } + + return got_pkts; +} + +static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) + die_codec(ctx, "Failed to write compressed frame."); + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +static void get_raw_image(aom_image_t **frame_to_encode, aom_image_t *raw, + aom_image_t *raw_shift) { + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + int input_shift = 0; + aom_img_upshift(raw_shift, raw, input_shift); + *frame_to_encode = raw_shift; + } else { + *frame_to_encode = raw; + } +} + +static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile, + const AvxInterface *encoder, + const aom_codec_enc_cfg_t *cfg, int lf_width, + int lf_height, int lf_blocksize, int flags, + aom_image_t *raw_shift) { + aom_codec_ctx_t codec; + int frame_count = 0; + int image_size_bytes = img_size_bytes(raw); + int u_blocks, v_blocks; + int bu, bv; + aom_fixed_buf_t stats = { NULL, 0 }; + aom_image_t *frame_to_encode; + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, flags)) + die_codec(&codec, "Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&codec, "Failed to turn off auto altref"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0)) + die_codec(&codec, "Failed to set frame parallel decoding"); + + // How many reference images we need to encode. + u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize; + v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize; + + printf("\n First pass: "); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u_block_size, v_block_size; + int block_ref_u, block_ref_v; + + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + u_block_size = block_u_end - block_u_min; + v_block_size = block_v_end - block_v_min; + block_ref_u = block_u_min + u_block_size / 2; + block_ref_v = block_v_min + v_block_size / 2; + + printf("A%d, ", (block_ref_u + block_ref_v * lf_width)); + fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes, + SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + // Reference frames can be encoded encoded without tiles. + ++frame_count; + get_frame_stats(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF, + &stats); + } + } + + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1)) + die_codec(&codec, "Failed to set frame parallel decoding"); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u, v; + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + for (v = block_v_min; v < block_v_end; ++v) { + for (u = block_u_min; u < block_u_end; ++u) { + printf("C%d, ", (u + v * lf_width)); + fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + ++frame_count; + get_frame_stats(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + &stats); + } + } + } + } + // Flush encoder. + // No ARF, this should not be needed. + while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) { + } + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + printf("\nFirst pass complete. Processed %d frames.\n", frame_count); + + return stats; +} + +static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name, + const AvxInterface *encoder, aom_codec_enc_cfg_t *cfg, + int lf_width, int lf_height, int lf_blocksize, int flags, + aom_image_t *raw_shift) { + AvxVideoInfo info = { encoder->fourcc, + cfg->g_w, + cfg->g_h, + { cfg->g_timebase.num, cfg->g_timebase.den }, + 0 }; + AvxVideoWriter *writer = NULL; + aom_codec_ctx_t codec; + int frame_count = 0; + int image_size_bytes = img_size_bytes(raw); + int bu, bv; + int u_blocks, v_blocks; + aom_image_t *frame_to_encode; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + int reference_image_num = 0; + int i; + + writer = aom_video_writer_open(outfile_name, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing", outfile_name); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, flags)) + die_codec(&codec, "Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&codec, "Failed to turn off auto altref"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0)) + die_codec(&codec, "Failed to set frame parallel decoding"); + if (aom_codec_control(&codec, AV1E_ENABLE_EXT_TILE_DEBUG, 1)) + die_codec(&codec, "Failed to enable encoder ext_tile debug"); + if (aom_codec_control(&codec, AOME_SET_CPUUSED, 1)) + die_codec(&codec, "Failed to set cpu-used"); + + // Note: The superblock is a sequence parameter and has to be the same for 1 + // sequence. In lightfield application, must choose the superblock size(either + // 64x64 or 128x128) before the encoding starts. Otherwise, the default is + // AOM_SUPERBLOCK_SIZE_DYNAMIC, and the superblock size will be set to 64x64 + // internally. + if (aom_codec_control(&codec, AV1E_SET_SUPERBLOCK_SIZE, + AOM_SUPERBLOCK_SIZE_64X64)) + die_codec(&codec, "Failed to set SB size"); + + u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize; + v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize; + + reference_image_num = u_blocks * v_blocks; + // Set the max gf group length so the references are guaranteed to be in + // a different gf group than any of the regular frames. This avoids using + // both vbr and constant quality mode in a single group. The number of + // references now cannot surpass 17 because of the enforced MAX_GF_INTERVAL of + // 16. If it is necessary to exceed this reference frame limit, one will have + // to do some additional handling to ensure references are in separate gf + // groups from the regular frames. + if (aom_codec_control(&codec, AV1E_SET_MAX_GF_INTERVAL, + reference_image_num - 1)) + die_codec(&codec, "Failed to set max gf interval"); + aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420; + if (FORCE_HIGHBITDEPTH_DECODING) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + // Allocate memory with the border so that it can be used as a reference. + int border_in_pixels = + (codec.config.enc->rc_resize_mode || codec.config.enc->rc_superres_mode) + ? AOM_BORDER_IN_PIXELS + : AOM_ENC_NO_SCALE_BORDER; + for (i = 0; i < reference_image_num; i++) { + if (!aom_img_alloc_with_border(&reference_images[i], ref_fmt, cfg->g_w, + cfg->g_h, 32, 8, border_in_pixels)) { + die("Failed to allocate image."); + } + } + + printf("\n Second pass: "); + + // Encode reference images first. + printf("Encoding Reference Images\n"); + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u_block_size, v_block_size; + int block_ref_u, block_ref_v; + + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + u_block_size = block_u_end - block_u_min; + v_block_size = block_v_end - block_v_min; + block_ref_u = block_u_min + u_block_size / 2; + block_ref_v = block_v_min + v_block_size / 2; + + printf("A%d, ", (block_ref_u + block_ref_v * lf_width)); + fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes, + SEEK_SET); + aom_img_read(raw, infile); + + get_raw_image(&frame_to_encode, raw, raw_shift); + + // Reference frames may be encoded without tiles. + ++frame_count; + printf("Encoding reference image %d of %d\n", bv * u_blocks + bu, + u_blocks * v_blocks); + encode_frame(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + writer); + + if (aom_codec_control(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[frame_count - 1])) + die_codec(&codec, "Failed to copy decoder reference frame"); + } + } + + cfg->large_scale_tile = 1; + // Fixed q encoding for camera frames. + cfg->rc_end_usage = AOM_Q; + if (aom_codec_enc_config_set(&codec, cfg)) + die_codec(&codec, "Failed to configure encoder"); + + // The fixed q value used in encoding. + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 36)) + die_codec(&codec, "Failed to set cq level"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1)) + die_codec(&codec, "Failed to set frame parallel decoding"); + if (aom_codec_control(&codec, AV1E_SET_SINGLE_TILE_DECODING, 1)) + die_codec(&codec, "Failed to turn on single tile decoding"); + // Set tile_columns and tile_rows to MAX values, which guarantees the tile + // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution. + if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 6)) + die_codec(&codec, "Failed to set tile width"); + if (aom_codec_control(&codec, AV1E_SET_TILE_ROWS, 6)) + die_codec(&codec, "Failed to set tile height"); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u, v; + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + for (v = block_v_min; v < block_v_end; ++v) { + for (u = block_u_min; u < block_u_end; ++u) { + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 1; + ref.img = reference_images[bv * u_blocks + bu]; + if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref)) + die_codec(&codec, "Failed to set reference frame"); + + printf("C%d, ", (u + v * lf_width)); + fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + ++frame_count; + printf("Encoding image %d of %d\n", + frame_count - (u_blocks * v_blocks), lf_width * lf_height); + encode_frame(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + writer); + } + } + } + } + + // Flush encoder. + // No ARF, this should not be needed. + while (encode_frame(&codec, NULL, -1, 1, 0, writer)) { + } + + for (i = 0; i < reference_image_num; i++) aom_img_free(&reference_images[i]); + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + // Modify large_scale_file fourcc. + if (cfg->large_scale_tile == 1) + aom_video_writer_set_fourcc(writer, LST_FOURCC); + aom_video_writer_close(writer); + + printf("\nSecond pass complete. Processed %d frames.\n", frame_count); +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + int w, h; + // The number of lightfield images in the u and v dimensions. + int lf_width, lf_height; + // Defines how many images refer to the same reference image for MCP. + // lf_blocksize X lf_blocksize images will all use the reference image + // in the middle of the block of images. + int lf_blocksize; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + aom_image_t raw; + aom_image_t raw_shift; + aom_codec_err_t res; + aom_fixed_buf_t stats; + int flags = 0; + + const AvxInterface *encoder = NULL; + const int fps = 30; + const int bitrate = 200; // kbit/s + const char *const width_arg = argv[1]; + const char *const height_arg = argv[2]; + const char *const infile_arg = argv[3]; + const char *const outfile_arg = argv[4]; + const char *const lf_width_arg = argv[5]; + const char *const lf_height_arg = argv[6]; + const char *lf_blocksize_arg = argv[7]; + exec_name = argv[0]; + + if (argc < 8) die("Invalid number of arguments"); + + encoder = get_aom_encoder_by_name("av1"); + if (!encoder) die("Unsupported codec."); + + w = (int)strtol(width_arg, NULL, 0); + h = (int)strtol(height_arg, NULL, 0); + lf_width = (int)strtol(lf_width_arg, NULL, 0); + lf_height = (int)strtol(lf_height_arg, NULL, 0); + lf_blocksize = (int)strtol(lf_blocksize_arg, NULL, 0); + lf_blocksize = lf_blocksize < lf_width ? lf_blocksize : lf_width; + lf_blocksize = lf_blocksize < lf_height ? lf_blocksize : lf_height; + + if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) + die("Invalid frame size: %dx%d", w, h); + if (lf_width <= 0 || lf_height <= 0) + die("Invalid lf_width and/or lf_height: %dx%d", lf_width, lf_height); + if (lf_blocksize <= 0) die("Invalid lf_blocksize: %d", lf_blocksize); + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 32)) { + die("Failed to allocate image."); + } + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + aom_img_alloc(&raw_shift, AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH, w, h, + 32); + } + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + // Configuration + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = w; + cfg.g_h = h; + cfg.g_timebase.num = 1; + cfg.g_timebase.den = fps; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = 0; // This is required. + cfg.g_lag_in_frames = 0; // need to set this since default is 19. + cfg.kf_mode = AOM_KF_DISABLED; + cfg.large_scale_tile = 0; // Only set it to 1 for camera frame encoding. + cfg.g_bit_depth = AOM_BITS_8; + flags |= (cfg.g_bit_depth > AOM_BITS_8 || FORCE_HIGHBITDEPTH_DECODING) + ? AOM_CODEC_USE_HIGHBITDEPTH + : 0; + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading", infile_arg); + + // Pass 0 + cfg.g_pass = AOM_RC_FIRST_PASS; + stats = pass0(&raw, infile, encoder, &cfg, lf_width, lf_height, lf_blocksize, + flags, &raw_shift); + + // Pass 1 + rewind(infile); + cfg.g_pass = AOM_RC_LAST_PASS; + cfg.rc_twopass_stats_in = stats; + pass1(&raw, infile, outfile_arg, encoder, &cfg, lf_width, lf_height, + lf_blocksize, flags, &raw_shift); + free(stats.buf); + + if (FORCE_HIGHBITDEPTH_DECODING) aom_img_free(&raw_shift); + aom_img_free(&raw); + fclose(infile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/lightfield_tile_list_decoder.c b/libs/libaom/src/examples/lightfield_tile_list_decoder.c new file mode 100644 index 000000000..3b928df2c --- /dev/null +++ b/libs/libaom/src/examples/lightfield_tile_list_decoder.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Tile List Decoder +// ============================ +// +// This is a lightfield tile list decoder example. It takes an input file that +// contains the anchor frames that are references of the coded tiles, the camera +// frame header, and tile list OBUs that include the tile information and the +// compressed tile data. This input file is reconstructed from the encoded +// lightfield ivf file, and is decodable by AV1 decoder. num_references is +// the number of anchor frames coded at the beginning of the light field file. +// num_tile_lists is the number of tile lists need to be decoded. There is an +// optional parameter allowing to choose the output format, and the supported +// formats are YUV1D(default), YUV, and NV12. +// Run lightfield tile list decoder to decode an AV1 tile list file: +// examples/lightfield_tile_list_decoder vase_tile_list.ivf vase_tile_list.yuv +// 4 2 0(optional) + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <infile> <outfile> <num_references> <num_tile_lists> " + "<output format(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void write_tile_yuv1d(aom_codec_ctx_t *codec, const aom_image_t *img, + FILE *file) { + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + const uint32_t output_frame_width_in_tiles = img->d_w / tile_width; + + unsigned int tile_count = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_COUNT, &tile_count)) + die_codec(codec, "Failed to get the tile size"); + + // Write tile to file. + const int shift = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0; + unsigned int tile_idx; + + for (tile_idx = 0; tile_idx < tile_count; ++tile_idx) { + const int row_offset = + (tile_idx / output_frame_width_in_tiles) * tile_height; + const int col_offset = + (tile_idx % output_frame_width_in_tiles) * tile_width; + int plane; + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int roffset = + (plane > 0) ? row_offset >> img->y_chroma_shift : row_offset; + const int coffset = + (plane > 0) ? col_offset >> img->x_chroma_shift : col_offset; + const int w = (plane > 0) ? ((tile_width >> img->x_chroma_shift) << shift) + : (tile_width << shift); + const int h = + (plane > 0) ? (tile_height >> img->y_chroma_shift) : tile_height; + int y; + + // col offset needs to be adjusted for HBD. + buf += roffset * stride + (coffset << shift); + + for (y = 0; y < h; ++y) { + fwrite(buf, 1, w, file); + buf += stride; + } + } + } +} + +int main(int argc, char **argv) { + FILE *outfile = NULL; + aom_codec_ctx_t codec; + AvxVideoReader *reader = NULL; + const AvxInterface *decoder = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + int num_tile_lists; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + size_t frame_size = 0; + const unsigned char *frame = NULL; + int output_format = YUV1D; + int i, j, n; + + exec_name = argv[0]; + + if (argc < 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + num_references = (int)strtol(argv[3], NULL, 0); + num_tile_lists = (int)strtol(argv[4], NULL, 0); + + if (argc > 5) output_format = (int)strtol(argv[5], NULL, 0); + if (output_format < YUV1D || output_format > NV12) + die("Output format out of range [0, 2]"); + + info = aom_video_reader_get_info(reader); + + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_IS_ANNEXB, + info->is_annexb)) { + die("Failed to set annex b status"); + } + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + if (i == 0) { + aom_img_fmt_t ref_fmt = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + + int frame_res[2]; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_SIZE, frame_res)) + die_codec(&codec, "Failed to get the image frame size"); + + // Allocate memory to store decoded references. Allocate memory with the + // border so that it can be used as a reference. + for (j = 0; j < num_references; j++) { + unsigned int border = AOM_DEC_BORDER_IN_PIXELS; + if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt, + frame_res[0], frame_res[1], 32, 8, + border)) { + die("Failed to allocate references."); + } + } + } + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[i])) + die_codec(&codec, "Failed to copy decoded reference frame"); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + char name[1024]; + snprintf(name, sizeof(name), "ref_%d.yuv", i); + printf("writing ref image to %s, %d, %d\n", name, img->d_w, img->d_h); + FILE *ref_file = fopen(name, "wb"); + aom_img_write(img, ref_file); + fclose(ref_file); + } + } + + // Decode the lightfield. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 1); + + // Set external references. + av1_ext_ref_frame_t set_ext_ref = { &reference_images[0], num_references }; + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_EXT_REF_PTR, &set_ext_ref); + // Must decode the camera frame header first. + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode the frame."); + // Decode tile lists one by one. + for (n = 0; n < num_tile_lists; n++) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode the tile list."); + aom_codec_iter_t iter = NULL; + aom_image_t *img = aom_codec_get_frame(&codec, &iter); + if (!img) die_codec(&codec, "Failed to get frame."); + + if (output_format == YUV1D) + // write the tile to the output file in 1D format. + write_tile_yuv1d(&codec, img, outfile); + else if (output_format == YUV) + aom_img_write(img, outfile); + else + // NV12 output format + aom_img_write_nv12(img, outfile); + } + + for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/lossless_encoder.c b/libs/libaom/src/examples/lossless_encoder.c new file mode 100644 index 000000000..e0253d2b3 --- /dev/null +++ b/libs/libaom/src/examples/lossless_encoder.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "lossless_encoder: Example demonstrating lossless " + "encoding feature. Supports raw input only.\n"); + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const AvxInterface *encoder = NULL; + const int fps = 30; + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc < 5) die("Invalid number of arguments"); + + encoder = get_aom_encoder_by_name("av1"); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(argv[1], NULL, 0); + info.frame_height = (int)strtol(argv[2], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + + writer = aom_video_writer_open(argv[4], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[4]); + + if (!(infile = fopen(argv[3], "rb"))) + die("Failed to open %s for reading.", argv[3]); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1E_SET_LOSSLESS, 1)) + die_codec(&codec, "Failed to use lossless mode"); + + // Encode frames. + while (aom_img_read(&raw, infile)) { + encode_frame(&codec, &raw, frame_count++, 0, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/noise_model.c b/libs/libaom/src/examples/noise_model.c new file mode 100644 index 000000000..d07443f9d --- /dev/null +++ b/libs/libaom/src/examples/noise_model.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/*!\file + * \brief This is an sample binary to create noise params from input video. + * + * To allow for external denoising applications, this sample binary illustrates + * how to create a film grain table (film grain params as a function of time) + * from an input video and its corresponding denoised source. + * + * The --output-grain-table file can be passed as input to the encoder (in + * aomenc this is done through the "--film-grain-table" parameter). + * + * As an example, where the input source is an 854x480 yuv420p 8-bit video + * named "input.854_480.yuv" you would use steps similar to the following: + * + * # Run your denoiser (e.g, using hqdn3d filter): + * ffmpeg -vcodec rawvideo -video_size 854x480 -i input.854_480.yuv \ + * -vf hqdn3d=5:5:5:5 -vcodec rawvideo -an -f rawvideo \ + * denoised.854_480.yuv + * + * # Model the noise between the denoised version and original source: + * ./examples/noise_model --fps=25/1 --width=854 --height=480 --i420 \ + * --input-denoised=denoised.854_480.yuv --input=original.854_480.yuv \ + * --output-grain-table=film_grain.tbl + * + * # Encode with your favorite settings (including the grain table): + * aomenc --limit=100 --cpu-used=4 --input-bit-depth=8 \ + * --i420 -w 854 -h 480 --end-usage=q --cq-level=25 --lag-in-frames=25 \ + * --auto-alt-ref=2 --bit-depth=8 --film-grain-table=film_grain.tbl \ + * -o denoised_with_grain_params.ivf denoised.854_480.yuv + */ +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom_dsp/aom_dsp_common.h" + +#if CONFIG_AV1_DECODER +#include "aom_dsp/grain_synthesis.h" +#endif + +#include "aom_dsp/grain_table.h" +#include "aom_dsp/noise_model.h" +#include "aom_dsp/noise_util.h" +#include "aom_mem/aom_mem.h" +#include "common/args.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s --input=<input> --input-denoised=<denoised> " + "--output-grain-table=<outfile> " + "See comments in noise_model.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static const arg_def_t help = + ARG_DEF(NULL, "help", 0, "Show usage options and exit"); +static const arg_def_t width_arg = + ARG_DEF("w", "width", 1, "Input width (if rawvideo)"); +static const arg_def_t height_arg = + ARG_DEF("h", "height", 1, "Input height (if rawvideo)"); +static const arg_def_t skip_frames_arg = + ARG_DEF("s", "skip-frames", 1, "Number of frames to skip (default = 1)"); +static const arg_def_t fps_arg = ARG_DEF(NULL, "fps", 1, "Frame rate"); +static const arg_def_t input_arg = ARG_DEF("-i", "input", 1, "Input filename"); +static const arg_def_t output_grain_table_arg = + ARG_DEF("n", "output-grain-table", 1, "Output noise file"); +static const arg_def_t input_denoised_arg = + ARG_DEF("d", "input-denoised", 1, "Input denoised filename (YUV) only"); +static const arg_def_t flat_block_finder_arg = + ARG_DEF("b", "flat-block-finder", 1, "Run the flat block finder"); +static const arg_def_t block_size_arg = + ARG_DEF("b", "block-size", 1, "Block size"); +static const arg_def_t bit_depth_arg = + ARG_DEF(NULL, "bit-depth", 1, "Bit depth of input"); +static const arg_def_t use_i420 = + ARG_DEF(NULL, "i420", 0, "Input file (and denoised) is I420 (default)"); +static const arg_def_t use_i422 = + ARG_DEF(NULL, "i422", 0, "Input file (and denoised) is I422"); +static const arg_def_t use_i444 = + ARG_DEF(NULL, "i444", 0, "Input file (and denoised) is I444"); +static const arg_def_t debug_file_arg = + ARG_DEF(NULL, "debug-file", 1, "File to output debug info"); + +typedef struct { + int width; + int height; + struct aom_rational fps; + const char *input; + const char *input_denoised; + const char *output_grain_table; + int img_fmt; + int block_size; + int bit_depth; + int run_flat_block_finder; + int force_flat_psd; + int skip_frames; + const char *debug_file; +} noise_model_args_t; + +static void parse_args(noise_model_args_t *noise_args, int *argc, char **argv) { + struct arg arg; + static const arg_def_t *main_args[] = { &help, + &input_arg, + &fps_arg, + &width_arg, + &height_arg, + &block_size_arg, + &output_grain_table_arg, + &input_denoised_arg, + &use_i420, + &use_i422, + &use_i444, + &debug_file_arg, + NULL }; + for (int argi = *argc + 1; *argv; argi++, argv++) { + if (arg_match(&arg, &help, argv)) { + fprintf(stdout, "\nOptions:\n"); + arg_show_usage(stdout, main_args); + exit(0); + } else if (arg_match(&arg, &width_arg, argv)) { + noise_args->width = atoi(arg.val); + } else if (arg_match(&arg, &height_arg, argv)) { + noise_args->height = atoi(arg.val); + } else if (arg_match(&arg, &input_arg, argv)) { + noise_args->input = arg.val; + } else if (arg_match(&arg, &input_denoised_arg, argv)) { + noise_args->input_denoised = arg.val; + } else if (arg_match(&arg, &output_grain_table_arg, argv)) { + noise_args->output_grain_table = arg.val; + } else if (arg_match(&arg, &block_size_arg, argv)) { + noise_args->block_size = atoi(arg.val); + } else if (arg_match(&arg, &bit_depth_arg, argv)) { + noise_args->bit_depth = atoi(arg.val); + } else if (arg_match(&arg, &flat_block_finder_arg, argv)) { + noise_args->run_flat_block_finder = atoi(arg.val); + } else if (arg_match(&arg, &fps_arg, argv)) { + noise_args->fps = arg_parse_rational(&arg); + } else if (arg_match(&arg, &use_i420, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I420; + } else if (arg_match(&arg, &use_i422, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I422; + } else if (arg_match(&arg, &use_i444, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I444; + } else if (arg_match(&arg, &skip_frames_arg, argv)) { + noise_args->skip_frames = atoi(arg.val); + } else if (arg_match(&arg, &debug_file_arg, argv)) { + noise_args->debug_file = arg.val; + } else { + fprintf(stdout, "Unknown arg: %s\n\nUsage:\n", *argv); + arg_show_usage(stdout, main_args); + exit(0); + } + } + if (noise_args->bit_depth > 8) { + noise_args->img_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + } +} + +#if CONFIG_AV1_DECODER +static void print_variance_y(FILE *debug_file, aom_image_t *raw, + aom_image_t *denoised, const uint8_t *flat_blocks, + int block_size, aom_film_grain_t *grain) { + aom_image_t renoised; + grain->apply_grain = 1; + grain->random_seed = 7391; + grain->bit_depth = raw->bit_depth; + aom_img_alloc(&renoised, raw->fmt, raw->w, raw->h, 1); + + if (av1_add_film_grain(grain, denoised, &renoised)) { + fprintf(stderr, "Internal failure in av1_add_film_grain().\n"); + aom_img_free(&renoised); + return; + } + + const int num_blocks_w = (raw->w + block_size - 1) / block_size; + const int num_blocks_h = (raw->h + block_size - 1) / block_size; + fprintf(debug_file, "x = ["); + for (int by = 0; by < num_blocks_h; by++) { + for (int bx = 0; bx < num_blocks_w; bx++) { + double block_mean = 0; + double noise_std = 0, noise_mean = 0; + double renoise_std = 0, renoise_mean = 0; + for (int yi = 0; yi < block_size; ++yi) { + const int y = by * block_size + yi; + for (int xi = 0; xi < block_size; ++xi) { + const int x = bx * block_size + xi; + const double noise_v = (raw->planes[0][y * raw->stride[0] + x] - + denoised->planes[0][y * raw->stride[0] + x]); + noise_mean += noise_v; + noise_std += noise_v * noise_v; + + block_mean += raw->planes[0][y * raw->stride[0] + x]; + + const double renoise_v = + (renoised.planes[0][y * raw->stride[0] + x] - + denoised->planes[0][y * raw->stride[0] + x]); + renoise_mean += renoise_v; + renoise_std += renoise_v * renoise_v; + } + } + int n = (block_size * block_size); + block_mean /= n; + noise_mean /= n; + renoise_mean /= n; + noise_std = sqrt(noise_std / n - noise_mean * noise_mean); + renoise_std = sqrt(renoise_std / n - renoise_mean * renoise_mean); + fprintf(debug_file, "%d %3.2lf %3.2lf %3.2lf ", + flat_blocks[by * num_blocks_w + bx], block_mean, noise_std, + renoise_std); + } + fprintf(debug_file, "\n"); + } + fprintf(debug_file, "];\n"); + + if (raw->fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + fprintf(stderr, + "Detailed debug info not supported for high bit" + "depth formats\n"); + } else { + fprintf(debug_file, "figure(2); clf;\n"); + fprintf(debug_file, + "scatter(x(:, 2:4:end), x(:, 3:4:end), 'r'); hold on;\n"); + fprintf(debug_file, "scatter(x(:, 2:4:end), x(:, 4:4:end), 'b');\n"); + fprintf(debug_file, + "plot(linspace(0, 255, length(noise_strength_0)), " + "noise_strength_0, 'b');\n"); + fprintf(debug_file, + "title('Scatter plot of intensity vs noise strength');\n"); + fprintf(debug_file, + "legend('Actual', 'Estimated', 'Estimated strength');\n"); + fprintf(debug_file, "figure(3); clf;\n"); + fprintf(debug_file, "scatter(x(:, 3:4:end), x(:, 4:4:end), 'k');\n"); + fprintf(debug_file, "title('Actual vs Estimated');\n"); + fprintf(debug_file, "pause(3);\n"); + } + aom_img_free(&renoised); +} +#endif + +static void print_debug_info(FILE *debug_file, aom_image_t *raw, + aom_image_t *denoised, uint8_t *flat_blocks, + int block_size, aom_noise_model_t *noise_model) { + (void)raw; + (void)denoised; + (void)flat_blocks; + (void)block_size; + fprintf(debug_file, "figure(3); clf;\n"); + fprintf(debug_file, "figure(2); clf;\n"); + fprintf(debug_file, "figure(1); clf;\n"); + for (int c = 0; c < 3; ++c) { + fprintf(debug_file, "noise_strength_%d = [\n", c); + const aom_equation_system_t *eqns = + &noise_model->combined_state[c].strength_solver.eqns; + for (int k = 0; k < eqns->n; ++k) { + fprintf(debug_file, "%lf ", eqns->x[k]); + } + fprintf(debug_file, "];\n"); + fprintf(debug_file, "plot(noise_strength_%d); hold on;\n", c); + } + fprintf(debug_file, "legend('Y', 'cb', 'cr');\n"); + fprintf(debug_file, "title('Noise strength function');\n"); + +#if CONFIG_AV1_DECODER + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(noise_model, &grain); + print_variance_y(debug_file, raw, denoised, flat_blocks, block_size, &grain); +#endif + fflush(debug_file); +} + +int main(int argc, char *argv[]) { + noise_model_args_t args = { 0, 0, { 25, 1 }, 0, 0, 0, AOM_IMG_FMT_I420, + 32, 8, 1, 0, 1, NULL }; + aom_image_t raw, denoised; + FILE *infile = NULL; + AvxVideoInfo info; + + memset(&info, 0, sizeof(info)); + + exec_name = argv[0]; + parse_args(&args, &argc, argv + 1); + + info.frame_width = args.width; + info.frame_height = args.height; + info.time_base.numerator = args.fps.den; + info.time_base.denominator = args.fps.num; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + if (!aom_img_alloc(&raw, args.img_fmt, info.frame_width, info.frame_height, + 1)) { + die("Failed to allocate image."); + } + if (!aom_img_alloc(&denoised, args.img_fmt, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + infile = fopen(args.input, "rb"); + if (!infile) { + die("Failed to open input file:", args.input); + } + fprintf(stderr, "Bit depth: %d stride:%d\n", args.bit_depth, raw.stride[0]); + + const int high_bd = args.bit_depth > 8; + const int block_size = args.block_size; + aom_flat_block_finder_t block_finder; + aom_flat_block_finder_init(&block_finder, block_size, args.bit_depth, + high_bd); + + const int num_blocks_w = (info.frame_width + block_size - 1) / block_size; + const int num_blocks_h = (info.frame_height + block_size - 1) / block_size; + uint8_t *flat_blocks = (uint8_t *)aom_malloc(num_blocks_w * num_blocks_h); + // Sets the random seed on the first entry in the output table + int16_t random_seed = 7391; + aom_noise_model_t noise_model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3, args.bit_depth, + high_bd }; + aom_noise_model_init(&noise_model, params); + + FILE *denoised_file = 0; + if (args.input_denoised) { + denoised_file = fopen(args.input_denoised, "rb"); + if (!denoised_file) + die("Unable to open input_denoised: %s", args.input_denoised); + } else { + die("--input-denoised file must be specified"); + } + FILE *debug_file = 0; + if (args.debug_file) { + debug_file = fopen(args.debug_file, "w"); + } + aom_film_grain_table_t grain_table = { 0, 0 }; + + int64_t prev_timestamp = 0; + int frame_count = 0; + while (aom_img_read(&raw, infile)) { + if (args.input_denoised) { + if (!aom_img_read(&denoised, denoised_file)) { + die("Unable to read input denoised file"); + } + } + if (frame_count % args.skip_frames == 0) { + int num_flat_blocks = num_blocks_w * num_blocks_h; + memset(flat_blocks, 1, num_flat_blocks); + if (args.run_flat_block_finder) { + memset(flat_blocks, 0, num_flat_blocks); + num_flat_blocks = aom_flat_block_finder_run( + &block_finder, raw.planes[0], info.frame_width, info.frame_height, + info.frame_width, flat_blocks); + fprintf(stdout, "Num flat blocks %d\n", num_flat_blocks); + } + + const uint8_t *planes[3] = { raw.planes[0], raw.planes[1], + raw.planes[2] }; + uint8_t *denoised_planes[3] = { denoised.planes[0], denoised.planes[1], + denoised.planes[2] }; + int strides[3] = { raw.stride[0] >> high_bd, raw.stride[1] >> high_bd, + raw.stride[2] >> high_bd }; + int chroma_sub[3] = { raw.x_chroma_shift, raw.y_chroma_shift, 0 }; + + fprintf(stdout, "Updating noise model...\n"); + aom_noise_status_t status = aom_noise_model_update( + &noise_model, (const uint8_t *const *)planes, + (const uint8_t *const *)denoised_planes, info.frame_width, + info.frame_height, strides, chroma_sub, flat_blocks, block_size); + + int64_t cur_timestamp = + frame_count * 10000000ULL * args.fps.den / args.fps.num; + if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) { + fprintf(stdout, + "Noise type is different, updating parameters for time " + "[ %" PRId64 ", %" PRId64 ")\n", + prev_timestamp, cur_timestamp); + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(&noise_model, &grain); + grain.random_seed = random_seed; + random_seed = 0; + aom_film_grain_table_append(&grain_table, prev_timestamp, cur_timestamp, + &grain); + aom_noise_model_save_latest(&noise_model); + prev_timestamp = cur_timestamp; + } + if (debug_file) { + print_debug_info(debug_file, &raw, &denoised, flat_blocks, block_size, + &noise_model); + } + fprintf(stdout, "Done noise model update, status = %d\n", status); + } + frame_count++; + } + + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(&noise_model, &grain); + grain.random_seed = random_seed; + aom_film_grain_table_append(&grain_table, prev_timestamp, INT64_MAX, &grain); + if (args.output_grain_table) { + struct aom_internal_error_info error_info; + if (AOM_CODEC_OK != aom_film_grain_table_write(&grain_table, + args.output_grain_table, + &error_info)) { + die("Unable to write output film grain table"); + } + } + aom_film_grain_table_free(&grain_table); + + if (infile) fclose(infile); + if (denoised_file) fclose(denoised_file); + if (debug_file) fclose(debug_file); + aom_img_free(&raw); + aom_img_free(&denoised); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/resize_util.c b/libs/libaom/src/examples/resize_util.c new file mode 100644 index 000000000..5692c2062 --- /dev/null +++ b/libs/libaom/src/examples/resize_util.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "av1/common/resize.h" +#include "common/tools_common.h" + +static const char *exec_name = NULL; + +static void usage() { + printf("Usage:\n"); + printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ", + exec_name); + printf("<output_yuv> [<frames>]\n"); +} + +void usage_exit(void) { + usage(); + exit(EXIT_FAILURE); +} + +static int parse_dim(char *v, int *width, int *height) { + char *x = strchr(v, 'x'); + if (x == NULL) x = strchr(v, 'X'); + if (x == NULL) return 0; + *width = atoi(v); + *height = atoi(&x[1]); + if (*width <= 0 || *height <= 0) + return 0; + else + return 1; +} + +int main(int argc, char *argv[]) { + char *fin, *fout; + FILE *fpin, *fpout; + uint8_t *inbuf, *outbuf; + uint8_t *inbuf_u, *outbuf_u; + uint8_t *inbuf_v, *outbuf_v; + int f, frames; + int width, height, target_width, target_height; + + exec_name = argv[0]; + + if (argc < 5) { + printf("Incorrect parameters:\n"); + usage(); + return 1; + } + + fin = argv[1]; + fout = argv[4]; + if (!parse_dim(argv[2], &width, &height)) { + printf("Incorrect parameters: %s\n", argv[2]); + usage(); + return 1; + } + if (!parse_dim(argv[3], &target_width, &target_height)) { + printf("Incorrect parameters: %s\n", argv[3]); + usage(); + return 1; + } + + fpin = fopen(fin, "rb"); + if (fpin == NULL) { + printf("Can't open file %s to read\n", fin); + usage(); + return 1; + } + fpout = fopen(fout, "wb"); + if (fpout == NULL) { + fclose(fpin); + printf("Can't open file %s to write\n", fout); + usage(); + return 1; + } + if (argc >= 6) + frames = atoi(argv[5]); + else + frames = INT_MAX; + + printf("Input size: %dx%d\n", width, height); + printf("Target size: %dx%d, Frames: ", target_width, target_height); + if (frames == INT_MAX) + printf("All\n"); + else + printf("%d\n", frames); + + inbuf = (uint8_t *)malloc(width * height * 3 / 2); + outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2); + inbuf_u = inbuf + width * height; + inbuf_v = inbuf_u + width * height / 4; + outbuf_u = outbuf + target_width * target_height; + outbuf_v = outbuf_u + target_width * target_height / 4; + f = 0; + while (f < frames) { + if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1) break; + av1_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height, + width, outbuf, target_width, outbuf_u, outbuf_v, + target_width / 2, target_height, target_width); + fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout); + f++; + } + printf("%d frames processed\n", f); + fclose(fpin); + fclose(fpout); + + free(inbuf); + free(outbuf); + return 0; +} diff --git a/libs/libaom/src/examples/scalable_decoder.c b/libs/libaom/src/examples/scalable_decoder.c new file mode 100644 index 000000000..c22924223 --- /dev/null +++ b/libs/libaom/src/examples/scalable_decoder.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Scalable Decoder +// ============== +// +// This is an example of a scalable decoder loop. It takes a 2-spatial-layer +// input file +// containing the compressed data (in OBU format), passes it through the +// decoder, and writes the decompressed frames to disk. The base layer and +// enhancement layers are stored as separate files, out_lyr0.yuv and +// out_lyr1.yuv, respectively. +// +// Standard Includes +// ----------------- +// For decoders, you only have to include `aom_decoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// av1. +// +// Initializing The Codec +// ---------------------- +// The libaom decoder is initialized by the call to aom_codec_dec_init(). +// Determining the codec interface to use is handled by AvxVideoReader and the +// functions prefixed with aom_video_reader_. Discussion of those functions is +// beyond the scope of this example, but the main gist is to open the input file +// and parse just enough of it to determine if it's a AVx file and which AVx +// codec is contained within the file. +// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this +// example because we want the algorithm to determine the stream configuration +// (width/height) and allocate memory automatically. +// +// Decoding A Frame +// ---------------- +// Once the frame has been read into memory, it is decoded using the +// `aom_codec_decode` function. The call takes a pointer to the data +// (`frame`) and the length of the data (`frame_size`). No application data +// is associated with the frame in this example, so the `user_priv` +// parameter is NULL. The `deadline` parameter is left at zero for this +// example. This parameter is generally only used when doing adaptive post +// processing. +// +// Codecs may produce a variable number of output frames for every call to +// `aom_codec_decode`. These frames are retrieved by the +// `aom_codec_get_frame` iterator function. The iterator variable `iter` is +// initialized to NULL each time `aom_codec_decode` is called. +// `aom_codec_get_frame` is called in a loop, returning a pointer to a +// decoded image or NULL to indicate the end of list. +// +// Processing The Decoded Data +// --------------------------- +// In this example, we simply write the encoded data to disk. It is +// important to honor the image's `stride` values. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exceptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/obudec.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +#define MAX_LAYERS 5 + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile[MAX_LAYERS]; + char filename[80]; + aom_codec_ctx_t codec; + const AvxInterface *decoder = NULL; + FILE *inputfile = NULL; + uint8_t *buf = NULL; + size_t bytes_in_buffer = 0; + size_t buffer_size = 0; + struct AvxInputContext aom_input_ctx; + struct ObuDecInputContext obu_ctx = { &aom_input_ctx, NULL, 0, 0, 0 }; + aom_codec_stream_info_t si; + uint8_t tmpbuf[32]; + unsigned int i; + + exec_name = argv[0]; + + if (argc != 2) die("Invalid number of arguments."); + + if (!(inputfile = fopen(argv[1], "rb"))) + die("Failed to open %s for read.", argv[1]); + obu_ctx.avx_ctx->file = inputfile; + obu_ctx.avx_ctx->filename = argv[1]; + + decoder = get_aom_decoder_by_index(0); + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + if (aom_codec_control(&codec, AV1D_SET_OUTPUT_ALL_LAYERS, 1)) { + die_codec(&codec, "Failed to set output_all_layers control."); + } + + // peak sequence header OBU to get number of spatial layers + const size_t ret = fread(tmpbuf, 1, 32, inputfile); + if (ret != 32) die_codec(&codec, "Input is not a valid obu file"); + si.is_annexb = 0; + if (aom_codec_peek_stream_info(decoder->codec_interface(), tmpbuf, 32, &si)) { + die_codec(&codec, "Input is not a valid obu file"); + } + fseek(inputfile, -32, SEEK_CUR); + + if (!file_is_obu(&obu_ctx)) + die_codec(&codec, "Input is not a valid obu file"); + + // open base layer output yuv file + snprintf(filename, sizeof(filename), "out_lyr%d.yuv", 0); + if (!(outfile[0] = fopen(filename, "wb"))) + die("Failed top open output for writing."); + + // open any enhancement layer output yuv files + for (i = 1; i < si.number_spatial_layers; i++) { + snprintf(filename, sizeof(filename), "out_lyr%d.yuv", i); + if (!(outfile[i] = fopen(filename, "wb"))) + die("Failed to open output for writing."); + } + + while (!obudec_read_temporal_unit(&obu_ctx, &buf, &bytes_in_buffer, + &buffer_size)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + if (aom_codec_decode(&codec, buf, bytes_in_buffer, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + aom_image_t *img_shifted = + aom_img_alloc(NULL, AOM_IMG_FMT_I420, img->d_w, img->d_h, 16); + img_shifted->bit_depth = 8; + aom_img_downshift(img_shifted, img, + img->bit_depth - img_shifted->bit_depth); + if (img->spatial_id == 0) { + printf("Writing base layer 0 %d\n", frame_cnt); + aom_img_write(img_shifted, outfile[0]); + } else if (img->spatial_id <= (int)(si.number_spatial_layers - 1)) { + printf("Writing enhancement layer %d %d\n", img->spatial_id, frame_cnt); + aom_img_write(img_shifted, outfile[img->spatial_id]); + } else { + die_codec(&codec, "Invalid bitstream. Layer id exceeds layer count"); + } + if (img->spatial_id == (int)(si.number_spatial_layers - 1)) ++frame_cnt; + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + for (i = 0; i < si.number_spatial_layers; i++) fclose(outfile[i]); + + fclose(inputfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/scalable_encoder.c b/libs/libaom/src/examples/scalable_encoder.c new file mode 100644 index 000000000..7af03e29f --- /dev/null +++ b/libs/libaom/src/examples/scalable_encoder.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Scalable Encoder +// ============== +// +// This is an example of a scalable encoder loop. It takes two input files in +// YV12 format, passes it through the encoder, and writes the compressed +// frames to disk in OBU format. +// +// Getting The Default Configuration +// --------------------------------- +// Encoders have the notion of "usage profiles." For example, an encoder +// may want to publish default configurations for both a video +// conferencing application and a best quality offline encoder. These +// obviously have very different default settings. Consult the +// documentation for your codec to see if it provides any default +// configurations. All codecs provide a default configuration, number 0, +// which is valid for material in the vacinity of QCIF/QVGA. +// +// Updating The Configuration +// --------------------------------- +// Almost all applications will want to update the default configuration +// with settings specific to their usage. Here we set the width and height +// of the video file to that specified on the command line. We also scale +// the default bitrate based on the ratio between the default resolution +// and the resolution specified on the command line. +// +// Encoding A Frame +// ---------------- +// The frame is read as a continuous block (size = width * height * 3 / 2) +// from the input file. If a frame was read (the input file has not hit +// EOF) then the frame is passed to the encoder. Otherwise, a NULL +// is passed, indicating the End-Of-Stream condition to the encoder. The +// `frame_cnt` is reused as the presentation time stamp (PTS) and each +// frame is shown for one frame-time in duration. The flags parameter is +// unused in this example. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the +// flags passed to `aom_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. +// +// Processing The Encoded Data +// --------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exeptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile0> <infile1> " + "<outfile> <frames to encode>\n" + "See comments in scalable_encoder.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, FILE *outfile) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile) != + pkt->data.frame.sz) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + printf(" %6d\n", (int)pkt->data.frame.sz); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile0 = NULL; + FILE *infile1 = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw0, raw1; + aom_codec_err_t res; + AvxVideoInfo info; + const AvxInterface *encoder = NULL; + const int fps = 30; + const int bitrate = 200; + int keyframe_interval = 0; + int max_frames = 0; + int frames_encoded = 0; + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile0_arg = NULL; + const char *infile1_arg = NULL; + const char *outfile_arg = NULL; + // const char *keyframe_interval_arg = NULL; + FILE *outfile = NULL; + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc != 8) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile0_arg = argv[4]; + infile1_arg = argv[5]; + outfile_arg = argv[6]; + max_frames = (int)strtol(argv[7], NULL, 0); + + encoder = get_aom_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw0, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image for layer 0."); + } + if (!aom_img_alloc(&raw1, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image for layer 1."); + } + + // keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); + keyframe_interval = 100; + if (keyframe_interval < 0) die("Invalid keyframe interval value."); + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = 0; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = AOM_Q; + cfg.save_as_annexb = 0; + + outfile = fopen(outfile_arg, "wb"); + if (!outfile) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile0 = fopen(infile0_arg, "rb"))) + die("Failed to open %s for reading.", infile0_arg); + if (!(infile1 = fopen(infile1_arg, "rb"))) + die("Failed to open %s for reading.", infile0_arg); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_CPUUSED, 8)) + die_codec(&codec, "Failed to set cpu to 8"); + + if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 2)) + die_codec(&codec, "Failed to set tile columns to 2"); + if (aom_codec_control(&codec, AV1E_SET_NUM_TG, 3)) + die_codec(&codec, "Failed to set num of tile groups to 3"); + + if (aom_codec_control(&codec, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)) + die_codec(&codec, "Failed to set number of spatial layers to 2"); + + // Encode frames. + while (aom_img_read(&raw0, infile0)) { + int flags = 0; + + // configure and encode base layer + + if (keyframe_interval > 0 && frames_encoded % keyframe_interval == 0) + flags |= AOM_EFLAG_FORCE_KF; + else + // use previous base layer (LAST) as sole reference + // save this frame as LAST to be used as reference by enhanmcent layer + // and next base layer + flags |= AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_UPD_ENTROPY; + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + if (aom_codec_enc_config_set(&codec, &cfg)) + die_codec(&codec, "Failed to set enc cfg for layer 0"); + if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 0)) + die_codec(&codec, "Failed to set layer id to 0"); + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 62)) + die_codec(&codec, "Failed to set cq level"); + encode_frame(&codec, &raw0, frame_count++, flags, outfile); + + // configure and encode enhancement layer + + // use LAST (base layer) as sole reference + flags = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_LAST | + AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_UPD_ENTROPY; + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + aom_img_read(&raw1, infile1); + if (aom_codec_enc_config_set(&codec, &cfg)) + die_codec(&codec, "Failed to set enc cfg for layer 1"); + if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 1)) + die_codec(&codec, "Failed to set layer id to 1"); + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 10)) + die_codec(&codec, "Failed to set cq level"); + encode_frame(&codec, &raw1, frame_count++, flags, outfile); + + frames_encoded++; + + if (max_frames > 0 && frames_encoded >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, outfile)) continue; + + printf("\n"); + fclose(infile0); + fclose(infile1); + printf("Processed %d frames.\n", frame_count / 2); + + aom_img_free(&raw0); + aom_img_free(&raw1); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/set_maps.c b/libs/libaom/src/examples/set_maps.c new file mode 100644 index 000000000..9aeb96e43 --- /dev/null +++ b/libs/libaom/src/examples/set_maps.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// AOM Set Active and ROI Maps +// =========================== +// +// This is an example demonstrating how to control the AOM encoder's +// ROI and Active maps. +// +// ROI (Reigon of Interest) maps are a way for the application to assign +// each macroblock in the image to a region, and then set quantizer and +// filtering parameters on that image. +// +// Active maps are a way for the application to specify on a +// macroblock-by-macroblock basis whether there is any activity in that +// macroblock. +// +// +// Configuration +// ------------- +// An ROI map is set on frame 22. If the width of the image in macroblocks +// is evenly divisble by 4, then the output will appear to have distinct +// columns, where the quantizer, loopfilter, and static threshold differ +// from column to column. +// +// An active map is set on frame 33. If the width of the image in macroblocks +// is evenly divisble by 4, then the output will appear to have distinct +// columns, where one column will have motion and the next will not. +// +// The active map is cleared on frame 44. +// +// Observing The Effects +// --------------------- +// Use the `simple_decoder` example to decode this sample, and observe +// the change in the image at frames 22, 33, and 44. + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void set_active_map(const aom_codec_enc_cfg_t *cfg, + aom_codec_ctx_t *codec) { + unsigned int i; + aom_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + + map.active_map = (uint8_t *)malloc(map.rows * map.cols); + for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2; + + if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); + + free(map.active_map); +} + +static void unset_active_map(const aom_codec_enc_cfg_t *cfg, + aom_codec_ctx_t *codec) { + aom_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + map.active_map = NULL; + + if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(codec, img, frame_index, 1, 0); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + const int limit = 15; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const AvxInterface *encoder = NULL; + const int fps = 2; // TODO(dkovalev) add command line argument + const double bits_per_pixel_per_frame = 0.067; + + exec_name = argv[0]; + if (argc != 6) die("Invalid number of arguments"); + + memset(&info, 0, sizeof(info)); + + encoder = get_aom_encoder_by_name(argv[1]); + if (encoder == NULL) { + die("Unsupported codec."); + } + assert(encoder != NULL); + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(argv[2], NULL, 0); + info.frame_height = (int)strtol(argv[3], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = + (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000); + cfg.g_lag_in_frames = 0; + + writer = aom_video_writer_open(argv[5], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[5]); + + if (!(infile = fopen(argv[4], "rb"))) + die("Failed to open %s for reading.", argv[4]); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + // Encode frames. + while (aom_img_read(&raw, infile) && frame_count < limit) { + ++frame_count; + + if (frame_count == 5) { + set_active_map(&cfg, &codec); + } else if (frame_count == 11) { + unset_active_map(&cfg, &codec); + } + + encode_frame(&codec, &raw, frame_count, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/simple_decoder.c b/libs/libaom/src/examples/simple_decoder.c new file mode 100644 index 000000000..d098d1e0b --- /dev/null +++ b/libs/libaom/src/examples/simple_decoder.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Simple Decoder +// ============== +// +// This is an example of a simple decoder loop. It takes an input file +// containing the compressed data (in IVF format), passes it through the +// decoder, and writes the decompressed frames to disk. Other decoder +// examples build upon this one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For decoders, you only have to include `aom_decoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// aom. +// +// Initializing The Codec +// ---------------------- +// The libaom decoder is initialized by the call to aom_codec_dec_init(). +// Determining the codec interface to use is handled by AvxVideoReader and the +// functions prefixed with aom_video_reader_. Discussion of those functions is +// beyond the scope of this example, but the main gist is to open the input file +// and parse just enough of it to determine if it's a AVx file and which AVx +// codec is contained within the file. +// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this +// example because we want the algorithm to determine the stream configuration +// (width/height) and allocate memory automatically. +// +// Decoding A Frame +// ---------------- +// Once the frame has been read into memory, it is decoded using the +// `aom_codec_decode` function. The call takes a pointer to the data +// (`frame`) and the length of the data (`frame_size`). No application data +// is associated with the frame in this example, so the `user_priv` +// parameter is NULL. +// +// Codecs may produce a variable number of output frames for every call to +// `aom_codec_decode`. These frames are retrieved by the +// `aom_codec_get_frame` iterator function. The iterator variable `iter` is +// initialized to NULL each time `aom_codec_decode` is called. +// `aom_codec_get_frame` is called in a loop, returning a pointer to a +// decoded image or NULL to indicate the end of list. +// +// Processing The Decoded Data +// --------------------------- +// In this example, we simply write the encoded data to disk. It is +// important to honor the image's `stride` values. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exceptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + aom_codec_ctx_t codec; + AvxVideoReader *reader = NULL; + const AvxInterface *decoder = NULL; + const AvxVideoInfo *info = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = aom_video_reader_get_info(reader); + + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface())); + + if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + aom_img_write(img, outfile); + ++frame_cnt; + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + aom_video_reader_close(reader); + + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/simple_encoder.c b/libs/libaom/src/examples/simple_encoder.c new file mode 100644 index 000000000..01a37cf0c --- /dev/null +++ b/libs/libaom/src/examples/simple_encoder.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Simple Encoder +// ============== +// +// This is an example of a simple encoder loop. It takes an input file in +// YV12 format, passes it through the encoder, and writes the compressed +// frames to disk in IVF format. Other decoder examples build upon this +// one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For encoders, you only have to include `aom_encoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// aom. +// +// Getting The Default Configuration +// --------------------------------- +// Encoders have the notion of "usage profiles." For example, an encoder +// may want to publish default configurations for both a video +// conferencing application and a best quality offline encoder. These +// obviously have very different default settings. Consult the +// documentation for your codec to see if it provides any default +// configurations. All codecs provide a default configuration, number 0, +// which is valid for material in the vacinity of QCIF/QVGA. +// +// Updating The Configuration +// --------------------------------- +// Almost all applications will want to update the default configuration +// with settings specific to their usage. Here we set the width and height +// of the video file to that specified on the command line. We also scale +// the default bitrate based on the ratio between the default resolution +// and the resolution specified on the command line. +// +// Initializing The Codec +// ---------------------- +// The encoder is initialized by the following code. +// +// Encoding A Frame +// ---------------- +// The frame is read as a continuous block (size width * height * 3 / 2) +// from the input file. If a frame was read (the input file has not hit +// EOF) then the frame is passed to the encoder. Otherwise, a NULL +// is passed, indicating the End-Of-Stream condition to the encoder. The +// `frame_cnt` is reused as the presentation time stamp (PTS) and each +// frame is shown for one frame-time in duration. The flags parameter is +// unused in this example. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the +// flags passed to `aom_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. +// +// Processing The Encoded Data +// --------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exeptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. +// +// Error Resiliency Features +// ------------------------- +// Error resiliency is controlled by the g_error_resilient member of the +// configuration structure. Use the `decode_with_drops` example to decode with +// frames 5-10 dropped. Compare the output for a file encoded with this example +// versus one encoded with the `simple_encoder` example. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<keyframe-interval> <error-resilient> <frames to encode>\n" + "See comments in simple_encoder.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps. +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const AvxInterface *encoder = NULL; + const int fps = 30; + const int bitrate = 200; + int keyframe_interval = 0; + int max_frames = 0; + int frames_encoded = 0; + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *keyframe_interval_arg = NULL; + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc != 9) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile_arg = argv[4]; + outfile_arg = argv[5]; + keyframe_interval_arg = argv[6]; + max_frames = (int)strtol(argv[8], NULL, 0); + + encoder = get_aom_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); + if (keyframe_interval < 0) die("Invalid keyframe interval value."); + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = (aom_codec_er_flags_t)strtoul(argv[7], NULL, 0); + + writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + // Encode frames. + while (aom_img_read(&raw, infile)) { + int flags = 0; + if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) + flags |= AOM_EFLAG_FORCE_KF; + encode_frame(&codec, &raw, frame_count++, flags, writer); + frames_encoded++; + if (max_frames > 0 && frames_encoded >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) continue; + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/svc_encoder_rtc.c b/libs/libaom/src/examples/svc_encoder_rtc.c new file mode 100644 index 000000000..1316c6c1e --- /dev/null +++ b/libs/libaom/src/examples/svc_encoder_rtc.c @@ -0,0 +1,907 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is an example demonstrating how to implement a multi-layer AOM +// encoding scheme for RTC video applications. + +#include <assert.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_writer.h" +#include "aom_ports/aom_timer.h" + +#define zero(Dest) memset(&(Dest), 0, sizeof(Dest)); + +static const char *exec_name; + +void usage_exit(void) { exit(EXIT_FAILURE); } + +static int mode_to_num_temporal_layers[10] = { 1, 2, 3, 3, 2, 1, 1, 3, 3, 3 }; +static int mode_to_num_spatial_layers[10] = { 1, 1, 1, 1, 1, 2, 3, 3, 3, 3 }; +static int mode_to_num_layers[10] = { 1, 2, 3, 3, 2, 2, 3, 9, 9, 9 }; + +// For rate control encoding stats. +struct RateControlMetrics { + // Number of input frames per layer. + int layer_input_frames[AOM_MAX_TS_LAYERS]; + // Number of encoded non-key frames per layer. + int layer_enc_frames[AOM_MAX_TS_LAYERS]; + // Framerate per layer layer (cumulative). + double layer_framerate[AOM_MAX_TS_LAYERS]; + // Target average frame size per layer (per-frame-bandwidth per layer). + double layer_pfb[AOM_MAX_LAYERS]; + // Actual average frame size per layer. + double layer_avg_frame_size[AOM_MAX_LAYERS]; + // Average rate mismatch per layer (|target - actual| / target). + double layer_avg_rate_mismatch[AOM_MAX_LAYERS]; + // Actual encoding bitrate per layer (cumulative across temporal layers). + double layer_encoding_bitrate[AOM_MAX_LAYERS]; + // Average of the short-time encoder actual bitrate. + // TODO(marpan): Should we add these short-time stats for each layer? + double avg_st_encoding_bitrate; + // Variance of the short-time encoder actual bitrate. + double variance_st_encoding_bitrate; + // Window (number of frames) for computing short-timee encoding bitrate. + int window_size; + // Number of window measurements. + int window_count; + int layer_target_bitrate[AOM_MAX_LAYERS]; +}; + +static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) { + FILE *f = input_ctx->file; + y4m_input *y4m = &input_ctx->y4m; + int shortread = 0; + + if (input_ctx->file_type == FILE_TYPE_Y4M) { + if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; + } else { + shortread = read_yuv_frame(input_ctx, img); + } + + return !shortread; +} + +static int file_is_y4m(const char detect[4]) { + if (memcmp(detect, "YUV4", 4) == 0) { + return 1; + } + return 0; +} + +static int fourcc_is_ivf(const char detect[4]) { + if (memcmp(detect, "DKIF", 4) == 0) { + return 1; + } + return 0; +} + +static void close_input_file(struct AvxInputContext *input) { + fclose(input->file); + if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); +} + +static void open_input_file(struct AvxInputContext *input, + aom_chroma_sample_position_t csp) { + /* Parse certain options from the input file, if possible */ + input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") + : set_binary_mode(stdin); + + if (!input->file) fatal("Failed to open input file"); + + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + + /* Default to 1:1 pixel aspect ratio. */ + input->pixel_aspect_ratio.numerator = 1; + input->pixel_aspect_ratio.denominator = 1; + + /* For RAW input sources, these bytes will applied on the first frame + * in read_frame(). + */ + input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); + input->detect.position = 0; + + if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp, + input->only_i420) >= 0) { + input->file_type = FILE_TYPE_Y4M; + input->width = input->y4m.pic_w; + input->height = input->y4m.pic_h; + input->pixel_aspect_ratio.numerator = input->y4m.par_n; + input->pixel_aspect_ratio.denominator = input->y4m.par_d; + input->framerate.numerator = input->y4m.fps_n; + input->framerate.denominator = input->y4m.fps_d; + input->fmt = input->y4m.aom_fmt; + input->bit_depth = input->y4m.bit_depth; + } else { + fatal("Unsupported Y4M stream."); + } + } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { + fatal("IVF is not supported as input."); + } else { + input->file_type = FILE_TYPE_RAW; + } +} + +// Note: these rate control metrics assume only 1 key frame in the +// sequence (i.e., first frame only). So for temporal pattern# 7 +// (which has key frame for every frame on base layer), the metrics +// computation will be off/wrong. +// TODO(marpan): Update these metrics to account for multiple key frames +// in the stream. +static void set_rate_control_metrics(struct RateControlMetrics *rc, + double framerate, + unsigned int ss_number_layers, + unsigned int ts_number_layers) { + int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 }; + ts_rate_decimator[0] = 1; + if (ts_number_layers == 2) { + ts_rate_decimator[0] = 2; + ts_rate_decimator[1] = 1; + } + if (ts_number_layers == 3) { + ts_rate_decimator[0] = 4; + ts_rate_decimator[1] = 2; + ts_rate_decimator[2] = 1; + } + // Set the layer (cumulative) framerate and the target layer (non-cumulative) + // per-frame-bandwidth, for the rate control encoding stats below. + for (unsigned int sl = 0; sl < ss_number_layers; ++sl) { + unsigned int i = sl * ts_number_layers; + rc->layer_framerate[0] = framerate / ts_rate_decimator[0]; + rc->layer_pfb[i] = + 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0]; + for (unsigned int tl = 0; tl < ts_number_layers; ++tl) { + i = sl * ts_number_layers + tl; + if (tl > 0) { + rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl]; + rc->layer_pfb[i] = + 1000.0 * + (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / + (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]); + } + rc->layer_input_frames[tl] = 0; + rc->layer_enc_frames[tl] = 0; + rc->layer_encoding_bitrate[i] = 0.0; + rc->layer_avg_frame_size[i] = 0.0; + rc->layer_avg_rate_mismatch[i] = 0.0; + } + } + rc->window_count = 0; + rc->window_size = 15; + rc->avg_st_encoding_bitrate = 0.0; + rc->variance_st_encoding_bitrate = 0.0; +} + +static void printout_rate_control_summary(struct RateControlMetrics *rc, + int frame_cnt, + unsigned int ss_number_layers, + unsigned int ts_number_layers) { + int tot_num_frames = 0; + double perc_fluctuation = 0.0; + printf("Total number of processed frames: %d\n\n", frame_cnt - 1); + printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers); + for (unsigned int sl = 0; sl < ss_number_layers; ++sl) { + tot_num_frames = 0; + for (unsigned int tl = 0; tl < ts_number_layers; ++tl) { + unsigned int i = sl * ts_number_layers + tl; + const int num_dropped = + tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] + : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1; + tot_num_frames += rc->layer_input_frames[tl]; + rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] * + rc->layer_encoding_bitrate[i] / + tot_num_frames; + rc->layer_avg_frame_size[i] = + rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl]; + rc->layer_avg_rate_mismatch[i] = + 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl]; + printf("For layer#: %d %d \n", sl, tl); + printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i], + rc->layer_encoding_bitrate[i]); + printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i], + rc->layer_avg_frame_size[i]); + printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]); + printf( + "Number of input frames, encoded (non-key) frames, " + "and perc dropped frames: %d %d %f\n", + rc->layer_input_frames[tl], rc->layer_enc_frames[tl], + 100.0 * num_dropped / rc->layer_input_frames[tl]); + printf("\n"); + } + } + rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; + rc->variance_st_encoding_bitrate = + rc->variance_st_encoding_bitrate / rc->window_count - + (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); + perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / + rc->avg_st_encoding_bitrate; + printf("Short-time stats, for window of %d frames:\n", rc->window_size); + printf("Average, rms-variance, and percent-fluct: %f %f %f\n", + rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), + perc_fluctuation); + if (frame_cnt - 1 != tot_num_frames) + die("Error: Number of input frames not equal to output!\n"); +} + +// Layer pattern configuration. +static int set_layer_pattern(int layering_mode, int superframe_cnt, + aom_svc_layer_id_t *layer_id, + aom_svc_ref_frame_config_t *ref_frame_config, + int *use_svc_control, int spatial_layer_id, + int is_key_frame, int ksvc_mode) { + int i; + int shift = (layering_mode == 7) ? 2 : 0; + *use_svc_control = 1; + layer_id->spatial_layer_id = spatial_layer_id; + // Set the referende map buffer idx for the 7 references: + // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), + // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0; + for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; + // Note for this layered patterns only use LAST and GF for prediction in + // non-rd mode (speed >= 7). + int layer_flags = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2; + if (ksvc_mode) { + // Same pattern as case 8. + layering_mode = 8; + if (!is_key_frame) + // No inter-layer prediction on inter-frames. + layer_flags |= AOM_EFLAG_NO_REF_GF; + } + switch (layering_mode) { + case 0: + // 1-layer: update LAST on every frame, reference LAST and GF. + layer_id->temporal_layer_id = 0; + ref_frame_config->refresh[0] = 1; + break; + case 1: + // 2-temporal layer. + // 1 3 5 + // 0 2 4 + if (superframe_cnt % 2 == 0) { + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST and GF. + ref_frame_config->refresh[0] = 1; + } else { + layer_id->temporal_layer_id = 1; + // No updates on layer 1, only reference LAST (TL0). + layer_flags |= AOM_EFLAG_NO_REF_GF; + } + break; + case 2: + // 3-temporal layer: + // 1 3 5 7 + // 2 6 + // 0 4 8 + if (superframe_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST and GF. + ref_frame_config->refresh[0] = 1; + } else if ((superframe_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if ((superframe_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update LAST2, only reference LAST (TL0). + ref_frame_config->refresh[1] = 1; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if ((superframe_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference LAST. + // Set buffer idx for LAST to slot 1, since that was the slot + // updated in previous frame. So LAST is TL1 frame. + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[1] = 0; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } + break; + case 3: + // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will + // only reference GF (not LAST). Other frames only reference LAST. + // 1 3 5 7 + // 2 6 + // 0 4 8 + if (superframe_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, only reference LAST. + ref_frame_config->refresh[0] = 1; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if ((superframe_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if ((superframe_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update GF, only reference LAST (TL0). + ref_frame_config->refresh[3] = 1; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if ((superframe_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference GF. + layer_flags |= AOM_EFLAG_NO_REF_LAST; + } + break; + case 4: + // 2-temporal layer with the old update flags, not with the new + // SVC control. + *use_svc_control = 0; + // 1 3 5 + // 0 2 4 + if (superframe_cnt % 2 == 0) { + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST and GF. + layer_flags |= AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + } else { + layer_id->temporal_layer_id = 1; + // No updates on layer 1, only reference LAST (TL0). + layer_flags |= AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_REF_GF; + } + break; + case 5: + // 2 spatial layers, 1 temporal. + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. + ref_frame_config->refresh[0] = 1; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 + // and GOLDEN to slot 0. Update slot 1 (LAST). + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[3] = 0; + ref_frame_config->refresh[1] = 1; + } + break; + case 6: + // 3 spatial layers, 1 temporal. + // Note for this case, we set the buffer idx for all references to be + // either LAST or GOLDEN, which are always valid references, since decoder + // will check if any of the 7 references is valid scale in + // valid_ref_frame_size(). + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. Set all buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + layer_flags |= AOM_EFLAG_NO_REF_GF; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 + // and GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2 + // and GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[2] = 1; + } + break; + case 7: + // 3 spatial and 3 temporal layer. + // Same as case 8 but overalap in the buffer slot updates. + // (shift = 2). The slots 3 and 4 updated by first TL2 are + // reused for update in TL1 superframe. + // Note for this case, frame order hint must be disabled for + // lower resolutios (operating points > 0) to be decoedable. + case 8: + // 3 spatial and 3 temporal layer. + // No overlap in buffer updates between TL2 and TL1. + // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7. + // Set the references via the svc_ref_frame_config control. + layer_flags = 0; + // Always reference LAST. + ref_frame_config->reference[0] = 1; + if (superframe_cnt % 4 == 0) { + // Base temporal layer. + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. + // Set all buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->refresh[2] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST (slot 0). + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to slot 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 3. + // Set LAST2 to slot 4 and Update slot 4. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[1] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 4. + // No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[0] = 2; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer. + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST. + // Set all buffer_idx to 0. + // Set GOLDEN to slot 5 and update slot 5. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[3] = 5 - shift; + ref_frame_config->refresh[5 - shift] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 5. + // Set LAST2 to slot 6 and update slot 6. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5 - shift; + ref_frame_config->ref_idx[0] = 1; + ref_frame_config->ref_idx[2] = 6 - shift; + ref_frame_config->refresh[6 - shift] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 6. + // Set LAST2 to slot 6 and update slot 7. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 6 - shift; + ref_frame_config->ref_idx[0] = 2; + ref_frame_config->ref_idx[2] = 7 - shift; + ref_frame_config->refresh[7 - shift] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Set LAST to slot 5 and reference LAST. + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 5 - shift; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, + // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 6 - shift; + ref_frame_config->ref_idx[3] = 3; + ref_frame_config->ref_idx[1] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7, + // GOLDEN to slot 4. No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[0] = 7 - shift; + ref_frame_config->ref_idx[3] = 4; + } + } + if (layer_id->spatial_layer_id > 0) + ref_frame_config->reference[3] = 1; // Reference GOLDEN. + break; + default: assert(0); die("Error: Unsupported temporal layering mode!\n"); + } + return layer_flags; +} + +int main(int argc, char **argv) { + AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL }; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_cnt = 0; + aom_image_t raw; + aom_codec_err_t res; + unsigned int width; + unsigned int height; + uint32_t error_resilient = 0; + int speed; + int frame_avail; + int got_data = 0; + int flags = 0; + unsigned i; + int pts = 0; // PTS starts at 0. + int frame_duration = 1; // 1 timebase tick per frame. + int layering_mode = 0; + aom_svc_layer_id_t layer_id; + aom_svc_params_t svc_params; + aom_svc_ref_frame_config_t ref_frame_config; + const AvxInterface *encoder = NULL; + struct AvxInputContext input_ctx; + struct RateControlMetrics rc; + int64_t cx_time = 0; + const int min_args_base = 13; + const int min_args = min_args_base; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + double framerate = 30.0; + int use_svc_control = 1; + zero(rc.layer_target_bitrate); + memset(&layer_id, 0, sizeof(aom_svc_layer_id_t)); + memset(&input_ctx, 0, sizeof(input_ctx)); + memset(&svc_params, 0, sizeof(svc_params)); + + // Flag to test dynamic scaling of source frames for single + // spatial stream, using the scaling_mode control. + const int test_dynamic_scaling_single_layer = 0; + + /* Setup default input stream settings */ + input_ctx.framerate.numerator = 30; + input_ctx.framerate.denominator = 1; + input_ctx.only_i420 = 1; + input_ctx.bit_depth = 0; + unsigned int ts_number_layers = 1; + unsigned int ss_number_layers = 1; + exec_name = argv[0]; + // Check usage and arguments. + if (argc < min_args) { + die("Usage: %s <infile> <outfile> <codec_type(av1)> <width> <height> " + "<rate_num> <rate_den> <speed> <frame_drop_threshold> " + "<error_resilient> <threads> <mode> " + "<Rate_0> ... <Rate_nlayers-1>\n", + argv[0]); + } + + encoder = get_aom_encoder_by_name(argv[3]); + + width = (unsigned int)strtoul(argv[4], NULL, 0); + height = (unsigned int)strtoul(argv[5], NULL, 0); + if (width < 16 || width % 2 || height < 16 || height % 2) { + die("Invalid resolution: %d x %d", width, height); + } + + layering_mode = (int)strtol(argv[12], NULL, 0); + if (layering_mode < 0 || layering_mode > 13) { + die("Invalid layering mode (0..12) %s", argv[12]); + } + + if (argc != min_args + mode_to_num_layers[layering_mode]) { + die("Invalid number of arguments"); + } + + ts_number_layers = mode_to_num_temporal_layers[layering_mode]; + ss_number_layers = mode_to_num_spatial_layers[layering_mode]; + + input_ctx.filename = argv[1]; + open_input_file(&input_ctx, 0); + + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image", width, height); + } + } + + // Populate encoder configuration. + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) { + printf("Failed to get config: %s\n", aom_codec_err_to_string(res)); + return EXIT_FAILURE; + } + + // Update the default configuration with our settings. + cfg.g_w = width; + cfg.g_h = height; + + // Timebase format e.g. 30fps: numerator=1, demoninator = 30. + cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0); + cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0); + + speed = (int)strtol(argv[8], NULL, 0); + if (speed < 0 || speed > 8) { + die("Invalid speed setting: must be positive"); + } + + for (i = min_args_base; + (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) { + rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0); + svc_params.layer_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13]; + } + + cfg.rc_target_bitrate = + svc_params.layer_target_bitrate[ss_number_layers * ts_number_layers - 1]; + + svc_params.framerate_factor[0] = 1; + if (ts_number_layers == 2) { + svc_params.framerate_factor[0] = 2; + svc_params.framerate_factor[1] = 1; + } else if (ts_number_layers == 3) { + svc_params.framerate_factor[0] = 4; + svc_params.framerate_factor[1] = 2; + svc_params.framerate_factor[2] = 1; + } + + // Real time parameters. + cfg.g_usage = AOM_USAGE_REALTIME; + + cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0); + cfg.rc_end_usage = AOM_CBR; + cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 52; + cfg.rc_undershoot_pct = 50; + cfg.rc_overshoot_pct = 50; + cfg.rc_buf_initial_sz = 600; + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + + // Use 1 thread as default. + cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0); + + error_resilient = (uint32_t)strtoul(argv[10], NULL, 0); + if (error_resilient != 0 && error_resilient != 1) { + die("Invalid value for error resilient (0, 1): %d.", error_resilient); + } + // Enable error resilient mode. + cfg.g_error_resilient = error_resilient; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_AUTO; + + // Disable automatic keyframe placement. + cfg.kf_min_dist = cfg.kf_max_dist = 3000; + + framerate = cfg.g_timebase.den / cfg.g_timebase.num; + set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); + + if (input_ctx.file_type == FILE_TYPE_Y4M) { + if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { + die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); + } + if (input_ctx.framerate.numerator != cfg.g_timebase.den || + input_ctx.framerate.denominator != cfg.g_timebase.num) { + die("Incorrect framerate: numerator %d denominator %d", + cfg.g_timebase.num, cfg.g_timebase.den); + } + } + + // Open an output file for each stream. + for (unsigned int sl = 0; sl < ss_number_layers; ++sl) { + for (unsigned tl = 0; tl < ts_number_layers; ++tl) { + i = sl * ts_number_layers + tl; + char file_name[PATH_MAX]; + AvxVideoInfo info; + info.codec_fourcc = encoder->fourcc; + info.frame_width = cfg.g_w; + info.frame_height = cfg.g_h; + info.time_base.numerator = cfg.g_timebase.num; + info.time_base.denominator = cfg.g_timebase.den; + + snprintf(file_name, sizeof(file_name), "%s_%d.av1", argv[2], i); + outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[i]) die("Failed to open %s for writing", file_name); + assert(outfile[i] != NULL); + } + } + + // Initialize codec. + if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + aom_codec_control(&codec, AOME_SET_CPUUSED, speed); + aom_codec_control(&codec, AV1E_SET_AQ_MODE, 3); + aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1); + aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0); + aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0); + + svc_params.number_spatial_layers = ss_number_layers; + svc_params.number_temporal_layers = ts_number_layers; + for (i = 0; i < ss_number_layers * ts_number_layers; ++i) { + svc_params.max_quantizers[i] = cfg.rc_max_quantizer; + svc_params.min_quantizers[i] = cfg.rc_min_quantizer; + } + for (i = 0; i < ss_number_layers; ++i) { + svc_params.scaling_factor_num[i] = 1; + svc_params.scaling_factor_den[i] = 1; + } + if (ss_number_layers == 2) { + svc_params.scaling_factor_num[0] = 1; + svc_params.scaling_factor_den[0] = 2; + } else if (ss_number_layers == 3) { + svc_params.scaling_factor_num[0] = 1; + svc_params.scaling_factor_den[0] = 4; + svc_params.scaling_factor_num[1] = 1; + svc_params.scaling_factor_den[1] = 2; + } + + aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params); + + // This controls the maximum target size of the key frame. + // For generating smaller key frames, use a smaller max_intra_size_pct + // value, like 100 or 200. + { + const int max_intra_size_pct = 300; + aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT, + max_intra_size_pct); + } + + frame_avail = 1; + while (frame_avail || got_data) { + struct aom_usec_timer timer; + frame_avail = read_frame(&input_ctx, &raw); + int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0; + // Loop over spatial layers. + for (unsigned int slx = 0; slx < ss_number_layers; slx++) { + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt; + int layer = 0; + + // Set the reference/update flags, layer_id, and reference_map + // buffer index. + flags = set_layer_pattern(layering_mode, frame_cnt, &layer_id, + &ref_frame_config, &use_svc_control, slx, + is_key_frame, (layering_mode == 9)); + aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); + if (use_svc_control) + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + + layer = slx * ts_number_layers + layer_id.temporal_layer_id; + if (frame_avail && slx == 0) ++rc.layer_input_frames[layer]; + + if (test_dynamic_scaling_single_layer) { + if (frame_cnt >= 200 && frame_cnt <= 400) { + // Scale source down by 2x2. + struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } else { + // Source back up to original resolution (no scaling). + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } + } + + // Do the layer encode. + aom_usec_timer_start(&timer); + if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags)) + die_codec(&codec, "Failed to encode frame"); + aom_usec_timer_mark(&timer); + cx_time += aom_usec_timer_elapsed(&timer); + + got_data = 0; + while ((pkt = aom_codec_get_cx_data(&codec, &iter))) { + got_data = 1; + switch (pkt->kind) { + case AOM_CODEC_CX_FRAME_PKT: + for (unsigned int sl = layer_id.spatial_layer_id; + sl < ss_number_layers; ++sl) { + for (unsigned tl = layer_id.temporal_layer_id; + tl < ts_number_layers; ++tl) { + unsigned int j = sl * ts_number_layers + tl; + aom_video_writer_write_frame(outfile[j], pkt->data.frame.buf, + pkt->data.frame.sz, pts); + if (sl == (unsigned int)layer_id.spatial_layer_id) + rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz; + // Keep count of rate control stats per layer (for non-key). + if (tl == (unsigned int)layer_id.temporal_layer_id && + sl == (unsigned int)layer_id.spatial_layer_id && + !(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) { + rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz; + rc.layer_avg_rate_mismatch[j] += + fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) / + rc.layer_pfb[j]; + if (slx == 0) ++rc.layer_enc_frames[tl]; + } + } + } + + // Update for short-time encoding bitrate states, for moving window + // of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + // For spatial layers: only do this for top/highest SL. + if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) { + sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size; + if (frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate / rc.window_size) * + (sum_bitrate / rc.window_size); + sum_bitrate = 0.0; + } + } + // Second shifted window. + if (frame_cnt > rc.window_size + rc.window_size / 2 && + slx == ss_number_layers - 1) { + sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + if (frame_cnt > 2 * rc.window_size && + frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate2 / rc.window_size) * + (sum_bitrate2 / rc.window_size); + sum_bitrate2 = 0.0; + } + } + break; + default: break; + } + } + } // loop over spatial layers + ++frame_cnt; + pts += frame_duration; + } + close_input_file(&input_ctx); + printout_rate_control_summary(&rc, frame_cnt, ss_number_layers, + ts_number_layers); + printf("\n"); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n", + frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + // Try to rewrite the output file headers with the actual frame count. + for (i = 0; i < ss_number_layers * ts_number_layers; ++i) + aom_video_writer_close(outfile[i]); + + if (input_ctx.file_type != FILE_TYPE_Y4M) { + aom_img_free(&raw); + } + return EXIT_SUCCESS; +} diff --git a/libs/libaom/src/examples/twopass_encoder.c b/libs/libaom/src/examples/twopass_encoder.c new file mode 100644 index 000000000..a03bc6cc2 --- /dev/null +++ b/libs/libaom/src/examples/twopass_encoder.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Two Pass Encoder +// ================ +// +// This is an example of a two pass encoder loop. It takes an input file in +// YV12 format, passes it through the encoder twice, and writes the compressed +// frames to disk in IVF format. It builds upon the simple_encoder example. +// +// Twopass Variables +// ----------------- +// Twopass mode needs to track the current pass number and the buffer of +// statistics packets. +// +// Updating The Configuration +// --------------------------------- +// In two pass mode, the configuration has to be updated on each pass. The +// statistics buffer is passed on the last pass. +// +// Encoding A Frame +// ---------------- +// Encoding a frame in two pass mode is identical to the simple encoder +// example. +// +// Processing Statistics Packets +// ----------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// +// Pass Progress Reporting +// ----------------------------- +// It's sometimes helpful to see when each pass completes. +// +// +// Clean-up +// ----------------------------- +// Destruction of the encoder instance must be done on each pass. The +// raw image should be destroyed at the end as usual. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, + aom_fixed_buf_t *stats) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_STATS_PKT) { + const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; + const size_t pkt_size = pkt->data.twopass_stats.sz; + stats->buf = realloc(stats->buf, stats->sz + pkt_size); + memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); + stats->sz += pkt_size; + } + } + + return got_pkts; +} + +static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) + die_codec(ctx, "Failed to write compressed frame."); + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile, + const AvxInterface *encoder, + const aom_codec_enc_cfg_t *cfg, int limit) { + aom_codec_ctx_t codec; + int frame_count = 0; + aom_fixed_buf_t stats = { NULL, 0 }; + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + // Calculate frame statistics. + while (aom_img_read(raw, infile) && frame_count < limit) { + ++frame_count; + get_frame_stats(&codec, raw, frame_count, 1, 0, &stats); + } + + // Flush encoder. + while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) { + } + + printf("Pass 0 complete. Processed %d frames.\n", frame_count); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + return stats; +} + +static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name, + const AvxInterface *encoder, const aom_codec_enc_cfg_t *cfg, + int limit) { + AvxVideoInfo info = { encoder->fourcc, + cfg->g_w, + cfg->g_h, + { cfg->g_timebase.num, cfg->g_timebase.den }, + 0 }; + AvxVideoWriter *writer = NULL; + aom_codec_ctx_t codec; + int frame_count = 0; + + writer = aom_video_writer_open(outfile_name, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing", outfile_name); + + if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + // Encode frames. + while (aom_img_read(raw, infile) && frame_count < limit) { + ++frame_count; + encode_frame(&codec, raw, frame_count, 1, 0, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 1, 0, writer)) { + } + + printf("\n"); + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + printf("Pass 1 complete. Processed %d frames.\n", frame_count); +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + int w, h; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + aom_image_t raw; + aom_codec_err_t res; + aom_fixed_buf_t stats; + + const AvxInterface *encoder = NULL; + const int fps = 30; // TODO(dkovalev) add command line argument + const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + const char *const codec_arg = argv[1]; + const char *const width_arg = argv[2]; + const char *const height_arg = argv[3]; + const char *const infile_arg = argv[4]; + const char *const outfile_arg = argv[5]; + int limit = 0; + exec_name = argv[0]; + + if (argc < 6) die("Invalid number of arguments"); + + if (argc > 6) limit = (int)strtol(argv[6], NULL, 0); + + if (limit == 0) limit = 100; + + encoder = get_aom_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + w = (int)strtol(width_arg, NULL, 0); + h = (int)strtol(height_arg, NULL, 0); + + if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) + die("Invalid frame size: %dx%d", w, h); + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 1)) + die("Failed to allocate image", w, h); + + printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface())); + + // Configuration + res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = w; + cfg.g_h = h; + cfg.g_timebase.num = 1; + cfg.g_timebase.den = fps; + cfg.rc_target_bitrate = bitrate; + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading", infile_arg); + + // Pass 0 + cfg.g_pass = AOM_RC_FIRST_PASS; + stats = pass0(&raw, infile, encoder, &cfg, limit); + + // Pass 1 + rewind(infile); + cfg.g_pass = AOM_RC_LAST_PASS; + cfg.rc_twopass_stats_in = stats; + pass1(&raw, infile, outfile_arg, encoder, &cfg, limit); + free(stats.buf); + + aom_img_free(&raw); + fclose(infile); + + return EXIT_SUCCESS; +} |