diff options
Diffstat (limited to 'libs/ffvpx/libavcodec')
156 files changed, 72707 insertions, 0 deletions
diff --git a/libs/ffvpx/libavcodec/allcodecs.c b/libs/ffvpx/libavcodec/allcodecs.c new file mode 100644 index 000000000..4d4ef530e --- /dev/null +++ b/libs/ffvpx/libavcodec/allcodecs.c @@ -0,0 +1,893 @@ +/* + * Provide registration of all codecs, parsers and bitstream filters for libavcodec. + * Copyright (c) 2002 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Provide registration of all codecs, parsers and bitstream filters for libavcodec. + */ + +#include "config.h" +#include "libavutil/thread.h" +#include "avcodec.h" +#include "version.h" + +extern AVCodec ff_a64multi_encoder; +extern AVCodec ff_a64multi5_encoder; +extern AVCodec ff_aasc_decoder; +extern AVCodec ff_aic_decoder; +extern AVCodec ff_alias_pix_encoder; +extern AVCodec ff_alias_pix_decoder; +extern AVCodec ff_amv_encoder; +extern AVCodec ff_amv_decoder; +extern AVCodec ff_anm_decoder; +extern AVCodec ff_ansi_decoder; +extern AVCodec ff_apng_encoder; +extern AVCodec ff_apng_decoder; +extern AVCodec ff_asv1_encoder; +extern AVCodec ff_asv1_decoder; +extern AVCodec ff_asv2_encoder; +extern AVCodec ff_asv2_decoder; +extern AVCodec ff_aura_decoder; +extern AVCodec ff_aura2_decoder; +extern AVCodec ff_avrp_encoder; +extern AVCodec ff_avrp_decoder; +extern AVCodec ff_avrn_decoder; +extern AVCodec ff_avs_decoder; +extern AVCodec ff_avui_encoder; +extern AVCodec ff_avui_decoder; +extern AVCodec ff_ayuv_encoder; +extern AVCodec ff_ayuv_decoder; +extern AVCodec ff_bethsoftvid_decoder; +extern AVCodec ff_bfi_decoder; +extern AVCodec ff_bink_decoder; +extern AVCodec ff_bmp_encoder; +extern AVCodec ff_bmp_decoder; +extern AVCodec ff_bmv_video_decoder; +extern AVCodec ff_brender_pix_decoder; +extern AVCodec ff_c93_decoder; +extern AVCodec ff_cavs_decoder; +extern AVCodec ff_cdgraphics_decoder; +extern AVCodec ff_cdxl_decoder; +extern AVCodec ff_cfhd_decoder; +extern AVCodec ff_cinepak_encoder; +extern AVCodec ff_cinepak_decoder; +extern AVCodec ff_clearvideo_decoder; +extern AVCodec ff_cljr_encoder; +extern AVCodec ff_cljr_decoder; +extern AVCodec ff_cllc_decoder; +extern AVCodec ff_comfortnoise_encoder; +extern AVCodec ff_comfortnoise_decoder; +extern AVCodec ff_cpia_decoder; +extern AVCodec ff_cscd_decoder; +extern AVCodec ff_cyuv_decoder; +extern AVCodec ff_dds_decoder; +extern AVCodec ff_dfa_decoder; +extern AVCodec ff_dirac_decoder; +extern AVCodec ff_dnxhd_encoder; +extern AVCodec ff_dnxhd_decoder; +extern AVCodec ff_dpx_encoder; +extern AVCodec ff_dpx_decoder; +extern AVCodec ff_dsicinvideo_decoder; +extern AVCodec ff_dvaudio_decoder; +extern AVCodec ff_dvvideo_encoder; +extern AVCodec ff_dvvideo_decoder; +extern AVCodec ff_dxa_decoder; +extern AVCodec ff_dxtory_decoder; +extern AVCodec ff_dxv_decoder; +extern AVCodec ff_eacmv_decoder; +extern AVCodec ff_eamad_decoder; +extern AVCodec ff_eatgq_decoder; +extern AVCodec ff_eatgv_decoder; +extern AVCodec ff_eatqi_decoder; +extern AVCodec ff_eightbps_decoder; +extern AVCodec ff_eightsvx_exp_decoder; +extern AVCodec ff_eightsvx_fib_decoder; +extern AVCodec ff_escape124_decoder; +extern AVCodec ff_escape130_decoder; +extern AVCodec ff_exr_decoder; +extern AVCodec ff_ffv1_encoder; +extern AVCodec ff_ffv1_decoder; +extern AVCodec ff_ffvhuff_encoder; +extern AVCodec ff_ffvhuff_decoder; +extern AVCodec ff_fic_decoder; +extern AVCodec ff_fits_encoder; +extern AVCodec ff_fits_decoder; +extern AVCodec ff_flashsv_encoder; +extern AVCodec ff_flashsv_decoder; +extern AVCodec ff_flashsv2_encoder; +extern AVCodec ff_flashsv2_decoder; +extern AVCodec ff_flic_decoder; +extern AVCodec ff_flv_encoder; +extern AVCodec ff_flv_decoder; +extern AVCodec ff_fmvc_decoder; +extern AVCodec ff_fourxm_decoder; +extern AVCodec ff_fraps_decoder; +extern AVCodec ff_frwu_decoder; +extern AVCodec ff_g2m_decoder; +extern AVCodec ff_gdv_decoder; +extern AVCodec ff_gif_encoder; +extern AVCodec ff_gif_decoder; +extern AVCodec ff_h261_encoder; +extern AVCodec ff_h261_decoder; +extern AVCodec ff_h263_encoder; +extern AVCodec ff_h263_decoder; +extern AVCodec ff_h263i_decoder; +extern AVCodec ff_h263p_encoder; +extern AVCodec ff_h263p_decoder; +extern AVCodec ff_h263_v4l2m2m_decoder; +extern AVCodec ff_h264_decoder; +extern AVCodec ff_h264_crystalhd_decoder; +extern AVCodec ff_h264_v4l2m2m_decoder; +extern AVCodec ff_h264_mediacodec_decoder; +extern AVCodec ff_h264_mmal_decoder; +extern AVCodec ff_h264_qsv_decoder; +extern AVCodec ff_h264_rkmpp_decoder; +extern AVCodec ff_hap_encoder; +extern AVCodec ff_hap_decoder; +extern AVCodec ff_hevc_decoder; +extern AVCodec ff_hevc_qsv_decoder; +extern AVCodec ff_hevc_rkmpp_decoder; +extern AVCodec ff_hevc_v4l2m2m_decoder; +extern AVCodec ff_hnm4_video_decoder; +extern AVCodec ff_hq_hqa_decoder; +extern AVCodec ff_hqx_decoder; +extern AVCodec ff_huffyuv_encoder; +extern AVCodec ff_huffyuv_decoder; +extern AVCodec ff_idcin_decoder; +extern AVCodec ff_iff_ilbm_decoder; +extern AVCodec ff_indeo2_decoder; +extern AVCodec ff_indeo3_decoder; +extern AVCodec ff_indeo4_decoder; +extern AVCodec ff_indeo5_decoder; +extern AVCodec ff_interplay_video_decoder; +extern AVCodec ff_jpeg2000_encoder; +extern AVCodec ff_jpeg2000_decoder; +extern AVCodec ff_jpegls_encoder; +extern AVCodec ff_jpegls_decoder; +extern AVCodec ff_jv_decoder; +extern AVCodec ff_kgv1_decoder; +extern AVCodec ff_kmvc_decoder; +extern AVCodec ff_lagarith_decoder; +extern AVCodec ff_ljpeg_encoder; +extern AVCodec ff_loco_decoder; +extern AVCodec ff_m101_decoder; +extern AVCodec ff_magicyuv_encoder; +extern AVCodec ff_magicyuv_decoder; +extern AVCodec ff_mdec_decoder; +extern AVCodec ff_mimic_decoder; +extern AVCodec ff_mjpeg_encoder; +extern AVCodec ff_mjpeg_decoder; +extern AVCodec ff_mjpegb_decoder; +extern AVCodec ff_mmvideo_decoder; +extern AVCodec ff_motionpixels_decoder; +extern AVCodec ff_mpeg1video_encoder; +extern AVCodec ff_mpeg1video_decoder; +extern AVCodec ff_mpeg2video_encoder; +extern AVCodec ff_mpeg2video_decoder; +extern AVCodec ff_mpeg4_encoder; +extern AVCodec ff_mpeg4_decoder; +extern AVCodec ff_mpeg4_crystalhd_decoder; +extern AVCodec ff_mpeg4_v4l2m2m_decoder; +extern AVCodec ff_mpeg4_mmal_decoder; +extern AVCodec ff_mpegvideo_decoder; +extern AVCodec ff_mpeg1_v4l2m2m_decoder; +extern AVCodec ff_mpeg2_mmal_decoder; +extern AVCodec ff_mpeg2_crystalhd_decoder; +extern AVCodec ff_mpeg2_v4l2m2m_decoder; +extern AVCodec ff_mpeg2_qsv_decoder; +extern AVCodec ff_mpeg2_mediacodec_decoder; +extern AVCodec ff_msa1_decoder; +extern AVCodec ff_mscc_decoder; +extern AVCodec ff_msmpeg4v1_decoder; +extern AVCodec ff_msmpeg4v2_encoder; +extern AVCodec ff_msmpeg4v2_decoder; +extern AVCodec ff_msmpeg4v3_encoder; +extern AVCodec ff_msmpeg4v3_decoder; +extern AVCodec ff_msmpeg4_crystalhd_decoder; +extern AVCodec ff_msrle_decoder; +extern AVCodec ff_mss1_decoder; +extern AVCodec ff_mss2_decoder; +extern AVCodec ff_msvideo1_encoder; +extern AVCodec ff_msvideo1_decoder; +extern AVCodec ff_mszh_decoder; +extern AVCodec ff_mts2_decoder; +extern AVCodec ff_mvc1_decoder; +extern AVCodec ff_mvc2_decoder; +extern AVCodec ff_mxpeg_decoder; +extern AVCodec ff_nuv_decoder; +extern AVCodec ff_paf_video_decoder; +extern AVCodec ff_pam_encoder; +extern AVCodec ff_pam_decoder; +extern AVCodec ff_pbm_encoder; +extern AVCodec ff_pbm_decoder; +extern AVCodec ff_pcx_encoder; +extern AVCodec ff_pcx_decoder; +extern AVCodec ff_pgm_encoder; +extern AVCodec ff_pgm_decoder; +extern AVCodec ff_pgmyuv_encoder; +extern AVCodec ff_pgmyuv_decoder; +extern AVCodec ff_pictor_decoder; +extern AVCodec ff_pixlet_decoder; +extern AVCodec ff_png_encoder; +extern AVCodec ff_png_decoder; +extern AVCodec ff_ppm_encoder; +extern AVCodec ff_ppm_decoder; +extern AVCodec ff_prores_encoder; +extern AVCodec ff_prores_decoder; +extern AVCodec ff_prores_aw_encoder; +extern AVCodec ff_prores_ks_encoder; +extern AVCodec ff_prores_lgpl_decoder; +extern AVCodec ff_psd_decoder; +extern AVCodec ff_ptx_decoder; +extern AVCodec ff_qdraw_decoder; +extern AVCodec ff_qpeg_decoder; +extern AVCodec ff_qtrle_encoder; +extern AVCodec ff_qtrle_decoder; +extern AVCodec ff_r10k_encoder; +extern AVCodec ff_r10k_decoder; +extern AVCodec ff_r210_encoder; +extern AVCodec ff_r210_decoder; +extern AVCodec ff_rawvideo_encoder; +extern AVCodec ff_rawvideo_decoder; +extern AVCodec ff_rl2_decoder; +extern AVCodec ff_roq_encoder; +extern AVCodec ff_roq_decoder; +extern AVCodec ff_rpza_decoder; +extern AVCodec ff_rscc_decoder; +extern AVCodec ff_rv10_encoder; +extern AVCodec ff_rv10_decoder; +extern AVCodec ff_rv20_encoder; +extern AVCodec ff_rv20_decoder; +extern AVCodec ff_rv30_decoder; +extern AVCodec ff_rv40_decoder; +extern AVCodec ff_s302m_encoder; +extern AVCodec ff_s302m_decoder; +extern AVCodec ff_sanm_decoder; +extern AVCodec ff_scpr_decoder; +extern AVCodec ff_screenpresso_decoder; +extern AVCodec ff_sdx2_dpcm_decoder; +extern AVCodec ff_sgi_encoder; +extern AVCodec ff_sgi_decoder; +extern AVCodec ff_sgirle_decoder; +extern AVCodec ff_sheervideo_decoder; +extern AVCodec ff_smacker_decoder; +extern AVCodec ff_smc_decoder; +extern AVCodec ff_smvjpeg_decoder; +extern AVCodec ff_snow_encoder; +extern AVCodec ff_snow_decoder; +extern AVCodec ff_sp5x_decoder; +extern AVCodec ff_speedhq_decoder; +extern AVCodec ff_srgc_decoder; +extern AVCodec ff_sunrast_encoder; +extern AVCodec ff_sunrast_decoder; +extern AVCodec ff_svq1_encoder; +extern AVCodec ff_svq1_decoder; +extern AVCodec ff_svq3_decoder; +extern AVCodec ff_targa_encoder; +extern AVCodec ff_targa_decoder; +extern AVCodec ff_targa_y216_decoder; +extern AVCodec ff_tdsc_decoder; +extern AVCodec ff_theora_decoder; +extern AVCodec ff_thp_decoder; +extern AVCodec ff_tiertexseqvideo_decoder; +extern AVCodec ff_tiff_encoder; +extern AVCodec ff_tiff_decoder; +extern AVCodec ff_tmv_decoder; +extern AVCodec ff_truemotion1_decoder; +extern AVCodec ff_truemotion2_decoder; +extern AVCodec ff_truemotion2rt_decoder; +extern AVCodec ff_tscc_decoder; +extern AVCodec ff_tscc2_decoder; +extern AVCodec ff_txd_decoder; +extern AVCodec ff_ulti_decoder; +extern AVCodec ff_utvideo_encoder; +extern AVCodec ff_utvideo_decoder; +extern AVCodec ff_v210_encoder; +extern AVCodec ff_v210_decoder; +extern AVCodec ff_v210x_decoder; +extern AVCodec ff_v308_encoder; +extern AVCodec ff_v308_decoder; +extern AVCodec ff_v408_encoder; +extern AVCodec ff_v408_decoder; +extern AVCodec ff_v410_encoder; +extern AVCodec ff_v410_decoder; +extern AVCodec ff_vb_decoder; +extern AVCodec ff_vble_decoder; +extern AVCodec ff_vc1_decoder; +extern AVCodec ff_vc1_crystalhd_decoder; +extern AVCodec ff_vc1image_decoder; +extern AVCodec ff_vc1_mmal_decoder; +extern AVCodec ff_vc1_qsv_decoder; +extern AVCodec ff_vc1_v4l2m2m_decoder; +extern AVCodec ff_vc2_encoder; +extern AVCodec ff_vcr1_decoder; +extern AVCodec ff_vmdvideo_decoder; +extern AVCodec ff_vmnc_decoder; +extern AVCodec ff_vp3_decoder; +extern AVCodec ff_vp5_decoder; +extern AVCodec ff_vp6_decoder; +extern AVCodec ff_vp6a_decoder; +extern AVCodec ff_vp6f_decoder; +extern AVCodec ff_vp7_decoder; +extern AVCodec ff_vp8_decoder; +extern AVCodec ff_vp8_rkmpp_decoder; +extern AVCodec ff_vp8_v4l2m2m_decoder; +extern AVCodec ff_vp9_decoder; +extern AVCodec ff_vp9_rkmpp_decoder; +extern AVCodec ff_vp9_v4l2m2m_decoder; +extern AVCodec ff_vqa_decoder; +extern AVCodec ff_bitpacked_decoder; +extern AVCodec ff_webp_decoder; +extern AVCodec ff_wrapped_avframe_encoder; +extern AVCodec ff_wrapped_avframe_decoder; +extern AVCodec ff_wmv1_encoder; +extern AVCodec ff_wmv1_decoder; +extern AVCodec ff_wmv2_encoder; +extern AVCodec ff_wmv2_decoder; +extern AVCodec ff_wmv3_decoder; +extern AVCodec ff_wmv3_crystalhd_decoder; +extern AVCodec ff_wmv3image_decoder; +extern AVCodec ff_wnv1_decoder; +extern AVCodec ff_xan_wc3_decoder; +extern AVCodec ff_xan_wc4_decoder; +extern AVCodec ff_xbm_encoder; +extern AVCodec ff_xbm_decoder; +extern AVCodec ff_xface_encoder; +extern AVCodec ff_xface_decoder; +extern AVCodec ff_xl_decoder; +extern AVCodec ff_xpm_decoder; +extern AVCodec ff_xwd_encoder; +extern AVCodec ff_xwd_decoder; +extern AVCodec ff_y41p_encoder; +extern AVCodec ff_y41p_decoder; +extern AVCodec ff_ylc_decoder; +extern AVCodec ff_yop_decoder; +extern AVCodec ff_yuv4_encoder; +extern AVCodec ff_yuv4_decoder; +extern AVCodec ff_zero12v_decoder; +extern AVCodec ff_zerocodec_decoder; +extern AVCodec ff_zlib_encoder; +extern AVCodec ff_zlib_decoder; +extern AVCodec ff_zmbv_encoder; +extern AVCodec ff_zmbv_decoder; + +/* audio codecs */ +extern AVCodec ff_aac_encoder; +extern AVCodec ff_aac_decoder; +extern AVCodec ff_aac_fixed_decoder; +extern AVCodec ff_aac_latm_decoder; +extern AVCodec ff_ac3_encoder; +extern AVCodec ff_ac3_decoder; +extern AVCodec ff_ac3_fixed_encoder; +extern AVCodec ff_ac3_fixed_decoder; +extern AVCodec ff_alac_encoder; +extern AVCodec ff_alac_decoder; +extern AVCodec ff_als_decoder; +extern AVCodec ff_amrnb_decoder; +extern AVCodec ff_amrwb_decoder; +extern AVCodec ff_ape_decoder; +extern AVCodec ff_aptx_encoder; +extern AVCodec ff_aptx_decoder; +extern AVCodec ff_aptx_hd_encoder; +extern AVCodec ff_aptx_hd_decoder; +extern AVCodec ff_atrac1_decoder; +extern AVCodec ff_atrac3_decoder; +extern AVCodec ff_atrac3al_decoder; +extern AVCodec ff_atrac3p_decoder; +extern AVCodec ff_atrac3pal_decoder; +extern AVCodec ff_binkaudio_dct_decoder; +extern AVCodec ff_binkaudio_rdft_decoder; +extern AVCodec ff_bmv_audio_decoder; +extern AVCodec ff_cook_decoder; +extern AVCodec ff_dca_encoder; +extern AVCodec ff_dca_decoder; +extern AVCodec ff_dolby_e_decoder; +extern AVCodec ff_dsd_lsbf_decoder; +extern AVCodec ff_dsd_msbf_decoder; +extern AVCodec ff_dsd_lsbf_planar_decoder; +extern AVCodec ff_dsd_msbf_planar_decoder; +extern AVCodec ff_dsicinaudio_decoder; +extern AVCodec ff_dss_sp_decoder; +extern AVCodec ff_dst_decoder; +extern AVCodec ff_eac3_encoder; +extern AVCodec ff_eac3_decoder; +extern AVCodec ff_evrc_decoder; +extern AVCodec ff_ffwavesynth_decoder; +extern AVCodec ff_flac_encoder; +extern AVCodec ff_flac_decoder; +extern AVCodec ff_g723_1_encoder; +extern AVCodec ff_g723_1_decoder; +extern AVCodec ff_g729_decoder; +extern AVCodec ff_gsm_decoder; +extern AVCodec ff_gsm_ms_decoder; +extern AVCodec ff_iac_decoder; +extern AVCodec ff_imc_decoder; +extern AVCodec ff_interplay_acm_decoder; +extern AVCodec ff_mace3_decoder; +extern AVCodec ff_mace6_decoder; +extern AVCodec ff_metasound_decoder; +extern AVCodec ff_mlp_encoder; +extern AVCodec ff_mlp_decoder; +extern AVCodec ff_mp1_decoder; +extern AVCodec ff_mp1float_decoder; +extern AVCodec ff_mp2_encoder; +extern AVCodec ff_mp2_decoder; +extern AVCodec ff_mp2float_decoder; +extern AVCodec ff_mp2fixed_encoder; +extern AVCodec ff_mp3float_decoder; +extern AVCodec ff_mp3_decoder; +extern AVCodec ff_mp3adufloat_decoder; +extern AVCodec ff_mp3adu_decoder; +extern AVCodec ff_mp3on4float_decoder; +extern AVCodec ff_mp3on4_decoder; +extern AVCodec ff_mpc7_decoder; +extern AVCodec ff_mpc8_decoder; +extern AVCodec ff_nellymoser_encoder; +extern AVCodec ff_nellymoser_decoder; +extern AVCodec ff_on2avc_decoder; +extern AVCodec ff_opus_encoder; +extern AVCodec ff_opus_decoder; +extern AVCodec ff_paf_audio_decoder; +extern AVCodec ff_qcelp_decoder; +extern AVCodec ff_qdm2_decoder; +extern AVCodec ff_qdmc_decoder; +extern AVCodec ff_ra_144_encoder; +extern AVCodec ff_ra_144_decoder; +extern AVCodec ff_ra_288_decoder; +extern AVCodec ff_ralf_decoder; +extern AVCodec ff_sbc_encoder; +extern AVCodec ff_sbc_decoder; +extern AVCodec ff_shorten_decoder; +extern AVCodec ff_sipr_decoder; +extern AVCodec ff_smackaud_decoder; +extern AVCodec ff_sonic_encoder; +extern AVCodec ff_sonic_decoder; +extern AVCodec ff_sonic_ls_encoder; +extern AVCodec ff_tak_decoder; +extern AVCodec ff_truehd_encoder; +extern AVCodec ff_truehd_decoder; +extern AVCodec ff_truespeech_decoder; +extern AVCodec ff_tta_encoder; +extern AVCodec ff_tta_decoder; +extern AVCodec ff_twinvq_decoder; +extern AVCodec ff_vmdaudio_decoder; +extern AVCodec ff_vorbis_encoder; +extern AVCodec ff_vorbis_decoder; +extern AVCodec ff_wavpack_encoder; +extern AVCodec ff_wavpack_decoder; +extern AVCodec ff_wmalossless_decoder; +extern AVCodec ff_wmapro_decoder; +extern AVCodec ff_wmav1_encoder; +extern AVCodec ff_wmav1_decoder; +extern AVCodec ff_wmav2_encoder; +extern AVCodec ff_wmav2_decoder; +extern AVCodec ff_wmavoice_decoder; +extern AVCodec ff_ws_snd1_decoder; +extern AVCodec ff_xma1_decoder; +extern AVCodec ff_xma2_decoder; + +/* PCM codecs */ +extern AVCodec ff_pcm_alaw_encoder; +extern AVCodec ff_pcm_alaw_decoder; +extern AVCodec ff_pcm_bluray_decoder; +extern AVCodec ff_pcm_dvd_decoder; +extern AVCodec ff_pcm_f16le_decoder; +extern AVCodec ff_pcm_f24le_decoder; +extern AVCodec ff_pcm_f32be_encoder; +extern AVCodec ff_pcm_f32be_decoder; +extern AVCodec ff_pcm_f32le_encoder; +extern AVCodec ff_pcm_f32le_decoder; +extern AVCodec ff_pcm_f64be_encoder; +extern AVCodec ff_pcm_f64be_decoder; +extern AVCodec ff_pcm_f64le_encoder; +extern AVCodec ff_pcm_f64le_decoder; +extern AVCodec ff_pcm_lxf_decoder; +extern AVCodec ff_pcm_mulaw_encoder; +extern AVCodec ff_pcm_mulaw_decoder; +extern AVCodec ff_pcm_s8_encoder; +extern AVCodec ff_pcm_s8_decoder; +extern AVCodec ff_pcm_s8_planar_encoder; +extern AVCodec ff_pcm_s8_planar_decoder; +extern AVCodec ff_pcm_s16be_encoder; +extern AVCodec ff_pcm_s16be_decoder; +extern AVCodec ff_pcm_s16be_planar_encoder; +extern AVCodec ff_pcm_s16be_planar_decoder; +extern AVCodec ff_pcm_s16le_encoder; +extern AVCodec ff_pcm_s16le_decoder; +extern AVCodec ff_pcm_s16le_planar_encoder; +extern AVCodec ff_pcm_s16le_planar_decoder; +extern AVCodec ff_pcm_s24be_encoder; +extern AVCodec ff_pcm_s24be_decoder; +extern AVCodec ff_pcm_s24daud_encoder; +extern AVCodec ff_pcm_s24daud_decoder; +extern AVCodec ff_pcm_s24le_encoder; +extern AVCodec ff_pcm_s24le_decoder; +extern AVCodec ff_pcm_s24le_planar_encoder; +extern AVCodec ff_pcm_s24le_planar_decoder; +extern AVCodec ff_pcm_s32be_encoder; +extern AVCodec ff_pcm_s32be_decoder; +extern AVCodec ff_pcm_s32le_encoder; +extern AVCodec ff_pcm_s32le_decoder; +extern AVCodec ff_pcm_s32le_planar_encoder; +extern AVCodec ff_pcm_s32le_planar_decoder; +extern AVCodec ff_pcm_s64be_encoder; +extern AVCodec ff_pcm_s64be_decoder; +extern AVCodec ff_pcm_s64le_encoder; +extern AVCodec ff_pcm_s64le_decoder; +extern AVCodec ff_pcm_u8_encoder; +extern AVCodec ff_pcm_u8_decoder; +extern AVCodec ff_pcm_u16be_encoder; +extern AVCodec ff_pcm_u16be_decoder; +extern AVCodec ff_pcm_u16le_encoder; +extern AVCodec ff_pcm_u16le_decoder; +extern AVCodec ff_pcm_u24be_encoder; +extern AVCodec ff_pcm_u24be_decoder; +extern AVCodec ff_pcm_u24le_encoder; +extern AVCodec ff_pcm_u24le_decoder; +extern AVCodec ff_pcm_u32be_encoder; +extern AVCodec ff_pcm_u32be_decoder; +extern AVCodec ff_pcm_u32le_encoder; +extern AVCodec ff_pcm_u32le_decoder; +extern AVCodec ff_pcm_zork_decoder; + +/* DPCM codecs */ +extern AVCodec ff_gremlin_dpcm_decoder; +extern AVCodec ff_interplay_dpcm_decoder; +extern AVCodec ff_roq_dpcm_encoder; +extern AVCodec ff_roq_dpcm_decoder; +extern AVCodec ff_sol_dpcm_decoder; +extern AVCodec ff_xan_dpcm_decoder; + +/* ADPCM codecs */ +extern AVCodec ff_adpcm_4xm_decoder; +extern AVCodec ff_adpcm_adx_encoder; +extern AVCodec ff_adpcm_adx_decoder; +extern AVCodec ff_adpcm_afc_decoder; +extern AVCodec ff_adpcm_aica_decoder; +extern AVCodec ff_adpcm_ct_decoder; +extern AVCodec ff_adpcm_dtk_decoder; +extern AVCodec ff_adpcm_ea_decoder; +extern AVCodec ff_adpcm_ea_maxis_xa_decoder; +extern AVCodec ff_adpcm_ea_r1_decoder; +extern AVCodec ff_adpcm_ea_r2_decoder; +extern AVCodec ff_adpcm_ea_r3_decoder; +extern AVCodec ff_adpcm_ea_xas_decoder; +extern AVCodec ff_adpcm_g722_encoder; +extern AVCodec ff_adpcm_g722_decoder; +extern AVCodec ff_adpcm_g726_encoder; +extern AVCodec ff_adpcm_g726_decoder; +extern AVCodec ff_adpcm_g726le_encoder; +extern AVCodec ff_adpcm_g726le_decoder; +extern AVCodec ff_adpcm_ima_amv_decoder; +extern AVCodec ff_adpcm_ima_apc_decoder; +extern AVCodec ff_adpcm_ima_dat4_decoder; +extern AVCodec ff_adpcm_ima_dk3_decoder; +extern AVCodec ff_adpcm_ima_dk4_decoder; +extern AVCodec ff_adpcm_ima_ea_eacs_decoder; +extern AVCodec ff_adpcm_ima_ea_sead_decoder; +extern AVCodec ff_adpcm_ima_iss_decoder; +extern AVCodec ff_adpcm_ima_oki_decoder; +extern AVCodec ff_adpcm_ima_qt_encoder; +extern AVCodec ff_adpcm_ima_qt_decoder; +extern AVCodec ff_adpcm_ima_rad_decoder; +extern AVCodec ff_adpcm_ima_smjpeg_decoder; +extern AVCodec ff_adpcm_ima_wav_encoder; +extern AVCodec ff_adpcm_ima_wav_decoder; +extern AVCodec ff_adpcm_ima_ws_decoder; +extern AVCodec ff_adpcm_ms_encoder; +extern AVCodec ff_adpcm_ms_decoder; +extern AVCodec ff_adpcm_mtaf_decoder; +extern AVCodec ff_adpcm_psx_decoder; +extern AVCodec ff_adpcm_sbpro_2_decoder; +extern AVCodec ff_adpcm_sbpro_3_decoder; +extern AVCodec ff_adpcm_sbpro_4_decoder; +extern AVCodec ff_adpcm_swf_encoder; +extern AVCodec ff_adpcm_swf_decoder; +extern AVCodec ff_adpcm_thp_decoder; +extern AVCodec ff_adpcm_thp_le_decoder; +extern AVCodec ff_adpcm_vima_decoder; +extern AVCodec ff_adpcm_xa_decoder; +extern AVCodec ff_adpcm_yamaha_encoder; +extern AVCodec ff_adpcm_yamaha_decoder; + +/* subtitles */ +extern AVCodec ff_ssa_encoder; +extern AVCodec ff_ssa_decoder; +extern AVCodec ff_ass_encoder; +extern AVCodec ff_ass_decoder; +extern AVCodec ff_ccaption_decoder; +extern AVCodec ff_dvbsub_encoder; +extern AVCodec ff_dvbsub_decoder; +extern AVCodec ff_dvdsub_encoder; +extern AVCodec ff_dvdsub_decoder; +extern AVCodec ff_jacosub_decoder; +extern AVCodec ff_microdvd_decoder; +extern AVCodec ff_movtext_encoder; +extern AVCodec ff_movtext_decoder; +extern AVCodec ff_mpl2_decoder; +extern AVCodec ff_pgssub_decoder; +extern AVCodec ff_pjs_decoder; +extern AVCodec ff_realtext_decoder; +extern AVCodec ff_sami_decoder; +extern AVCodec ff_srt_encoder; +extern AVCodec ff_srt_decoder; +extern AVCodec ff_stl_decoder; +extern AVCodec ff_subrip_encoder; +extern AVCodec ff_subrip_decoder; +extern AVCodec ff_subviewer_decoder; +extern AVCodec ff_subviewer1_decoder; +extern AVCodec ff_text_encoder; +extern AVCodec ff_text_decoder; +extern AVCodec ff_vplayer_decoder; +extern AVCodec ff_webvtt_encoder; +extern AVCodec ff_webvtt_decoder; +extern AVCodec ff_xsub_encoder; +extern AVCodec ff_xsub_decoder; + +/* external libraries */ +extern AVCodec ff_aac_at_encoder; +extern AVCodec ff_aac_at_decoder; +extern AVCodec ff_ac3_at_decoder; +extern AVCodec ff_adpcm_ima_qt_at_decoder; +extern AVCodec ff_alac_at_encoder; +extern AVCodec ff_alac_at_decoder; +extern AVCodec ff_amr_nb_at_decoder; +extern AVCodec ff_eac3_at_decoder; +extern AVCodec ff_gsm_ms_at_decoder; +extern AVCodec ff_ilbc_at_encoder; +extern AVCodec ff_ilbc_at_decoder; +extern AVCodec ff_mp1_at_decoder; +extern AVCodec ff_mp2_at_decoder; +extern AVCodec ff_mp3_at_decoder; +extern AVCodec ff_pcm_alaw_at_encoder; +extern AVCodec ff_pcm_alaw_at_decoder; +extern AVCodec ff_pcm_mulaw_at_encoder; +extern AVCodec ff_pcm_mulaw_at_decoder; +extern AVCodec ff_qdmc_at_decoder; +extern AVCodec ff_qdm2_at_decoder; +extern AVCodec ff_libaom_av1_decoder; +extern AVCodec ff_libaom_av1_encoder; +extern AVCodec ff_libcelt_decoder; +extern AVCodec ff_libcodec2_encoder; +extern AVCodec ff_libcodec2_decoder; +extern AVCodec ff_libfdk_aac_encoder; +extern AVCodec ff_libfdk_aac_decoder; +extern AVCodec ff_libgsm_encoder; +extern AVCodec ff_libgsm_decoder; +extern AVCodec ff_libgsm_ms_encoder; +extern AVCodec ff_libgsm_ms_decoder; +extern AVCodec ff_libilbc_encoder; +extern AVCodec ff_libilbc_decoder; +extern AVCodec ff_libmp3lame_encoder; +extern AVCodec ff_libopencore_amrnb_encoder; +extern AVCodec ff_libopencore_amrnb_decoder; +extern AVCodec ff_libopencore_amrwb_decoder; +extern AVCodec ff_libopenjpeg_encoder; +extern AVCodec ff_libopenjpeg_decoder; +extern AVCodec ff_libopus_encoder; +extern AVCodec ff_libopus_decoder; +extern AVCodec ff_librsvg_decoder; +extern AVCodec ff_libshine_encoder; +extern AVCodec ff_libspeex_encoder; +extern AVCodec ff_libspeex_decoder; +extern AVCodec ff_libtheora_encoder; +extern AVCodec ff_libtwolame_encoder; +extern AVCodec ff_libvo_amrwbenc_encoder; +extern AVCodec ff_libvorbis_encoder; +extern AVCodec ff_libvorbis_decoder; +extern AVCodec ff_libvpx_vp8_encoder; +extern AVCodec ff_libvpx_vp8_decoder; +extern AVCodec ff_libvpx_vp9_encoder; +extern AVCodec ff_libvpx_vp9_decoder; +extern AVCodec ff_libwavpack_encoder; +/* preferred over libwebp */ +extern AVCodec ff_libwebp_anim_encoder; +extern AVCodec ff_libwebp_encoder; +extern AVCodec ff_libx262_encoder; +extern AVCodec ff_libx264_encoder; +extern AVCodec ff_libx264rgb_encoder; +extern AVCodec ff_libx265_encoder; +extern AVCodec ff_libxavs_encoder; +extern AVCodec ff_libxvid_encoder; +extern AVCodec ff_libzvbi_teletext_decoder; + +/* text */ +extern AVCodec ff_bintext_decoder; +extern AVCodec ff_xbin_decoder; +extern AVCodec ff_idf_decoder; + +/* external libraries, that shouldn't be used by default if one of the + * above is available */ +extern AVCodec ff_h263_v4l2m2m_encoder; +extern AVCodec ff_libopenh264_encoder; +extern AVCodec ff_libopenh264_decoder; +extern AVCodec ff_h264_amf_encoder; +extern AVCodec ff_h264_cuvid_decoder; +extern AVCodec ff_h264_nvenc_encoder; +extern AVCodec ff_h264_omx_encoder; +extern AVCodec ff_h264_qsv_encoder; +extern AVCodec ff_h264_v4l2m2m_encoder; +extern AVCodec ff_h264_vaapi_encoder; +extern AVCodec ff_h264_videotoolbox_encoder; +#if FF_API_NVENC_OLD_NAME +extern AVCodec ff_nvenc_encoder; +extern AVCodec ff_nvenc_h264_encoder; +extern AVCodec ff_nvenc_hevc_encoder; +#endif +extern AVCodec ff_hevc_amf_encoder; +extern AVCodec ff_hevc_cuvid_decoder; +extern AVCodec ff_hevc_mediacodec_decoder; +extern AVCodec ff_hevc_nvenc_encoder; +extern AVCodec ff_hevc_qsv_encoder; +extern AVCodec ff_hevc_v4l2m2m_encoder; +extern AVCodec ff_hevc_vaapi_encoder; +extern AVCodec ff_hevc_videotoolbox_encoder; +extern AVCodec ff_libkvazaar_encoder; +extern AVCodec ff_mjpeg_cuvid_decoder; +extern AVCodec ff_mjpeg_qsv_encoder; +extern AVCodec ff_mjpeg_vaapi_encoder; +extern AVCodec ff_mpeg1_cuvid_decoder; +extern AVCodec ff_mpeg2_cuvid_decoder; +extern AVCodec ff_mpeg2_qsv_encoder; +extern AVCodec ff_mpeg2_vaapi_encoder; +extern AVCodec ff_mpeg4_cuvid_decoder; +extern AVCodec ff_mpeg4_mediacodec_decoder; +extern AVCodec ff_mpeg4_v4l2m2m_encoder; +extern AVCodec ff_vc1_cuvid_decoder; +extern AVCodec ff_vp8_cuvid_decoder; +extern AVCodec ff_vp8_mediacodec_decoder; +extern AVCodec ff_vp8_qsv_decoder; +extern AVCodec ff_vp8_v4l2m2m_encoder; +extern AVCodec ff_vp8_vaapi_encoder; +extern AVCodec ff_vp9_cuvid_decoder; +extern AVCodec ff_vp9_mediacodec_decoder; +extern AVCodec ff_vp9_vaapi_encoder; + +#include "libavcodec/codec_list.c" + +static AVOnce av_codec_static_init = AV_ONCE_INIT; +static void av_codec_init_static(void) +{ + for (int i = 0; codec_list[i]; i++) { + if (codec_list[i]->init_static_data) + codec_list[i]->init_static_data((AVCodec*)codec_list[i]); + } +} + +const AVCodec *av_codec_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVCodec *c = codec_list[i]; + + ff_thread_once(&av_codec_static_init, av_codec_init_static); + + if (c) + *opaque = (void*)(i + 1); + + return c; +} + +#if FF_API_NEXT +FF_DISABLE_DEPRECATION_WARNINGS +static AVOnce av_codec_next_init = AV_ONCE_INIT; + +static void av_codec_init_next(void) +{ + AVCodec *prev = NULL, *p; + void *i = 0; + while ((p = (AVCodec*)av_codec_iterate(&i))) { + if (prev) + prev->next = p; + prev = p; + } +} + + + +av_cold void avcodec_register(AVCodec *codec) +{ + ff_thread_once(&av_codec_next_init, av_codec_init_next); +} + +AVCodec *av_codec_next(const AVCodec *c) +{ + ff_thread_once(&av_codec_next_init, av_codec_init_next); + + if (c) + return c->next; + else + return (AVCodec*)codec_list[0]; +} + +void avcodec_register_all(void) +{ + ff_thread_once(&av_codec_next_init, av_codec_init_next); +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif + +static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id) +{ + switch(id){ + //This is for future deprecatec codec ids, its empty since + //last major bump but will fill up again over time, please don't remove it + default : return id; + } +} + +static AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *)) +{ + const AVCodec *p, *experimental = NULL; + void *i = 0; + + id = remap_deprecated_codec_id(id); + + while ((p = av_codec_iterate(&i))) { + if (!x(p)) + continue; + if (p->id == id) { + if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) { + experimental = p; + } else + return (AVCodec*)p; + } + } + + return (AVCodec*)experimental; +} + +AVCodec *avcodec_find_encoder(enum AVCodecID id) +{ + return find_codec(id, av_codec_is_encoder); +} + +AVCodec *avcodec_find_decoder(enum AVCodecID id) +{ + return find_codec(id, av_codec_is_decoder); +} + +static AVCodec *find_codec_by_name(const char *name, int (*x)(const AVCodec *)) +{ + void *i = 0; + const AVCodec *p; + + if (!name) + return NULL; + + while ((p = av_codec_iterate(&i))) { + if (!x(p)) + continue; + if (strcmp(name, p->name) == 0) + return (AVCodec*)p; + } + + return NULL; +} + +AVCodec *avcodec_find_encoder_by_name(const char *name) +{ + return find_codec_by_name(name, av_codec_is_encoder); +} + +AVCodec *avcodec_find_decoder_by_name(const char *name) +{ + return find_codec_by_name(name, av_codec_is_decoder); +} diff --git a/libs/ffvpx/libavcodec/avcodec.h b/libs/ffvpx/libavcodec/avcodec.h new file mode 100644 index 000000000..fb0c6fae7 --- /dev/null +++ b/libs/ffvpx/libavcodec/avcodec.h @@ -0,0 +1,6146 @@ +/* + * copyright (c) 2001 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_AVCODEC_H +#define AVCODEC_AVCODEC_H + +/** + * @file + * @ingroup libavc + * Libavcodec external API header + */ + +#include <errno.h> +#include "libavutil/samplefmt.h" +#include "libavutil/attributes.h" +#include "libavutil/avutil.h" +#include "libavutil/buffer.h" +#include "libavutil/cpu.h" +#include "libavutil/channel_layout.h" +#include "libavutil/dict.h" +#include "libavutil/frame.h" +#include "libavutil/hwcontext.h" +#include "libavutil/log.h" +#include "libavutil/pixfmt.h" +#include "libavutil/rational.h" + +#include "version.h" + +/** + * @defgroup libavc libavcodec + * Encoding/Decoding Library + * + * @{ + * + * @defgroup lavc_decoding Decoding + * @{ + * @} + * + * @defgroup lavc_encoding Encoding + * @{ + * @} + * + * @defgroup lavc_codec Codecs + * @{ + * @defgroup lavc_codec_native Native Codecs + * @{ + * @} + * @defgroup lavc_codec_wrappers External library wrappers + * @{ + * @} + * @defgroup lavc_codec_hwaccel Hardware Accelerators bridge + * @{ + * @} + * @} + * @defgroup lavc_internal Internal + * @{ + * @} + * @} + */ + +/** + * @ingroup libavc + * @defgroup lavc_encdec send/receive encoding and decoding API overview + * @{ + * + * The avcodec_send_packet()/avcodec_receive_frame()/avcodec_send_frame()/ + * avcodec_receive_packet() functions provide an encode/decode API, which + * decouples input and output. + * + * The API is very similar for encoding/decoding and audio/video, and works as + * follows: + * - Set up and open the AVCodecContext as usual. + * - Send valid input: + * - For decoding, call avcodec_send_packet() to give the decoder raw + * compressed data in an AVPacket. + * - For encoding, call avcodec_send_frame() to give the encoder an AVFrame + * containing uncompressed audio or video. + * In both cases, it is recommended that AVPackets and AVFrames are + * refcounted, or libavcodec might have to copy the input data. (libavformat + * always returns refcounted AVPackets, and av_frame_get_buffer() allocates + * refcounted AVFrames.) + * - Receive output in a loop. Periodically call one of the avcodec_receive_*() + * functions and process their output: + * - For decoding, call avcodec_receive_frame(). On success, it will return + * an AVFrame containing uncompressed audio or video data. + * - For encoding, call avcodec_receive_packet(). On success, it will return + * an AVPacket with a compressed frame. + * Repeat this call until it returns AVERROR(EAGAIN) or an error. The + * AVERROR(EAGAIN) return value means that new input data is required to + * return new output. In this case, continue with sending input. For each + * input frame/packet, the codec will typically return 1 output frame/packet, + * but it can also be 0 or more than 1. + * + * At the beginning of decoding or encoding, the codec might accept multiple + * input frames/packets without returning a frame, until its internal buffers + * are filled. This situation is handled transparently if you follow the steps + * outlined above. + * + * In theory, sending input can result in EAGAIN - this should happen only if + * not all output was received. You can use this to structure alternative decode + * or encode loops other than the one suggested above. For example, you could + * try sending new input on each iteration, and try to receive output if that + * returns EAGAIN. + * + * End of stream situations. These require "flushing" (aka draining) the codec, + * as the codec might buffer multiple frames or packets internally for + * performance or out of necessity (consider B-frames). + * This is handled as follows: + * - Instead of valid input, send NULL to the avcodec_send_packet() (decoding) + * or avcodec_send_frame() (encoding) functions. This will enter draining + * mode. + * - Call avcodec_receive_frame() (decoding) or avcodec_receive_packet() + * (encoding) in a loop until AVERROR_EOF is returned. The functions will + * not return AVERROR(EAGAIN), unless you forgot to enter draining mode. + * - Before decoding can be resumed again, the codec has to be reset with + * avcodec_flush_buffers(). + * + * Using the API as outlined above is highly recommended. But it is also + * possible to call functions outside of this rigid schema. For example, you can + * call avcodec_send_packet() repeatedly without calling + * avcodec_receive_frame(). In this case, avcodec_send_packet() will succeed + * until the codec's internal buffer has been filled up (which is typically of + * size 1 per output frame, after initial input), and then reject input with + * AVERROR(EAGAIN). Once it starts rejecting input, you have no choice but to + * read at least some output. + * + * Not all codecs will follow a rigid and predictable dataflow; the only + * guarantee is that an AVERROR(EAGAIN) return value on a send/receive call on + * one end implies that a receive/send call on the other end will succeed, or + * at least will not fail with AVERROR(EAGAIN). In general, no codec will + * permit unlimited buffering of input or output. + * + * This API replaces the following legacy functions: + * - avcodec_decode_video2() and avcodec_decode_audio4(): + * Use avcodec_send_packet() to feed input to the decoder, then use + * avcodec_receive_frame() to receive decoded frames after each packet. + * Unlike with the old video decoding API, multiple frames might result from + * a packet. For audio, splitting the input packet into frames by partially + * decoding packets becomes transparent to the API user. You never need to + * feed an AVPacket to the API twice (unless it is rejected with AVERROR(EAGAIN) - then + * no data was read from the packet). + * Additionally, sending a flush/draining packet is required only once. + * - avcodec_encode_video2()/avcodec_encode_audio2(): + * Use avcodec_send_frame() to feed input to the encoder, then use + * avcodec_receive_packet() to receive encoded packets. + * Providing user-allocated buffers for avcodec_receive_packet() is not + * possible. + * - The new API does not handle subtitles yet. + * + * Mixing new and old function calls on the same AVCodecContext is not allowed, + * and will result in undefined behavior. + * + * Some codecs might require using the new API; using the old API will return + * an error when calling it. All codecs support the new API. + * + * A codec is not allowed to return AVERROR(EAGAIN) for both sending and receiving. This + * would be an invalid state, which could put the codec user into an endless + * loop. The API has no concept of time either: it cannot happen that trying to + * do avcodec_send_packet() results in AVERROR(EAGAIN), but a repeated call 1 second + * later accepts the packet (with no other receive/flush API calls involved). + * The API is a strict state machine, and the passage of time is not supposed + * to influence it. Some timing-dependent behavior might still be deemed + * acceptable in certain cases. But it must never result in both send/receive + * returning EAGAIN at the same time at any point. It must also absolutely be + * avoided that the current state is "unstable" and can "flip-flop" between + * the send/receive APIs allowing progress. For example, it's not allowed that + * the codec randomly decides that it actually wants to consume a packet now + * instead of returning a frame, after it just returned AVERROR(EAGAIN) on an + * avcodec_send_packet() call. + * @} + */ + +/** + * @defgroup lavc_core Core functions/structures. + * @ingroup libavc + * + * Basic definitions, functions for querying libavcodec capabilities, + * allocating core structures, etc. + * @{ + */ + + +/** + * Identify the syntax and semantics of the bitstream. + * The principle is roughly: + * Two decoders with the same ID can decode the same streams. + * Two encoders with the same ID can encode compatible streams. + * There may be slight deviations from the principle due to implementation + * details. + * + * If you add a codec ID to this list, add it so that + * 1. no value of an existing codec ID changes (that would break ABI), + * 2. it is as close as possible to similar codecs + * + * After adding new codec IDs, do not forget to add an entry to the codec + * descriptor list and bump libavcodec minor version. + */ +enum AVCodecID { + AV_CODEC_ID_NONE, + + /* video codecs */ + AV_CODEC_ID_MPEG1VIDEO, + AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding + AV_CODEC_ID_H261, + AV_CODEC_ID_H263, + AV_CODEC_ID_RV10, + AV_CODEC_ID_RV20, + AV_CODEC_ID_MJPEG, + AV_CODEC_ID_MJPEGB, + AV_CODEC_ID_LJPEG, + AV_CODEC_ID_SP5X, + AV_CODEC_ID_JPEGLS, + AV_CODEC_ID_MPEG4, + AV_CODEC_ID_RAWVIDEO, + AV_CODEC_ID_MSMPEG4V1, + AV_CODEC_ID_MSMPEG4V2, + AV_CODEC_ID_MSMPEG4V3, + AV_CODEC_ID_WMV1, + AV_CODEC_ID_WMV2, + AV_CODEC_ID_H263P, + AV_CODEC_ID_H263I, + AV_CODEC_ID_FLV1, + AV_CODEC_ID_SVQ1, + AV_CODEC_ID_SVQ3, + AV_CODEC_ID_DVVIDEO, + AV_CODEC_ID_HUFFYUV, + AV_CODEC_ID_CYUV, + AV_CODEC_ID_H264, + AV_CODEC_ID_INDEO3, + AV_CODEC_ID_VP3, + AV_CODEC_ID_THEORA, + AV_CODEC_ID_ASV1, + AV_CODEC_ID_ASV2, + AV_CODEC_ID_FFV1, + AV_CODEC_ID_4XM, + AV_CODEC_ID_VCR1, + AV_CODEC_ID_CLJR, + AV_CODEC_ID_MDEC, + AV_CODEC_ID_ROQ, + AV_CODEC_ID_INTERPLAY_VIDEO, + AV_CODEC_ID_XAN_WC3, + AV_CODEC_ID_XAN_WC4, + AV_CODEC_ID_RPZA, + AV_CODEC_ID_CINEPAK, + AV_CODEC_ID_WS_VQA, + AV_CODEC_ID_MSRLE, + AV_CODEC_ID_MSVIDEO1, + AV_CODEC_ID_IDCIN, + AV_CODEC_ID_8BPS, + AV_CODEC_ID_SMC, + AV_CODEC_ID_FLIC, + AV_CODEC_ID_TRUEMOTION1, + AV_CODEC_ID_VMDVIDEO, + AV_CODEC_ID_MSZH, + AV_CODEC_ID_ZLIB, + AV_CODEC_ID_QTRLE, + AV_CODEC_ID_TSCC, + AV_CODEC_ID_ULTI, + AV_CODEC_ID_QDRAW, + AV_CODEC_ID_VIXL, + AV_CODEC_ID_QPEG, + AV_CODEC_ID_PNG, + AV_CODEC_ID_PPM, + AV_CODEC_ID_PBM, + AV_CODEC_ID_PGM, + AV_CODEC_ID_PGMYUV, + AV_CODEC_ID_PAM, + AV_CODEC_ID_FFVHUFF, + AV_CODEC_ID_RV30, + AV_CODEC_ID_RV40, + AV_CODEC_ID_VC1, + AV_CODEC_ID_WMV3, + AV_CODEC_ID_LOCO, + AV_CODEC_ID_WNV1, + AV_CODEC_ID_AASC, + AV_CODEC_ID_INDEO2, + AV_CODEC_ID_FRAPS, + AV_CODEC_ID_TRUEMOTION2, + AV_CODEC_ID_BMP, + AV_CODEC_ID_CSCD, + AV_CODEC_ID_MMVIDEO, + AV_CODEC_ID_ZMBV, + AV_CODEC_ID_AVS, + AV_CODEC_ID_SMACKVIDEO, + AV_CODEC_ID_NUV, + AV_CODEC_ID_KMVC, + AV_CODEC_ID_FLASHSV, + AV_CODEC_ID_CAVS, + AV_CODEC_ID_JPEG2000, + AV_CODEC_ID_VMNC, + AV_CODEC_ID_VP5, + AV_CODEC_ID_VP6, + AV_CODEC_ID_VP6F, + AV_CODEC_ID_TARGA, + AV_CODEC_ID_DSICINVIDEO, + AV_CODEC_ID_TIERTEXSEQVIDEO, + AV_CODEC_ID_TIFF, + AV_CODEC_ID_GIF, + AV_CODEC_ID_DXA, + AV_CODEC_ID_DNXHD, + AV_CODEC_ID_THP, + AV_CODEC_ID_SGI, + AV_CODEC_ID_C93, + AV_CODEC_ID_BETHSOFTVID, + AV_CODEC_ID_PTX, + AV_CODEC_ID_TXD, + AV_CODEC_ID_VP6A, + AV_CODEC_ID_AMV, + AV_CODEC_ID_VB, + AV_CODEC_ID_PCX, + AV_CODEC_ID_SUNRAST, + AV_CODEC_ID_INDEO4, + AV_CODEC_ID_INDEO5, + AV_CODEC_ID_MIMIC, + AV_CODEC_ID_RL2, + AV_CODEC_ID_ESCAPE124, + AV_CODEC_ID_DIRAC, + AV_CODEC_ID_BFI, + AV_CODEC_ID_CMV, + AV_CODEC_ID_MOTIONPIXELS, + AV_CODEC_ID_TGV, + AV_CODEC_ID_TGQ, + AV_CODEC_ID_TQI, + AV_CODEC_ID_AURA, + AV_CODEC_ID_AURA2, + AV_CODEC_ID_V210X, + AV_CODEC_ID_TMV, + AV_CODEC_ID_V210, + AV_CODEC_ID_DPX, + AV_CODEC_ID_MAD, + AV_CODEC_ID_FRWU, + AV_CODEC_ID_FLASHSV2, + AV_CODEC_ID_CDGRAPHICS, + AV_CODEC_ID_R210, + AV_CODEC_ID_ANM, + AV_CODEC_ID_BINKVIDEO, + AV_CODEC_ID_IFF_ILBM, +#define AV_CODEC_ID_IFF_BYTERUN1 AV_CODEC_ID_IFF_ILBM + AV_CODEC_ID_KGV1, + AV_CODEC_ID_YOP, + AV_CODEC_ID_VP8, + AV_CODEC_ID_PICTOR, + AV_CODEC_ID_ANSI, + AV_CODEC_ID_A64_MULTI, + AV_CODEC_ID_A64_MULTI5, + AV_CODEC_ID_R10K, + AV_CODEC_ID_MXPEG, + AV_CODEC_ID_LAGARITH, + AV_CODEC_ID_PRORES, + AV_CODEC_ID_JV, + AV_CODEC_ID_DFA, + AV_CODEC_ID_WMV3IMAGE, + AV_CODEC_ID_VC1IMAGE, + AV_CODEC_ID_UTVIDEO, + AV_CODEC_ID_BMV_VIDEO, + AV_CODEC_ID_VBLE, + AV_CODEC_ID_DXTORY, + AV_CODEC_ID_V410, + AV_CODEC_ID_XWD, + AV_CODEC_ID_CDXL, + AV_CODEC_ID_XBM, + AV_CODEC_ID_ZEROCODEC, + AV_CODEC_ID_MSS1, + AV_CODEC_ID_MSA1, + AV_CODEC_ID_TSCC2, + AV_CODEC_ID_MTS2, + AV_CODEC_ID_CLLC, + AV_CODEC_ID_MSS2, + AV_CODEC_ID_VP9, + AV_CODEC_ID_AIC, + AV_CODEC_ID_ESCAPE130, + AV_CODEC_ID_G2M, + AV_CODEC_ID_WEBP, + AV_CODEC_ID_HNM4_VIDEO, + AV_CODEC_ID_HEVC, +#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC + AV_CODEC_ID_FIC, + AV_CODEC_ID_ALIAS_PIX, + AV_CODEC_ID_BRENDER_PIX, + AV_CODEC_ID_PAF_VIDEO, + AV_CODEC_ID_EXR, + AV_CODEC_ID_VP7, + AV_CODEC_ID_SANM, + AV_CODEC_ID_SGIRLE, + AV_CODEC_ID_MVC1, + AV_CODEC_ID_MVC2, + AV_CODEC_ID_HQX, + AV_CODEC_ID_TDSC, + AV_CODEC_ID_HQ_HQA, + AV_CODEC_ID_HAP, + AV_CODEC_ID_DDS, + AV_CODEC_ID_DXV, + AV_CODEC_ID_SCREENPRESSO, + AV_CODEC_ID_RSCC, + + AV_CODEC_ID_Y41P = 0x8000, + AV_CODEC_ID_AVRP, + AV_CODEC_ID_012V, + AV_CODEC_ID_AVUI, + AV_CODEC_ID_AYUV, + AV_CODEC_ID_TARGA_Y216, + AV_CODEC_ID_V308, + AV_CODEC_ID_V408, + AV_CODEC_ID_YUV4, + AV_CODEC_ID_AVRN, + AV_CODEC_ID_CPIA, + AV_CODEC_ID_XFACE, + AV_CODEC_ID_SNOW, + AV_CODEC_ID_SMVJPEG, + AV_CODEC_ID_APNG, + AV_CODEC_ID_DAALA, + AV_CODEC_ID_CFHD, + AV_CODEC_ID_TRUEMOTION2RT, + AV_CODEC_ID_M101, + AV_CODEC_ID_MAGICYUV, + AV_CODEC_ID_SHEERVIDEO, + AV_CODEC_ID_YLC, + AV_CODEC_ID_PSD, + AV_CODEC_ID_PIXLET, + AV_CODEC_ID_SPEEDHQ, + AV_CODEC_ID_FMVC, + AV_CODEC_ID_SCPR, + AV_CODEC_ID_CLEARVIDEO, + AV_CODEC_ID_XPM, + AV_CODEC_ID_AV1, + AV_CODEC_ID_BITPACKED, + AV_CODEC_ID_MSCC, + AV_CODEC_ID_SRGC, + AV_CODEC_ID_SVG, + AV_CODEC_ID_GDV, + AV_CODEC_ID_FITS, + + /* various PCM "codecs" */ + AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs + AV_CODEC_ID_PCM_S16LE = 0x10000, + AV_CODEC_ID_PCM_S16BE, + AV_CODEC_ID_PCM_U16LE, + AV_CODEC_ID_PCM_U16BE, + AV_CODEC_ID_PCM_S8, + AV_CODEC_ID_PCM_U8, + AV_CODEC_ID_PCM_MULAW, + AV_CODEC_ID_PCM_ALAW, + AV_CODEC_ID_PCM_S32LE, + AV_CODEC_ID_PCM_S32BE, + AV_CODEC_ID_PCM_U32LE, + AV_CODEC_ID_PCM_U32BE, + AV_CODEC_ID_PCM_S24LE, + AV_CODEC_ID_PCM_S24BE, + AV_CODEC_ID_PCM_U24LE, + AV_CODEC_ID_PCM_U24BE, + AV_CODEC_ID_PCM_S24DAUD, + AV_CODEC_ID_PCM_ZORK, + AV_CODEC_ID_PCM_S16LE_PLANAR, + AV_CODEC_ID_PCM_DVD, + AV_CODEC_ID_PCM_F32BE, + AV_CODEC_ID_PCM_F32LE, + AV_CODEC_ID_PCM_F64BE, + AV_CODEC_ID_PCM_F64LE, + AV_CODEC_ID_PCM_BLURAY, + AV_CODEC_ID_PCM_LXF, + AV_CODEC_ID_S302M, + AV_CODEC_ID_PCM_S8_PLANAR, + AV_CODEC_ID_PCM_S24LE_PLANAR, + AV_CODEC_ID_PCM_S32LE_PLANAR, + AV_CODEC_ID_PCM_S16BE_PLANAR, + + AV_CODEC_ID_PCM_S64LE = 0x10800, + AV_CODEC_ID_PCM_S64BE, + AV_CODEC_ID_PCM_F16LE, + AV_CODEC_ID_PCM_F24LE, + + /* various ADPCM codecs */ + AV_CODEC_ID_ADPCM_IMA_QT = 0x11000, + AV_CODEC_ID_ADPCM_IMA_WAV, + AV_CODEC_ID_ADPCM_IMA_DK3, + AV_CODEC_ID_ADPCM_IMA_DK4, + AV_CODEC_ID_ADPCM_IMA_WS, + AV_CODEC_ID_ADPCM_IMA_SMJPEG, + AV_CODEC_ID_ADPCM_MS, + AV_CODEC_ID_ADPCM_4XM, + AV_CODEC_ID_ADPCM_XA, + AV_CODEC_ID_ADPCM_ADX, + AV_CODEC_ID_ADPCM_EA, + AV_CODEC_ID_ADPCM_G726, + AV_CODEC_ID_ADPCM_CT, + AV_CODEC_ID_ADPCM_SWF, + AV_CODEC_ID_ADPCM_YAMAHA, + AV_CODEC_ID_ADPCM_SBPRO_4, + AV_CODEC_ID_ADPCM_SBPRO_3, + AV_CODEC_ID_ADPCM_SBPRO_2, + AV_CODEC_ID_ADPCM_THP, + AV_CODEC_ID_ADPCM_IMA_AMV, + AV_CODEC_ID_ADPCM_EA_R1, + AV_CODEC_ID_ADPCM_EA_R3, + AV_CODEC_ID_ADPCM_EA_R2, + AV_CODEC_ID_ADPCM_IMA_EA_SEAD, + AV_CODEC_ID_ADPCM_IMA_EA_EACS, + AV_CODEC_ID_ADPCM_EA_XAS, + AV_CODEC_ID_ADPCM_EA_MAXIS_XA, + AV_CODEC_ID_ADPCM_IMA_ISS, + AV_CODEC_ID_ADPCM_G722, + AV_CODEC_ID_ADPCM_IMA_APC, + AV_CODEC_ID_ADPCM_VIMA, + + AV_CODEC_ID_ADPCM_AFC = 0x11800, + AV_CODEC_ID_ADPCM_IMA_OKI, + AV_CODEC_ID_ADPCM_DTK, + AV_CODEC_ID_ADPCM_IMA_RAD, + AV_CODEC_ID_ADPCM_G726LE, + AV_CODEC_ID_ADPCM_THP_LE, + AV_CODEC_ID_ADPCM_PSX, + AV_CODEC_ID_ADPCM_AICA, + AV_CODEC_ID_ADPCM_IMA_DAT4, + AV_CODEC_ID_ADPCM_MTAF, + + /* AMR */ + AV_CODEC_ID_AMR_NB = 0x12000, + AV_CODEC_ID_AMR_WB, + + /* RealAudio codecs*/ + AV_CODEC_ID_RA_144 = 0x13000, + AV_CODEC_ID_RA_288, + + /* various DPCM codecs */ + AV_CODEC_ID_ROQ_DPCM = 0x14000, + AV_CODEC_ID_INTERPLAY_DPCM, + AV_CODEC_ID_XAN_DPCM, + AV_CODEC_ID_SOL_DPCM, + + AV_CODEC_ID_SDX2_DPCM = 0x14800, + AV_CODEC_ID_GREMLIN_DPCM, + + /* audio codecs */ + AV_CODEC_ID_MP2 = 0x15000, + AV_CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3 + AV_CODEC_ID_AAC, + AV_CODEC_ID_AC3, + AV_CODEC_ID_DTS, + AV_CODEC_ID_VORBIS, + AV_CODEC_ID_DVAUDIO, + AV_CODEC_ID_WMAV1, + AV_CODEC_ID_WMAV2, + AV_CODEC_ID_MACE3, + AV_CODEC_ID_MACE6, + AV_CODEC_ID_VMDAUDIO, + AV_CODEC_ID_FLAC, + AV_CODEC_ID_MP3ADU, + AV_CODEC_ID_MP3ON4, + AV_CODEC_ID_SHORTEN, + AV_CODEC_ID_ALAC, + AV_CODEC_ID_WESTWOOD_SND1, + AV_CODEC_ID_GSM, ///< as in Berlin toast format + AV_CODEC_ID_QDM2, + AV_CODEC_ID_COOK, + AV_CODEC_ID_TRUESPEECH, + AV_CODEC_ID_TTA, + AV_CODEC_ID_SMACKAUDIO, + AV_CODEC_ID_QCELP, + AV_CODEC_ID_WAVPACK, + AV_CODEC_ID_DSICINAUDIO, + AV_CODEC_ID_IMC, + AV_CODEC_ID_MUSEPACK7, + AV_CODEC_ID_MLP, + AV_CODEC_ID_GSM_MS, /* as found in WAV */ + AV_CODEC_ID_ATRAC3, + AV_CODEC_ID_APE, + AV_CODEC_ID_NELLYMOSER, + AV_CODEC_ID_MUSEPACK8, + AV_CODEC_ID_SPEEX, + AV_CODEC_ID_WMAVOICE, + AV_CODEC_ID_WMAPRO, + AV_CODEC_ID_WMALOSSLESS, + AV_CODEC_ID_ATRAC3P, + AV_CODEC_ID_EAC3, + AV_CODEC_ID_SIPR, + AV_CODEC_ID_MP1, + AV_CODEC_ID_TWINVQ, + AV_CODEC_ID_TRUEHD, + AV_CODEC_ID_MP4ALS, + AV_CODEC_ID_ATRAC1, + AV_CODEC_ID_BINKAUDIO_RDFT, + AV_CODEC_ID_BINKAUDIO_DCT, + AV_CODEC_ID_AAC_LATM, + AV_CODEC_ID_QDMC, + AV_CODEC_ID_CELT, + AV_CODEC_ID_G723_1, + AV_CODEC_ID_G729, + AV_CODEC_ID_8SVX_EXP, + AV_CODEC_ID_8SVX_FIB, + AV_CODEC_ID_BMV_AUDIO, + AV_CODEC_ID_RALF, + AV_CODEC_ID_IAC, + AV_CODEC_ID_ILBC, + AV_CODEC_ID_OPUS, + AV_CODEC_ID_COMFORT_NOISE, + AV_CODEC_ID_TAK, + AV_CODEC_ID_METASOUND, + AV_CODEC_ID_PAF_AUDIO, + AV_CODEC_ID_ON2AVC, + AV_CODEC_ID_DSS_SP, + AV_CODEC_ID_CODEC2, + + AV_CODEC_ID_FFWAVESYNTH = 0x15800, + AV_CODEC_ID_SONIC, + AV_CODEC_ID_SONIC_LS, + AV_CODEC_ID_EVRC, + AV_CODEC_ID_SMV, + AV_CODEC_ID_DSD_LSBF, + AV_CODEC_ID_DSD_MSBF, + AV_CODEC_ID_DSD_LSBF_PLANAR, + AV_CODEC_ID_DSD_MSBF_PLANAR, + AV_CODEC_ID_4GV, + AV_CODEC_ID_INTERPLAY_ACM, + AV_CODEC_ID_XMA1, + AV_CODEC_ID_XMA2, + AV_CODEC_ID_DST, + AV_CODEC_ID_ATRAC3AL, + AV_CODEC_ID_ATRAC3PAL, + AV_CODEC_ID_DOLBY_E, + AV_CODEC_ID_APTX, + AV_CODEC_ID_APTX_HD, + AV_CODEC_ID_SBC, + + /* subtitle codecs */ + AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID pointing at the start of subtitle codecs. + AV_CODEC_ID_DVD_SUBTITLE = 0x17000, + AV_CODEC_ID_DVB_SUBTITLE, + AV_CODEC_ID_TEXT, ///< raw UTF-8 text + AV_CODEC_ID_XSUB, + AV_CODEC_ID_SSA, + AV_CODEC_ID_MOV_TEXT, + AV_CODEC_ID_HDMV_PGS_SUBTITLE, + AV_CODEC_ID_DVB_TELETEXT, + AV_CODEC_ID_SRT, + + AV_CODEC_ID_MICRODVD = 0x17800, + AV_CODEC_ID_EIA_608, + AV_CODEC_ID_JACOSUB, + AV_CODEC_ID_SAMI, + AV_CODEC_ID_REALTEXT, + AV_CODEC_ID_STL, + AV_CODEC_ID_SUBVIEWER1, + AV_CODEC_ID_SUBVIEWER, + AV_CODEC_ID_SUBRIP, + AV_CODEC_ID_WEBVTT, + AV_CODEC_ID_MPL2, + AV_CODEC_ID_VPLAYER, + AV_CODEC_ID_PJS, + AV_CODEC_ID_ASS, + AV_CODEC_ID_HDMV_TEXT_SUBTITLE, + + /* other specific kind of codecs (generally used for attachments) */ + AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs. + AV_CODEC_ID_TTF = 0x18000, + + AV_CODEC_ID_SCTE_35, ///< Contain timestamp estimated through PCR of program stream. + AV_CODEC_ID_BINTEXT = 0x18800, + AV_CODEC_ID_XBIN, + AV_CODEC_ID_IDF, + AV_CODEC_ID_OTF, + AV_CODEC_ID_SMPTE_KLV, + AV_CODEC_ID_DVD_NAV, + AV_CODEC_ID_TIMED_ID3, + AV_CODEC_ID_BIN_DATA, + + + AV_CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like AV_CODEC_ID_NONE) but lavf should attempt to identify it + + AV_CODEC_ID_MPEG2TS = 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS + * stream (only used by libavformat) */ + AV_CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems + * stream (only used by libavformat) */ + AV_CODEC_ID_FFMETADATA = 0x21000, ///< Dummy codec for streams containing only metadata information. + AV_CODEC_ID_WRAPPED_AVFRAME = 0x21001, ///< Passthrough codec, AVFrames wrapped in AVPacket +}; + +/** + * This struct describes the properties of a single codec described by an + * AVCodecID. + * @see avcodec_descriptor_get() + */ +typedef struct AVCodecDescriptor { + enum AVCodecID id; + enum AVMediaType type; + /** + * Name of the codec described by this descriptor. It is non-empty and + * unique for each codec descriptor. It should contain alphanumeric + * characters and '_' only. + */ + const char *name; + /** + * A more descriptive name for this codec. May be NULL. + */ + const char *long_name; + /** + * Codec properties, a combination of AV_CODEC_PROP_* flags. + */ + int props; + /** + * MIME type(s) associated with the codec. + * May be NULL; if not, a NULL-terminated array of MIME types. + * The first item is always non-NULL and is the preferred MIME type. + */ + const char *const *mime_types; + /** + * If non-NULL, an array of profiles recognized for this codec. + * Terminated with FF_PROFILE_UNKNOWN. + */ + const struct AVProfile *profiles; +} AVCodecDescriptor; + +/** + * Codec uses only intra compression. + * Video and audio codecs only. + */ +#define AV_CODEC_PROP_INTRA_ONLY (1 << 0) +/** + * Codec supports lossy compression. Audio and video codecs only. + * @note a codec may support both lossy and lossless + * compression modes + */ +#define AV_CODEC_PROP_LOSSY (1 << 1) +/** + * Codec supports lossless compression. Audio and video codecs only. + */ +#define AV_CODEC_PROP_LOSSLESS (1 << 2) +/** + * Codec supports frame reordering. That is, the coded order (the order in which + * the encoded packets are output by the encoders / stored / input to the + * decoders) may be different from the presentation order of the corresponding + * frames. + * + * For codecs that do not have this property set, PTS and DTS should always be + * equal. + */ +#define AV_CODEC_PROP_REORDER (1 << 3) +/** + * Subtitle codec is bitmap based + * Decoded AVSubtitle data can be read from the AVSubtitleRect->pict field. + */ +#define AV_CODEC_PROP_BITMAP_SUB (1 << 16) +/** + * Subtitle codec is text based. + * Decoded AVSubtitle data can be read from the AVSubtitleRect->ass field. + */ +#define AV_CODEC_PROP_TEXT_SUB (1 << 17) + +/** + * @ingroup lavc_decoding + * Required number of additionally allocated bytes at the end of the input bitstream for decoding. + * This is mainly needed because some optimized bitstream readers read + * 32 or 64 bit at once and could read over the end.<br> + * Note: If the first 23 bits of the additional bytes are not 0, then damaged + * MPEG bitstreams could cause overread and segfault. + */ +#define AV_INPUT_BUFFER_PADDING_SIZE 64 + +/** + * @ingroup lavc_encoding + * minimum encoding buffer size + * Used to avoid some checks during header writing. + */ +#define AV_INPUT_BUFFER_MIN_SIZE 16384 + +/** + * @ingroup lavc_decoding + */ +enum AVDiscard{ + /* We leave some space between them for extensions (drop some + * keyframes for intra-only or drop just some bidir frames). */ + AVDISCARD_NONE =-16, ///< discard nothing + AVDISCARD_DEFAULT = 0, ///< discard useless packets like 0 size packets in avi + AVDISCARD_NONREF = 8, ///< discard all non reference + AVDISCARD_BIDIR = 16, ///< discard all bidirectional frames + AVDISCARD_NONINTRA= 24, ///< discard all non intra frames + AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes + AVDISCARD_ALL = 48, ///< discard all +}; + +enum AVAudioServiceType { + AV_AUDIO_SERVICE_TYPE_MAIN = 0, + AV_AUDIO_SERVICE_TYPE_EFFECTS = 1, + AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED = 2, + AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED = 3, + AV_AUDIO_SERVICE_TYPE_DIALOGUE = 4, + AV_AUDIO_SERVICE_TYPE_COMMENTARY = 5, + AV_AUDIO_SERVICE_TYPE_EMERGENCY = 6, + AV_AUDIO_SERVICE_TYPE_VOICE_OVER = 7, + AV_AUDIO_SERVICE_TYPE_KARAOKE = 8, + AV_AUDIO_SERVICE_TYPE_NB , ///< Not part of ABI +}; + +/** + * @ingroup lavc_encoding + */ +typedef struct RcOverride{ + int start_frame; + int end_frame; + int qscale; // If this is 0 then quality_factor will be used instead. + float quality_factor; +} RcOverride; + +/* encoding support + These flags can be passed in AVCodecContext.flags before initialization. + Note: Not everything is supported yet. +*/ + +/** + * Allow decoders to produce frames with data planes that are not aligned + * to CPU requirements (e.g. due to cropping). + */ +#define AV_CODEC_FLAG_UNALIGNED (1 << 0) +/** + * Use fixed qscale. + */ +#define AV_CODEC_FLAG_QSCALE (1 << 1) +/** + * 4 MV per MB allowed / advanced prediction for H.263. + */ +#define AV_CODEC_FLAG_4MV (1 << 2) +/** + * Output even those frames that might be corrupted. + */ +#define AV_CODEC_FLAG_OUTPUT_CORRUPT (1 << 3) +/** + * Use qpel MC. + */ +#define AV_CODEC_FLAG_QPEL (1 << 4) +/** + * Use internal 2pass ratecontrol in first pass mode. + */ +#define AV_CODEC_FLAG_PASS1 (1 << 9) +/** + * Use internal 2pass ratecontrol in second pass mode. + */ +#define AV_CODEC_FLAG_PASS2 (1 << 10) +/** + * loop filter. + */ +#define AV_CODEC_FLAG_LOOP_FILTER (1 << 11) +/** + * Only decode/encode grayscale. + */ +#define AV_CODEC_FLAG_GRAY (1 << 13) +/** + * error[?] variables will be set during encoding. + */ +#define AV_CODEC_FLAG_PSNR (1 << 15) +/** + * Input bitstream might be truncated at a random location + * instead of only at frame boundaries. + */ +#define AV_CODEC_FLAG_TRUNCATED (1 << 16) +/** + * Use interlaced DCT. + */ +#define AV_CODEC_FLAG_INTERLACED_DCT (1 << 18) +/** + * Force low delay. + */ +#define AV_CODEC_FLAG_LOW_DELAY (1 << 19) +/** + * Place global headers in extradata instead of every keyframe. + */ +#define AV_CODEC_FLAG_GLOBAL_HEADER (1 << 22) +/** + * Use only bitexact stuff (except (I)DCT). + */ +#define AV_CODEC_FLAG_BITEXACT (1 << 23) +/* Fx : Flag for H.263+ extra options */ +/** + * H.263 advanced intra coding / MPEG-4 AC prediction + */ +#define AV_CODEC_FLAG_AC_PRED (1 << 24) +/** + * interlaced motion estimation + */ +#define AV_CODEC_FLAG_INTERLACED_ME (1 << 29) +#define AV_CODEC_FLAG_CLOSED_GOP (1U << 31) + +/** + * Allow non spec compliant speedup tricks. + */ +#define AV_CODEC_FLAG2_FAST (1 << 0) +/** + * Skip bitstream encoding. + */ +#define AV_CODEC_FLAG2_NO_OUTPUT (1 << 2) +/** + * Place global headers at every keyframe instead of in extradata. + */ +#define AV_CODEC_FLAG2_LOCAL_HEADER (1 << 3) + +/** + * timecode is in drop frame format. DEPRECATED!!!! + */ +#define AV_CODEC_FLAG2_DROP_FRAME_TIMECODE (1 << 13) + +/** + * Input bitstream might be truncated at a packet boundaries + * instead of only at frame boundaries. + */ +#define AV_CODEC_FLAG2_CHUNKS (1 << 15) +/** + * Discard cropping information from SPS. + */ +#define AV_CODEC_FLAG2_IGNORE_CROP (1 << 16) + +/** + * Show all frames before the first keyframe + */ +#define AV_CODEC_FLAG2_SHOW_ALL (1 << 22) +/** + * Export motion vectors through frame side data + */ +#define AV_CODEC_FLAG2_EXPORT_MVS (1 << 28) +/** + * Do not skip samples and export skip information as frame side data + */ +#define AV_CODEC_FLAG2_SKIP_MANUAL (1 << 29) +/** + * Do not reset ASS ReadOrder field on flush (subtitles decoding) + */ +#define AV_CODEC_FLAG2_RO_FLUSH_NOOP (1 << 30) + +/* Unsupported options : + * Syntax Arithmetic coding (SAC) + * Reference Picture Selection + * Independent Segment Decoding */ +/* /Fx */ +/* codec capabilities */ + +/** + * Decoder can use draw_horiz_band callback. + */ +#define AV_CODEC_CAP_DRAW_HORIZ_BAND (1 << 0) +/** + * Codec uses get_buffer() for allocating buffers and supports custom allocators. + * If not set, it might not use get_buffer() at all or use operations that + * assume the buffer was allocated by avcodec_default_get_buffer. + */ +#define AV_CODEC_CAP_DR1 (1 << 1) +#define AV_CODEC_CAP_TRUNCATED (1 << 3) +/** + * Encoder or decoder requires flushing with NULL input at the end in order to + * give the complete and correct output. + * + * NOTE: If this flag is not set, the codec is guaranteed to never be fed with + * with NULL data. The user can still send NULL data to the public encode + * or decode function, but libavcodec will not pass it along to the codec + * unless this flag is set. + * + * Decoders: + * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL, + * avpkt->size=0 at the end to get the delayed data until the decoder no longer + * returns frames. + * + * Encoders: + * The encoder needs to be fed with NULL data at the end of encoding until the + * encoder no longer returns data. + * + * NOTE: For encoders implementing the AVCodec.encode2() function, setting this + * flag also means that the encoder must set the pts and duration for + * each output packet. If this flag is not set, the pts and duration will + * be determined by libavcodec from the input frame. + */ +#define AV_CODEC_CAP_DELAY (1 << 5) +/** + * Codec can be fed a final frame with a smaller size. + * This can be used to prevent truncation of the last audio samples. + */ +#define AV_CODEC_CAP_SMALL_LAST_FRAME (1 << 6) + +/** + * Codec can output multiple frames per AVPacket + * Normally demuxers return one frame at a time, demuxers which do not do + * are connected to a parser to split what they return into proper frames. + * This flag is reserved to the very rare category of codecs which have a + * bitstream that cannot be split into frames without timeconsuming + * operations like full decoding. Demuxers carrying such bitstreams thus + * may return multiple frames in a packet. This has many disadvantages like + * prohibiting stream copy in many cases thus it should only be considered + * as a last resort. + */ +#define AV_CODEC_CAP_SUBFRAMES (1 << 8) +/** + * Codec is experimental and is thus avoided in favor of non experimental + * encoders + */ +#define AV_CODEC_CAP_EXPERIMENTAL (1 << 9) +/** + * Codec should fill in channel configuration and samplerate instead of container + */ +#define AV_CODEC_CAP_CHANNEL_CONF (1 << 10) +/** + * Codec supports frame-level multithreading. + */ +#define AV_CODEC_CAP_FRAME_THREADS (1 << 12) +/** + * Codec supports slice-based (or partition-based) multithreading. + */ +#define AV_CODEC_CAP_SLICE_THREADS (1 << 13) +/** + * Codec supports changed parameters at any point. + */ +#define AV_CODEC_CAP_PARAM_CHANGE (1 << 14) +/** + * Codec supports avctx->thread_count == 0 (auto). + */ +#define AV_CODEC_CAP_AUTO_THREADS (1 << 15) +/** + * Audio encoder supports receiving a different number of samples in each call. + */ +#define AV_CODEC_CAP_VARIABLE_FRAME_SIZE (1 << 16) +/** + * Decoder is not a preferred choice for probing. + * This indicates that the decoder is not a good choice for probing. + * It could for example be an expensive to spin up hardware decoder, + * or it could simply not provide a lot of useful information about + * the stream. + * A decoder marked with this flag should only be used as last resort + * choice for probing. + */ +#define AV_CODEC_CAP_AVOID_PROBING (1 << 17) +/** + * Codec is intra only. + */ +#define AV_CODEC_CAP_INTRA_ONLY 0x40000000 +/** + * Codec is lossless. + */ +#define AV_CODEC_CAP_LOSSLESS 0x80000000 + +/** + * Codec is backed by a hardware implementation. Typically used to + * identify a non-hwaccel hardware decoder. For information about hwaccels, use + * avcodec_get_hw_config() instead. + */ +#define AV_CODEC_CAP_HARDWARE (1 << 18) + +/** + * Codec is potentially backed by a hardware implementation, but not + * necessarily. This is used instead of AV_CODEC_CAP_HARDWARE, if the + * implementation provides some sort of internal fallback. + */ +#define AV_CODEC_CAP_HYBRID (1 << 19) + +/** + * Pan Scan area. + * This specifies the area which should be displayed. + * Note there may be multiple such areas for one frame. + */ +typedef struct AVPanScan { + /** + * id + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + int id; + + /** + * width and height in 1/16 pel + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + int width; + int height; + + /** + * position of the top left corner in 1/16 pel for up to 3 fields/frames + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + int16_t position[3][2]; +} AVPanScan; + +/** + * This structure describes the bitrate properties of an encoded bitstream. It + * roughly corresponds to a subset the VBV parameters for MPEG-2 or HRD + * parameters for H.264/HEVC. + */ +typedef struct AVCPBProperties { + /** + * Maximum bitrate of the stream, in bits per second. + * Zero if unknown or unspecified. + */ + int max_bitrate; + /** + * Minimum bitrate of the stream, in bits per second. + * Zero if unknown or unspecified. + */ + int min_bitrate; + /** + * Average bitrate of the stream, in bits per second. + * Zero if unknown or unspecified. + */ + int avg_bitrate; + + /** + * The size of the buffer to which the ratecontrol is applied, in bits. + * Zero if unknown or unspecified. + */ + int buffer_size; + + /** + * The delay between the time the packet this structure is associated with + * is received and the time when it should be decoded, in periods of a 27MHz + * clock. + * + * UINT64_MAX when unknown or unspecified. + */ + uint64_t vbv_delay; +} AVCPBProperties; + +/** + * The decoder will keep a reference to the frame and may reuse it later. + */ +#define AV_GET_BUFFER_FLAG_REF (1 << 0) + +/** + * @defgroup lavc_packet AVPacket + * + * Types and functions for working with AVPacket. + * @{ + */ +enum AVPacketSideDataType { + /** + * An AV_PKT_DATA_PALETTE side data packet contains exactly AVPALETTE_SIZE + * bytes worth of palette. This side data signals that a new palette is + * present. + */ + AV_PKT_DATA_PALETTE, + + /** + * The AV_PKT_DATA_NEW_EXTRADATA is used to notify the codec or the format + * that the extradata buffer was changed and the receiving side should + * act upon it appropriately. The new extradata is embedded in the side + * data buffer and should be immediately used for processing the current + * frame or packet. + */ + AV_PKT_DATA_NEW_EXTRADATA, + + /** + * An AV_PKT_DATA_PARAM_CHANGE side data packet is laid out as follows: + * @code + * u32le param_flags + * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) + * s32le channel_count + * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) + * u64le channel_layout + * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) + * s32le sample_rate + * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) + * s32le width + * s32le height + * @endcode + */ + AV_PKT_DATA_PARAM_CHANGE, + + /** + * An AV_PKT_DATA_H263_MB_INFO side data packet contains a number of + * structures with info about macroblocks relevant to splitting the + * packet into smaller packets on macroblock edges (e.g. as for RFC 2190). + * That is, it does not necessarily contain info about all macroblocks, + * as long as the distance between macroblocks in the info is smaller + * than the target payload size. + * Each MB info structure is 12 bytes, and is laid out as follows: + * @code + * u32le bit offset from the start of the packet + * u8 current quantizer at the start of the macroblock + * u8 GOB number + * u16le macroblock address within the GOB + * u8 horizontal MV predictor + * u8 vertical MV predictor + * u8 horizontal MV predictor for block number 3 + * u8 vertical MV predictor for block number 3 + * @endcode + */ + AV_PKT_DATA_H263_MB_INFO, + + /** + * This side data should be associated with an audio stream and contains + * ReplayGain information in form of the AVReplayGain struct. + */ + AV_PKT_DATA_REPLAYGAIN, + + /** + * This side data contains a 3x3 transformation matrix describing an affine + * transformation that needs to be applied to the decoded video frames for + * correct presentation. + * + * See libavutil/display.h for a detailed description of the data. + */ + AV_PKT_DATA_DISPLAYMATRIX, + + /** + * This side data should be associated with a video stream and contains + * Stereoscopic 3D information in form of the AVStereo3D struct. + */ + AV_PKT_DATA_STEREO3D, + + /** + * This side data should be associated with an audio stream and corresponds + * to enum AVAudioServiceType. + */ + AV_PKT_DATA_AUDIO_SERVICE_TYPE, + + /** + * This side data contains quality related information from the encoder. + * @code + * u32le quality factor of the compressed frame. Allowed range is between 1 (good) and FF_LAMBDA_MAX (bad). + * u8 picture type + * u8 error count + * u16 reserved + * u64le[error count] sum of squared differences between encoder in and output + * @endcode + */ + AV_PKT_DATA_QUALITY_STATS, + + /** + * This side data contains an integer value representing the stream index + * of a "fallback" track. A fallback track indicates an alternate + * track to use when the current track can not be decoded for some reason. + * e.g. no decoder available for codec. + */ + AV_PKT_DATA_FALLBACK_TRACK, + + /** + * This side data corresponds to the AVCPBProperties struct. + */ + AV_PKT_DATA_CPB_PROPERTIES, + + /** + * Recommmends skipping the specified number of samples + * @code + * u32le number of samples to skip from start of this packet + * u32le number of samples to skip from end of this packet + * u8 reason for start skip + * u8 reason for end skip (0=padding silence, 1=convergence) + * @endcode + */ + AV_PKT_DATA_SKIP_SAMPLES, + + /** + * An AV_PKT_DATA_JP_DUALMONO side data packet indicates that + * the packet may contain "dual mono" audio specific to Japanese DTV + * and if it is true, recommends only the selected channel to be used. + * @code + * u8 selected channels (0=mail/left, 1=sub/right, 2=both) + * @endcode + */ + AV_PKT_DATA_JP_DUALMONO, + + /** + * A list of zero terminated key/value strings. There is no end marker for + * the list, so it is required to rely on the side data size to stop. + */ + AV_PKT_DATA_STRINGS_METADATA, + + /** + * Subtitle event position + * @code + * u32le x1 + * u32le y1 + * u32le x2 + * u32le y2 + * @endcode + */ + AV_PKT_DATA_SUBTITLE_POSITION, + + /** + * Data found in BlockAdditional element of matroska container. There is + * no end marker for the data, so it is required to rely on the side data + * size to recognize the end. 8 byte id (as found in BlockAddId) followed + * by data. + */ + AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL, + + /** + * The optional first identifier line of a WebVTT cue. + */ + AV_PKT_DATA_WEBVTT_IDENTIFIER, + + /** + * The optional settings (rendering instructions) that immediately + * follow the timestamp specifier of a WebVTT cue. + */ + AV_PKT_DATA_WEBVTT_SETTINGS, + + /** + * A list of zero terminated key/value strings. There is no end marker for + * the list, so it is required to rely on the side data size to stop. This + * side data includes updated metadata which appeared in the stream. + */ + AV_PKT_DATA_METADATA_UPDATE, + + /** + * MPEGTS stream ID, this is required to pass the stream ID + * information from the demuxer to the corresponding muxer. + */ + AV_PKT_DATA_MPEGTS_STREAM_ID, + + /** + * Mastering display metadata (based on SMPTE-2086:2014). This metadata + * should be associated with a video stream and contains data in the form + * of the AVMasteringDisplayMetadata struct. + */ + AV_PKT_DATA_MASTERING_DISPLAY_METADATA, + + /** + * This side data should be associated with a video stream and corresponds + * to the AVSphericalMapping structure. + */ + AV_PKT_DATA_SPHERICAL, + + /** + * Content light level (based on CTA-861.3). This metadata should be + * associated with a video stream and contains data in the form of the + * AVContentLightMetadata struct. + */ + AV_PKT_DATA_CONTENT_LIGHT_LEVEL, + + /** + * ATSC A53 Part 4 Closed Captions. This metadata should be associated with + * a video stream. A53 CC bitstream is stored as uint8_t in AVPacketSideData.data. + * The number of bytes of CC data is AVPacketSideData.size. + */ + AV_PKT_DATA_A53_CC, + + /** + * This side data is encryption initialization data. + * The format is not part of ABI, use av_encryption_init_info_* methods to + * access. + */ + AV_PKT_DATA_ENCRYPTION_INIT_INFO, + + /** + * This side data contains encryption info for how to decrypt the packet. + * The format is not part of ABI, use av_encryption_info_* methods to access. + */ + AV_PKT_DATA_ENCRYPTION_INFO, + + /** + * The number of side data types. + * This is not part of the public API/ABI in the sense that it may + * change when new side data types are added. + * This must stay the last enum value. + * If its value becomes huge, some code using it + * needs to be updated as it assumes it to be smaller than other limits. + */ + AV_PKT_DATA_NB +}; + +#define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED + +typedef struct AVPacketSideData { + uint8_t *data; + int size; + enum AVPacketSideDataType type; +} AVPacketSideData; + +/** + * This structure stores compressed data. It is typically exported by demuxers + * and then passed as input to decoders, or received as output from encoders and + * then passed to muxers. + * + * For video, it should typically contain one compressed frame. For audio it may + * contain several compressed frames. Encoders are allowed to output empty + * packets, with no compressed data, containing only side data + * (e.g. to update some stream parameters at the end of encoding). + * + * AVPacket is one of the few structs in FFmpeg, whose size is a part of public + * ABI. Thus it may be allocated on stack and no new fields can be added to it + * without libavcodec and libavformat major bump. + * + * The semantics of data ownership depends on the buf field. + * If it is set, the packet data is dynamically allocated and is + * valid indefinitely until a call to av_packet_unref() reduces the + * reference count to 0. + * + * If the buf field is not set av_packet_ref() would make a copy instead + * of increasing the reference count. + * + * The side data is always allocated with av_malloc(), copied by + * av_packet_ref() and freed by av_packet_unref(). + * + * @see av_packet_ref + * @see av_packet_unref + */ +typedef struct AVPacket { + /** + * A reference to the reference-counted buffer where the packet data is + * stored. + * May be NULL, then the packet data is not reference-counted. + */ + AVBufferRef *buf; + /** + * Presentation timestamp in AVStream->time_base units; the time at which + * the decompressed packet will be presented to the user. + * Can be AV_NOPTS_VALUE if it is not stored in the file. + * pts MUST be larger or equal to dts as presentation cannot happen before + * decompression, unless one wants to view hex dumps. Some formats misuse + * the terms dts and pts/cts to mean something different. Such timestamps + * must be converted to true pts/dts before they are stored in AVPacket. + */ + int64_t pts; + /** + * Decompression timestamp in AVStream->time_base units; the time at which + * the packet is decompressed. + * Can be AV_NOPTS_VALUE if it is not stored in the file. + */ + int64_t dts; + uint8_t *data; + int size; + int stream_index; + /** + * A combination of AV_PKT_FLAG values + */ + int flags; + /** + * Additional packet data that can be provided by the container. + * Packet can contain several types of side information. + */ + AVPacketSideData *side_data; + int side_data_elems; + + /** + * Duration of this packet in AVStream->time_base units, 0 if unknown. + * Equals next_pts - this_pts in presentation order. + */ + int64_t duration; + + int64_t pos; ///< byte position in stream, -1 if unknown + +#if FF_API_CONVERGENCE_DURATION + /** + * @deprecated Same as the duration field, but as int64_t. This was required + * for Matroska subtitles, whose duration values could overflow when the + * duration field was still an int. + */ + attribute_deprecated + int64_t convergence_duration; +#endif +} AVPacket; +#define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe +#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted +/** + * Flag is used to discard packets which are required to maintain valid + * decoder state but are not required for output and should be dropped + * after decoding. + **/ +#define AV_PKT_FLAG_DISCARD 0x0004 +/** + * The packet comes from a trusted source. + * + * Otherwise-unsafe constructs such as arbitrary pointers to data + * outside the packet may be followed. + */ +#define AV_PKT_FLAG_TRUSTED 0x0008 +/** + * Flag is used to indicate packets that contain frames that can + * be discarded by the decoder. I.e. Non-reference frames. + */ +#define AV_PKT_FLAG_DISPOSABLE 0x0010 + + +enum AVSideDataParamChangeFlags { + AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT = 0x0001, + AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002, + AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE = 0x0004, + AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS = 0x0008, +}; +/** + * @} + */ + +struct AVCodecInternal; + +enum AVFieldOrder { + AV_FIELD_UNKNOWN, + AV_FIELD_PROGRESSIVE, + AV_FIELD_TT, //< Top coded_first, top displayed first + AV_FIELD_BB, //< Bottom coded first, bottom displayed first + AV_FIELD_TB, //< Top coded first, bottom displayed first + AV_FIELD_BT, //< Bottom coded first, top displayed first +}; + +/** + * main external API structure. + * New fields can be added to the end with minor version bumps. + * Removal, reordering and changes to existing fields require a major + * version bump. + * You can use AVOptions (av_opt* / av_set/get*()) to access these fields from user + * applications. + * The name string for AVOptions options matches the associated command line + * parameter name and can be found in libavcodec/options_table.h + * The AVOption/command line parameter names differ in some cases from the C + * structure field names for historic reasons or brevity. + * sizeof(AVCodecContext) must not be used outside libav*. + */ +typedef struct AVCodecContext { + /** + * information on struct for av_log + * - set by avcodec_alloc_context3 + */ + const AVClass *av_class; + int log_level_offset; + + enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */ + const struct AVCodec *codec; + enum AVCodecID codec_id; /* see AV_CODEC_ID_xxx */ + + /** + * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A'). + * This is used to work around some encoder bugs. + * A demuxer should set this to what is stored in the field used to identify the codec. + * If there are multiple such fields in a container then the demuxer should choose the one + * which maximizes the information about the used codec. + * If the codec tag field in a container is larger than 32 bits then the demuxer should + * remap the longer ID to 32 bits with a table or other structure. Alternatively a new + * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated + * first. + * - encoding: Set by user, if not then the default based on codec_id will be used. + * - decoding: Set by user, will be converted to uppercase by libavcodec during init. + */ + unsigned int codec_tag; + + void *priv_data; + + /** + * Private context used for internal data. + * + * Unlike priv_data, this is not codec-specific. It is used in general + * libavcodec functions. + */ + struct AVCodecInternal *internal; + + /** + * Private data of the user, can be used to carry app specific stuff. + * - encoding: Set by user. + * - decoding: Set by user. + */ + void *opaque; + + /** + * the average bitrate + * - encoding: Set by user; unused for constant quantizer encoding. + * - decoding: Set by user, may be overwritten by libavcodec + * if this info is available in the stream + */ + int64_t bit_rate; + + /** + * number of bits the bitstream is allowed to diverge from the reference. + * the reference can be CBR (for CBR pass1) or VBR (for pass2) + * - encoding: Set by user; unused for constant quantizer encoding. + * - decoding: unused + */ + int bit_rate_tolerance; + + /** + * Global quality for codecs which cannot change it per frame. + * This should be proportional to MPEG-1/2/4 qscale. + * - encoding: Set by user. + * - decoding: unused + */ + int global_quality; + + /** + * - encoding: Set by user. + * - decoding: unused + */ + int compression_level; +#define FF_COMPRESSION_DEFAULT -1 + + /** + * AV_CODEC_FLAG_*. + * - encoding: Set by user. + * - decoding: Set by user. + */ + int flags; + + /** + * AV_CODEC_FLAG2_* + * - encoding: Set by user. + * - decoding: Set by user. + */ + int flags2; + + /** + * some codecs need / can use extradata like Huffman tables. + * MJPEG: Huffman tables + * rv10: additional flags + * MPEG-4: global headers (they can be in the bitstream or here) + * The allocated memory should be AV_INPUT_BUFFER_PADDING_SIZE bytes larger + * than extradata_size to avoid problems if it is read with the bitstream reader. + * The bytewise contents of extradata must not depend on the architecture or CPU endianness. + * - encoding: Set/allocated/freed by libavcodec. + * - decoding: Set/allocated/freed by user. + */ + uint8_t *extradata; + int extradata_size; + + /** + * This is the fundamental unit of time (in seconds) in terms + * of which frame timestamps are represented. For fixed-fps content, + * timebase should be 1/framerate and timestamp increments should be + * identically 1. + * This often, but not always is the inverse of the frame rate or field rate + * for video. 1/time_base is not the average frame rate if the frame rate is not + * constant. + * + * Like containers, elementary streams also can store timestamps, 1/time_base + * is the unit in which these timestamps are specified. + * As example of such codec time base see ISO/IEC 14496-2:2001(E) + * vop_time_increment_resolution and fixed_vop_rate + * (fixed_vop_rate == 0 implies that it is different from the framerate) + * + * - encoding: MUST be set by user. + * - decoding: the use of this field for decoding is deprecated. + * Use framerate instead. + */ + AVRational time_base; + + /** + * For some codecs, the time base is closer to the field rate than the frame rate. + * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration + * if no telecine is used ... + * + * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2. + */ + int ticks_per_frame; + + /** + * Codec delay. + * + * Encoding: Number of frames delay there will be from the encoder input to + * the decoder output. (we assume the decoder matches the spec) + * Decoding: Number of frames delay in addition to what a standard decoder + * as specified in the spec would produce. + * + * Video: + * Number of frames the decoded output will be delayed relative to the + * encoded input. + * + * Audio: + * For encoding, this field is unused (see initial_padding). + * + * For decoding, this is the number of samples the decoder needs to + * output before the decoder's output is valid. When seeking, you should + * start decoding this many samples prior to your desired seek point. + * + * - encoding: Set by libavcodec. + * - decoding: Set by libavcodec. + */ + int delay; + + + /* video only */ + /** + * picture width / height. + * + * @note Those fields may not match the values of the last + * AVFrame output by avcodec_decode_video2 due frame + * reordering. + * + * - encoding: MUST be set by user. + * - decoding: May be set by the user before opening the decoder if known e.g. + * from the container. Some decoders will require the dimensions + * to be set by the caller. During decoding, the decoder may + * overwrite those values as required while parsing the data. + */ + int width, height; + + /** + * Bitstream width / height, may be different from width/height e.g. when + * the decoded frame is cropped before being output or lowres is enabled. + * + * @note Those field may not match the value of the last + * AVFrame output by avcodec_receive_frame() due frame + * reordering. + * + * - encoding: unused + * - decoding: May be set by the user before opening the decoder if known + * e.g. from the container. During decoding, the decoder may + * overwrite those values as required while parsing the data. + */ + int coded_width, coded_height; + + /** + * the number of pictures in a group of pictures, or 0 for intra_only + * - encoding: Set by user. + * - decoding: unused + */ + int gop_size; + + /** + * Pixel format, see AV_PIX_FMT_xxx. + * May be set by the demuxer if known from headers. + * May be overridden by the decoder if it knows better. + * + * @note This field may not match the value of the last + * AVFrame output by avcodec_receive_frame() due frame + * reordering. + * + * - encoding: Set by user. + * - decoding: Set by user if known, overridden by libavcodec while + * parsing the data. + */ + enum AVPixelFormat pix_fmt; + + /** + * If non NULL, 'draw_horiz_band' is called by the libavcodec + * decoder to draw a horizontal band. It improves cache usage. Not + * all codecs can do that. You must check the codec capabilities + * beforehand. + * When multithreading is used, it may be called from multiple threads + * at the same time; threads might draw different parts of the same AVFrame, + * or multiple AVFrames, and there is no guarantee that slices will be drawn + * in order. + * The function is also used by hardware acceleration APIs. + * It is called at least once during frame decoding to pass + * the data needed for hardware render. + * In that mode instead of pixel data, AVFrame points to + * a structure specific to the acceleration API. The application + * reads the structure and can change some fields to indicate progress + * or mark state. + * - encoding: unused + * - decoding: Set by user. + * @param height the height of the slice + * @param y the y position of the slice + * @param type 1->top field, 2->bottom field, 3->frame + * @param offset offset into the AVFrame.data from which the slice should be read + */ + void (*draw_horiz_band)(struct AVCodecContext *s, + const AVFrame *src, int offset[AV_NUM_DATA_POINTERS], + int y, int type, int height); + + /** + * callback to negotiate the pixelFormat + * @param fmt is the list of formats which are supported by the codec, + * it is terminated by -1 as 0 is a valid format, the formats are ordered by quality. + * The first is always the native one. + * @note The callback may be called again immediately if initialization for + * the selected (hardware-accelerated) pixel format failed. + * @warning Behavior is undefined if the callback returns a value not + * in the fmt list of formats. + * @return the chosen format + * - encoding: unused + * - decoding: Set by user, if not set the native format will be chosen. + */ + enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt); + + /** + * maximum number of B-frames between non-B-frames + * Note: The output will be delayed by max_b_frames+1 relative to the input. + * - encoding: Set by user. + * - decoding: unused + */ + int max_b_frames; + + /** + * qscale factor between IP and B-frames + * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset). + * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset). + * - encoding: Set by user. + * - decoding: unused + */ + float b_quant_factor; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int b_frame_strategy; +#endif + + /** + * qscale offset between IP and B-frames + * - encoding: Set by user. + * - decoding: unused + */ + float b_quant_offset; + + /** + * Size of the frame reordering buffer in the decoder. + * For MPEG-2 it is 1 IPB or 0 low delay IP. + * - encoding: Set by libavcodec. + * - decoding: Set by libavcodec. + */ + int has_b_frames; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int mpeg_quant; +#endif + + /** + * qscale factor between P- and I-frames + * If > 0 then the last P-frame quantizer will be used (q = lastp_q * factor + offset). + * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset). + * - encoding: Set by user. + * - decoding: unused + */ + float i_quant_factor; + + /** + * qscale offset between P and I-frames + * - encoding: Set by user. + * - decoding: unused + */ + float i_quant_offset; + + /** + * luminance masking (0-> disabled) + * - encoding: Set by user. + * - decoding: unused + */ + float lumi_masking; + + /** + * temporary complexity masking (0-> disabled) + * - encoding: Set by user. + * - decoding: unused + */ + float temporal_cplx_masking; + + /** + * spatial complexity masking (0-> disabled) + * - encoding: Set by user. + * - decoding: unused + */ + float spatial_cplx_masking; + + /** + * p block masking (0-> disabled) + * - encoding: Set by user. + * - decoding: unused + */ + float p_masking; + + /** + * darkness masking (0-> disabled) + * - encoding: Set by user. + * - decoding: unused + */ + float dark_masking; + + /** + * slice count + * - encoding: Set by libavcodec. + * - decoding: Set by user (or 0). + */ + int slice_count; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int prediction_method; +#define FF_PRED_LEFT 0 +#define FF_PRED_PLANE 1 +#define FF_PRED_MEDIAN 2 +#endif + + /** + * slice offsets in the frame in bytes + * - encoding: Set/allocated by libavcodec. + * - decoding: Set/allocated by user (or NULL). + */ + int *slice_offset; + + /** + * sample aspect ratio (0 if unknown) + * That is the width of a pixel divided by the height of the pixel. + * Numerator and denominator must be relatively prime and smaller than 256 for some video standards. + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + AVRational sample_aspect_ratio; + + /** + * motion estimation comparison function + * - encoding: Set by user. + * - decoding: unused + */ + int me_cmp; + /** + * subpixel motion estimation comparison function + * - encoding: Set by user. + * - decoding: unused + */ + int me_sub_cmp; + /** + * macroblock comparison function (not supported yet) + * - encoding: Set by user. + * - decoding: unused + */ + int mb_cmp; + /** + * interlaced DCT comparison function + * - encoding: Set by user. + * - decoding: unused + */ + int ildct_cmp; +#define FF_CMP_SAD 0 +#define FF_CMP_SSE 1 +#define FF_CMP_SATD 2 +#define FF_CMP_DCT 3 +#define FF_CMP_PSNR 4 +#define FF_CMP_BIT 5 +#define FF_CMP_RD 6 +#define FF_CMP_ZERO 7 +#define FF_CMP_VSAD 8 +#define FF_CMP_VSSE 9 +#define FF_CMP_NSSE 10 +#define FF_CMP_W53 11 +#define FF_CMP_W97 12 +#define FF_CMP_DCTMAX 13 +#define FF_CMP_DCT264 14 +#define FF_CMP_MEDIAN_SAD 15 +#define FF_CMP_CHROMA 256 + + /** + * ME diamond size & shape + * - encoding: Set by user. + * - decoding: unused + */ + int dia_size; + + /** + * amount of previous MV predictors (2a+1 x 2a+1 square) + * - encoding: Set by user. + * - decoding: unused + */ + int last_predictor_count; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int pre_me; +#endif + + /** + * motion estimation prepass comparison function + * - encoding: Set by user. + * - decoding: unused + */ + int me_pre_cmp; + + /** + * ME prepass diamond size & shape + * - encoding: Set by user. + * - decoding: unused + */ + int pre_dia_size; + + /** + * subpel ME quality + * - encoding: Set by user. + * - decoding: unused + */ + int me_subpel_quality; + + /** + * maximum motion estimation search range in subpel units + * If 0 then no limit. + * + * - encoding: Set by user. + * - decoding: unused + */ + int me_range; + + /** + * slice flags + * - encoding: unused + * - decoding: Set by user. + */ + int slice_flags; +#define SLICE_FLAG_CODED_ORDER 0x0001 ///< draw_horiz_band() is called in coded order instead of display +#define SLICE_FLAG_ALLOW_FIELD 0x0002 ///< allow draw_horiz_band() with field slices (MPEG-2 field pics) +#define SLICE_FLAG_ALLOW_PLANE 0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1) + + /** + * macroblock decision mode + * - encoding: Set by user. + * - decoding: unused + */ + int mb_decision; +#define FF_MB_DECISION_SIMPLE 0 ///< uses mb_cmp +#define FF_MB_DECISION_BITS 1 ///< chooses the one which needs the fewest bits +#define FF_MB_DECISION_RD 2 ///< rate distortion + + /** + * custom intra quantization matrix + * - encoding: Set by user, can be NULL. + * - decoding: Set by libavcodec. + */ + uint16_t *intra_matrix; + + /** + * custom inter quantization matrix + * - encoding: Set by user, can be NULL. + * - decoding: Set by libavcodec. + */ + uint16_t *inter_matrix; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int scenechange_threshold; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int noise_reduction; +#endif + + /** + * precision of the intra DC coefficient - 8 + * - encoding: Set by user. + * - decoding: Set by libavcodec + */ + int intra_dc_precision; + + /** + * Number of macroblock rows at the top which are skipped. + * - encoding: unused + * - decoding: Set by user. + */ + int skip_top; + + /** + * Number of macroblock rows at the bottom which are skipped. + * - encoding: unused + * - decoding: Set by user. + */ + int skip_bottom; + + /** + * minimum MB Lagrange multiplier + * - encoding: Set by user. + * - decoding: unused + */ + int mb_lmin; + + /** + * maximum MB Lagrange multiplier + * - encoding: Set by user. + * - decoding: unused + */ + int mb_lmax; + +#if FF_API_PRIVATE_OPT + /** + * @deprecated use encoder private options instead + */ + attribute_deprecated + int me_penalty_compensation; +#endif + + /** + * - encoding: Set by user. + * - decoding: unused + */ + int bidir_refine; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int brd_scale; +#endif + + /** + * minimum GOP size + * - encoding: Set by user. + * - decoding: unused + */ + int keyint_min; + + /** + * number of reference frames + * - encoding: Set by user. + * - decoding: Set by lavc. + */ + int refs; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int chromaoffset; +#endif + + /** + * Note: Value depends upon the compare function used for fullpel ME. + * - encoding: Set by user. + * - decoding: unused + */ + int mv0_threshold; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int b_sensitivity; +#endif + + /** + * Chromaticity coordinates of the source primaries. + * - encoding: Set by user + * - decoding: Set by libavcodec + */ + enum AVColorPrimaries color_primaries; + + /** + * Color Transfer Characteristic. + * - encoding: Set by user + * - decoding: Set by libavcodec + */ + enum AVColorTransferCharacteristic color_trc; + + /** + * YUV colorspace type. + * - encoding: Set by user + * - decoding: Set by libavcodec + */ + enum AVColorSpace colorspace; + + /** + * MPEG vs JPEG YUV range. + * - encoding: Set by user + * - decoding: Set by libavcodec + */ + enum AVColorRange color_range; + + /** + * This defines the location of chroma samples. + * - encoding: Set by user + * - decoding: Set by libavcodec + */ + enum AVChromaLocation chroma_sample_location; + + /** + * Number of slices. + * Indicates number of picture subdivisions. Used for parallelized + * decoding. + * - encoding: Set by user + * - decoding: unused + */ + int slices; + + /** Field order + * - encoding: set by libavcodec + * - decoding: Set by user. + */ + enum AVFieldOrder field_order; + + /* audio only */ + int sample_rate; ///< samples per second + int channels; ///< number of audio channels + + /** + * audio sample format + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + enum AVSampleFormat sample_fmt; ///< sample format + + /* The following data should not be initialized. */ + /** + * Number of samples per channel in an audio frame. + * + * - encoding: set by libavcodec in avcodec_open2(). Each submitted frame + * except the last must contain exactly frame_size samples per channel. + * May be 0 when the codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE set, then the + * frame size is not restricted. + * - decoding: may be set by some decoders to indicate constant frame size + */ + int frame_size; + + /** + * Frame counter, set by libavcodec. + * + * - decoding: total number of frames returned from the decoder so far. + * - encoding: total number of frames passed to the encoder so far. + * + * @note the counter is not incremented if encoding/decoding resulted in + * an error. + */ + int frame_number; + + /** + * number of bytes per packet if constant and known or 0 + * Used by some WAV based audio codecs. + */ + int block_align; + + /** + * Audio cutoff bandwidth (0 means "automatic") + * - encoding: Set by user. + * - decoding: unused + */ + int cutoff; + + /** + * Audio channel layout. + * - encoding: set by user. + * - decoding: set by user, may be overwritten by libavcodec. + */ + uint64_t channel_layout; + + /** + * Request decoder to use this channel layout if it can (0 for default) + * - encoding: unused + * - decoding: Set by user. + */ + uint64_t request_channel_layout; + + /** + * Type of service that the audio stream conveys. + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + enum AVAudioServiceType audio_service_type; + + /** + * desired sample format + * - encoding: Not used. + * - decoding: Set by user. + * Decoder will decode to this format if it can. + */ + enum AVSampleFormat request_sample_fmt; + + /** + * This callback is called at the beginning of each frame to get data + * buffer(s) for it. There may be one contiguous buffer for all the data or + * there may be a buffer per each data plane or anything in between. What + * this means is, you may set however many entries in buf[] you feel necessary. + * Each buffer must be reference-counted using the AVBuffer API (see description + * of buf[] below). + * + * The following fields will be set in the frame before this callback is + * called: + * - format + * - width, height (video only) + * - sample_rate, channel_layout, nb_samples (audio only) + * Their values may differ from the corresponding values in + * AVCodecContext. This callback must use the frame values, not the codec + * context values, to calculate the required buffer size. + * + * This callback must fill the following fields in the frame: + * - data[] + * - linesize[] + * - extended_data: + * * if the data is planar audio with more than 8 channels, then this + * callback must allocate and fill extended_data to contain all pointers + * to all data planes. data[] must hold as many pointers as it can. + * extended_data must be allocated with av_malloc() and will be freed in + * av_frame_unref(). + * * otherwise extended_data must point to data + * - buf[] must contain one or more pointers to AVBufferRef structures. Each of + * the frame's data and extended_data pointers must be contained in these. That + * is, one AVBufferRef for each allocated chunk of memory, not necessarily one + * AVBufferRef per data[] entry. See: av_buffer_create(), av_buffer_alloc(), + * and av_buffer_ref(). + * - extended_buf and nb_extended_buf must be allocated with av_malloc() by + * this callback and filled with the extra buffers if there are more + * buffers than buf[] can hold. extended_buf will be freed in + * av_frame_unref(). + * + * If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call + * avcodec_default_get_buffer2() instead of providing buffers allocated by + * some other means. + * + * Each data plane must be aligned to the maximum required by the target + * CPU. + * + * @see avcodec_default_get_buffer2() + * + * Video: + * + * If AV_GET_BUFFER_FLAG_REF is set in flags then the frame may be reused + * (read and/or written to if it is writable) later by libavcodec. + * + * avcodec_align_dimensions2() should be used to find the required width and + * height, as they normally need to be rounded up to the next multiple of 16. + * + * Some decoders do not support linesizes changing between frames. + * + * If frame multithreading is used and thread_safe_callbacks is set, + * this callback may be called from a different thread, but not from more + * than one at once. Does not need to be reentrant. + * + * @see avcodec_align_dimensions2() + * + * Audio: + * + * Decoders request a buffer of a particular size by setting + * AVFrame.nb_samples prior to calling get_buffer2(). The decoder may, + * however, utilize only part of the buffer by setting AVFrame.nb_samples + * to a smaller value in the output frame. + * + * As a convenience, av_samples_get_buffer_size() and + * av_samples_fill_arrays() in libavutil may be used by custom get_buffer2() + * functions to find the required data size and to fill data pointers and + * linesize. In AVFrame.linesize, only linesize[0] may be set for audio + * since all planes must be the same size. + * + * @see av_samples_get_buffer_size(), av_samples_fill_arrays() + * + * - encoding: unused + * - decoding: Set by libavcodec, user can override. + */ + int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags); + + /** + * If non-zero, the decoded audio and video frames returned from + * avcodec_decode_video2() and avcodec_decode_audio4() are reference-counted + * and are valid indefinitely. The caller must free them with + * av_frame_unref() when they are not needed anymore. + * Otherwise, the decoded frames must not be freed by the caller and are + * only valid until the next decode call. + * + * This is always automatically enabled if avcodec_receive_frame() is used. + * + * - encoding: unused + * - decoding: set by the caller before avcodec_open2(). + */ + attribute_deprecated + int refcounted_frames; + + /* - encoding parameters */ + float qcompress; ///< amount of qscale change between easy & hard scenes (0.0-1.0) + float qblur; ///< amount of qscale smoothing over time (0.0-1.0) + + /** + * minimum quantizer + * - encoding: Set by user. + * - decoding: unused + */ + int qmin; + + /** + * maximum quantizer + * - encoding: Set by user. + * - decoding: unused + */ + int qmax; + + /** + * maximum quantizer difference between frames + * - encoding: Set by user. + * - decoding: unused + */ + int max_qdiff; + + /** + * decoder bitstream buffer size + * - encoding: Set by user. + * - decoding: unused + */ + int rc_buffer_size; + + /** + * ratecontrol override, see RcOverride + * - encoding: Allocated/set/freed by user. + * - decoding: unused + */ + int rc_override_count; + RcOverride *rc_override; + + /** + * maximum bitrate + * - encoding: Set by user. + * - decoding: Set by user, may be overwritten by libavcodec. + */ + int64_t rc_max_rate; + + /** + * minimum bitrate + * - encoding: Set by user. + * - decoding: unused + */ + int64_t rc_min_rate; + + /** + * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow. + * - encoding: Set by user. + * - decoding: unused. + */ + float rc_max_available_vbv_use; + + /** + * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow. + * - encoding: Set by user. + * - decoding: unused. + */ + float rc_min_vbv_overflow_use; + + /** + * Number of bits which should be loaded into the rc buffer before decoding starts. + * - encoding: Set by user. + * - decoding: unused + */ + int rc_initial_buffer_occupancy; + +#if FF_API_CODER_TYPE +#define FF_CODER_TYPE_VLC 0 +#define FF_CODER_TYPE_AC 1 +#define FF_CODER_TYPE_RAW 2 +#define FF_CODER_TYPE_RLE 3 + /** + * @deprecated use encoder private options instead + */ + attribute_deprecated + int coder_type; +#endif /* FF_API_CODER_TYPE */ + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int context_model; +#endif + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int frame_skip_threshold; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int frame_skip_factor; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int frame_skip_exp; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int frame_skip_cmp; +#endif /* FF_API_PRIVATE_OPT */ + + /** + * trellis RD quantization + * - encoding: Set by user. + * - decoding: unused + */ + int trellis; + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int min_prediction_order; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int max_prediction_order; + + /** @deprecated use encoder private options instead */ + attribute_deprecated + int64_t timecode_frame_start; +#endif + +#if FF_API_RTP_CALLBACK + /** + * @deprecated unused + */ + /* The RTP callback: This function is called */ + /* every time the encoder has a packet to send. */ + /* It depends on the encoder if the data starts */ + /* with a Start Code (it should). H.263 does. */ + /* mb_nb contains the number of macroblocks */ + /* encoded in the RTP payload. */ + attribute_deprecated + void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb); +#endif + +#if FF_API_PRIVATE_OPT + /** @deprecated use encoder private options instead */ + attribute_deprecated + int rtp_payload_size; /* The size of the RTP payload: the coder will */ + /* do its best to deliver a chunk with size */ + /* below rtp_payload_size, the chunk will start */ + /* with a start code on some codecs like H.263. */ + /* This doesn't take account of any particular */ + /* headers inside the transmitted RTP payload. */ +#endif + +#if FF_API_STAT_BITS + /* statistics, used for 2-pass encoding */ + attribute_deprecated + int mv_bits; + attribute_deprecated + int header_bits; + attribute_deprecated + int i_tex_bits; + attribute_deprecated + int p_tex_bits; + attribute_deprecated + int i_count; + attribute_deprecated + int p_count; + attribute_deprecated + int skip_count; + attribute_deprecated + int misc_bits; + + /** @deprecated this field is unused */ + attribute_deprecated + int frame_bits; +#endif + + /** + * pass1 encoding statistics output buffer + * - encoding: Set by libavcodec. + * - decoding: unused + */ + char *stats_out; + + /** + * pass2 encoding statistics input buffer + * Concatenated stuff from stats_out of pass1 should be placed here. + * - encoding: Allocated/set/freed by user. + * - decoding: unused + */ + char *stats_in; + + /** + * Work around bugs in encoders which sometimes cannot be detected automatically. + * - encoding: Set by user + * - decoding: Set by user + */ + int workaround_bugs; +#define FF_BUG_AUTODETECT 1 ///< autodetection +#define FF_BUG_XVID_ILACE 4 +#define FF_BUG_UMP4 8 +#define FF_BUG_NO_PADDING 16 +#define FF_BUG_AMV 32 +#define FF_BUG_QPEL_CHROMA 64 +#define FF_BUG_STD_QPEL 128 +#define FF_BUG_QPEL_CHROMA2 256 +#define FF_BUG_DIRECT_BLOCKSIZE 512 +#define FF_BUG_EDGE 1024 +#define FF_BUG_HPEL_CHROMA 2048 +#define FF_BUG_DC_CLIP 4096 +#define FF_BUG_MS 8192 ///< Work around various bugs in Microsoft's broken decoders. +#define FF_BUG_TRUNCATED 16384 +#define FF_BUG_IEDGE 32768 + + /** + * strictly follow the standard (MPEG-4, ...). + * - encoding: Set by user. + * - decoding: Set by user. + * Setting this to STRICT or higher means the encoder and decoder will + * generally do stupid things, whereas setting it to unofficial or lower + * will mean the encoder might produce output that is not supported by all + * spec-compliant decoders. Decoders don't differentiate between normal, + * unofficial and experimental (that is, they always try to decode things + * when they can) unless they are explicitly asked to behave stupidly + * (=strictly conform to the specs) + */ + int strict_std_compliance; +#define FF_COMPLIANCE_VERY_STRICT 2 ///< Strictly conform to an older more strict version of the spec or reference software. +#define FF_COMPLIANCE_STRICT 1 ///< Strictly conform to all the things in the spec no matter what consequences. +#define FF_COMPLIANCE_NORMAL 0 +#define FF_COMPLIANCE_UNOFFICIAL -1 ///< Allow unofficial extensions +#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things. + + /** + * error concealment flags + * - encoding: unused + * - decoding: Set by user. + */ + int error_concealment; +#define FF_EC_GUESS_MVS 1 +#define FF_EC_DEBLOCK 2 +#define FF_EC_FAVOR_INTER 256 + + /** + * debug + * - encoding: Set by user. + * - decoding: Set by user. + */ + int debug; +#define FF_DEBUG_PICT_INFO 1 +#define FF_DEBUG_RC 2 +#define FF_DEBUG_BITSTREAM 4 +#define FF_DEBUG_MB_TYPE 8 +#define FF_DEBUG_QP 16 +#if FF_API_DEBUG_MV +/** + * @deprecated this option does nothing + */ +#define FF_DEBUG_MV 32 +#endif +#define FF_DEBUG_DCT_COEFF 0x00000040 +#define FF_DEBUG_SKIP 0x00000080 +#define FF_DEBUG_STARTCODE 0x00000100 +#define FF_DEBUG_ER 0x00000400 +#define FF_DEBUG_MMCO 0x00000800 +#define FF_DEBUG_BUGS 0x00001000 +#if FF_API_DEBUG_MV +#define FF_DEBUG_VIS_QP 0x00002000 +#define FF_DEBUG_VIS_MB_TYPE 0x00004000 +#endif +#define FF_DEBUG_BUFFERS 0x00008000 +#define FF_DEBUG_THREADS 0x00010000 +#define FF_DEBUG_GREEN_MD 0x00800000 +#define FF_DEBUG_NOMC 0x01000000 + +#if FF_API_DEBUG_MV + /** + * debug + * - encoding: Set by user. + * - decoding: Set by user. + */ + int debug_mv; +#define FF_DEBUG_VIS_MV_P_FOR 0x00000001 // visualize forward predicted MVs of P-frames +#define FF_DEBUG_VIS_MV_B_FOR 0x00000002 // visualize forward predicted MVs of B-frames +#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 // visualize backward predicted MVs of B-frames +#endif + + /** + * Error recognition; may misdetect some more or less valid parts as errors. + * - encoding: unused + * - decoding: Set by user. + */ + int err_recognition; + +/** + * Verify checksums embedded in the bitstream (could be of either encoded or + * decoded data, depending on the codec) and print an error message on mismatch. + * If AV_EF_EXPLODE is also set, a mismatching checksum will result in the + * decoder returning an error. + */ +#define AV_EF_CRCCHECK (1<<0) +#define AV_EF_BITSTREAM (1<<1) ///< detect bitstream specification deviations +#define AV_EF_BUFFER (1<<2) ///< detect improper bitstream length +#define AV_EF_EXPLODE (1<<3) ///< abort decoding on minor error detection + +#define AV_EF_IGNORE_ERR (1<<15) ///< ignore errors and continue +#define AV_EF_CAREFUL (1<<16) ///< consider things that violate the spec, are fast to calculate and have not been seen in the wild as errors +#define AV_EF_COMPLIANT (1<<17) ///< consider all spec non compliances as errors +#define AV_EF_AGGRESSIVE (1<<18) ///< consider things that a sane encoder should not do as an error + + + /** + * opaque 64-bit number (generally a PTS) that will be reordered and + * output in AVFrame.reordered_opaque + * - encoding: unused + * - decoding: Set by user. + */ + int64_t reordered_opaque; + + /** + * Hardware accelerator in use + * - encoding: unused. + * - decoding: Set by libavcodec + */ + const struct AVHWAccel *hwaccel; + + /** + * Hardware accelerator context. + * For some hardware accelerators, a global context needs to be + * provided by the user. In that case, this holds display-dependent + * data FFmpeg cannot instantiate itself. Please refer to the + * FFmpeg HW accelerator documentation to know how to fill this + * is. e.g. for VA API, this is a struct vaapi_context. + * - encoding: unused + * - decoding: Set by user + */ + void *hwaccel_context; + + /** + * error + * - encoding: Set by libavcodec if flags & AV_CODEC_FLAG_PSNR. + * - decoding: unused + */ + uint64_t error[AV_NUM_DATA_POINTERS]; + + /** + * DCT algorithm, see FF_DCT_* below + * - encoding: Set by user. + * - decoding: unused + */ + int dct_algo; +#define FF_DCT_AUTO 0 +#define FF_DCT_FASTINT 1 +#define FF_DCT_INT 2 +#define FF_DCT_MMX 3 +#define FF_DCT_ALTIVEC 5 +#define FF_DCT_FAAN 6 + + /** + * IDCT algorithm, see FF_IDCT_* below. + * - encoding: Set by user. + * - decoding: Set by user. + */ + int idct_algo; +#define FF_IDCT_AUTO 0 +#define FF_IDCT_INT 1 +#define FF_IDCT_SIMPLE 2 +#define FF_IDCT_SIMPLEMMX 3 +#define FF_IDCT_ARM 7 +#define FF_IDCT_ALTIVEC 8 +#define FF_IDCT_SIMPLEARM 10 +#define FF_IDCT_XVID 14 +#define FF_IDCT_SIMPLEARMV5TE 16 +#define FF_IDCT_SIMPLEARMV6 17 +#define FF_IDCT_FAAN 20 +#define FF_IDCT_SIMPLENEON 22 +#define FF_IDCT_NONE 24 /* Used by XvMC to extract IDCT coefficients with FF_IDCT_PERM_NONE */ +#define FF_IDCT_SIMPLEAUTO 128 + + /** + * bits per sample/pixel from the demuxer (needed for huffyuv). + * - encoding: Set by libavcodec. + * - decoding: Set by user. + */ + int bits_per_coded_sample; + + /** + * Bits per sample/pixel of internal libavcodec pixel/sample format. + * - encoding: set by user. + * - decoding: set by libavcodec. + */ + int bits_per_raw_sample; + +#if FF_API_LOWRES + /** + * low resolution decoding, 1-> 1/2 size, 2->1/4 size + * - encoding: unused + * - decoding: Set by user. + */ + int lowres; +#endif + +#if FF_API_CODED_FRAME + /** + * the picture in the bitstream + * - encoding: Set by libavcodec. + * - decoding: unused + * + * @deprecated use the quality factor packet side data instead + */ + attribute_deprecated AVFrame *coded_frame; +#endif + + /** + * thread count + * is used to decide how many independent tasks should be passed to execute() + * - encoding: Set by user. + * - decoding: Set by user. + */ + int thread_count; + + /** + * Which multithreading methods to use. + * Use of FF_THREAD_FRAME will increase decoding delay by one frame per thread, + * so clients which cannot provide future frames should not use it. + * + * - encoding: Set by user, otherwise the default is used. + * - decoding: Set by user, otherwise the default is used. + */ + int thread_type; +#define FF_THREAD_FRAME 1 ///< Decode more than one frame at once +#define FF_THREAD_SLICE 2 ///< Decode more than one part of a single frame at once + + /** + * Which multithreading methods are in use by the codec. + * - encoding: Set by libavcodec. + * - decoding: Set by libavcodec. + */ + int active_thread_type; + + /** + * Set by the client if its custom get_buffer() callback can be called + * synchronously from another thread, which allows faster multithreaded decoding. + * draw_horiz_band() will be called from other threads regardless of this setting. + * Ignored if the default get_buffer() is used. + * - encoding: Set by user. + * - decoding: Set by user. + */ + int thread_safe_callbacks; + + /** + * The codec may call this to execute several independent things. + * It will return only after finishing all tasks. + * The user may replace this with some multithreaded implementation, + * the default implementation will execute the parts serially. + * @param count the number of things to execute + * - encoding: Set by libavcodec, user can override. + * - decoding: Set by libavcodec, user can override. + */ + int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size); + + /** + * The codec may call this to execute several independent things. + * It will return only after finishing all tasks. + * The user may replace this with some multithreaded implementation, + * the default implementation will execute the parts serially. + * Also see avcodec_thread_init and e.g. the --enable-pthread configure option. + * @param c context passed also to func + * @param count the number of things to execute + * @param arg2 argument passed unchanged to func + * @param ret return values of executed functions, must have space for "count" values. May be NULL. + * @param func function that will be called count times, with jobnr from 0 to count-1. + * threadnr will be in the range 0 to c->thread_count-1 < MAX_THREADS and so that no + * two instances of func executing at the same time will have the same threadnr. + * @return always 0 currently, but code should handle a future improvement where when any call to func + * returns < 0 no further calls to func may be done and < 0 is returned. + * - encoding: Set by libavcodec, user can override. + * - decoding: Set by libavcodec, user can override. + */ + int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count); + + /** + * noise vs. sse weight for the nsse comparison function + * - encoding: Set by user. + * - decoding: unused + */ + int nsse_weight; + + /** + * profile + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + int profile; +#define FF_PROFILE_UNKNOWN -99 +#define FF_PROFILE_RESERVED -100 + +#define FF_PROFILE_AAC_MAIN 0 +#define FF_PROFILE_AAC_LOW 1 +#define FF_PROFILE_AAC_SSR 2 +#define FF_PROFILE_AAC_LTP 3 +#define FF_PROFILE_AAC_HE 4 +#define FF_PROFILE_AAC_HE_V2 28 +#define FF_PROFILE_AAC_LD 22 +#define FF_PROFILE_AAC_ELD 38 +#define FF_PROFILE_MPEG2_AAC_LOW 128 +#define FF_PROFILE_MPEG2_AAC_HE 131 + +#define FF_PROFILE_DNXHD 0 +#define FF_PROFILE_DNXHR_LB 1 +#define FF_PROFILE_DNXHR_SQ 2 +#define FF_PROFILE_DNXHR_HQ 3 +#define FF_PROFILE_DNXHR_HQX 4 +#define FF_PROFILE_DNXHR_444 5 + +#define FF_PROFILE_DTS 20 +#define FF_PROFILE_DTS_ES 30 +#define FF_PROFILE_DTS_96_24 40 +#define FF_PROFILE_DTS_HD_HRA 50 +#define FF_PROFILE_DTS_HD_MA 60 +#define FF_PROFILE_DTS_EXPRESS 70 + +#define FF_PROFILE_MPEG2_422 0 +#define FF_PROFILE_MPEG2_HIGH 1 +#define FF_PROFILE_MPEG2_SS 2 +#define FF_PROFILE_MPEG2_SNR_SCALABLE 3 +#define FF_PROFILE_MPEG2_MAIN 4 +#define FF_PROFILE_MPEG2_SIMPLE 5 + +#define FF_PROFILE_H264_CONSTRAINED (1<<9) // 8+1; constraint_set1_flag +#define FF_PROFILE_H264_INTRA (1<<11) // 8+3; constraint_set3_flag + +#define FF_PROFILE_H264_BASELINE 66 +#define FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED) +#define FF_PROFILE_H264_MAIN 77 +#define FF_PROFILE_H264_EXTENDED 88 +#define FF_PROFILE_H264_HIGH 100 +#define FF_PROFILE_H264_HIGH_10 110 +#define FF_PROFILE_H264_HIGH_10_INTRA (110|FF_PROFILE_H264_INTRA) +#define FF_PROFILE_H264_MULTIVIEW_HIGH 118 +#define FF_PROFILE_H264_HIGH_422 122 +#define FF_PROFILE_H264_HIGH_422_INTRA (122|FF_PROFILE_H264_INTRA) +#define FF_PROFILE_H264_STEREO_HIGH 128 +#define FF_PROFILE_H264_HIGH_444 144 +#define FF_PROFILE_H264_HIGH_444_PREDICTIVE 244 +#define FF_PROFILE_H264_HIGH_444_INTRA (244|FF_PROFILE_H264_INTRA) +#define FF_PROFILE_H264_CAVLC_444 44 + +#define FF_PROFILE_VC1_SIMPLE 0 +#define FF_PROFILE_VC1_MAIN 1 +#define FF_PROFILE_VC1_COMPLEX 2 +#define FF_PROFILE_VC1_ADVANCED 3 + +#define FF_PROFILE_MPEG4_SIMPLE 0 +#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE 1 +#define FF_PROFILE_MPEG4_CORE 2 +#define FF_PROFILE_MPEG4_MAIN 3 +#define FF_PROFILE_MPEG4_N_BIT 4 +#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE 5 +#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION 6 +#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE 7 +#define FF_PROFILE_MPEG4_HYBRID 8 +#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME 9 +#define FF_PROFILE_MPEG4_CORE_SCALABLE 10 +#define FF_PROFILE_MPEG4_ADVANCED_CODING 11 +#define FF_PROFILE_MPEG4_ADVANCED_CORE 12 +#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13 +#define FF_PROFILE_MPEG4_SIMPLE_STUDIO 14 +#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE 15 + +#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0 1 +#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1 2 +#define FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION 32768 +#define FF_PROFILE_JPEG2000_DCINEMA_2K 3 +#define FF_PROFILE_JPEG2000_DCINEMA_4K 4 + +#define FF_PROFILE_VP9_0 0 +#define FF_PROFILE_VP9_1 1 +#define FF_PROFILE_VP9_2 2 +#define FF_PROFILE_VP9_3 3 + +#define FF_PROFILE_HEVC_MAIN 1 +#define FF_PROFILE_HEVC_MAIN_10 2 +#define FF_PROFILE_HEVC_MAIN_STILL_PICTURE 3 +#define FF_PROFILE_HEVC_REXT 4 + +#define FF_PROFILE_AV1_MAIN 0 +#define FF_PROFILE_AV1_HIGH 1 +#define FF_PROFILE_AV1_PROFESSIONAL 2 + +#define FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT 0xc0 +#define FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT 0xc1 +#define FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT 0xc2 +#define FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS 0xc3 +#define FF_PROFILE_MJPEG_JPEG_LS 0xf7 + +#define FF_PROFILE_SBC_MSBC 1 + + /** + * level + * - encoding: Set by user. + * - decoding: Set by libavcodec. + */ + int level; +#define FF_LEVEL_UNKNOWN -99 + + /** + * Skip loop filtering for selected frames. + * - encoding: unused + * - decoding: Set by user. + */ + enum AVDiscard skip_loop_filter; + + /** + * Skip IDCT/dequantization for selected frames. + * - encoding: unused + * - decoding: Set by user. + */ + enum AVDiscard skip_idct; + + /** + * Skip decoding for selected frames. + * - encoding: unused + * - decoding: Set by user. + */ + enum AVDiscard skip_frame; + + /** + * Header containing style information for text subtitles. + * For SUBTITLE_ASS subtitle type, it should contain the whole ASS + * [Script Info] and [V4+ Styles] section, plus the [Events] line and + * the Format line following. It shouldn't include any Dialogue line. + * - encoding: Set/allocated/freed by user (before avcodec_open2()) + * - decoding: Set/allocated/freed by libavcodec (by avcodec_open2()) + */ + uint8_t *subtitle_header; + int subtitle_header_size; + +#if FF_API_VBV_DELAY + /** + * VBV delay coded in the last frame (in periods of a 27 MHz clock). + * Used for compliant TS muxing. + * - encoding: Set by libavcodec. + * - decoding: unused. + * @deprecated this value is now exported as a part of + * AV_PKT_DATA_CPB_PROPERTIES packet side data + */ + attribute_deprecated + uint64_t vbv_delay; +#endif + +#if FF_API_SIDEDATA_ONLY_PKT + /** + * Encoding only and set by default. Allow encoders to output packets + * that do not contain any encoded data, only side data. + * + * Some encoders need to output such packets, e.g. to update some stream + * parameters at the end of encoding. + * + * @deprecated this field disables the default behaviour and + * it is kept only for compatibility. + */ + attribute_deprecated + int side_data_only_packets; +#endif + + /** + * Audio only. The number of "priming" samples (padding) inserted by the + * encoder at the beginning of the audio. I.e. this number of leading + * decoded samples must be discarded by the caller to get the original audio + * without leading padding. + * + * - decoding: unused + * - encoding: Set by libavcodec. The timestamps on the output packets are + * adjusted by the encoder so that they always refer to the + * first sample of the data actually contained in the packet, + * including any added padding. E.g. if the timebase is + * 1/samplerate and the timestamp of the first input sample is + * 0, the timestamp of the first output packet will be + * -initial_padding. + */ + int initial_padding; + + /** + * - decoding: For codecs that store a framerate value in the compressed + * bitstream, the decoder may export it here. { 0, 1} when + * unknown. + * - encoding: May be used to signal the framerate of CFR content to an + * encoder. + */ + AVRational framerate; + + /** + * Nominal unaccelerated pixel format, see AV_PIX_FMT_xxx. + * - encoding: unused. + * - decoding: Set by libavcodec before calling get_format() + */ + enum AVPixelFormat sw_pix_fmt; + + /** + * Timebase in which pkt_dts/pts and AVPacket.dts/pts are. + * - encoding unused. + * - decoding set by user. + */ + AVRational pkt_timebase; + + /** + * AVCodecDescriptor + * - encoding: unused. + * - decoding: set by libavcodec. + */ + const AVCodecDescriptor *codec_descriptor; + +#if !FF_API_LOWRES + /** + * low resolution decoding, 1-> 1/2 size, 2->1/4 size + * - encoding: unused + * - decoding: Set by user. + */ + int lowres; +#endif + + /** + * Current statistics for PTS correction. + * - decoding: maintained and used by libavcodec, not intended to be used by user apps + * - encoding: unused + */ + int64_t pts_correction_num_faulty_pts; /// Number of incorrect PTS values so far + int64_t pts_correction_num_faulty_dts; /// Number of incorrect DTS values so far + int64_t pts_correction_last_pts; /// PTS of the last frame + int64_t pts_correction_last_dts; /// DTS of the last frame + + /** + * Character encoding of the input subtitles file. + * - decoding: set by user + * - encoding: unused + */ + char *sub_charenc; + + /** + * Subtitles character encoding mode. Formats or codecs might be adjusting + * this setting (if they are doing the conversion themselves for instance). + * - decoding: set by libavcodec + * - encoding: unused + */ + int sub_charenc_mode; +#define FF_SUB_CHARENC_MODE_DO_NOTHING -1 ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance) +#define FF_SUB_CHARENC_MODE_AUTOMATIC 0 ///< libavcodec will select the mode itself +#define FF_SUB_CHARENC_MODE_PRE_DECODER 1 ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv +#define FF_SUB_CHARENC_MODE_IGNORE 2 ///< neither convert the subtitles, nor check them for valid UTF-8 + + /** + * Skip processing alpha if supported by codec. + * Note that if the format uses pre-multiplied alpha (common with VP6, + * and recommended due to better video quality/compression) + * the image will look as if alpha-blended onto a black background. + * However for formats that do not use pre-multiplied alpha + * there might be serious artefacts (though e.g. libswscale currently + * assumes pre-multiplied alpha anyway). + * + * - decoding: set by user + * - encoding: unused + */ + int skip_alpha; + + /** + * Number of samples to skip after a discontinuity + * - decoding: unused + * - encoding: set by libavcodec + */ + int seek_preroll; + +#if !FF_API_DEBUG_MV + /** + * debug motion vectors + * - encoding: Set by user. + * - decoding: Set by user. + */ + int debug_mv; +#define FF_DEBUG_VIS_MV_P_FOR 0x00000001 //visualize forward predicted MVs of P frames +#define FF_DEBUG_VIS_MV_B_FOR 0x00000002 //visualize forward predicted MVs of B frames +#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames +#endif + + /** + * custom intra quantization matrix + * - encoding: Set by user, can be NULL. + * - decoding: unused. + */ + uint16_t *chroma_intra_matrix; + + /** + * dump format separator. + * can be ", " or "\n " or anything else + * - encoding: Set by user. + * - decoding: Set by user. + */ + uint8_t *dump_separator; + + /** + * ',' separated list of allowed decoders. + * If NULL then all are allowed + * - encoding: unused + * - decoding: set by user + */ + char *codec_whitelist; + + /** + * Properties of the stream that gets decoded + * - encoding: unused + * - decoding: set by libavcodec + */ + unsigned properties; +#define FF_CODEC_PROPERTY_LOSSLESS 0x00000001 +#define FF_CODEC_PROPERTY_CLOSED_CAPTIONS 0x00000002 + + /** + * Additional data associated with the entire coded stream. + * + * - decoding: unused + * - encoding: may be set by libavcodec after avcodec_open2(). + */ + AVPacketSideData *coded_side_data; + int nb_coded_side_data; + + /** + * A reference to the AVHWFramesContext describing the input (for encoding) + * or output (decoding) frames. The reference is set by the caller and + * afterwards owned (and freed) by libavcodec - it should never be read by + * the caller after being set. + * + * - decoding: This field should be set by the caller from the get_format() + * callback. The previous reference (if any) will always be + * unreffed by libavcodec before the get_format() call. + * + * If the default get_buffer2() is used with a hwaccel pixel + * format, then this AVHWFramesContext will be used for + * allocating the frame buffers. + * + * - encoding: For hardware encoders configured to use a hwaccel pixel + * format, this field should be set by the caller to a reference + * to the AVHWFramesContext describing input frames. + * AVHWFramesContext.format must be equal to + * AVCodecContext.pix_fmt. + * + * This field should be set before avcodec_open2() is called. + */ + AVBufferRef *hw_frames_ctx; + + /** + * Control the form of AVSubtitle.rects[N]->ass + * - decoding: set by user + * - encoding: unused + */ + int sub_text_format; +#define FF_SUB_TEXT_FMT_ASS 0 +#if FF_API_ASS_TIMING +#define FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS 1 +#endif + + /** + * Audio only. The amount of padding (in samples) appended by the encoder to + * the end of the audio. I.e. this number of decoded samples must be + * discarded by the caller from the end of the stream to get the original + * audio without any trailing padding. + * + * - decoding: unused + * - encoding: unused + */ + int trailing_padding; + + /** + * The number of pixels per image to maximally accept. + * + * - decoding: set by user + * - encoding: set by user + */ + int64_t max_pixels; + + /** + * A reference to the AVHWDeviceContext describing the device which will + * be used by a hardware encoder/decoder. The reference is set by the + * caller and afterwards owned (and freed) by libavcodec. + * + * This should be used if either the codec device does not require + * hardware frames or any that are used are to be allocated internally by + * libavcodec. If the user wishes to supply any of the frames used as + * encoder input or decoder output then hw_frames_ctx should be used + * instead. When hw_frames_ctx is set in get_format() for a decoder, this + * field will be ignored while decoding the associated stream segment, but + * may again be used on a following one after another get_format() call. + * + * For both encoders and decoders this field should be set before + * avcodec_open2() is called and must not be written to thereafter. + * + * Note that some decoders may require this field to be set initially in + * order to support hw_frames_ctx at all - in that case, all frames + * contexts used must be created on the same device. + */ + AVBufferRef *hw_device_ctx; + + /** + * Bit set of AV_HWACCEL_FLAG_* flags, which affect hardware accelerated + * decoding (if active). + * - encoding: unused + * - decoding: Set by user (either before avcodec_open2(), or in the + * AVCodecContext.get_format callback) + */ + int hwaccel_flags; + + /** + * Video decoding only. Certain video codecs support cropping, meaning that + * only a sub-rectangle of the decoded frame is intended for display. This + * option controls how cropping is handled by libavcodec. + * + * When set to 1 (the default), libavcodec will apply cropping internally. + * I.e. it will modify the output frame width/height fields and offset the + * data pointers (only by as much as possible while preserving alignment, or + * by the full amount if the AV_CODEC_FLAG_UNALIGNED flag is set) so that + * the frames output by the decoder refer only to the cropped area. The + * crop_* fields of the output frames will be zero. + * + * When set to 0, the width/height fields of the output frames will be set + * to the coded dimensions and the crop_* fields will describe the cropping + * rectangle. Applying the cropping is left to the caller. + * + * @warning When hardware acceleration with opaque output frames is used, + * libavcodec is unable to apply cropping from the top/left border. + * + * @note when this option is set to zero, the width/height fields of the + * AVCodecContext and output AVFrames have different meanings. The codec + * context fields store display dimensions (with the coded dimensions in + * coded_width/height), while the frame fields store the coded dimensions + * (with the display dimensions being determined by the crop_* fields). + */ + int apply_cropping; + + /* + * Video decoding only. Sets the number of extra hardware frames which + * the decoder will allocate for use by the caller. This must be set + * before avcodec_open2() is called. + * + * Some hardware decoders require all frames that they will use for + * output to be defined in advance before decoding starts. For such + * decoders, the hardware frame pool must therefore be of a fixed size. + * The extra frames set here are on top of any number that the decoder + * needs internally in order to operate normally (for example, frames + * used as reference pictures). + */ + int extra_hw_frames; +} AVCodecContext; + +#if FF_API_CODEC_GET_SET +/** + * Accessors for some AVCodecContext fields. These used to be provided for ABI + * compatibility, and do not need to be used anymore. + */ +attribute_deprecated +AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx); +attribute_deprecated +void av_codec_set_pkt_timebase (AVCodecContext *avctx, AVRational val); + +attribute_deprecated +const AVCodecDescriptor *av_codec_get_codec_descriptor(const AVCodecContext *avctx); +attribute_deprecated +void av_codec_set_codec_descriptor(AVCodecContext *avctx, const AVCodecDescriptor *desc); + +attribute_deprecated +unsigned av_codec_get_codec_properties(const AVCodecContext *avctx); + +#if FF_API_LOWRES +attribute_deprecated +int av_codec_get_lowres(const AVCodecContext *avctx); +attribute_deprecated +void av_codec_set_lowres(AVCodecContext *avctx, int val); +#endif + +attribute_deprecated +int av_codec_get_seek_preroll(const AVCodecContext *avctx); +attribute_deprecated +void av_codec_set_seek_preroll(AVCodecContext *avctx, int val); + +attribute_deprecated +uint16_t *av_codec_get_chroma_intra_matrix(const AVCodecContext *avctx); +attribute_deprecated +void av_codec_set_chroma_intra_matrix(AVCodecContext *avctx, uint16_t *val); +#endif + +/** + * AVProfile. + */ +typedef struct AVProfile { + int profile; + const char *name; ///< short name for the profile +} AVProfile; + +enum { + /** + * The codec supports this format via the hw_device_ctx interface. + * + * When selecting this format, AVCodecContext.hw_device_ctx should + * have been set to a device of the specified type before calling + * avcodec_open2(). + */ + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX = 0x01, + /** + * The codec supports this format via the hw_frames_ctx interface. + * + * When selecting this format for a decoder, + * AVCodecContext.hw_frames_ctx should be set to a suitable frames + * context inside the get_format() callback. The frames context + * must have been created on a device of the specified type. + */ + AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX = 0x02, + /** + * The codec supports this format by some internal method. + * + * This format can be selected without any additional configuration - + * no device or frames context is required. + */ + AV_CODEC_HW_CONFIG_METHOD_INTERNAL = 0x04, + /** + * The codec supports this format by some ad-hoc method. + * + * Additional settings and/or function calls are required. See the + * codec-specific documentation for details. (Methods requiring + * this sort of configuration are deprecated and others should be + * used in preference.) + */ + AV_CODEC_HW_CONFIG_METHOD_AD_HOC = 0x08, +}; + +typedef struct AVCodecHWConfig { + /** + * A hardware pixel format which the codec can use. + */ + enum AVPixelFormat pix_fmt; + /** + * Bit set of AV_CODEC_HW_CONFIG_METHOD_* flags, describing the possible + * setup methods which can be used with this configuration. + */ + int methods; + /** + * The device type associated with the configuration. + * + * Must be set for AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX and + * AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX, otherwise unused. + */ + enum AVHWDeviceType device_type; +} AVCodecHWConfig; + +typedef struct AVCodecDefault AVCodecDefault; + +struct AVSubtitle; + +/** + * AVCodec. + */ +typedef struct AVCodec { + /** + * Name of the codec implementation. + * The name is globally unique among encoders and among decoders (but an + * encoder and a decoder can share the same name). + * This is the primary way to find a codec from the user perspective. + */ + const char *name; + /** + * Descriptive name for the codec, meant to be more human readable than name. + * You should use the NULL_IF_CONFIG_SMALL() macro to define it. + */ + const char *long_name; + enum AVMediaType type; + enum AVCodecID id; + /** + * Codec capabilities. + * see AV_CODEC_CAP_* + */ + int capabilities; + const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0} + const enum AVPixelFormat *pix_fmts; ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1 + const int *supported_samplerates; ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0 + const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1 + const uint64_t *channel_layouts; ///< array of support channel layouts, or NULL if unknown. array is terminated by 0 + uint8_t max_lowres; ///< maximum value for lowres supported by the decoder + const AVClass *priv_class; ///< AVClass for the private context + const AVProfile *profiles; ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN} + + /** + * Group name of the codec implementation. + * This is a short symbolic name of the wrapper backing this codec. A + * wrapper uses some kind of external implementation for the codec, such + * as an external library, or a codec implementation provided by the OS or + * the hardware. + * If this field is NULL, this is a builtin, libavcodec native codec. + * If non-NULL, this will be the suffix in AVCodec.name in most cases + * (usually AVCodec.name will be of the form "<codec_name>_<wrapper_name>"). + */ + const char *wrapper_name; + + /***************************************************************** + * No fields below this line are part of the public API. They + * may not be used outside of libavcodec and can be changed and + * removed at will. + * New public fields should be added right above. + ***************************************************************** + */ + int priv_data_size; + struct AVCodec *next; + /** + * @name Frame-level threading support functions + * @{ + */ + /** + * If defined, called on thread contexts when they are created. + * If the codec allocates writable tables in init(), re-allocate them here. + * priv_data will be set to a copy of the original. + */ + int (*init_thread_copy)(AVCodecContext *); + /** + * Copy necessary context variables from a previous thread context to the current one. + * If not defined, the next thread will start automatically; otherwise, the codec + * must call ff_thread_finish_setup(). + * + * dst and src will (rarely) point to the same context, in which case memcpy should be skipped. + */ + int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src); + /** @} */ + + /** + * Private codec-specific defaults. + */ + const AVCodecDefault *defaults; + + /** + * Initialize codec static data, called from avcodec_register(). + * + * This is not intended for time consuming operations as it is + * run for every codec regardless of that codec being used. + */ + void (*init_static_data)(struct AVCodec *codec); + + int (*init)(AVCodecContext *); + int (*encode_sub)(AVCodecContext *, uint8_t *buf, int buf_size, + const struct AVSubtitle *sub); + /** + * Encode data to an AVPacket. + * + * @param avctx codec context + * @param avpkt output AVPacket (may contain a user-provided buffer) + * @param[in] frame AVFrame containing the raw data to be encoded + * @param[out] got_packet_ptr encoder sets to 0 or 1 to indicate that a + * non-empty packet was returned in avpkt. + * @return 0 on success, negative error code on failure + */ + int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, + int *got_packet_ptr); + int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt); + int (*close)(AVCodecContext *); + /** + * Encode API with decoupled packet/frame dataflow. The API is the + * same as the avcodec_ prefixed APIs (avcodec_send_frame() etc.), except + * that: + * - never called if the codec is closed or the wrong type, + * - if AV_CODEC_CAP_DELAY is not set, drain frames are never sent, + * - only one drain frame is ever passed down, + */ + int (*send_frame)(AVCodecContext *avctx, const AVFrame *frame); + int (*receive_packet)(AVCodecContext *avctx, AVPacket *avpkt); + + /** + * Decode API with decoupled packet/frame dataflow. This function is called + * to get one output frame. It should call ff_decode_get_packet() to obtain + * input data. + */ + int (*receive_frame)(AVCodecContext *avctx, AVFrame *frame); + /** + * Flush buffers. + * Will be called when seeking + */ + void (*flush)(AVCodecContext *); + /** + * Internal codec capabilities. + * See FF_CODEC_CAP_* in internal.h + */ + int caps_internal; + + /** + * Decoding only, a comma-separated list of bitstream filters to apply to + * packets before decoding. + */ + const char *bsfs; + + /** + * Array of pointers to hardware configurations supported by the codec, + * or NULL if no hardware supported. The array is terminated by a NULL + * pointer. + * + * The user can only access this field via avcodec_get_hw_config(). + */ + const struct AVCodecHWConfigInternal **hw_configs; +} AVCodec; + +#if FF_API_CODEC_GET_SET +attribute_deprecated +int av_codec_get_max_lowres(const AVCodec *codec); +#endif + +struct MpegEncContext; + +/** + * Retrieve supported hardware configurations for a codec. + * + * Values of index from zero to some maximum return the indexed configuration + * descriptor; all other values return NULL. If the codec does not support + * any hardware configurations then it will always return NULL. + */ +const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *codec, int index); + +/** + * @defgroup lavc_hwaccel AVHWAccel + * + * @note Nothing in this structure should be accessed by the user. At some + * point in future it will not be externally visible at all. + * + * @{ + */ +typedef struct AVHWAccel { + /** + * Name of the hardware accelerated codec. + * The name is globally unique among encoders and among decoders (but an + * encoder and a decoder can share the same name). + */ + const char *name; + + /** + * Type of codec implemented by the hardware accelerator. + * + * See AVMEDIA_TYPE_xxx + */ + enum AVMediaType type; + + /** + * Codec implemented by the hardware accelerator. + * + * See AV_CODEC_ID_xxx + */ + enum AVCodecID id; + + /** + * Supported pixel format. + * + * Only hardware accelerated formats are supported here. + */ + enum AVPixelFormat pix_fmt; + + /** + * Hardware accelerated codec capabilities. + * see AV_HWACCEL_CODEC_CAP_* + */ + int capabilities; + + /***************************************************************** + * No fields below this line are part of the public API. They + * may not be used outside of libavcodec and can be changed and + * removed at will. + * New public fields should be added right above. + ***************************************************************** + */ + + /** + * Allocate a custom buffer + */ + int (*alloc_frame)(AVCodecContext *avctx, AVFrame *frame); + + /** + * Called at the beginning of each frame or field picture. + * + * Meaningful frame information (codec specific) is guaranteed to + * be parsed at this point. This function is mandatory. + * + * Note that buf can be NULL along with buf_size set to 0. + * Otherwise, this means the whole frame is available at this point. + * + * @param avctx the codec context + * @param buf the frame data buffer base + * @param buf_size the size of the frame in bytes + * @return zero if successful, a negative value otherwise + */ + int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); + + /** + * Callback for parameter data (SPS/PPS/VPS etc). + * + * Useful for hardware decoders which keep persistent state about the + * video parameters, and need to receive any changes to update that state. + * + * @param avctx the codec context + * @param type the nal unit type + * @param buf the nal unit data buffer + * @param buf_size the size of the nal unit in bytes + * @return zero if successful, a negative value otherwise + */ + int (*decode_params)(AVCodecContext *avctx, int type, const uint8_t *buf, uint32_t buf_size); + + /** + * Callback for each slice. + * + * Meaningful slice information (codec specific) is guaranteed to + * be parsed at this point. This function is mandatory. + * The only exception is XvMC, that works on MB level. + * + * @param avctx the codec context + * @param buf the slice data buffer base + * @param buf_size the size of the slice in bytes + * @return zero if successful, a negative value otherwise + */ + int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); + + /** + * Called at the end of each frame or field picture. + * + * The whole picture is parsed at this point and can now be sent + * to the hardware accelerator. This function is mandatory. + * + * @param avctx the codec context + * @return zero if successful, a negative value otherwise + */ + int (*end_frame)(AVCodecContext *avctx); + + /** + * Size of per-frame hardware accelerator private data. + * + * Private data is allocated with av_mallocz() before + * AVCodecContext.get_buffer() and deallocated after + * AVCodecContext.release_buffer(). + */ + int frame_priv_data_size; + + /** + * Called for every Macroblock in a slice. + * + * XvMC uses it to replace the ff_mpv_reconstruct_mb(). + * Instead of decoding to raw picture, MB parameters are + * stored in an array provided by the video driver. + * + * @param s the mpeg context + */ + void (*decode_mb)(struct MpegEncContext *s); + + /** + * Initialize the hwaccel private data. + * + * This will be called from ff_get_format(), after hwaccel and + * hwaccel_context are set and the hwaccel private data in AVCodecInternal + * is allocated. + */ + int (*init)(AVCodecContext *avctx); + + /** + * Uninitialize the hwaccel private data. + * + * This will be called from get_format() or avcodec_close(), after hwaccel + * and hwaccel_context are already uninitialized. + */ + int (*uninit)(AVCodecContext *avctx); + + /** + * Size of the private data to allocate in + * AVCodecInternal.hwaccel_priv_data. + */ + int priv_data_size; + + /** + * Internal hwaccel capabilities. + */ + int caps_internal; + + /** + * Fill the given hw_frames context with current codec parameters. Called + * from get_format. Refer to avcodec_get_hw_frames_parameters() for + * details. + * + * This CAN be called before AVHWAccel.init is called, and you must assume + * that avctx->hwaccel_priv_data is invalid. + */ + int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); +} AVHWAccel; + +/** + * HWAccel is experimental and is thus avoided in favor of non experimental + * codecs + */ +#define AV_HWACCEL_CODEC_CAP_EXPERIMENTAL 0x0200 + +/** + * Hardware acceleration should be used for decoding even if the codec level + * used is unknown or higher than the maximum supported level reported by the + * hardware driver. + * + * It's generally a good idea to pass this flag unless you have a specific + * reason not to, as hardware tends to under-report supported levels. + */ +#define AV_HWACCEL_FLAG_IGNORE_LEVEL (1 << 0) + +/** + * Hardware acceleration can output YUV pixel formats with a different chroma + * sampling than 4:2:0 and/or other than 8 bits per component. + */ +#define AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH (1 << 1) + +/** + * Hardware acceleration should still be attempted for decoding when the + * codec profile does not match the reported capabilities of the hardware. + * + * For example, this can be used to try to decode baseline profile H.264 + * streams in hardware - it will often succeed, because many streams marked + * as baseline profile actually conform to constrained baseline profile. + * + * @warning If the stream is actually not supported then the behaviour is + * undefined, and may include returning entirely incorrect output + * while indicating success. + */ +#define AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH (1 << 2) + +/** + * @} + */ + +#if FF_API_AVPICTURE +/** + * @defgroup lavc_picture AVPicture + * + * Functions for working with AVPicture + * @{ + */ + +/** + * Picture data structure. + * + * Up to four components can be stored into it, the last component is + * alpha. + * @deprecated use AVFrame or imgutils functions instead + */ +typedef struct AVPicture { + attribute_deprecated + uint8_t *data[AV_NUM_DATA_POINTERS]; ///< pointers to the image data planes + attribute_deprecated + int linesize[AV_NUM_DATA_POINTERS]; ///< number of bytes per line +} AVPicture; + +/** + * @} + */ +#endif + +enum AVSubtitleType { + SUBTITLE_NONE, + + SUBTITLE_BITMAP, ///< A bitmap, pict will be set + + /** + * Plain text, the text field must be set by the decoder and is + * authoritative. ass and pict fields may contain approximations. + */ + SUBTITLE_TEXT, + + /** + * Formatted text, the ass field must be set by the decoder and is + * authoritative. pict and text fields may contain approximations. + */ + SUBTITLE_ASS, +}; + +#define AV_SUBTITLE_FLAG_FORCED 0x00000001 + +typedef struct AVSubtitleRect { + int x; ///< top left corner of pict, undefined when pict is not set + int y; ///< top left corner of pict, undefined when pict is not set + int w; ///< width of pict, undefined when pict is not set + int h; ///< height of pict, undefined when pict is not set + int nb_colors; ///< number of colors in pict, undefined when pict is not set + +#if FF_API_AVPICTURE + /** + * @deprecated unused + */ + attribute_deprecated + AVPicture pict; +#endif + /** + * data+linesize for the bitmap of this subtitle. + * Can be set for text/ass as well once they are rendered. + */ + uint8_t *data[4]; + int linesize[4]; + + enum AVSubtitleType type; + + char *text; ///< 0 terminated plain UTF-8 text + + /** + * 0 terminated ASS/SSA compatible event line. + * The presentation of this is unaffected by the other values in this + * struct. + */ + char *ass; + + int flags; +} AVSubtitleRect; + +typedef struct AVSubtitle { + uint16_t format; /* 0 = graphics */ + uint32_t start_display_time; /* relative to packet pts, in ms */ + uint32_t end_display_time; /* relative to packet pts, in ms */ + unsigned num_rects; + AVSubtitleRect **rects; + int64_t pts; ///< Same as packet pts, in AV_TIME_BASE +} AVSubtitle; + +/** + * This struct describes the properties of an encoded stream. + * + * sizeof(AVCodecParameters) is not a part of the public ABI, this struct must + * be allocated with avcodec_parameters_alloc() and freed with + * avcodec_parameters_free(). + */ +typedef struct AVCodecParameters { + /** + * General type of the encoded data. + */ + enum AVMediaType codec_type; + /** + * Specific type of the encoded data (the codec used). + */ + enum AVCodecID codec_id; + /** + * Additional information about the codec (corresponds to the AVI FOURCC). + */ + uint32_t codec_tag; + + /** + * Extra binary data needed for initializing the decoder, codec-dependent. + * + * Must be allocated with av_malloc() and will be freed by + * avcodec_parameters_free(). The allocated size of extradata must be at + * least extradata_size + AV_INPUT_BUFFER_PADDING_SIZE, with the padding + * bytes zeroed. + */ + uint8_t *extradata; + /** + * Size of the extradata content in bytes. + */ + int extradata_size; + + /** + * - video: the pixel format, the value corresponds to enum AVPixelFormat. + * - audio: the sample format, the value corresponds to enum AVSampleFormat. + */ + int format; + + /** + * The average bitrate of the encoded data (in bits per second). + */ + int64_t bit_rate; + + /** + * The number of bits per sample in the codedwords. + * + * This is basically the bitrate per sample. It is mandatory for a bunch of + * formats to actually decode them. It's the number of bits for one sample in + * the actual coded bitstream. + * + * This could be for example 4 for ADPCM + * For PCM formats this matches bits_per_raw_sample + * Can be 0 + */ + int bits_per_coded_sample; + + /** + * This is the number of valid bits in each output sample. If the + * sample format has more bits, the least significant bits are additional + * padding bits, which are always 0. Use right shifts to reduce the sample + * to its actual size. For example, audio formats with 24 bit samples will + * have bits_per_raw_sample set to 24, and format set to AV_SAMPLE_FMT_S32. + * To get the original sample use "(int32_t)sample >> 8"." + * + * For ADPCM this might be 12 or 16 or similar + * Can be 0 + */ + int bits_per_raw_sample; + + /** + * Codec-specific bitstream restrictions that the stream conforms to. + */ + int profile; + int level; + + /** + * Video only. The dimensions of the video frame in pixels. + */ + int width; + int height; + + /** + * Video only. The aspect ratio (width / height) which a single pixel + * should have when displayed. + * + * When the aspect ratio is unknown / undefined, the numerator should be + * set to 0 (the denominator may have any value). + */ + AVRational sample_aspect_ratio; + + /** + * Video only. The order of the fields in interlaced video. + */ + enum AVFieldOrder field_order; + + /** + * Video only. Additional colorspace characteristics. + */ + enum AVColorRange color_range; + enum AVColorPrimaries color_primaries; + enum AVColorTransferCharacteristic color_trc; + enum AVColorSpace color_space; + enum AVChromaLocation chroma_location; + + /** + * Video only. Number of delayed frames. + */ + int video_delay; + + /** + * Audio only. The channel layout bitmask. May be 0 if the channel layout is + * unknown or unspecified, otherwise the number of bits set must be equal to + * the channels field. + */ + uint64_t channel_layout; + /** + * Audio only. The number of audio channels. + */ + int channels; + /** + * Audio only. The number of audio samples per second. + */ + int sample_rate; + /** + * Audio only. The number of bytes per coded audio frame, required by some + * formats. + * + * Corresponds to nBlockAlign in WAVEFORMATEX. + */ + int block_align; + /** + * Audio only. Audio frame size, if known. Required by some formats to be static. + */ + int frame_size; + + /** + * Audio only. The amount of padding (in samples) inserted by the encoder at + * the beginning of the audio. I.e. this number of leading decoded samples + * must be discarded by the caller to get the original audio without leading + * padding. + */ + int initial_padding; + /** + * Audio only. The amount of padding (in samples) appended by the encoder to + * the end of the audio. I.e. this number of decoded samples must be + * discarded by the caller from the end of the stream to get the original + * audio without any trailing padding. + */ + int trailing_padding; + /** + * Audio only. Number of samples to skip after a discontinuity. + */ + int seek_preroll; +} AVCodecParameters; + +/** + * Iterate over all registered codecs. + * + * @param opaque a pointer where libavcodec will store the iteration state. Must + * point to NULL to start the iteration. + * + * @return the next registered codec or NULL when the iteration is + * finished + */ +const AVCodec *av_codec_iterate(void **opaque); + +#if FF_API_NEXT +/** + * If c is NULL, returns the first registered codec, + * if c is non-NULL, returns the next registered codec after c, + * or NULL if c is the last one. + */ +attribute_deprecated +AVCodec *av_codec_next(const AVCodec *c); +#endif + +/** + * Return the LIBAVCODEC_VERSION_INT constant. + */ +unsigned avcodec_version(void); + +/** + * Return the libavcodec build-time configuration. + */ +const char *avcodec_configuration(void); + +/** + * Return the libavcodec license. + */ +const char *avcodec_license(void); + +#if FF_API_NEXT +/** + * Register the codec codec and initialize libavcodec. + * + * @warning either this function or avcodec_register_all() must be called + * before any other libavcodec functions. + * + * @see avcodec_register_all() + */ +attribute_deprecated +void avcodec_register(AVCodec *codec); + +/** + * Register all the codecs, parsers and bitstream filters which were enabled at + * configuration time. If you do not call this function you can select exactly + * which formats you want to support, by using the individual registration + * functions. + * + * @see avcodec_register + * @see av_register_codec_parser + * @see av_register_bitstream_filter + */ +attribute_deprecated +void avcodec_register_all(void); +#endif + +/** + * Allocate an AVCodecContext and set its fields to default values. The + * resulting struct should be freed with avcodec_free_context(). + * + * @param codec if non-NULL, allocate private data and initialize defaults + * for the given codec. It is illegal to then call avcodec_open2() + * with a different codec. + * If NULL, then the codec-specific defaults won't be initialized, + * which may result in suboptimal default settings (this is + * important mainly for encoders, e.g. libx264). + * + * @return An AVCodecContext filled with default values or NULL on failure. + */ +AVCodecContext *avcodec_alloc_context3(const AVCodec *codec); + +/** + * Free the codec context and everything associated with it and write NULL to + * the provided pointer. + */ +void avcodec_free_context(AVCodecContext **avctx); + +#if FF_API_GET_CONTEXT_DEFAULTS +/** + * @deprecated This function should not be used, as closing and opening a codec + * context multiple time is not supported. A new codec context should be + * allocated for each new use. + */ +int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec); +#endif + +/** + * Get the AVClass for AVCodecContext. It can be used in combination with + * AV_OPT_SEARCH_FAKE_OBJ for examining options. + * + * @see av_opt_find(). + */ +const AVClass *avcodec_get_class(void); + +#if FF_API_COPY_CONTEXT +/** + * Get the AVClass for AVFrame. It can be used in combination with + * AV_OPT_SEARCH_FAKE_OBJ for examining options. + * + * @see av_opt_find(). + */ +const AVClass *avcodec_get_frame_class(void); + +/** + * Get the AVClass for AVSubtitleRect. It can be used in combination with + * AV_OPT_SEARCH_FAKE_OBJ for examining options. + * + * @see av_opt_find(). + */ +const AVClass *avcodec_get_subtitle_rect_class(void); + +/** + * Copy the settings of the source AVCodecContext into the destination + * AVCodecContext. The resulting destination codec context will be + * unopened, i.e. you are required to call avcodec_open2() before you + * can use this AVCodecContext to decode/encode video/audio data. + * + * @param dest target codec context, should be initialized with + * avcodec_alloc_context3(NULL), but otherwise uninitialized + * @param src source codec context + * @return AVERROR() on error (e.g. memory allocation error), 0 on success + * + * @deprecated The semantics of this function are ill-defined and it should not + * be used. If you need to transfer the stream parameters from one codec context + * to another, use an intermediate AVCodecParameters instance and the + * avcodec_parameters_from_context() / avcodec_parameters_to_context() + * functions. + */ +attribute_deprecated +int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src); +#endif + +/** + * Allocate a new AVCodecParameters and set its fields to default values + * (unknown/invalid/0). The returned struct must be freed with + * avcodec_parameters_free(). + */ +AVCodecParameters *avcodec_parameters_alloc(void); + +/** + * Free an AVCodecParameters instance and everything associated with it and + * write NULL to the supplied pointer. + */ +void avcodec_parameters_free(AVCodecParameters **par); + +/** + * Copy the contents of src to dst. Any allocated fields in dst are freed and + * replaced with newly allocated duplicates of the corresponding fields in src. + * + * @return >= 0 on success, a negative AVERROR code on failure. + */ +int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src); + +/** + * Fill the parameters struct based on the values from the supplied codec + * context. Any allocated fields in par are freed and replaced with duplicates + * of the corresponding fields in codec. + * + * @return >= 0 on success, a negative AVERROR code on failure + */ +int avcodec_parameters_from_context(AVCodecParameters *par, + const AVCodecContext *codec); + +/** + * Fill the codec context based on the values from the supplied codec + * parameters. Any allocated fields in codec that have a corresponding field in + * par are freed and replaced with duplicates of the corresponding field in par. + * Fields in codec that do not have a counterpart in par are not touched. + * + * @return >= 0 on success, a negative AVERROR code on failure. + */ +int avcodec_parameters_to_context(AVCodecContext *codec, + const AVCodecParameters *par); + +/** + * Initialize the AVCodecContext to use the given AVCodec. Prior to using this + * function the context has to be allocated with avcodec_alloc_context3(). + * + * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(), + * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for + * retrieving a codec. + * + * @warning This function is not thread safe! + * + * @note Always call this function before using decoding routines (such as + * @ref avcodec_receive_frame()). + * + * @code + * avcodec_register_all(); + * av_dict_set(&opts, "b", "2.5M", 0); + * codec = avcodec_find_decoder(AV_CODEC_ID_H264); + * if (!codec) + * exit(1); + * + * context = avcodec_alloc_context3(codec); + * + * if (avcodec_open2(context, codec, opts) < 0) + * exit(1); + * @endcode + * + * @param avctx The context to initialize. + * @param codec The codec to open this context for. If a non-NULL codec has been + * previously passed to avcodec_alloc_context3() or + * for this context, then this parameter MUST be either NULL or + * equal to the previously passed codec. + * @param options A dictionary filled with AVCodecContext and codec-private options. + * On return this object will be filled with options that were not found. + * + * @return zero on success, a negative value on error + * @see avcodec_alloc_context3(), avcodec_find_decoder(), avcodec_find_encoder(), + * av_dict_set(), av_opt_find(). + */ +int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options); + +/** + * Close a given AVCodecContext and free all the data associated with it + * (but not the AVCodecContext itself). + * + * Calling this function on an AVCodecContext that hasn't been opened will free + * the codec-specific data allocated in avcodec_alloc_context3() with a non-NULL + * codec. Subsequent calls will do nothing. + * + * @note Do not use this function. Use avcodec_free_context() to destroy a + * codec context (either open or closed). Opening and closing a codec context + * multiple times is not supported anymore -- use multiple codec contexts + * instead. + */ +int avcodec_close(AVCodecContext *avctx); + +/** + * Free all allocated data in the given subtitle struct. + * + * @param sub AVSubtitle to free. + */ +void avsubtitle_free(AVSubtitle *sub); + +/** + * @} + */ + +/** + * @addtogroup lavc_packet + * @{ + */ + +/** + * Allocate an AVPacket and set its fields to default values. The resulting + * struct must be freed using av_packet_free(). + * + * @return An AVPacket filled with default values or NULL on failure. + * + * @note this only allocates the AVPacket itself, not the data buffers. Those + * must be allocated through other means such as av_new_packet. + * + * @see av_new_packet + */ +AVPacket *av_packet_alloc(void); + +/** + * Create a new packet that references the same data as src. + * + * This is a shortcut for av_packet_alloc()+av_packet_ref(). + * + * @return newly created AVPacket on success, NULL on error. + * + * @see av_packet_alloc + * @see av_packet_ref + */ +AVPacket *av_packet_clone(const AVPacket *src); + +/** + * Free the packet, if the packet is reference counted, it will be + * unreferenced first. + * + * @param pkt packet to be freed. The pointer will be set to NULL. + * @note passing NULL is a no-op. + */ +void av_packet_free(AVPacket **pkt); + +/** + * Initialize optional fields of a packet with default values. + * + * Note, this does not touch the data and size members, which have to be + * initialized separately. + * + * @param pkt packet + */ +void av_init_packet(AVPacket *pkt); + +/** + * Allocate the payload of a packet and initialize its fields with + * default values. + * + * @param pkt packet + * @param size wanted payload size + * @return 0 if OK, AVERROR_xxx otherwise + */ +int av_new_packet(AVPacket *pkt, int size); + +/** + * Reduce packet size, correctly zeroing padding + * + * @param pkt packet + * @param size new size + */ +void av_shrink_packet(AVPacket *pkt, int size); + +/** + * Increase packet size, correctly zeroing padding + * + * @param pkt packet + * @param grow_by number of bytes by which to increase the size of the packet + */ +int av_grow_packet(AVPacket *pkt, int grow_by); + +/** + * Initialize a reference-counted packet from av_malloc()ed data. + * + * @param pkt packet to be initialized. This function will set the data, size, + * buf and destruct fields, all others are left untouched. + * @param data Data allocated by av_malloc() to be used as packet data. If this + * function returns successfully, the data is owned by the underlying AVBuffer. + * The caller may not access the data through other means. + * @param size size of data in bytes, without the padding. I.e. the full buffer + * size is assumed to be size + AV_INPUT_BUFFER_PADDING_SIZE. + * + * @return 0 on success, a negative AVERROR on error + */ +int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size); + +#if FF_API_AVPACKET_OLD_API +/** + * @warning This is a hack - the packet memory allocation stuff is broken. The + * packet is allocated if it was not really allocated. + * + * @deprecated Use av_packet_ref or av_packet_make_refcounted + */ +attribute_deprecated +int av_dup_packet(AVPacket *pkt); +/** + * Copy packet, including contents + * + * @return 0 on success, negative AVERROR on fail + * + * @deprecated Use av_packet_ref + */ +attribute_deprecated +int av_copy_packet(AVPacket *dst, const AVPacket *src); + +/** + * Copy packet side data + * + * @return 0 on success, negative AVERROR on fail + * + * @deprecated Use av_packet_copy_props + */ +attribute_deprecated +int av_copy_packet_side_data(AVPacket *dst, const AVPacket *src); + +/** + * Free a packet. + * + * @deprecated Use av_packet_unref + * + * @param pkt packet to free + */ +attribute_deprecated +void av_free_packet(AVPacket *pkt); +#endif +/** + * Allocate new information of a packet. + * + * @param pkt packet + * @param type side information type + * @param size side information size + * @return pointer to fresh allocated data or NULL otherwise + */ +uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + int size); + +/** + * Wrap an existing array as a packet side data. + * + * @param pkt packet + * @param type side information type + * @param data the side data array. It must be allocated with the av_malloc() + * family of functions. The ownership of the data is transferred to + * pkt. + * @param size side information size + * @return a non-negative number on success, a negative AVERROR code on + * failure. On failure, the packet is unchanged and the data remains + * owned by the caller. + */ +int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + uint8_t *data, size_t size); + +/** + * Shrink the already allocated side data buffer + * + * @param pkt packet + * @param type side information type + * @param size new side information size + * @return 0 on success, < 0 on failure + */ +int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + int size); + +/** + * Get side information from packet. + * + * @param pkt packet + * @param type desired side information type + * @param size pointer for side information size to store (optional) + * @return pointer to data if present or NULL otherwise + */ +uint8_t* av_packet_get_side_data(const AVPacket *pkt, enum AVPacketSideDataType type, + int *size); + +#if FF_API_MERGE_SD_API +attribute_deprecated +int av_packet_merge_side_data(AVPacket *pkt); + +attribute_deprecated +int av_packet_split_side_data(AVPacket *pkt); +#endif + +const char *av_packet_side_data_name(enum AVPacketSideDataType type); + +/** + * Pack a dictionary for use in side_data. + * + * @param dict The dictionary to pack. + * @param size pointer to store the size of the returned data + * @return pointer to data if successful, NULL otherwise + */ +uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size); +/** + * Unpack a dictionary from side_data. + * + * @param data data from side_data + * @param size size of the data + * @param dict the metadata storage dictionary + * @return 0 on success, < 0 on failure + */ +int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict); + + +/** + * Convenience function to free all the side data stored. + * All the other fields stay untouched. + * + * @param pkt packet + */ +void av_packet_free_side_data(AVPacket *pkt); + +/** + * Setup a new reference to the data described by a given packet + * + * If src is reference-counted, setup dst as a new reference to the + * buffer in src. Otherwise allocate a new buffer in dst and copy the + * data from src into it. + * + * All the other fields are copied from src. + * + * @see av_packet_unref + * + * @param dst Destination packet + * @param src Source packet + * + * @return 0 on success, a negative AVERROR on error. + */ +int av_packet_ref(AVPacket *dst, const AVPacket *src); + +/** + * Wipe the packet. + * + * Unreference the buffer referenced by the packet and reset the + * remaining packet fields to their default values. + * + * @param pkt The packet to be unreferenced. + */ +void av_packet_unref(AVPacket *pkt); + +/** + * Move every field in src to dst and reset src. + * + * @see av_packet_unref + * + * @param src Source packet, will be reset + * @param dst Destination packet + */ +void av_packet_move_ref(AVPacket *dst, AVPacket *src); + +/** + * Copy only "properties" fields from src to dst. + * + * Properties for the purpose of this function are all the fields + * beside those related to the packet data (buf, data, size) + * + * @param dst Destination packet + * @param src Source packet + * + * @return 0 on success AVERROR on failure. + */ +int av_packet_copy_props(AVPacket *dst, const AVPacket *src); + +/** + * Ensure the data described by a given packet is reference counted. + * + * @note This function does not ensure that the reference will be writable. + * Use av_packet_make_writable instead for that purpose. + * + * @see av_packet_ref + * @see av_packet_make_writable + * + * @param pkt packet whose data should be made reference counted. + * + * @return 0 on success, a negative AVERROR on error. On failure, the + * packet is unchanged. + */ +int av_packet_make_refcounted(AVPacket *pkt); + +/** + * Create a writable reference for the data described by a given packet, + * avoiding data copy if possible. + * + * @param pkt Packet whose data should be made writable. + * + * @return 0 on success, a negative AVERROR on failure. On failure, the + * packet is unchanged. + */ +int av_packet_make_writable(AVPacket *pkt); + +/** + * Convert valid timing fields (timestamps / durations) in a packet from one + * timebase to another. Timestamps with unknown values (AV_NOPTS_VALUE) will be + * ignored. + * + * @param pkt packet on which the conversion will be performed + * @param tb_src source timebase, in which the timing fields in pkt are + * expressed + * @param tb_dst destination timebase, to which the timing fields will be + * converted + */ +void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational tb_dst); + +/** + * @} + */ + +/** + * @addtogroup lavc_decoding + * @{ + */ + +/** + * Find a registered decoder with a matching codec ID. + * + * @param id AVCodecID of the requested decoder + * @return A decoder if one was found, NULL otherwise. + */ +AVCodec *avcodec_find_decoder(enum AVCodecID id); + +/** + * Find a registered decoder with the specified name. + * + * @param name name of the requested decoder + * @return A decoder if one was found, NULL otherwise. + */ +AVCodec *avcodec_find_decoder_by_name(const char *name); + +/** + * The default callback for AVCodecContext.get_buffer2(). It is made public so + * it can be called by custom get_buffer2() implementations for decoders without + * AV_CODEC_CAP_DR1 set. + */ +int avcodec_default_get_buffer2(AVCodecContext *s, AVFrame *frame, int flags); + +/** + * Modify width and height values so that they will result in a memory + * buffer that is acceptable for the codec if you do not use any horizontal + * padding. + * + * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened. + */ +void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height); + +/** + * Modify width and height values so that they will result in a memory + * buffer that is acceptable for the codec if you also ensure that all + * line sizes are a multiple of the respective linesize_align[i]. + * + * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened. + */ +void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, + int linesize_align[AV_NUM_DATA_POINTERS]); + +/** + * Converts AVChromaLocation to swscale x/y chroma position. + * + * The positions represent the chroma (0,0) position in a coordinates system + * with luma (0,0) representing the origin and luma(1,1) representing 256,256 + * + * @param xpos horizontal chroma sample position + * @param ypos vertical chroma sample position + */ +int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos); + +/** + * Converts swscale x/y chroma position to AVChromaLocation. + * + * The positions represent the chroma (0,0) position in a coordinates system + * with luma (0,0) representing the origin and luma(1,1) representing 256,256 + * + * @param xpos horizontal chroma sample position + * @param ypos vertical chroma sample position + */ +enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos); + +/** + * Decode the audio frame of size avpkt->size from avpkt->data into frame. + * + * Some decoders may support multiple frames in a single AVPacket. Such + * decoders would then just decode the first frame and the return value would be + * less than the packet size. In this case, avcodec_decode_audio4 has to be + * called again with an AVPacket containing the remaining data in order to + * decode the second frame, etc... Even if no frames are returned, the packet + * needs to be fed to the decoder with remaining data until it is completely + * consumed or an error occurs. + * + * Some decoders (those marked with AV_CODEC_CAP_DELAY) have a delay between input + * and output. This means that for some packets they will not immediately + * produce decoded output and need to be flushed at the end of decoding to get + * all the decoded data. Flushing is done by calling this function with packets + * with avpkt->data set to NULL and avpkt->size set to 0 until it stops + * returning samples. It is safe to flush even those decoders that are not + * marked with AV_CODEC_CAP_DELAY, then no samples will be returned. + * + * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE + * larger than the actual read bytes because some optimized bitstream + * readers read 32 or 64 bits at once and could read over the end. + * + * @note The AVCodecContext MUST have been opened with @ref avcodec_open2() + * before packets may be fed to the decoder. + * + * @param avctx the codec context + * @param[out] frame The AVFrame in which to store decoded audio samples. + * The decoder will allocate a buffer for the decoded frame by + * calling the AVCodecContext.get_buffer2() callback. + * When AVCodecContext.refcounted_frames is set to 1, the frame is + * reference counted and the returned reference belongs to the + * caller. The caller must release the frame using av_frame_unref() + * when the frame is no longer needed. The caller may safely write + * to the frame if av_frame_is_writable() returns 1. + * When AVCodecContext.refcounted_frames is set to 0, the returned + * reference belongs to the decoder and is valid only until the + * next call to this function or until closing or flushing the + * decoder. The caller may not write to it. + * @param[out] got_frame_ptr Zero if no frame could be decoded, otherwise it is + * non-zero. Note that this field being set to zero + * does not mean that an error has occurred. For + * decoders with AV_CODEC_CAP_DELAY set, no given decode + * call is guaranteed to produce a frame. + * @param[in] avpkt The input AVPacket containing the input buffer. + * At least avpkt->data and avpkt->size should be set. Some + * decoders might also require additional fields to be set. + * @return A negative error code is returned if an error occurred during + * decoding, otherwise the number of bytes consumed from the input + * AVPacket is returned. + * +* @deprecated Use avcodec_send_packet() and avcodec_receive_frame(). + */ +attribute_deprecated +int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame, + int *got_frame_ptr, const AVPacket *avpkt); + +/** + * Decode the video frame of size avpkt->size from avpkt->data into picture. + * Some decoders may support multiple frames in a single AVPacket, such + * decoders would then just decode the first frame. + * + * @warning The input buffer must be AV_INPUT_BUFFER_PADDING_SIZE larger than + * the actual read bytes because some optimized bitstream readers read 32 or 64 + * bits at once and could read over the end. + * + * @warning The end of the input buffer buf should be set to 0 to ensure that + * no overreading happens for damaged MPEG streams. + * + * @note Codecs which have the AV_CODEC_CAP_DELAY capability set have a delay + * between input and output, these need to be fed with avpkt->data=NULL, + * avpkt->size=0 at the end to return the remaining frames. + * + * @note The AVCodecContext MUST have been opened with @ref avcodec_open2() + * before packets may be fed to the decoder. + * + * @param avctx the codec context + * @param[out] picture The AVFrame in which the decoded video frame will be stored. + * Use av_frame_alloc() to get an AVFrame. The codec will + * allocate memory for the actual bitmap by calling the + * AVCodecContext.get_buffer2() callback. + * When AVCodecContext.refcounted_frames is set to 1, the frame is + * reference counted and the returned reference belongs to the + * caller. The caller must release the frame using av_frame_unref() + * when the frame is no longer needed. The caller may safely write + * to the frame if av_frame_is_writable() returns 1. + * When AVCodecContext.refcounted_frames is set to 0, the returned + * reference belongs to the decoder and is valid only until the + * next call to this function or until closing or flushing the + * decoder. The caller may not write to it. + * + * @param[in] avpkt The input AVPacket containing the input buffer. + * You can create such packet with av_init_packet() and by then setting + * data and size, some decoders might in addition need other fields like + * flags&AV_PKT_FLAG_KEY. All decoders are designed to use the least + * fields possible. + * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero. + * @return On error a negative value is returned, otherwise the number of bytes + * used or zero if no frame could be decompressed. + * + * @deprecated Use avcodec_send_packet() and avcodec_receive_frame(). + */ +attribute_deprecated +int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture, + int *got_picture_ptr, + const AVPacket *avpkt); + +/** + * Decode a subtitle message. + * Return a negative value on error, otherwise return the number of bytes used. + * If no subtitle could be decompressed, got_sub_ptr is zero. + * Otherwise, the subtitle is stored in *sub. + * Note that AV_CODEC_CAP_DR1 is not available for subtitle codecs. This is for + * simplicity, because the performance difference is expect to be negligible + * and reusing a get_buffer written for video codecs would probably perform badly + * due to a potentially very different allocation pattern. + * + * Some decoders (those marked with AV_CODEC_CAP_DELAY) have a delay between input + * and output. This means that for some packets they will not immediately + * produce decoded output and need to be flushed at the end of decoding to get + * all the decoded data. Flushing is done by calling this function with packets + * with avpkt->data set to NULL and avpkt->size set to 0 until it stops + * returning subtitles. It is safe to flush even those decoders that are not + * marked with AV_CODEC_CAP_DELAY, then no subtitles will be returned. + * + * @note The AVCodecContext MUST have been opened with @ref avcodec_open2() + * before packets may be fed to the decoder. + * + * @param avctx the codec context + * @param[out] sub The Preallocated AVSubtitle in which the decoded subtitle will be stored, + * must be freed with avsubtitle_free if *got_sub_ptr is set. + * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero. + * @param[in] avpkt The input AVPacket containing the input buffer. + */ +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, + AVPacket *avpkt); + +/** + * Supply raw packet data as input to a decoder. + * + * Internally, this call will copy relevant AVCodecContext fields, which can + * influence decoding per-packet, and apply them when the packet is actually + * decoded. (For example AVCodecContext.skip_frame, which might direct the + * decoder to drop the frame contained by the packet sent with this function.) + * + * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE + * larger than the actual read bytes because some optimized bitstream + * readers read 32 or 64 bits at once and could read over the end. + * + * @warning Do not mix this API with the legacy API (like avcodec_decode_video2()) + * on the same AVCodecContext. It will return unexpected results now + * or in future libavcodec versions. + * + * @note The AVCodecContext MUST have been opened with @ref avcodec_open2() + * before packets may be fed to the decoder. + * + * @param avctx codec context + * @param[in] avpkt The input AVPacket. Usually, this will be a single video + * frame, or several complete audio frames. + * Ownership of the packet remains with the caller, and the + * decoder will not write to the packet. The decoder may create + * a reference to the packet data (or copy it if the packet is + * not reference-counted). + * Unlike with older APIs, the packet is always fully consumed, + * and if it contains multiple frames (e.g. some audio codecs), + * will require you to call avcodec_receive_frame() multiple + * times afterwards before you can send a new packet. + * It can be NULL (or an AVPacket with data set to NULL and + * size set to 0); in this case, it is considered a flush + * packet, which signals the end of the stream. Sending the + * first flush packet will return success. Subsequent ones are + * unnecessary and will return AVERROR_EOF. If the decoder + * still has frames buffered, it will return them after sending + * a flush packet. + * + * @return 0 on success, otherwise negative error code: + * AVERROR(EAGAIN): input is not accepted in the current state - user + * must read output with avcodec_receive_frame() (once + * all output is read, the packet should be resent, and + * the call will not fail with EAGAIN). + * AVERROR_EOF: the decoder has been flushed, and no new packets can + * be sent to it (also returned if more than 1 flush + * packet is sent) + * AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush + * AVERROR(ENOMEM): failed to add packet to internal queue, or similar + * other errors: legitimate decoding errors + */ +int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt); + +/** + * Return decoded output data from a decoder. + * + * @param avctx codec context + * @param frame This will be set to a reference-counted video or audio + * frame (depending on the decoder type) allocated by the + * decoder. Note that the function will always call + * av_frame_unref(frame) before doing anything else. + * + * @return + * 0: success, a frame was returned + * AVERROR(EAGAIN): output is not available in this state - user must try + * to send new input + * AVERROR_EOF: the decoder has been fully flushed, and there will be + * no more output frames + * AVERROR(EINVAL): codec not opened, or it is an encoder + * other negative values: legitimate decoding errors + */ +int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame); + +/** + * Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet() + * to retrieve buffered output packets. + * + * @param avctx codec context + * @param[in] frame AVFrame containing the raw audio or video frame to be encoded. + * Ownership of the frame remains with the caller, and the + * encoder will not write to the frame. The encoder may create + * a reference to the frame data (or copy it if the frame is + * not reference-counted). + * It can be NULL, in which case it is considered a flush + * packet. This signals the end of the stream. If the encoder + * still has packets buffered, it will return them after this + * call. Once flushing mode has been entered, additional flush + * packets are ignored, and sending frames will return + * AVERROR_EOF. + * + * For audio: + * If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame + * can have any number of samples. + * If it is not set, frame->nb_samples must be equal to + * avctx->frame_size for all frames except the last. + * The final frame may be smaller than avctx->frame_size. + * @return 0 on success, otherwise negative error code: + * AVERROR(EAGAIN): input is not accepted in the current state - user + * must read output with avcodec_receive_packet() (once + * all output is read, the packet should be resent, and + * the call will not fail with EAGAIN). + * AVERROR_EOF: the encoder has been flushed, and no new frames can + * be sent to it + * AVERROR(EINVAL): codec not opened, refcounted_frames not set, it is a + * decoder, or requires flush + * AVERROR(ENOMEM): failed to add packet to internal queue, or similar + * other errors: legitimate decoding errors + */ +int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame); + +/** + * Read encoded data from the encoder. + * + * @param avctx codec context + * @param avpkt This will be set to a reference-counted packet allocated by the + * encoder. Note that the function will always call + * av_frame_unref(frame) before doing anything else. + * @return 0 on success, otherwise negative error code: + * AVERROR(EAGAIN): output is not available in the current state - user + * must try to send input + * AVERROR_EOF: the encoder has been fully flushed, and there will be + * no more output packets + * AVERROR(EINVAL): codec not opened, or it is an encoder + * other errors: legitimate decoding errors + */ +int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt); + +/** + * Create and return a AVHWFramesContext with values adequate for hardware + * decoding. This is meant to get called from the get_format callback, and is + * a helper for preparing a AVHWFramesContext for AVCodecContext.hw_frames_ctx. + * This API is for decoding with certain hardware acceleration modes/APIs only. + * + * The returned AVHWFramesContext is not initialized. The caller must do this + * with av_hwframe_ctx_init(). + * + * Calling this function is not a requirement, but makes it simpler to avoid + * codec or hardware API specific details when manually allocating frames. + * + * Alternatively to this, an API user can set AVCodecContext.hw_device_ctx, + * which sets up AVCodecContext.hw_frames_ctx fully automatically, and makes + * it unnecessary to call this function or having to care about + * AVHWFramesContext initialization at all. + * + * There are a number of requirements for calling this function: + * + * - It must be called from get_format with the same avctx parameter that was + * passed to get_format. Calling it outside of get_format is not allowed, and + * can trigger undefined behavior. + * - The function is not always supported (see description of return values). + * Even if this function returns successfully, hwaccel initialization could + * fail later. (The degree to which implementations check whether the stream + * is actually supported varies. Some do this check only after the user's + * get_format callback returns.) + * - The hw_pix_fmt must be one of the choices suggested by get_format. If the + * user decides to use a AVHWFramesContext prepared with this API function, + * the user must return the same hw_pix_fmt from get_format. + * - The device_ref passed to this function must support the given hw_pix_fmt. + * - After calling this API function, it is the user's responsibility to + * initialize the AVHWFramesContext (returned by the out_frames_ref parameter), + * and to set AVCodecContext.hw_frames_ctx to it. If done, this must be done + * before returning from get_format (this is implied by the normal + * AVCodecContext.hw_frames_ctx API rules). + * - The AVHWFramesContext parameters may change every time time get_format is + * called. Also, AVCodecContext.hw_frames_ctx is reset before get_format. So + * you are inherently required to go through this process again on every + * get_format call. + * - It is perfectly possible to call this function without actually using + * the resulting AVHWFramesContext. One use-case might be trying to reuse a + * previously initialized AVHWFramesContext, and calling this API function + * only to test whether the required frame parameters have changed. + * - Fields that use dynamically allocated values of any kind must not be set + * by the user unless setting them is explicitly allowed by the documentation. + * If the user sets AVHWFramesContext.free and AVHWFramesContext.user_opaque, + * the new free callback must call the potentially set previous free callback. + * This API call may set any dynamically allocated fields, including the free + * callback. + * + * The function will set at least the following fields on AVHWFramesContext + * (potentially more, depending on hwaccel API): + * + * - All fields set by av_hwframe_ctx_alloc(). + * - Set the format field to hw_pix_fmt. + * - Set the sw_format field to the most suited and most versatile format. (An + * implication is that this will prefer generic formats over opaque formats + * with arbitrary restrictions, if possible.) + * - Set the width/height fields to the coded frame size, rounded up to the + * API-specific minimum alignment. + * - Only _if_ the hwaccel requires a pre-allocated pool: set the initial_pool_size + * field to the number of maximum reference surfaces possible with the codec, + * plus 1 surface for the user to work (meaning the user can safely reference + * at most 1 decoded surface at a time), plus additional buffering introduced + * by frame threading. If the hwaccel does not require pre-allocation, the + * field is left to 0, and the decoder will allocate new surfaces on demand + * during decoding. + * - Possibly AVHWFramesContext.hwctx fields, depending on the underlying + * hardware API. + * + * Essentially, out_frames_ref returns the same as av_hwframe_ctx_alloc(), but + * with basic frame parameters set. + * + * The function is stateless, and does not change the AVCodecContext or the + * device_ref AVHWDeviceContext. + * + * @param avctx The context which is currently calling get_format, and which + * implicitly contains all state needed for filling the returned + * AVHWFramesContext properly. + * @param device_ref A reference to the AVHWDeviceContext describing the device + * which will be used by the hardware decoder. + * @param hw_pix_fmt The hwaccel format you are going to return from get_format. + * @param out_frames_ref On success, set to a reference to an _uninitialized_ + * AVHWFramesContext, created from the given device_ref. + * Fields will be set to values required for decoding. + * Not changed if an error is returned. + * @return zero on success, a negative value on error. The following error codes + * have special semantics: + * AVERROR(ENOENT): the decoder does not support this functionality. Setup + * is always manual, or it is a decoder which does not + * support setting AVCodecContext.hw_frames_ctx at all, + * or it is a software format. + * AVERROR(EINVAL): it is known that hardware decoding is not supported for + * this configuration, or the device_ref is not supported + * for the hwaccel referenced by hw_pix_fmt. + */ +int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, + AVBufferRef *device_ref, + enum AVPixelFormat hw_pix_fmt, + AVBufferRef **out_frames_ref); + + + +/** + * @defgroup lavc_parsing Frame parsing + * @{ + */ + +enum AVPictureStructure { + AV_PICTURE_STRUCTURE_UNKNOWN, //< unknown + AV_PICTURE_STRUCTURE_TOP_FIELD, //< coded as top field + AV_PICTURE_STRUCTURE_BOTTOM_FIELD, //< coded as bottom field + AV_PICTURE_STRUCTURE_FRAME, //< coded as frame +}; + +typedef struct AVCodecParserContext { + void *priv_data; + struct AVCodecParser *parser; + int64_t frame_offset; /* offset of the current frame */ + int64_t cur_offset; /* current offset + (incremented by each av_parser_parse()) */ + int64_t next_frame_offset; /* offset of the next frame */ + /* video info */ + int pict_type; /* XXX: Put it back in AVCodecContext. */ + /** + * This field is used for proper frame duration computation in lavf. + * It signals, how much longer the frame duration of the current frame + * is compared to normal frame duration. + * + * frame_duration = (1 + repeat_pict) * time_base + * + * It is used by codecs like H.264 to display telecined material. + */ + int repeat_pict; /* XXX: Put it back in AVCodecContext. */ + int64_t pts; /* pts of the current frame */ + int64_t dts; /* dts of the current frame */ + + /* private data */ + int64_t last_pts; + int64_t last_dts; + int fetch_timestamp; + +#define AV_PARSER_PTS_NB 4 + int cur_frame_start_index; + int64_t cur_frame_offset[AV_PARSER_PTS_NB]; + int64_t cur_frame_pts[AV_PARSER_PTS_NB]; + int64_t cur_frame_dts[AV_PARSER_PTS_NB]; + + int flags; +#define PARSER_FLAG_COMPLETE_FRAMES 0x0001 +#define PARSER_FLAG_ONCE 0x0002 +/// Set if the parser has a valid file offset +#define PARSER_FLAG_FETCHED_OFFSET 0x0004 +#define PARSER_FLAG_USE_CODEC_TS 0x1000 + + int64_t offset; ///< byte offset from starting packet start + int64_t cur_frame_end[AV_PARSER_PTS_NB]; + + /** + * Set by parser to 1 for key frames and 0 for non-key frames. + * It is initialized to -1, so if the parser doesn't set this flag, + * old-style fallback using AV_PICTURE_TYPE_I picture type as key frames + * will be used. + */ + int key_frame; + +#if FF_API_CONVERGENCE_DURATION + /** + * @deprecated unused + */ + attribute_deprecated + int64_t convergence_duration; +#endif + + // Timestamp generation support: + /** + * Synchronization point for start of timestamp generation. + * + * Set to >0 for sync point, 0 for no sync point and <0 for undefined + * (default). + * + * For example, this corresponds to presence of H.264 buffering period + * SEI message. + */ + int dts_sync_point; + + /** + * Offset of the current timestamp against last timestamp sync point in + * units of AVCodecContext.time_base. + * + * Set to INT_MIN when dts_sync_point unused. Otherwise, it must + * contain a valid timestamp offset. + * + * Note that the timestamp of sync point has usually a nonzero + * dts_ref_dts_delta, which refers to the previous sync point. Offset of + * the next frame after timestamp sync point will be usually 1. + * + * For example, this corresponds to H.264 cpb_removal_delay. + */ + int dts_ref_dts_delta; + + /** + * Presentation delay of current frame in units of AVCodecContext.time_base. + * + * Set to INT_MIN when dts_sync_point unused. Otherwise, it must + * contain valid non-negative timestamp delta (presentation time of a frame + * must not lie in the past). + * + * This delay represents the difference between decoding and presentation + * time of the frame. + * + * For example, this corresponds to H.264 dpb_output_delay. + */ + int pts_dts_delta; + + /** + * Position of the packet in file. + * + * Analogous to cur_frame_pts/dts + */ + int64_t cur_frame_pos[AV_PARSER_PTS_NB]; + + /** + * Byte position of currently parsed frame in stream. + */ + int64_t pos; + + /** + * Previous frame byte position. + */ + int64_t last_pos; + + /** + * Duration of the current frame. + * For audio, this is in units of 1 / AVCodecContext.sample_rate. + * For all other types, this is in units of AVCodecContext.time_base. + */ + int duration; + + enum AVFieldOrder field_order; + + /** + * Indicate whether a picture is coded as a frame, top field or bottom field. + * + * For example, H.264 field_pic_flag equal to 0 corresponds to + * AV_PICTURE_STRUCTURE_FRAME. An H.264 picture with field_pic_flag + * equal to 1 and bottom_field_flag equal to 0 corresponds to + * AV_PICTURE_STRUCTURE_TOP_FIELD. + */ + enum AVPictureStructure picture_structure; + + /** + * Picture number incremented in presentation or output order. + * This field may be reinitialized at the first picture of a new sequence. + * + * For example, this corresponds to H.264 PicOrderCnt. + */ + int output_picture_number; + + /** + * Dimensions of the decoded video intended for presentation. + */ + int width; + int height; + + /** + * Dimensions of the coded video. + */ + int coded_width; + int coded_height; + + /** + * The format of the coded data, corresponds to enum AVPixelFormat for video + * and for enum AVSampleFormat for audio. + * + * Note that a decoder can have considerable freedom in how exactly it + * decodes the data, so the format reported here might be different from the + * one returned by a decoder. + */ + int format; +} AVCodecParserContext; + +typedef struct AVCodecParser { + int codec_ids[5]; /* several codec IDs are permitted */ + int priv_data_size; + int (*parser_init)(AVCodecParserContext *s); + /* This callback never returns an error, a negative value means that + * the frame start was in a previous packet. */ + int (*parser_parse)(AVCodecParserContext *s, + AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size); + void (*parser_close)(AVCodecParserContext *s); + int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size); + struct AVCodecParser *next; +} AVCodecParser; + +/** + * Iterate over all registered codec parsers. + * + * @param opaque a pointer where libavcodec will store the iteration state. Must + * point to NULL to start the iteration. + * + * @return the next registered codec parser or NULL when the iteration is + * finished + */ +const AVCodecParser *av_parser_iterate(void **opaque); + +attribute_deprecated +AVCodecParser *av_parser_next(const AVCodecParser *c); + +attribute_deprecated +void av_register_codec_parser(AVCodecParser *parser); +AVCodecParserContext *av_parser_init(int codec_id); + +/** + * Parse a packet. + * + * @param s parser context. + * @param avctx codec context. + * @param poutbuf set to pointer to parsed buffer or NULL if not yet finished. + * @param poutbuf_size set to size of parsed buffer or zero if not yet finished. + * @param buf input buffer. + * @param buf_size buffer size in bytes without the padding. I.e. the full buffer + size is assumed to be buf_size + AV_INPUT_BUFFER_PADDING_SIZE. + To signal EOF, this should be 0 (so that the last frame + can be output). + * @param pts input presentation timestamp. + * @param dts input decoding timestamp. + * @param pos input byte position in stream. + * @return the number of bytes of the input bitstream used. + * + * Example: + * @code + * while(in_len){ + * len = av_parser_parse2(myparser, AVCodecContext, &data, &size, + * in_data, in_len, + * pts, dts, pos); + * in_data += len; + * in_len -= len; + * + * if(size) + * decode_frame(data, size); + * } + * @endcode + */ +int av_parser_parse2(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, + int64_t pts, int64_t dts, + int64_t pos); + +/** + * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed + * @deprecated use AVBitStreamFilter + */ +int av_parser_change(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe); +void av_parser_close(AVCodecParserContext *s); + +/** + * @} + * @} + */ + +/** + * @addtogroup lavc_encoding + * @{ + */ + +/** + * Find a registered encoder with a matching codec ID. + * + * @param id AVCodecID of the requested encoder + * @return An encoder if one was found, NULL otherwise. + */ +AVCodec *avcodec_find_encoder(enum AVCodecID id); + +/** + * Find a registered encoder with the specified name. + * + * @param name name of the requested encoder + * @return An encoder if one was found, NULL otherwise. + */ +AVCodec *avcodec_find_encoder_by_name(const char *name); + +/** + * Encode a frame of audio. + * + * Takes input samples from frame and writes the next output packet, if + * available, to avpkt. The output packet does not necessarily contain data for + * the most recent frame, as encoders can delay, split, and combine input frames + * internally as needed. + * + * @param avctx codec context + * @param avpkt output AVPacket. + * The user can supply an output buffer by setting + * avpkt->data and avpkt->size prior to calling the + * function, but if the size of the user-provided data is not + * large enough, encoding will fail. If avpkt->data and + * avpkt->size are set, avpkt->destruct must also be set. All + * other AVPacket fields will be reset by the encoder using + * av_init_packet(). If avpkt->data is NULL, the encoder will + * allocate it. The encoder will set avpkt->size to the size + * of the output packet. + * + * If this function fails or produces no output, avpkt will be + * freed using av_packet_unref(). + * @param[in] frame AVFrame containing the raw audio data to be encoded. + * May be NULL when flushing an encoder that has the + * AV_CODEC_CAP_DELAY capability set. + * If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame + * can have any number of samples. + * If it is not set, frame->nb_samples must be equal to + * avctx->frame_size for all frames except the last. + * The final frame may be smaller than avctx->frame_size. + * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the + * output packet is non-empty, and to 0 if it is + * empty. If the function returns an error, the + * packet can be assumed to be invalid, and the + * value of got_packet_ptr is undefined and should + * not be used. + * @return 0 on success, negative error code on failure + * + * @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead + */ +attribute_deprecated +int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet_ptr); + +/** + * Encode a frame of video. + * + * Takes input raw video data from frame and writes the next output packet, if + * available, to avpkt. The output packet does not necessarily contain data for + * the most recent frame, as encoders can delay and reorder input frames + * internally as needed. + * + * @param avctx codec context + * @param avpkt output AVPacket. + * The user can supply an output buffer by setting + * avpkt->data and avpkt->size prior to calling the + * function, but if the size of the user-provided data is not + * large enough, encoding will fail. All other AVPacket fields + * will be reset by the encoder using av_init_packet(). If + * avpkt->data is NULL, the encoder will allocate it. + * The encoder will set avpkt->size to the size of the + * output packet. The returned data (if any) belongs to the + * caller, he is responsible for freeing it. + * + * If this function fails or produces no output, avpkt will be + * freed using av_packet_unref(). + * @param[in] frame AVFrame containing the raw video data to be encoded. + * May be NULL when flushing an encoder that has the + * AV_CODEC_CAP_DELAY capability set. + * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the + * output packet is non-empty, and to 0 if it is + * empty. If the function returns an error, the + * packet can be assumed to be invalid, and the + * value of got_packet_ptr is undefined and should + * not be used. + * @return 0 on success, negative error code on failure + * + * @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead + */ +attribute_deprecated +int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet_ptr); + +int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size, + const AVSubtitle *sub); + + +/** + * @} + */ + +#if FF_API_AVPICTURE +/** + * @addtogroup lavc_picture + * @{ + */ + +/** + * @deprecated unused + */ +attribute_deprecated +int avpicture_alloc(AVPicture *picture, enum AVPixelFormat pix_fmt, int width, int height); + +/** + * @deprecated unused + */ +attribute_deprecated +void avpicture_free(AVPicture *picture); + +/** + * @deprecated use av_image_fill_arrays() instead. + */ +attribute_deprecated +int avpicture_fill(AVPicture *picture, const uint8_t *ptr, + enum AVPixelFormat pix_fmt, int width, int height); + +/** + * @deprecated use av_image_copy_to_buffer() instead. + */ +attribute_deprecated +int avpicture_layout(const AVPicture *src, enum AVPixelFormat pix_fmt, + int width, int height, + unsigned char *dest, int dest_size); + +/** + * @deprecated use av_image_get_buffer_size() instead. + */ +attribute_deprecated +int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height); + +/** + * @deprecated av_image_copy() instead. + */ +attribute_deprecated +void av_picture_copy(AVPicture *dst, const AVPicture *src, + enum AVPixelFormat pix_fmt, int width, int height); + +/** + * @deprecated unused + */ +attribute_deprecated +int av_picture_crop(AVPicture *dst, const AVPicture *src, + enum AVPixelFormat pix_fmt, int top_band, int left_band); + +/** + * @deprecated unused + */ +attribute_deprecated +int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, enum AVPixelFormat pix_fmt, + int padtop, int padbottom, int padleft, int padright, int *color); + +/** + * @} + */ +#endif + +/** + * @defgroup lavc_misc Utility functions + * @ingroup libavc + * + * Miscellaneous utility functions related to both encoding and decoding + * (or neither). + * @{ + */ + +/** + * @defgroup lavc_misc_pixfmt Pixel formats + * + * Functions for working with pixel formats. + * @{ + */ + +#if FF_API_GETCHROMA +/** + * @deprecated Use av_pix_fmt_get_chroma_sub_sample + */ + +attribute_deprecated +void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift); +#endif + +/** + * Return a value representing the fourCC code associated to the + * pixel format pix_fmt, or 0 if no associated fourCC code can be + * found. + */ +unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat pix_fmt); + +/** + * @deprecated see av_get_pix_fmt_loss() + */ +int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, enum AVPixelFormat src_pix_fmt, + int has_alpha); + +/** + * Find the best pixel format to convert to given a certain source pixel + * format. When converting from one pixel format to another, information loss + * may occur. For example, when converting from RGB24 to GRAY, the color + * information will be lost. Similarly, other losses occur when converting from + * some formats to other formats. avcodec_find_best_pix_fmt_of_2() searches which of + * the given pixel formats should be used to suffer the least amount of loss. + * The pixel formats from which it chooses one, are determined by the + * pix_fmt_list parameter. + * + * + * @param[in] pix_fmt_list AV_PIX_FMT_NONE terminated array of pixel formats to choose from + * @param[in] src_pix_fmt source pixel format + * @param[in] has_alpha Whether the source pixel format alpha channel is used. + * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur. + * @return The best pixel format to convert to or -1 if none was found. + */ +enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list, + enum AVPixelFormat src_pix_fmt, + int has_alpha, int *loss_ptr); + +/** + * @deprecated see av_find_best_pix_fmt_of_2() + */ +enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2, + enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr); + +attribute_deprecated +enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2, + enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr); + +enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt); + +/** + * @} + */ + +#if FF_API_TAG_STRING +/** + * Put a string representing the codec tag codec_tag in buf. + * + * @param buf buffer to place codec tag in + * @param buf_size size in bytes of buf + * @param codec_tag codec tag to assign + * @return the length of the string that would have been generated if + * enough space had been available, excluding the trailing null + * + * @deprecated see av_fourcc_make_string() and av_fourcc2str(). + */ +attribute_deprecated +size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_tag); +#endif + +void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode); + +/** + * Return a name for the specified profile, if available. + * + * @param codec the codec that is searched for the given profile + * @param profile the profile value for which a name is requested + * @return A name for the profile if found, NULL otherwise. + */ +const char *av_get_profile_name(const AVCodec *codec, int profile); + +/** + * Return a name for the specified profile, if available. + * + * @param codec_id the ID of the codec to which the requested profile belongs + * @param profile the profile value for which a name is requested + * @return A name for the profile if found, NULL otherwise. + * + * @note unlike av_get_profile_name(), which searches a list of profiles + * supported by a specific decoder or encoder implementation, this + * function searches the list of profiles from the AVCodecDescriptor + */ +const char *avcodec_profile_name(enum AVCodecID codec_id, int profile); + +int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size); +int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count); +//FIXME func typedef + +/** + * Fill AVFrame audio data and linesize pointers. + * + * The buffer buf must be a preallocated buffer with a size big enough + * to contain the specified samples amount. The filled AVFrame data + * pointers will point to this buffer. + * + * AVFrame extended_data channel pointers are allocated if necessary for + * planar audio. + * + * @param frame the AVFrame + * frame->nb_samples must be set prior to calling the + * function. This function fills in frame->data, + * frame->extended_data, frame->linesize[0]. + * @param nb_channels channel count + * @param sample_fmt sample format + * @param buf buffer to use for frame data + * @param buf_size size of buffer + * @param align plane size sample alignment (0 = default) + * @return >=0 on success, negative error code on failure + * @todo return the size in bytes required to store the samples in + * case of success, at the next libavutil bump + */ +int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, + enum AVSampleFormat sample_fmt, const uint8_t *buf, + int buf_size, int align); + +/** + * Reset the internal decoder state / flush internal buffers. Should be called + * e.g. when seeking or when switching to a different stream. + * + * @note when refcounted frames are not used (i.e. avctx->refcounted_frames is 0), + * this invalidates the frames previously returned from the decoder. When + * refcounted frames are used, the decoder just releases any references it might + * keep internally, but the caller's reference remains valid. + */ +void avcodec_flush_buffers(AVCodecContext *avctx); + +/** + * Return codec bits per sample. + * + * @param[in] codec_id the codec + * @return Number of bits per sample or zero if unknown for the given codec. + */ +int av_get_bits_per_sample(enum AVCodecID codec_id); + +/** + * Return the PCM codec associated with a sample format. + * @param be endianness, 0 for little, 1 for big, + * -1 (or anything else) for native + * @return AV_CODEC_ID_PCM_* or AV_CODEC_ID_NONE + */ +enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be); + +/** + * Return codec bits per sample. + * Only return non-zero if the bits per sample is exactly correct, not an + * approximation. + * + * @param[in] codec_id the codec + * @return Number of bits per sample or zero if unknown for the given codec. + */ +int av_get_exact_bits_per_sample(enum AVCodecID codec_id); + +/** + * Return audio frame duration. + * + * @param avctx codec context + * @param frame_bytes size of the frame, or 0 if unknown + * @return frame duration, in samples, if known. 0 if not able to + * determine. + */ +int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes); + +/** + * This function is the same as av_get_audio_frame_duration(), except it works + * with AVCodecParameters instead of an AVCodecContext. + */ +int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes); + +#if FF_API_OLD_BSF +typedef struct AVBitStreamFilterContext { + void *priv_data; + const struct AVBitStreamFilter *filter; + AVCodecParserContext *parser; + struct AVBitStreamFilterContext *next; + /** + * Internal default arguments, used if NULL is passed to av_bitstream_filter_filter(). + * Not for access by library users. + */ + char *args; +} AVBitStreamFilterContext; +#endif + +typedef struct AVBSFInternal AVBSFInternal; + +/** + * The bitstream filter state. + * + * This struct must be allocated with av_bsf_alloc() and freed with + * av_bsf_free(). + * + * The fields in the struct will only be changed (by the caller or by the + * filter) as described in their documentation, and are to be considered + * immutable otherwise. + */ +typedef struct AVBSFContext { + /** + * A class for logging and AVOptions + */ + const AVClass *av_class; + + /** + * The bitstream filter this context is an instance of. + */ + const struct AVBitStreamFilter *filter; + + /** + * Opaque libavcodec internal data. Must not be touched by the caller in any + * way. + */ + AVBSFInternal *internal; + + /** + * Opaque filter-specific private data. If filter->priv_class is non-NULL, + * this is an AVOptions-enabled struct. + */ + void *priv_data; + + /** + * Parameters of the input stream. This field is allocated in + * av_bsf_alloc(), it needs to be filled by the caller before + * av_bsf_init(). + */ + AVCodecParameters *par_in; + + /** + * Parameters of the output stream. This field is allocated in + * av_bsf_alloc(), it is set by the filter in av_bsf_init(). + */ + AVCodecParameters *par_out; + + /** + * The timebase used for the timestamps of the input packets. Set by the + * caller before av_bsf_init(). + */ + AVRational time_base_in; + + /** + * The timebase used for the timestamps of the output packets. Set by the + * filter in av_bsf_init(). + */ + AVRational time_base_out; +} AVBSFContext; + +typedef struct AVBitStreamFilter { + const char *name; + + /** + * A list of codec ids supported by the filter, terminated by + * AV_CODEC_ID_NONE. + * May be NULL, in that case the bitstream filter works with any codec id. + */ + const enum AVCodecID *codec_ids; + + /** + * A class for the private data, used to declare bitstream filter private + * AVOptions. This field is NULL for bitstream filters that do not declare + * any options. + * + * If this field is non-NULL, the first member of the filter private data + * must be a pointer to AVClass, which will be set by libavcodec generic + * code to this class. + */ + const AVClass *priv_class; + + /***************************************************************** + * No fields below this line are part of the public API. They + * may not be used outside of libavcodec and can be changed and + * removed at will. + * New public fields should be added right above. + ***************************************************************** + */ + + int priv_data_size; + int (*init)(AVBSFContext *ctx); + int (*filter)(AVBSFContext *ctx, AVPacket *pkt); + void (*close)(AVBSFContext *ctx); +} AVBitStreamFilter; + +#if FF_API_OLD_BSF +/** + * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext) + * is deprecated. Use the new bitstream filtering API (using AVBSFContext). + */ +attribute_deprecated +void av_register_bitstream_filter(AVBitStreamFilter *bsf); +/** + * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext) + * is deprecated. Use av_bsf_get_by_name(), av_bsf_alloc(), and av_bsf_init() + * from the new bitstream filtering API (using AVBSFContext). + */ +attribute_deprecated +AVBitStreamFilterContext *av_bitstream_filter_init(const char *name); +/** + * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext) + * is deprecated. Use av_bsf_send_packet() and av_bsf_receive_packet() from the + * new bitstream filtering API (using AVBSFContext). + */ +attribute_deprecated +int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc, + AVCodecContext *avctx, const char *args, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe); +/** + * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext) + * is deprecated. Use av_bsf_free() from the new bitstream filtering API (using + * AVBSFContext). + */ +attribute_deprecated +void av_bitstream_filter_close(AVBitStreamFilterContext *bsf); +/** + * @deprecated the old bitstream filtering API (using AVBitStreamFilterContext) + * is deprecated. Use av_bsf_iterate() from the new bitstream filtering API (using + * AVBSFContext). + */ +attribute_deprecated +const AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f); +#endif + +/** + * @return a bitstream filter with the specified name or NULL if no such + * bitstream filter exists. + */ +const AVBitStreamFilter *av_bsf_get_by_name(const char *name); + +/** + * Iterate over all registered bitstream filters. + * + * @param opaque a pointer where libavcodec will store the iteration state. Must + * point to NULL to start the iteration. + * + * @return the next registered bitstream filter or NULL when the iteration is + * finished + */ +const AVBitStreamFilter *av_bsf_iterate(void **opaque); +#if FF_API_NEXT +attribute_deprecated +const AVBitStreamFilter *av_bsf_next(void **opaque); +#endif + +/** + * Allocate a context for a given bitstream filter. The caller must fill in the + * context parameters as described in the documentation and then call + * av_bsf_init() before sending any data to the filter. + * + * @param filter the filter for which to allocate an instance. + * @param ctx a pointer into which the pointer to the newly-allocated context + * will be written. It must be freed with av_bsf_free() after the + * filtering is done. + * + * @return 0 on success, a negative AVERROR code on failure + */ +int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **ctx); + +/** + * Prepare the filter for use, after all the parameters and options have been + * set. + */ +int av_bsf_init(AVBSFContext *ctx); + +/** + * Submit a packet for filtering. + * + * After sending each packet, the filter must be completely drained by calling + * av_bsf_receive_packet() repeatedly until it returns AVERROR(EAGAIN) or + * AVERROR_EOF. + * + * @param pkt the packet to filter. The bitstream filter will take ownership of + * the packet and reset the contents of pkt. pkt is not touched if an error occurs. + * This parameter may be NULL, which signals the end of the stream (i.e. no more + * packets will be sent). That will cause the filter to output any packets it + * may have buffered internally. + * + * @return 0 on success, a negative AVERROR on error. + */ +int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt); + +/** + * Retrieve a filtered packet. + * + * @param[out] pkt this struct will be filled with the contents of the filtered + * packet. It is owned by the caller and must be freed using + * av_packet_unref() when it is no longer needed. + * This parameter should be "clean" (i.e. freshly allocated + * with av_packet_alloc() or unreffed with av_packet_unref()) + * when this function is called. If this function returns + * successfully, the contents of pkt will be completely + * overwritten by the returned data. On failure, pkt is not + * touched. + * + * @return 0 on success. AVERROR(EAGAIN) if more packets need to be sent to the + * filter (using av_bsf_send_packet()) to get more output. AVERROR_EOF if there + * will be no further output from the filter. Another negative AVERROR value if + * an error occurs. + * + * @note one input packet may result in several output packets, so after sending + * a packet with av_bsf_send_packet(), this function needs to be called + * repeatedly until it stops returning 0. It is also possible for a filter to + * output fewer packets than were sent to it, so this function may return + * AVERROR(EAGAIN) immediately after a successful av_bsf_send_packet() call. + */ +int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt); + +/** + * Free a bitstream filter context and everything associated with it; write NULL + * into the supplied pointer. + */ +void av_bsf_free(AVBSFContext **ctx); + +/** + * Get the AVClass for AVBSFContext. It can be used in combination with + * AV_OPT_SEARCH_FAKE_OBJ for examining options. + * + * @see av_opt_find(). + */ +const AVClass *av_bsf_get_class(void); + +/** + * Structure for chain/list of bitstream filters. + * Empty list can be allocated by av_bsf_list_alloc(). + */ +typedef struct AVBSFList AVBSFList; + +/** + * Allocate empty list of bitstream filters. + * The list must be later freed by av_bsf_list_free() + * or finalized by av_bsf_list_finalize(). + * + * @return Pointer to @ref AVBSFList on success, NULL in case of failure + */ +AVBSFList *av_bsf_list_alloc(void); + +/** + * Free list of bitstream filters. + * + * @param lst Pointer to pointer returned by av_bsf_list_alloc() + */ +void av_bsf_list_free(AVBSFList **lst); + +/** + * Append bitstream filter to the list of bitstream filters. + * + * @param lst List to append to + * @param bsf Filter context to be appended + * + * @return >=0 on success, negative AVERROR in case of failure + */ +int av_bsf_list_append(AVBSFList *lst, AVBSFContext *bsf); + +/** + * Construct new bitstream filter context given it's name and options + * and append it to the list of bitstream filters. + * + * @param lst List to append to + * @param bsf_name Name of the bitstream filter + * @param options Options for the bitstream filter, can be set to NULL + * + * @return >=0 on success, negative AVERROR in case of failure + */ +int av_bsf_list_append2(AVBSFList *lst, const char * bsf_name, AVDictionary **options); +/** + * Finalize list of bitstream filters. + * + * This function will transform @ref AVBSFList to single @ref AVBSFContext, + * so the whole chain of bitstream filters can be treated as single filter + * freshly allocated by av_bsf_alloc(). + * If the call is successful, @ref AVBSFList structure is freed and lst + * will be set to NULL. In case of failure, caller is responsible for + * freeing the structure by av_bsf_list_free() + * + * @param lst Filter list structure to be transformed + * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure + * representing the chain of bitstream filters + * + * @return >=0 on success, negative AVERROR in case of failure + */ +int av_bsf_list_finalize(AVBSFList **lst, AVBSFContext **bsf); + +/** + * Parse string describing list of bitstream filters and create single + * @ref AVBSFContext describing the whole chain of bitstream filters. + * Resulting @ref AVBSFContext can be treated as any other @ref AVBSFContext freshly + * allocated by av_bsf_alloc(). + * + * @param str String describing chain of bitstream filters in format + * `bsf1[=opt1=val1:opt2=val2][,bsf2]` + * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure + * representing the chain of bitstream filters + * + * @return >=0 on success, negative AVERROR in case of failure + */ +int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf); + +/** + * Get null/pass-through bitstream filter. + * + * @param[out] bsf Pointer to be set to new instance of pass-through bitstream filter + * + * @return + */ +int av_bsf_get_null_filter(AVBSFContext **bsf); + +/* memory */ + +/** + * Same behaviour av_fast_malloc but the buffer has additional + * AV_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0. + * + * In addition the whole buffer will initially and after resizes + * be 0-initialized so that no uninitialized data will ever appear. + */ +void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size); + +/** + * Same behaviour av_fast_padded_malloc except that buffer will always + * be 0-initialized after call. + */ +void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size); + +/** + * Encode extradata length to a buffer. Used by xiph codecs. + * + * @param s buffer to write to; must be at least (v/255+1) bytes long + * @param v size of extradata in bytes + * @return number of bytes written to the buffer. + */ +unsigned int av_xiphlacing(unsigned char *s, unsigned int v); + +#if FF_API_USER_VISIBLE_AVHWACCEL +/** + * Register the hardware accelerator hwaccel. + * + * @deprecated This function doesn't do anything. + */ +attribute_deprecated +void av_register_hwaccel(AVHWAccel *hwaccel); + +/** + * If hwaccel is NULL, returns the first registered hardware accelerator, + * if hwaccel is non-NULL, returns the next registered hardware accelerator + * after hwaccel, or NULL if hwaccel is the last one. + * + * @deprecated AVHWaccel structures contain no user-serviceable parts, so + * this function should not be used. + */ +attribute_deprecated +AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel); +#endif + +#if FF_API_LOCKMGR +/** + * Lock operation used by lockmgr + * + * @deprecated Deprecated together with av_lockmgr_register(). + */ +enum AVLockOp { + AV_LOCK_CREATE, ///< Create a mutex + AV_LOCK_OBTAIN, ///< Lock the mutex + AV_LOCK_RELEASE, ///< Unlock the mutex + AV_LOCK_DESTROY, ///< Free mutex resources +}; + +/** + * Register a user provided lock manager supporting the operations + * specified by AVLockOp. The "mutex" argument to the function points + * to a (void *) where the lockmgr should store/get a pointer to a user + * allocated mutex. It is NULL upon AV_LOCK_CREATE and equal to the + * value left by the last call for all other ops. If the lock manager is + * unable to perform the op then it should leave the mutex in the same + * state as when it was called and return a non-zero value. However, + * when called with AV_LOCK_DESTROY the mutex will always be assumed to + * have been successfully destroyed. If av_lockmgr_register succeeds + * it will return a non-negative value, if it fails it will return a + * negative value and destroy all mutex and unregister all callbacks. + * av_lockmgr_register is not thread-safe, it must be called from a + * single thread before any calls which make use of locking are used. + * + * @param cb User defined callback. av_lockmgr_register invokes calls + * to this callback and the previously registered callback. + * The callback will be used to create more than one mutex + * each of which must be backed by its own underlying locking + * mechanism (i.e. do not use a single static object to + * implement your lock manager). If cb is set to NULL the + * lockmgr will be unregistered. + * + * @deprecated This function does nothing, and always returns 0. Be sure to + * build with thread support to get basic thread safety. + */ +attribute_deprecated +int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op)); +#endif + +/** + * Get the type of the given codec. + */ +enum AVMediaType avcodec_get_type(enum AVCodecID codec_id); + +/** + * Get the name of a codec. + * @return a static string identifying the codec; never NULL + */ +const char *avcodec_get_name(enum AVCodecID id); + +/** + * @return a positive value if s is open (i.e. avcodec_open2() was called on it + * with no corresponding avcodec_close()), 0 otherwise. + */ +int avcodec_is_open(AVCodecContext *s); + +/** + * @return a non-zero number if codec is an encoder, zero otherwise + */ +int av_codec_is_encoder(const AVCodec *codec); + +/** + * @return a non-zero number if codec is a decoder, zero otherwise + */ +int av_codec_is_decoder(const AVCodec *codec); + +/** + * @return descriptor for given codec ID or NULL if no descriptor exists. + */ +const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id); + +/** + * Iterate over all codec descriptors known to libavcodec. + * + * @param prev previous descriptor. NULL to get the first descriptor. + * + * @return next descriptor or NULL after the last descriptor + */ +const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev); + +/** + * @return codec descriptor with the given name or NULL if no such descriptor + * exists. + */ +const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name); + +/** + * Allocate a CPB properties structure and initialize its fields to default + * values. + * + * @param size if non-NULL, the size of the allocated struct will be written + * here. This is useful for embedding it in side data. + * + * @return the newly allocated struct or NULL on failure + */ +AVCPBProperties *av_cpb_properties_alloc(size_t *size); + +/** + * @} + */ + +#endif /* AVCODEC_AVCODEC_H */ diff --git a/libs/ffvpx/libavcodec/avcodec.symbols b/libs/ffvpx/libavcodec/avcodec.symbols new file mode 100644 index 000000000..8f135e523 --- /dev/null +++ b/libs/ffvpx/libavcodec/avcodec.symbols @@ -0,0 +1,103 @@ +av_codec_ffversion +av_codec_get_chroma_intra_matrix +av_codec_get_codec_descriptor +av_codec_get_codec_properties +av_codec_get_lowres +av_codec_get_max_lowres +av_codec_get_pkt_timebase +av_codec_get_seek_preroll +av_codec_is_decoder +av_codec_is_encoder +av_codec_next +av_codec_set_chroma_intra_matrix +av_codec_set_codec_descriptor +av_codec_set_lowres +av_codec_set_pkt_timebase +av_codec_set_seek_preroll +av_copy_packet +av_copy_packet_side_data +av_dup_packet +av_fast_padded_malloc +av_fast_padded_mallocz +av_free_packet +av_get_audio_frame_duration +av_get_bits_per_sample +av_get_codec_tag_string +av_get_exact_bits_per_sample +av_get_pcm_codec +av_get_profile_name +av_grow_packet +av_hwaccel_next +av_init_packet +av_lockmgr_register +av_new_packet +av_packet_copy_props +av_packet_free_side_data +av_packet_from_data +av_packet_get_side_data +av_packet_merge_side_data +av_packet_move_ref +av_packet_new_side_data +av_packet_pack_dictionary +av_packet_ref +av_packet_rescale_ts +av_packet_shrink_side_data +av_packet_side_data_name +av_packet_split_side_data +av_packet_unpack_dictionary +av_packet_unref +av_parser_change +av_parser_close +av_parser_init +av_parser_next +av_parser_parse2 +av_register_codec_parser +av_register_hwaccel +av_shrink_packet +av_vorbis_parse_frame +av_vorbis_parse_frame_flags +av_vorbis_parse_free +av_vorbis_parse_init +av_vorbis_parse_reset +av_xiphlacing +avcodec_align_dimensions +avcodec_align_dimensions2 +avcodec_alloc_context3 +avcodec_chroma_pos_to_enum +avcodec_close +avcodec_configuration +avcodec_copy_context +avcodec_decode_audio4 +avcodec_decode_subtitle2 +avcodec_decode_video2 +avcodec_default_execute +avcodec_default_execute2 +avcodec_default_get_buffer2 +avcodec_default_get_format +avcodec_descriptor_get +avcodec_descriptor_get_by_name +avcodec_descriptor_next +avcodec_enum_to_chroma_pos +avcodec_fill_audio_frame +avcodec_find_decoder +avcodec_find_decoder_by_name +avcodec_find_encoder +avcodec_find_encoder_by_name +avcodec_flush_buffers +avcodec_free_context +avcodec_get_class +avcodec_get_context_defaults3 +avcodec_get_frame_class +avcodec_get_name +avcodec_get_subtitle_rect_class +avcodec_get_type +avcodec_is_open +avcodec_license +avcodec_open2 +avcodec_register +avcodec_register_all +avcodec_string +avcodec_version +avsubtitle_free +avcodec_send_packet +avcodec_receive_frame diff --git a/libs/ffvpx/libavcodec/avpacket.c b/libs/ffvpx/libavcodec/avpacket.c new file mode 100644 index 000000000..99a0c1383 --- /dev/null +++ b/libs/ffvpx/libavcodec/avpacket.c @@ -0,0 +1,733 @@ +/* + * AVPacket functions for libavcodec + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "libavutil/internal.h" +#include "libavutil/mathematics.h" +#include "libavutil/mem.h" +#include "avcodec.h" +#include "bytestream.h" +#include "internal.h" + +void av_init_packet(AVPacket *pkt) +{ + pkt->pts = AV_NOPTS_VALUE; + pkt->dts = AV_NOPTS_VALUE; + pkt->pos = -1; + pkt->duration = 0; +#if FF_API_CONVERGENCE_DURATION +FF_DISABLE_DEPRECATION_WARNINGS + pkt->convergence_duration = 0; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + pkt->flags = 0; + pkt->stream_index = 0; + pkt->buf = NULL; + pkt->side_data = NULL; + pkt->side_data_elems = 0; +} + +AVPacket *av_packet_alloc(void) +{ + AVPacket *pkt = av_mallocz(sizeof(AVPacket)); + if (!pkt) + return pkt; + + av_packet_unref(pkt); + + return pkt; +} + +void av_packet_free(AVPacket **pkt) +{ + if (!pkt || !*pkt) + return; + + av_packet_unref(*pkt); + av_freep(pkt); +} + +static int packet_alloc(AVBufferRef **buf, int size) +{ + int ret; + if (size < 0 || size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) + return AVERROR(EINVAL); + + ret = av_buffer_realloc(buf, size + AV_INPUT_BUFFER_PADDING_SIZE); + if (ret < 0) + return ret; + + memset((*buf)->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); + + return 0; +} + +int av_new_packet(AVPacket *pkt, int size) +{ + AVBufferRef *buf = NULL; + int ret = packet_alloc(&buf, size); + if (ret < 0) + return ret; + + av_init_packet(pkt); + pkt->buf = buf; + pkt->data = buf->data; + pkt->size = size; + + return 0; +} + +void av_shrink_packet(AVPacket *pkt, int size) +{ + if (pkt->size <= size) + return; + pkt->size = size; + memset(pkt->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); +} + +int av_grow_packet(AVPacket *pkt, int grow_by) +{ + int new_size; + av_assert0((unsigned)pkt->size <= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE); + if ((unsigned)grow_by > + INT_MAX - (pkt->size + AV_INPUT_BUFFER_PADDING_SIZE)) + return -1; + + new_size = pkt->size + grow_by + AV_INPUT_BUFFER_PADDING_SIZE; + if (pkt->buf) { + size_t data_offset; + uint8_t *old_data = pkt->data; + if (pkt->data == NULL) { + data_offset = 0; + pkt->data = pkt->buf->data; + } else { + data_offset = pkt->data - pkt->buf->data; + if (data_offset > INT_MAX - new_size) + return -1; + } + + if (new_size + data_offset > pkt->buf->size) { + int ret = av_buffer_realloc(&pkt->buf, new_size + data_offset); + if (ret < 0) { + pkt->data = old_data; + return ret; + } + pkt->data = pkt->buf->data + data_offset; + } + } else { + pkt->buf = av_buffer_alloc(new_size); + if (!pkt->buf) + return AVERROR(ENOMEM); + if (pkt->size > 0) + memcpy(pkt->buf->data, pkt->data, pkt->size); + pkt->data = pkt->buf->data; + } + pkt->size += grow_by; + memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE); + + return 0; +} + +int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size) +{ + if (size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) + return AVERROR(EINVAL); + + pkt->buf = av_buffer_create(data, size + AV_INPUT_BUFFER_PADDING_SIZE, + av_buffer_default_free, NULL, 0); + if (!pkt->buf) + return AVERROR(ENOMEM); + + pkt->data = data; + pkt->size = size; + + return 0; +} + +#if FF_API_AVPACKET_OLD_API +FF_DISABLE_DEPRECATION_WARNINGS +#define ALLOC_MALLOC(data, size) data = av_malloc(size) +#define ALLOC_BUF(data, size) \ +do { \ + av_buffer_realloc(&pkt->buf, size); \ + data = pkt->buf ? pkt->buf->data : NULL; \ +} while (0) + +#define DUP_DATA(dst, src, size, padding, ALLOC) \ + do { \ + void *data; \ + if (padding) { \ + if ((unsigned)(size) > \ + (unsigned)(size) + AV_INPUT_BUFFER_PADDING_SIZE) \ + goto failed_alloc; \ + ALLOC(data, size + AV_INPUT_BUFFER_PADDING_SIZE); \ + } else { \ + ALLOC(data, size); \ + } \ + if (!data) \ + goto failed_alloc; \ + memcpy(data, src, size); \ + if (padding) \ + memset((uint8_t *)data + size, 0, \ + AV_INPUT_BUFFER_PADDING_SIZE); \ + dst = data; \ + } while (0) + +/* Makes duplicates of data, side_data, but does not copy any other fields */ +static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup) +{ + pkt->data = NULL; + pkt->side_data = NULL; + pkt->side_data_elems = 0; + if (pkt->buf) { + AVBufferRef *ref = av_buffer_ref(src->buf); + if (!ref) + return AVERROR(ENOMEM); + pkt->buf = ref; + pkt->data = ref->data; + } else { + DUP_DATA(pkt->data, src->data, pkt->size, 1, ALLOC_BUF); + } + if (src->side_data_elems && dup) { + pkt->side_data = src->side_data; + pkt->side_data_elems = src->side_data_elems; + } + if (src->side_data_elems && !dup) { + return av_copy_packet_side_data(pkt, src); + } + return 0; + +failed_alloc: + av_packet_unref(pkt); + return AVERROR(ENOMEM); +} + +int av_copy_packet_side_data(AVPacket *pkt, const AVPacket *src) +{ + if (src->side_data_elems) { + int i; + DUP_DATA(pkt->side_data, src->side_data, + src->side_data_elems * sizeof(*src->side_data), 0, ALLOC_MALLOC); + if (src != pkt) { + memset(pkt->side_data, 0, + src->side_data_elems * sizeof(*src->side_data)); + } + for (i = 0; i < src->side_data_elems; i++) { + DUP_DATA(pkt->side_data[i].data, src->side_data[i].data, + src->side_data[i].size, 1, ALLOC_MALLOC); + pkt->side_data[i].size = src->side_data[i].size; + pkt->side_data[i].type = src->side_data[i].type; + } + } + pkt->side_data_elems = src->side_data_elems; + return 0; + +failed_alloc: + av_packet_unref(pkt); + return AVERROR(ENOMEM); +} + +int av_dup_packet(AVPacket *pkt) +{ + AVPacket tmp_pkt; + + if (!pkt->buf && pkt->data) { + tmp_pkt = *pkt; + return copy_packet_data(pkt, &tmp_pkt, 1); + } + return 0; +} + +int av_copy_packet(AVPacket *dst, const AVPacket *src) +{ + *dst = *src; + return copy_packet_data(dst, src, 0); +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif + +void av_packet_free_side_data(AVPacket *pkt) +{ + int i; + for (i = 0; i < pkt->side_data_elems; i++) + av_freep(&pkt->side_data[i].data); + av_freep(&pkt->side_data); + pkt->side_data_elems = 0; +} + +#if FF_API_AVPACKET_OLD_API +FF_DISABLE_DEPRECATION_WARNINGS +void av_free_packet(AVPacket *pkt) +{ + if (pkt) { + if (pkt->buf) + av_buffer_unref(&pkt->buf); + pkt->data = NULL; + pkt->size = 0; + + av_packet_free_side_data(pkt); + } +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif + +int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + uint8_t *data, size_t size) +{ + AVPacketSideData *tmp; + int i, elems = pkt->side_data_elems; + + for (i = 0; i < elems; i++) { + AVPacketSideData *sd = &pkt->side_data[i]; + + if (sd->type == type) { + av_free(sd->data); + sd->data = data; + sd->size = size; + return 0; + } + } + + if ((unsigned)elems + 1 > AV_PKT_DATA_NB) + return AVERROR(ERANGE); + + tmp = av_realloc(pkt->side_data, (elems + 1) * sizeof(*tmp)); + if (!tmp) + return AVERROR(ENOMEM); + + pkt->side_data = tmp; + pkt->side_data[elems].data = data; + pkt->side_data[elems].size = size; + pkt->side_data[elems].type = type; + pkt->side_data_elems++; + + return 0; +} + + +uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + int size) +{ + int ret; + uint8_t *data; + + if ((unsigned)size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) + return NULL; + data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!data) + return NULL; + + ret = av_packet_add_side_data(pkt, type, data, size); + if (ret < 0) { + av_freep(&data); + return NULL; + } + + return data; +} + +uint8_t *av_packet_get_side_data(const AVPacket *pkt, enum AVPacketSideDataType type, + int *size) +{ + int i; + + for (i = 0; i < pkt->side_data_elems; i++) { + if (pkt->side_data[i].type == type) { + if (size) + *size = pkt->side_data[i].size; + return pkt->side_data[i].data; + } + } + if (size) + *size = 0; + return NULL; +} + +const char *av_packet_side_data_name(enum AVPacketSideDataType type) +{ + switch(type) { + case AV_PKT_DATA_PALETTE: return "Palette"; + case AV_PKT_DATA_NEW_EXTRADATA: return "New Extradata"; + case AV_PKT_DATA_PARAM_CHANGE: return "Param Change"; + case AV_PKT_DATA_H263_MB_INFO: return "H263 MB Info"; + case AV_PKT_DATA_REPLAYGAIN: return "Replay Gain"; + case AV_PKT_DATA_DISPLAYMATRIX: return "Display Matrix"; + case AV_PKT_DATA_STEREO3D: return "Stereo 3D"; + case AV_PKT_DATA_AUDIO_SERVICE_TYPE: return "Audio Service Type"; + case AV_PKT_DATA_SKIP_SAMPLES: return "Skip Samples"; + case AV_PKT_DATA_JP_DUALMONO: return "JP Dual Mono"; + case AV_PKT_DATA_STRINGS_METADATA: return "Strings Metadata"; + case AV_PKT_DATA_SUBTITLE_POSITION: return "Subtitle Position"; + case AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL: return "Matroska BlockAdditional"; + case AV_PKT_DATA_WEBVTT_IDENTIFIER: return "WebVTT ID"; + case AV_PKT_DATA_WEBVTT_SETTINGS: return "WebVTT Settings"; + case AV_PKT_DATA_METADATA_UPDATE: return "Metadata Update"; + case AV_PKT_DATA_MPEGTS_STREAM_ID: return "MPEGTS Stream ID"; + case AV_PKT_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display metadata"; + case AV_PKT_DATA_CONTENT_LIGHT_LEVEL: return "Content light level metadata"; + case AV_PKT_DATA_SPHERICAL: return "Spherical Mapping"; + case AV_PKT_DATA_A53_CC: return "A53 Closed Captions"; + } + return NULL; +} + +#if FF_API_MERGE_SD_API + +#define FF_MERGE_MARKER 0x8c4d9d108e25e9feULL + +int av_packet_merge_side_data(AVPacket *pkt){ + if(pkt->side_data_elems){ + AVBufferRef *buf; + int i; + uint8_t *p; + uint64_t size= pkt->size + 8LL + AV_INPUT_BUFFER_PADDING_SIZE; + AVPacket old= *pkt; + for (i=0; i<old.side_data_elems; i++) { + size += old.side_data[i].size + 5LL; + } + if (size > INT_MAX) + return AVERROR(EINVAL); + buf = av_buffer_alloc(size); + if (!buf) + return AVERROR(ENOMEM); + pkt->buf = buf; + pkt->data = p = buf->data; + pkt->size = size - AV_INPUT_BUFFER_PADDING_SIZE; + bytestream_put_buffer(&p, old.data, old.size); + for (i=old.side_data_elems-1; i>=0; i--) { + bytestream_put_buffer(&p, old.side_data[i].data, old.side_data[i].size); + bytestream_put_be32(&p, old.side_data[i].size); + *p++ = old.side_data[i].type | ((i==old.side_data_elems-1)*128); + } + bytestream_put_be64(&p, FF_MERGE_MARKER); + av_assert0(p-pkt->data == pkt->size); + memset(p, 0, AV_INPUT_BUFFER_PADDING_SIZE); + av_packet_unref(&old); + pkt->side_data_elems = 0; + pkt->side_data = NULL; + return 1; + } + return 0; +} + +int av_packet_split_side_data(AVPacket *pkt){ + if (!pkt->side_data_elems && pkt->size >12 && AV_RB64(pkt->data + pkt->size - 8) == FF_MERGE_MARKER){ + int i; + unsigned int size; + uint8_t *p; + + p = pkt->data + pkt->size - 8 - 5; + for (i=1; ; i++){ + size = AV_RB32(p); + if (size>INT_MAX - 5 || p - pkt->data < size) + return 0; + if (p[4]&128) + break; + if (p - pkt->data < size + 5) + return 0; + p-= size+5; + } + + if (i > AV_PKT_DATA_NB) + return AVERROR(ERANGE); + + pkt->side_data = av_malloc_array(i, sizeof(*pkt->side_data)); + if (!pkt->side_data) + return AVERROR(ENOMEM); + + p= pkt->data + pkt->size - 8 - 5; + for (i=0; ; i++){ + size= AV_RB32(p); + av_assert0(size<=INT_MAX - 5 && p - pkt->data >= size); + pkt->side_data[i].data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE); + pkt->side_data[i].size = size; + pkt->side_data[i].type = p[4]&127; + if (!pkt->side_data[i].data) + return AVERROR(ENOMEM); + memcpy(pkt->side_data[i].data, p-size, size); + pkt->size -= size + 5; + if(p[4]&128) + break; + p-= size+5; + } + pkt->size -= 8; + pkt->side_data_elems = i+1; + return 1; + } + return 0; +} +#endif + +uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size) +{ + AVDictionaryEntry *t = NULL; + uint8_t *data = NULL; + *size = 0; + + if (!dict) + return NULL; + + while ((t = av_dict_get(dict, "", t, AV_DICT_IGNORE_SUFFIX))) { + const size_t keylen = strlen(t->key); + const size_t valuelen = strlen(t->value); + const size_t new_size = *size + keylen + 1 + valuelen + 1; + uint8_t *const new_data = av_realloc(data, new_size); + + if (!new_data) + goto fail; + data = new_data; + if (new_size > INT_MAX) + goto fail; + + memcpy(data + *size, t->key, keylen + 1); + memcpy(data + *size + keylen + 1, t->value, valuelen + 1); + + *size = new_size; + } + + return data; + +fail: + av_freep(&data); + *size = 0; + return NULL; +} + +int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict) +{ + const uint8_t *end = data + size; + int ret = 0; + + if (!dict || !data || !size) + return ret; + if (size && end[-1]) + return AVERROR_INVALIDDATA; + while (data < end) { + const uint8_t *key = data; + const uint8_t *val = data + strlen(key) + 1; + + if (val >= end || !*key) + return AVERROR_INVALIDDATA; + + ret = av_dict_set(dict, key, val, 0); + if (ret < 0) + break; + data = val + strlen(val) + 1; + } + + return ret; +} + +int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type, + int size) +{ + int i; + + for (i = 0; i < pkt->side_data_elems; i++) { + if (pkt->side_data[i].type == type) { + if (size > pkt->side_data[i].size) + return AVERROR(ENOMEM); + pkt->side_data[i].size = size; + return 0; + } + } + return AVERROR(ENOENT); +} + +int av_packet_copy_props(AVPacket *dst, const AVPacket *src) +{ + int i; + + dst->pts = src->pts; + dst->dts = src->dts; + dst->pos = src->pos; + dst->duration = src->duration; +#if FF_API_CONVERGENCE_DURATION +FF_DISABLE_DEPRECATION_WARNINGS + dst->convergence_duration = src->convergence_duration; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + dst->flags = src->flags; + dst->stream_index = src->stream_index; + + dst->side_data = NULL; + dst->side_data_elems = 0; + for (i = 0; i < src->side_data_elems; i++) { + enum AVPacketSideDataType type = src->side_data[i].type; + int size = src->side_data[i].size; + uint8_t *src_data = src->side_data[i].data; + uint8_t *dst_data = av_packet_new_side_data(dst, type, size); + + if (!dst_data) { + av_packet_free_side_data(dst); + return AVERROR(ENOMEM); + } + memcpy(dst_data, src_data, size); + } + + return 0; +} + +void av_packet_unref(AVPacket *pkt) +{ + av_packet_free_side_data(pkt); + av_buffer_unref(&pkt->buf); + av_init_packet(pkt); + pkt->data = NULL; + pkt->size = 0; +} + +int av_packet_ref(AVPacket *dst, const AVPacket *src) +{ + int ret; + + ret = av_packet_copy_props(dst, src); + if (ret < 0) + return ret; + + if (!src->buf) { + ret = packet_alloc(&dst->buf, src->size); + if (ret < 0) + goto fail; + if (src->size) + memcpy(dst->buf->data, src->data, src->size); + + dst->data = dst->buf->data; + } else { + dst->buf = av_buffer_ref(src->buf); + if (!dst->buf) { + ret = AVERROR(ENOMEM); + goto fail; + } + dst->data = src->data; + } + + dst->size = src->size; + + return 0; +fail: + av_packet_free_side_data(dst); + return ret; +} + +AVPacket *av_packet_clone(const AVPacket *src) +{ + AVPacket *ret = av_packet_alloc(); + + if (!ret) + return ret; + + if (av_packet_ref(ret, src)) + av_packet_free(&ret); + + return ret; +} + +void av_packet_move_ref(AVPacket *dst, AVPacket *src) +{ + *dst = *src; + av_init_packet(src); + src->data = NULL; + src->size = 0; +} + +int av_packet_make_refcounted(AVPacket *pkt) +{ + int ret; + + if (pkt->buf) + return 0; + + ret = packet_alloc(&pkt->buf, pkt->size); + if (ret < 0) + return ret; + if (pkt->size) + memcpy(pkt->buf->data, pkt->data, pkt->size); + + pkt->data = pkt->buf->data; + + return 0; +} + +int av_packet_make_writable(AVPacket *pkt) +{ + AVBufferRef *buf = NULL; + int ret; + + if (pkt->buf && av_buffer_is_writable(pkt->buf)) + return 0; + + ret = packet_alloc(&buf, pkt->size); + if (ret < 0) + return ret; + if (pkt->size) + memcpy(buf->data, pkt->data, pkt->size); + + av_buffer_unref(&pkt->buf); + pkt->buf = buf; + pkt->data = buf->data; + + return 0; +} + +void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb) +{ + if (pkt->pts != AV_NOPTS_VALUE) + pkt->pts = av_rescale_q(pkt->pts, src_tb, dst_tb); + if (pkt->dts != AV_NOPTS_VALUE) + pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb); + if (pkt->duration > 0) + pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb); +#if FF_API_CONVERGENCE_DURATION +FF_DISABLE_DEPRECATION_WARNINGS + if (pkt->convergence_duration > 0) + pkt->convergence_duration = av_rescale_q(pkt->convergence_duration, src_tb, dst_tb); +FF_ENABLE_DEPRECATION_WARNINGS +#endif +} + +int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type) +{ + uint8_t *side_data; + int side_data_size; + int i; + + side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &side_data_size); + if (!side_data) { + side_data_size = 4+4+8*error_count; + side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, + side_data_size); + } + + if (!side_data || side_data_size < 4+4+8*error_count) + return AVERROR(ENOMEM); + + AV_WL32(side_data , quality ); + side_data[4] = pict_type; + side_data[5] = error_count; + for (i = 0; i<error_count; i++) + AV_WL64(side_data+8 + 8*i , error[i]); + + return 0; +} diff --git a/libs/ffvpx/libavcodec/avpicture.c b/libs/ffvpx/libavcodec/avpicture.c new file mode 100644 index 000000000..56435f4fc --- /dev/null +++ b/libs/ffvpx/libavcodec/avpicture.c @@ -0,0 +1,82 @@ +/* + * AVPicture management routines + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * AVPicture management routines + */ + +#include "avcodec.h" +#include "internal.h" +#include "libavutil/common.h" +#include "libavutil/pixdesc.h" +#include "libavutil/imgutils.h" +#include "libavutil/internal.h" +#include "libavutil/colorspace.h" + +#if FF_API_AVPICTURE +FF_DISABLE_DEPRECATION_WARNINGS +int avpicture_fill(AVPicture *picture, const uint8_t *ptr, + enum AVPixelFormat pix_fmt, int width, int height) +{ + return av_image_fill_arrays(picture->data, picture->linesize, + ptr, pix_fmt, width, height, 1); +} + +int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt, int width, int height, + unsigned char *dest, int dest_size) +{ + return av_image_copy_to_buffer(dest, dest_size, + (const uint8_t * const*)src->data, src->linesize, + pix_fmt, width, height, 1); +} + +int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height) +{ + return av_image_get_buffer_size(pix_fmt, width, height, 1); +} + +int avpicture_alloc(AVPicture *picture, + enum AVPixelFormat pix_fmt, int width, int height) +{ + int ret = av_image_alloc(picture->data, picture->linesize, + width, height, pix_fmt, 1); + if (ret < 0) { + memset(picture, 0, sizeof(AVPicture)); + return ret; + } + + return 0; +} + +void avpicture_free(AVPicture *picture) +{ + av_freep(&picture->data[0]); +} + +void av_picture_copy(AVPicture *dst, const AVPicture *src, + enum AVPixelFormat pix_fmt, int width, int height) +{ + av_image_copy(dst->data, dst->linesize, (const uint8_t **)src->data, + src->linesize, pix_fmt, width, height); +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif /* FF_API_AVPICTURE */ diff --git a/libs/ffvpx/libavcodec/bit_depth_template.c b/libs/ffvpx/libavcodec/bit_depth_template.c new file mode 100644 index 000000000..d44d47ea4 --- /dev/null +++ b/libs/ffvpx/libavcodec/bit_depth_template.c @@ -0,0 +1,108 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "mathops.h" +#include "rnd_avg.h" +#include "libavutil/intreadwrite.h" + +#ifndef BIT_DEPTH +#define BIT_DEPTH 8 +#endif + +#ifdef AVCODEC_BIT_DEPTH_TEMPLATE_C +# undef pixel +# undef pixel2 +# undef pixel4 +# undef dctcoef +# undef idctin +# undef INIT_CLIP +# undef no_rnd_avg_pixel4 +# undef rnd_avg_pixel4 +# undef AV_RN2P +# undef AV_RN4P +# undef AV_RN4PA +# undef AV_WN2P +# undef AV_WN4P +# undef AV_WN4PA +# undef CLIP +# undef FUNC +# undef FUNCC +# undef av_clip_pixel +# undef PIXEL_SPLAT_X4 +#else +# define AVCODEC_BIT_DEPTH_TEMPLATE_C +#endif + +#if BIT_DEPTH > 8 +# define pixel uint16_t +# define pixel2 uint32_t +# define pixel4 uint64_t +# define dctcoef int32_t + +#ifdef IN_IDCT_DEPTH +#if IN_IDCT_DEPTH == 32 +# define idctin int32_t +#else +# define idctin int16_t +#endif +#else +# define idctin int16_t +#endif + +# define INIT_CLIP +# define no_rnd_avg_pixel4 no_rnd_avg64 +# define rnd_avg_pixel4 rnd_avg64 +# define AV_RN2P AV_RN32 +# define AV_RN4P AV_RN64 +# define AV_RN4PA AV_RN64A +# define AV_WN2P AV_WN32 +# define AV_WN4P AV_WN64 +# define AV_WN4PA AV_WN64A +# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL) + +# define av_clip_pixel(a) av_clip_uintp2(a, BIT_DEPTH) +# define CLIP(a) av_clip_uintp2(a, BIT_DEPTH) +#else +# define pixel uint8_t +# define pixel2 uint16_t +# define pixel4 uint32_t +# define dctcoef int16_t +# define idctin int16_t + +# define INIT_CLIP +# define no_rnd_avg_pixel4 no_rnd_avg32 +# define rnd_avg_pixel4 rnd_avg32 +# define AV_RN2P AV_RN16 +# define AV_RN4P AV_RN32 +# define AV_RN4PA AV_RN32A +# define AV_WN2P AV_WN16 +# define AV_WN4P AV_WN32 +# define AV_WN4PA AV_WN32A +# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) + +# define av_clip_pixel(a) av_clip_uint8(a) +# define CLIP(a) av_clip_uint8(a) +#endif + +#define FUNC3(a, b, c) a ## _ ## b ## c +#define FUNC2(a, b, c) FUNC3(a, b, c) +#define FUNC(a) FUNC2(a, BIT_DEPTH,) +#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c) +#define FUNC4(a, b, c) a ## _int ## b ## _ ## c ## bit +#define FUNC5(a, b, c) FUNC4(a, b, c) +#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH) diff --git a/libs/ffvpx/libavcodec/bitstream.c b/libs/ffvpx/libavcodec/bitstream.c new file mode 100644 index 000000000..ed528fe4a --- /dev/null +++ b/libs/ffvpx/libavcodec/bitstream.c @@ -0,0 +1,357 @@ +/* + * Common bit i/o utils + * Copyright (c) 2000, 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2010 Loren Merritt + * + * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * bitstream api. + */ + +#include "libavutil/avassert.h" +#include "libavutil/qsort.h" +#include "avcodec.h" +#include "internal.h" +#include "mathops.h" +#include "put_bits.h" +#include "vlc.h" + +const uint8_t ff_log2_run[41]={ + 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, + 8, 9,10,11,12,13,14,15, +16,17,18,19,20,21,22,23, +24, +}; + +void avpriv_align_put_bits(PutBitContext *s) +{ + put_bits(s, s->bit_left & 7, 0); +} + +void avpriv_put_string(PutBitContext *pb, const char *string, + int terminate_string) +{ + while (*string) { + put_bits(pb, 8, *string); + string++; + } + if (terminate_string) + put_bits(pb, 8, 0); +} + +void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length) +{ + int words = length >> 4; + int bits = length & 15; + int i; + + if (length == 0) + return; + + av_assert0(length <= put_bits_left(pb)); + + if (CONFIG_SMALL || words < 16 || put_bits_count(pb) & 7) { + for (i = 0; i < words; i++) + put_bits(pb, 16, AV_RB16(src + 2 * i)); + } else { + for (i = 0; put_bits_count(pb) & 31; i++) + put_bits(pb, 8, src[i]); + flush_put_bits(pb); + memcpy(put_bits_ptr(pb), src + i, 2 * words - i); + skip_put_bytes(pb, 2 * words - i); + } + + put_bits(pb, bits, AV_RB16(src + 2 * words) >> (16 - bits)); +} + +/* VLC decoding */ + +#define GET_DATA(v, table, i, wrap, size) \ +{ \ + const uint8_t *ptr = (const uint8_t *)table + i * wrap; \ + switch(size) { \ + case 1: \ + v = *(const uint8_t *)ptr; \ + break; \ + case 2: \ + v = *(const uint16_t *)ptr; \ + break; \ + case 4: \ + v = *(const uint32_t *)ptr; \ + break; \ + default: \ + av_assert1(0); \ + } \ +} + + +static int alloc_table(VLC *vlc, int size, int use_static) +{ + int index = vlc->table_size; + + vlc->table_size += size; + if (vlc->table_size > vlc->table_allocated) { + if (use_static) + abort(); // cannot do anything, init_vlc() is used with too little memory + vlc->table_allocated += (1 << vlc->bits); + vlc->table = av_realloc_f(vlc->table, vlc->table_allocated, sizeof(VLC_TYPE) * 2); + if (!vlc->table) { + vlc->table_allocated = 0; + vlc->table_size = 0; + return AVERROR(ENOMEM); + } + memset(vlc->table + vlc->table_allocated - (1 << vlc->bits), 0, sizeof(VLC_TYPE) * 2 << vlc->bits); + } + return index; +} + +typedef struct VLCcode { + uint8_t bits; + uint16_t symbol; + /** codeword, with the first bit-to-be-read in the msb + * (even if intended for a little-endian bitstream reader) */ + uint32_t code; +} VLCcode; + +static int compare_vlcspec(const void *a, const void *b) +{ + const VLCcode *sa = a, *sb = b; + return (sa->code >> 1) - (sb->code >> 1); +} +/** + * Build VLC decoding tables suitable for use with get_vlc(). + * + * @param vlc the context to be initialized + * + * @param table_nb_bits max length of vlc codes to store directly in this table + * (Longer codes are delegated to subtables.) + * + * @param nb_codes number of elements in codes[] + * + * @param codes descriptions of the vlc codes + * These must be ordered such that codes going into the same subtable are contiguous. + * Sorting by VLCcode.code is sufficient, though not necessary. + */ +static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, + VLCcode *codes, int flags) +{ + int table_size, table_index, index, code_prefix, symbol, subtable_bits; + int i, j, k, n, nb, inc; + uint32_t code; + volatile VLC_TYPE (* volatile table)[2]; // the double volatile is needed to prevent an internal compiler error in gcc 4.2 + + table_size = 1 << table_nb_bits; + if (table_nb_bits > 30) + return -1; + table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); + ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size); + if (table_index < 0) + return table_index; + table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index]; + + /* first pass: map codes and compute auxiliary table sizes */ + for (i = 0; i < nb_codes; i++) { + n = codes[i].bits; + code = codes[i].code; + symbol = codes[i].symbol; + ff_dlog(NULL, "i=%d n=%d code=0x%"PRIx32"\n", i, n, code); + if (n <= table_nb_bits) { + /* no need to add another table */ + j = code >> (32 - table_nb_bits); + nb = 1 << (table_nb_bits - n); + inc = 1; + if (flags & INIT_VLC_LE) { + j = bitswap_32(code); + inc = 1 << n; + } + for (k = 0; k < nb; k++) { + int bits = table[j][1]; + ff_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n); + if (bits != 0 && bits != n) { + av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); + return AVERROR_INVALIDDATA; + } + table[j][1] = n; //bits + table[j][0] = symbol; + j += inc; + } + } else { + /* fill auxiliary table recursively */ + n -= table_nb_bits; + code_prefix = code >> (32 - table_nb_bits); + subtable_bits = n; + codes[i].bits = n; + codes[i].code = code << table_nb_bits; + for (k = i+1; k < nb_codes; k++) { + n = codes[k].bits - table_nb_bits; + if (n <= 0) + break; + code = codes[k].code; + if (code >> (32 - table_nb_bits) != code_prefix) + break; + codes[k].bits = n; + codes[k].code = code << table_nb_bits; + subtable_bits = FFMAX(subtable_bits, n); + } + subtable_bits = FFMIN(subtable_bits, table_nb_bits); + j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix; + table[j][1] = -subtable_bits; + ff_dlog(NULL, "%4x: n=%d (subtable)\n", + j, codes[i].bits + table_nb_bits); + index = build_table(vlc, subtable_bits, k-i, codes+i, flags); + if (index < 0) + return index; + /* note: realloc has been done, so reload tables */ + table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index]; + table[j][0] = index; //code + i = k-1; + } + } + + for (i = 0; i < table_size; i++) { + if (table[i][1] == 0) //bits + table[i][0] = -1; //codes + } + + return table_index; +} + + +/* Build VLC decoding tables suitable for use with get_vlc(). + + 'nb_bits' sets the decoding table size (2^nb_bits) entries. The + bigger it is, the faster is the decoding. But it should not be too + big to save memory and L1 cache. '9' is a good compromise. + + 'nb_codes' : number of vlcs codes + + 'bits' : table which gives the size (in bits) of each vlc code. + + 'codes' : table which gives the bit pattern of of each vlc code. + + 'symbols' : table which gives the values to be returned from get_vlc(). + + 'xxx_wrap' : give the number of bytes between each entry of the + 'bits' or 'codes' tables. + + 'xxx_size' : gives the number of bytes of each entry of the 'bits' + or 'codes' tables. Currently 1,2 and 4 are supported. + + 'wrap' and 'size' make it possible to use any memory configuration and types + (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables. + + 'use_static' should be set to 1 for tables, which should be freed + with av_free_static(), 0 if ff_free_vlc() will be used. +*/ +int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + const void *symbols, int symbols_wrap, int symbols_size, + int flags) +{ + VLCcode *buf; + int i, j, ret; + VLCcode localbuf[1500]; // the maximum currently needed is 1296 by rv34 + VLC localvlc, *vlc; + + vlc = vlc_arg; + vlc->bits = nb_bits; + if (flags & INIT_VLC_USE_NEW_STATIC) { + av_assert0(nb_codes + 1 <= FF_ARRAY_ELEMS(localbuf)); + buf = localbuf; + localvlc = *vlc_arg; + vlc = &localvlc; + vlc->table_size = 0; + } else { + vlc->table = NULL; + vlc->table_allocated = 0; + vlc->table_size = 0; + + buf = av_malloc_array((nb_codes + 1), sizeof(VLCcode)); + if (!buf) + return AVERROR(ENOMEM); + } + + + av_assert0(symbols_size <= 2 || !symbols); + j = 0; +#define COPY(condition)\ + for (i = 0; i < nb_codes; i++) { \ + GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size); \ + if (!(condition)) \ + continue; \ + if (buf[j].bits > 3*nb_bits || buf[j].bits>32) { \ + av_log(NULL, AV_LOG_ERROR, "Too long VLC (%d) in init_vlc\n", buf[j].bits);\ + if (!(flags & INIT_VLC_USE_NEW_STATIC)) \ + av_free(buf); \ + return -1; \ + } \ + GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size); \ + if (buf[j].code >= (1LL<<buf[j].bits)) { \ + av_log(NULL, AV_LOG_ERROR, "Invalid code %"PRIx32" for %d in " \ + "init_vlc\n", buf[j].code, i); \ + if (!(flags & INIT_VLC_USE_NEW_STATIC)) \ + av_free(buf); \ + return -1; \ + } \ + if (flags & INIT_VLC_LE) \ + buf[j].code = bitswap_32(buf[j].code); \ + else \ + buf[j].code <<= 32 - buf[j].bits; \ + if (symbols) \ + GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size) \ + else \ + buf[j].symbol = i; \ + j++; \ + } + COPY(buf[j].bits > nb_bits); + // qsort is the slowest part of init_vlc, and could probably be improved or avoided + AV_QSORT(buf, j, struct VLCcode, compare_vlcspec); + COPY(buf[j].bits && buf[j].bits <= nb_bits); + nb_codes = j; + + ret = build_table(vlc, nb_bits, nb_codes, buf, flags); + + if (flags & INIT_VLC_USE_NEW_STATIC) { + if(vlc->table_size != vlc->table_allocated) + av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); + + av_assert0(ret >= 0); + *vlc_arg = *vlc; + } else { + av_free(buf); + if (ret < 0) { + av_freep(&vlc->table); + return ret; + } + } + return 0; +} + + +void ff_free_vlc(VLC *vlc) +{ + av_freep(&vlc->table); +} diff --git a/libs/ffvpx/libavcodec/bitstream_filter.c b/libs/ffvpx/libavcodec/bitstream_filter.c new file mode 100644 index 000000000..ca11ed371 --- /dev/null +++ b/libs/ffvpx/libavcodec/bitstream_filter.c @@ -0,0 +1,185 @@ +/* + * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include "avcodec.h" +#include "libavutil/internal.h" +#include "libavutil/mem.h" +#include "libavutil/opt.h" + +#if FF_API_OLD_BSF +FF_DISABLE_DEPRECATION_WARNINGS + +const AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f) +{ + const AVBitStreamFilter *filter = NULL; + void *opaque = NULL; + + while (filter != f) + filter = av_bsf_iterate(&opaque); + + return av_bsf_iterate(&opaque); +} + +void av_register_bitstream_filter(AVBitStreamFilter *bsf) +{ +} + +typedef struct BSFCompatContext { + AVBSFContext *ctx; + int extradata_updated; +} BSFCompatContext; + +AVBitStreamFilterContext *av_bitstream_filter_init(const char *name) +{ + AVBitStreamFilterContext *ctx = NULL; + BSFCompatContext *priv = NULL; + const AVBitStreamFilter *bsf; + + bsf = av_bsf_get_by_name(name); + if (!bsf) + return NULL; + + ctx = av_mallocz(sizeof(*ctx)); + if (!ctx) + return NULL; + + priv = av_mallocz(sizeof(*priv)); + if (!priv) + goto fail; + + + ctx->filter = bsf; + ctx->priv_data = priv; + + return ctx; + +fail: + if (priv) + av_bsf_free(&priv->ctx); + av_freep(&priv); + av_freep(&ctx); + return NULL; +} + +void av_bitstream_filter_close(AVBitStreamFilterContext *bsfc) +{ + BSFCompatContext *priv; + + if (!bsfc) + return; + + priv = bsfc->priv_data; + + av_bsf_free(&priv->ctx); + av_freep(&bsfc->priv_data); + av_free(bsfc); +} + +int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc, + AVCodecContext *avctx, const char *args, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe) +{ + BSFCompatContext *priv = bsfc->priv_data; + AVPacket pkt = { 0 }; + int ret; + + if (!priv->ctx) { + ret = av_bsf_alloc(bsfc->filter, &priv->ctx); + if (ret < 0) + return ret; + + ret = avcodec_parameters_from_context(priv->ctx->par_in, avctx); + if (ret < 0) + return ret; + + priv->ctx->time_base_in = avctx->time_base; + + if (bsfc->args && bsfc->filter->priv_class) { + const AVOption *opt = av_opt_next(priv->ctx->priv_data, NULL); + const char * shorthand[2] = {NULL}; + + if (opt) + shorthand[0] = opt->name; + + ret = av_opt_set_from_string(priv->ctx->priv_data, bsfc->args, shorthand, "=", ":"); + if (ret < 0) + return ret; + } + + ret = av_bsf_init(priv->ctx); + if (ret < 0) + return ret; + } + + pkt.data = (uint8_t *)buf; + pkt.size = buf_size; + + ret = av_bsf_send_packet(priv->ctx, &pkt); + if (ret < 0) + return ret; + + *poutbuf = NULL; + *poutbuf_size = 0; + + ret = av_bsf_receive_packet(priv->ctx, &pkt); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + return 0; + else if (ret < 0) + return ret; + + *poutbuf = av_malloc(pkt.size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!*poutbuf) { + av_packet_unref(&pkt); + return AVERROR(ENOMEM); + } + + *poutbuf_size = pkt.size; + memcpy(*poutbuf, pkt.data, pkt.size); + + av_packet_unref(&pkt); + + /* drain all the remaining packets we cannot return */ + while (ret >= 0) { + ret = av_bsf_receive_packet(priv->ctx, &pkt); + av_packet_unref(&pkt); + } + + if (!priv->extradata_updated) { + /* update extradata in avctx from the output codec parameters */ + if (priv->ctx->par_out->extradata_size && (!args || !strstr(args, "private_spspps_buf"))) { + av_freep(&avctx->extradata); + avctx->extradata_size = 0; + avctx->extradata = av_mallocz(priv->ctx->par_out->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) + return AVERROR(ENOMEM); + memcpy(avctx->extradata, priv->ctx->par_out->extradata, priv->ctx->par_out->extradata_size); + avctx->extradata_size = priv->ctx->par_out->extradata_size; + } + + priv->extradata_updated = 1; + } + + return 1; +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif diff --git a/libs/ffvpx/libavcodec/bitstream_filters.c b/libs/ffvpx/libavcodec/bitstream_filters.c new file mode 100644 index 000000000..18b698a85 --- /dev/null +++ b/libs/ffvpx/libavcodec/bitstream_filters.c @@ -0,0 +1,106 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/common.h" +#include "libavutil/log.h" + +#include "avcodec.h" +#include "bsf.h" + +extern const AVBitStreamFilter ff_aac_adtstoasc_bsf; +extern const AVBitStreamFilter ff_chomp_bsf; +extern const AVBitStreamFilter ff_dump_extradata_bsf; +extern const AVBitStreamFilter ff_dca_core_bsf; +extern const AVBitStreamFilter ff_eac3_core_bsf; +extern const AVBitStreamFilter ff_extract_extradata_bsf; +extern const AVBitStreamFilter ff_filter_units_bsf; +extern const AVBitStreamFilter ff_h264_metadata_bsf; +extern const AVBitStreamFilter ff_h264_mp4toannexb_bsf; +extern const AVBitStreamFilter ff_h264_redundant_pps_bsf; +extern const AVBitStreamFilter ff_hapqa_extract_bsf; +extern const AVBitStreamFilter ff_hevc_metadata_bsf; +extern const AVBitStreamFilter ff_hevc_mp4toannexb_bsf; +extern const AVBitStreamFilter ff_imx_dump_header_bsf; +extern const AVBitStreamFilter ff_mjpeg2jpeg_bsf; +extern const AVBitStreamFilter ff_mjpega_dump_header_bsf; +extern const AVBitStreamFilter ff_mp3_header_decompress_bsf; +extern const AVBitStreamFilter ff_mpeg2_metadata_bsf; +extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf; +extern const AVBitStreamFilter ff_mov2textsub_bsf; +extern const AVBitStreamFilter ff_noise_bsf; +extern const AVBitStreamFilter ff_null_bsf; +extern const AVBitStreamFilter ff_remove_extradata_bsf; +extern const AVBitStreamFilter ff_text2movsub_bsf; +extern const AVBitStreamFilter ff_trace_headers_bsf; +extern const AVBitStreamFilter ff_vp9_raw_reorder_bsf; +extern const AVBitStreamFilter ff_vp9_superframe_bsf; +extern const AVBitStreamFilter ff_vp9_superframe_split_bsf; + +#include "libavcodec/bsf_list.c" + +const AVBitStreamFilter *av_bsf_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVBitStreamFilter *f = bitstream_filters[i]; + + if (f) + *opaque = (void*)(i + 1); + + return f; +} + +#if FF_API_NEXT +const AVBitStreamFilter *av_bsf_next(void **opaque) { + return av_bsf_iterate(opaque); +} +#endif + +const AVBitStreamFilter *av_bsf_get_by_name(const char *name) +{ + const AVBitStreamFilter *f = NULL; + void *i = 0; + + while ((f = av_bsf_iterate(&i))) { + if (!strcmp(f->name, name)) + return f; + } + + return NULL; +} + +const AVClass *ff_bsf_child_class_next(const AVClass *prev) +{ + const AVBitStreamFilter *f = NULL; + void *i = 0; + + /* find the filter that corresponds to prev */ + while (prev && (f = av_bsf_iterate(&i))) { + if (f->priv_class == prev) { + break; + } + } + + /* find next filter with priv options */ + while ((f = av_bsf_iterate(&i))) { + if (f->priv_class) + return f->priv_class; + } + return NULL; +} diff --git a/libs/ffvpx/libavcodec/blockdsp.h b/libs/ffvpx/libavcodec/blockdsp.h new file mode 100644 index 000000000..26fc2ea13 --- /dev/null +++ b/libs/ffvpx/libavcodec/blockdsp.h @@ -0,0 +1,50 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_BLOCKDSP_H +#define AVCODEC_BLOCKDSP_H + +#include <stddef.h> +#include <stdint.h> + +#include "avcodec.h" +#include "version.h" + +/* add and put pixel (decoding) + * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. + * h for op_pixels_func is limited to { width / 2, width }, + * but never larger than 16 and never smaller than 4. */ +typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, + uint8_t value, ptrdiff_t line_size, int h); + +typedef struct BlockDSPContext { + void (*clear_block)(int16_t *block /* align 32 */); + void (*clear_blocks)(int16_t *blocks /* align 32 */); + + op_fill_func fill_block_tab[2]; +} BlockDSPContext; + +void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); + +void ff_blockdsp_init_alpha(BlockDSPContext *c); +void ff_blockdsp_init_arm(BlockDSPContext *c); +void ff_blockdsp_init_ppc(BlockDSPContext *c); +void ff_blockdsp_init_x86(BlockDSPContext *c, AVCodecContext *avctx); +void ff_blockdsp_init_mips(BlockDSPContext *c); + +#endif /* AVCODEC_BLOCKDSP_H */ diff --git a/libs/ffvpx/libavcodec/bsf.c b/libs/ffvpx/libavcodec/bsf.c new file mode 100644 index 000000000..bd611ea16 --- /dev/null +++ b/libs/ffvpx/libavcodec/bsf.c @@ -0,0 +1,547 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include "libavutil/log.h" +#include "libavutil/mem.h" +#include "libavutil/opt.h" +#include "libavutil/avstring.h" +#include "libavutil/bprint.h" + +#include "avcodec.h" +#include "bsf.h" + +struct AVBSFInternal { + AVPacket *buffer_pkt; + int eof; +}; + +void av_bsf_free(AVBSFContext **pctx) +{ + AVBSFContext *ctx; + + if (!pctx || !*pctx) + return; + ctx = *pctx; + + if (ctx->filter->close) + ctx->filter->close(ctx); + if (ctx->filter->priv_class && ctx->priv_data) + av_opt_free(ctx->priv_data); + + av_opt_free(ctx); + + av_packet_free(&ctx->internal->buffer_pkt); + av_freep(&ctx->internal); + av_freep(&ctx->priv_data); + + avcodec_parameters_free(&ctx->par_in); + avcodec_parameters_free(&ctx->par_out); + + av_freep(pctx); +} + +static void *bsf_child_next(void *obj, void *prev) +{ + AVBSFContext *ctx = obj; + if (!prev && ctx->filter->priv_class) + return ctx->priv_data; + return NULL; +} + +static const AVClass bsf_class = { + .class_name = "AVBSFContext", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .child_next = bsf_child_next, + .child_class_next = ff_bsf_child_class_next, +}; + +const AVClass *av_bsf_get_class(void) +{ + return &bsf_class; +} + +int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **pctx) +{ + AVBSFContext *ctx; + int ret; + + ctx = av_mallocz(sizeof(*ctx)); + if (!ctx) + return AVERROR(ENOMEM); + + ctx->av_class = &bsf_class; + ctx->filter = filter; + + ctx->par_in = avcodec_parameters_alloc(); + ctx->par_out = avcodec_parameters_alloc(); + if (!ctx->par_in || !ctx->par_out) { + ret = AVERROR(ENOMEM); + goto fail; + } + + ctx->internal = av_mallocz(sizeof(*ctx->internal)); + if (!ctx->internal) { + ret = AVERROR(ENOMEM); + goto fail; + } + + ctx->internal->buffer_pkt = av_packet_alloc(); + if (!ctx->internal->buffer_pkt) { + ret = AVERROR(ENOMEM); + goto fail; + } + + av_opt_set_defaults(ctx); + + /* allocate priv data and init private options */ + if (filter->priv_data_size) { + ctx->priv_data = av_mallocz(filter->priv_data_size); + if (!ctx->priv_data) { + ret = AVERROR(ENOMEM); + goto fail; + } + if (filter->priv_class) { + *(const AVClass **)ctx->priv_data = filter->priv_class; + av_opt_set_defaults(ctx->priv_data); + } + } + + *pctx = ctx; + return 0; +fail: + av_bsf_free(&ctx); + return ret; +} + +int av_bsf_init(AVBSFContext *ctx) +{ + int ret, i; + + /* check that the codec is supported */ + if (ctx->filter->codec_ids) { + for (i = 0; ctx->filter->codec_ids[i] != AV_CODEC_ID_NONE; i++) + if (ctx->par_in->codec_id == ctx->filter->codec_ids[i]) + break; + if (ctx->filter->codec_ids[i] == AV_CODEC_ID_NONE) { + const AVCodecDescriptor *desc = avcodec_descriptor_get(ctx->par_in->codec_id); + av_log(ctx, AV_LOG_ERROR, "Codec '%s' (%d) is not supported by the " + "bitstream filter '%s'. Supported codecs are: ", + desc ? desc->name : "unknown", ctx->par_in->codec_id, ctx->filter->name); + for (i = 0; ctx->filter->codec_ids[i] != AV_CODEC_ID_NONE; i++) { + desc = avcodec_descriptor_get(ctx->filter->codec_ids[i]); + av_log(ctx, AV_LOG_ERROR, "%s (%d) ", + desc ? desc->name : "unknown", ctx->filter->codec_ids[i]); + } + av_log(ctx, AV_LOG_ERROR, "\n"); + return AVERROR(EINVAL); + } + } + + /* initialize output parameters to be the same as input + * init below might overwrite that */ + ret = avcodec_parameters_copy(ctx->par_out, ctx->par_in); + if (ret < 0) + return ret; + + ctx->time_base_out = ctx->time_base_in; + + if (ctx->filter->init) { + ret = ctx->filter->init(ctx); + if (ret < 0) + return ret; + } + + return 0; +} + +int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt) +{ + int ret; + + if (!pkt || (!pkt->data && !pkt->side_data_elems)) { + ctx->internal->eof = 1; + return 0; + } + + if (ctx->internal->eof) { + av_log(ctx, AV_LOG_ERROR, "A non-NULL packet sent after an EOF.\n"); + return AVERROR(EINVAL); + } + + if (ctx->internal->buffer_pkt->data || + ctx->internal->buffer_pkt->side_data_elems) + return AVERROR(EAGAIN); + + ret = av_packet_make_refcounted(pkt); + if (ret < 0) + return ret; + av_packet_move_ref(ctx->internal->buffer_pkt, pkt); + + return 0; +} + +int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt) +{ + return ctx->filter->filter(ctx, pkt); +} + +int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt) +{ + AVBSFInternal *in = ctx->internal; + AVPacket *tmp_pkt; + + if (in->eof) + return AVERROR_EOF; + + if (!ctx->internal->buffer_pkt->data && + !ctx->internal->buffer_pkt->side_data_elems) + return AVERROR(EAGAIN); + + tmp_pkt = av_packet_alloc(); + if (!tmp_pkt) + return AVERROR(ENOMEM); + + *pkt = ctx->internal->buffer_pkt; + ctx->internal->buffer_pkt = tmp_pkt; + + return 0; +} + +int ff_bsf_get_packet_ref(AVBSFContext *ctx, AVPacket *pkt) +{ + AVBSFInternal *in = ctx->internal; + + if (in->eof) + return AVERROR_EOF; + + if (!ctx->internal->buffer_pkt->data && + !ctx->internal->buffer_pkt->side_data_elems) + return AVERROR(EAGAIN); + + av_packet_move_ref(pkt, ctx->internal->buffer_pkt); + + return 0; +} + +typedef struct BSFListContext { + const AVClass *class; + + AVBSFContext **bsfs; + int nb_bsfs; + + unsigned idx; // index of currently processed BSF + unsigned flushed_idx; // index of BSF being flushed + + char * item_name; +} BSFListContext; + + +static int bsf_list_init(AVBSFContext *bsf) +{ + BSFListContext *lst = bsf->priv_data; + int ret, i; + const AVCodecParameters *cod_par = bsf->par_in; + AVRational tb = bsf->time_base_in; + + for (i = 0; i < lst->nb_bsfs; ++i) { + ret = avcodec_parameters_copy(lst->bsfs[i]->par_in, cod_par); + if (ret < 0) + goto fail; + + lst->bsfs[i]->time_base_in = tb; + + ret = av_bsf_init(lst->bsfs[i]); + if (ret < 0) + goto fail; + + cod_par = lst->bsfs[i]->par_out; + tb = lst->bsfs[i]->time_base_out; + } + + bsf->time_base_out = tb; + ret = avcodec_parameters_copy(bsf->par_out, cod_par); + +fail: + return ret; +} + +static int bsf_list_filter(AVBSFContext *bsf, AVPacket *out) +{ + BSFListContext *lst = bsf->priv_data; + int ret; + + if (!lst->nb_bsfs) + return ff_bsf_get_packet_ref(bsf, out); + + while (1) { + if (lst->idx > lst->flushed_idx) { + ret = av_bsf_receive_packet(lst->bsfs[lst->idx-1], out); + if (ret == AVERROR(EAGAIN)) { + /* no more packets from idx-1, try with previous */ + ret = 0; + lst->idx--; + continue; + } else if (ret == AVERROR_EOF) { + /* filter idx-1 is done, continue with idx...nb_bsfs */ + lst->flushed_idx = lst->idx; + continue; + }else if (ret < 0) { + /* filtering error */ + break; + } + } else { + ret = ff_bsf_get_packet_ref(bsf, out); + if (ret == AVERROR_EOF) { + lst->idx = lst->flushed_idx; + } else if (ret < 0) + break; + } + + if (lst->idx < lst->nb_bsfs) { + AVPacket *pkt; + if (ret == AVERROR_EOF && lst->idx == lst->flushed_idx) { + /* ff_bsf_get_packet_ref returned EOF and idx is first + * filter of yet not flushed filter chain */ + pkt = NULL; + } else { + pkt = out; + } + ret = av_bsf_send_packet(lst->bsfs[lst->idx], pkt); + if (ret < 0) + break; + lst->idx++; + } else { + /* The end of filter chain, break to return result */ + break; + } + } + + if (ret < 0) + av_packet_unref(out); + + return ret; +} + +static void bsf_list_close(AVBSFContext *bsf) +{ + BSFListContext *lst = bsf->priv_data; + int i; + + for (i = 0; i < lst->nb_bsfs; ++i) + av_bsf_free(&lst->bsfs[i]); + av_freep(&lst->bsfs); + av_freep(&lst->item_name); +} + +static const char *bsf_list_item_name(void *ctx) +{ + static const char *null_filter_name = "null"; + AVBSFContext *bsf_ctx = ctx; + BSFListContext *lst = bsf_ctx->priv_data; + + if (!lst->nb_bsfs) + return null_filter_name; + + if (!lst->item_name) { + int i; + AVBPrint bp; + av_bprint_init(&bp, 16, 128); + + av_bprintf(&bp, "bsf_list("); + for (i = 0; i < lst->nb_bsfs; i++) + av_bprintf(&bp, i ? ",%s" : "%s", lst->bsfs[i]->filter->name); + av_bprintf(&bp, ")"); + + av_bprint_finalize(&bp, &lst->item_name); + } + + return lst->item_name; +} + +static const AVClass bsf_list_class = { + .class_name = "bsf_list", + .item_name = bsf_list_item_name, + .version = LIBAVUTIL_VERSION_INT, +}; + +const AVBitStreamFilter ff_list_bsf = { + .name = "bsf_list", + .priv_data_size = sizeof(BSFListContext), + .priv_class = &bsf_list_class, + .init = bsf_list_init, + .filter = bsf_list_filter, + .close = bsf_list_close, +}; + +struct AVBSFList { + AVBSFContext **bsfs; + int nb_bsfs; +}; + +AVBSFList *av_bsf_list_alloc(void) +{ + return av_mallocz(sizeof(AVBSFList)); +} + +void av_bsf_list_free(AVBSFList **lst) +{ + int i; + + if (!*lst) + return; + + for (i = 0; i < (*lst)->nb_bsfs; ++i) + av_bsf_free(&(*lst)->bsfs[i]); + av_free((*lst)->bsfs); + av_freep(lst); +} + +int av_bsf_list_append(AVBSFList *lst, AVBSFContext *bsf) +{ + return av_dynarray_add_nofree(&lst->bsfs, &lst->nb_bsfs, bsf); +} + +int av_bsf_list_append2(AVBSFList *lst, const char *bsf_name, AVDictionary ** options) +{ + int ret; + const AVBitStreamFilter *filter; + AVBSFContext *bsf; + + filter = av_bsf_get_by_name(bsf_name); + if (!filter) + return AVERROR_BSF_NOT_FOUND; + + ret = av_bsf_alloc(filter, &bsf); + if (ret < 0) + return ret; + + if (options) { + ret = av_opt_set_dict2(bsf, options, AV_OPT_SEARCH_CHILDREN); + if (ret < 0) + goto end; + } + + ret = av_bsf_list_append(lst, bsf); + +end: + if (ret < 0) + av_bsf_free(&bsf); + + return ret; +} + +int av_bsf_list_finalize(AVBSFList **lst, AVBSFContext **bsf) +{ + int ret = 0; + BSFListContext *ctx; + + if ((*lst)->nb_bsfs == 1) { + *bsf = (*lst)->bsfs[0]; + av_freep(&(*lst)->bsfs); + (*lst)->nb_bsfs = 0; + goto end; + } + + ret = av_bsf_alloc(&ff_list_bsf, bsf); + if (ret < 0) + return ret; + + ctx = (*bsf)->priv_data; + + ctx->bsfs = (*lst)->bsfs; + ctx->nb_bsfs = (*lst)->nb_bsfs; + +end: + av_freep(lst); + return ret; +} + +static int bsf_parse_single(const char *str, AVBSFList *bsf_lst) +{ + char *bsf_name, *bsf_options_str, *buf; + AVDictionary *bsf_options = NULL; + int ret = 0; + + if (!(buf = av_strdup(str))) + return AVERROR(ENOMEM); + + bsf_name = av_strtok(buf, "=", &bsf_options_str); + if (!bsf_name) { + ret = AVERROR(EINVAL); + goto end; + } + + if (bsf_options_str) { + ret = av_dict_parse_string(&bsf_options, bsf_options_str, "=", ":", 0); + if (ret < 0) + goto end; + } + + ret = av_bsf_list_append2(bsf_lst, bsf_name, &bsf_options); + + av_dict_free(&bsf_options); +end: + av_free(buf); + return ret; +} + +int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf_lst) +{ + AVBSFList *lst; + char *bsf_str, *buf, *dup, *saveptr; + int ret; + + if (!str) + return av_bsf_get_null_filter(bsf_lst); + + lst = av_bsf_list_alloc(); + if (!lst) + return AVERROR(ENOMEM); + + if (!(dup = buf = av_strdup(str))) { + ret = AVERROR(ENOMEM); + goto end; + } + + while (1) { + bsf_str = av_strtok(buf, ",", &saveptr); + if (!bsf_str) + break; + + ret = bsf_parse_single(bsf_str, lst); + if (ret < 0) + goto end; + + buf = NULL; + } + + ret = av_bsf_list_finalize(&lst, bsf_lst); +end: + if (ret < 0) + av_bsf_list_free(&lst); + av_free(dup); + return ret; +} + +int av_bsf_get_null_filter(AVBSFContext **bsf) +{ + return av_bsf_alloc(&ff_list_bsf, bsf); +} diff --git a/libs/ffvpx/libavcodec/bsf.h b/libs/ffvpx/libavcodec/bsf.h new file mode 100644 index 000000000..af035eee4 --- /dev/null +++ b/libs/ffvpx/libavcodec/bsf.h @@ -0,0 +1,44 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_BSF_H +#define AVCODEC_BSF_H + +#include "avcodec.h" + +/** + * Called by the bitstream filters to get the next packet for filtering. + * The filter is responsible for either freeing the packet or passing it to the + * caller. + */ +int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt); + +/** + * Called by bitstream filters to get packet for filtering. + * The reference to packet is moved to provided packet structure. + * + * @param ctx pointer to AVBSFContext of filter + * @param pkt pointer to packet to move reference to + * + * @return 0>= on success, negative AVERROR in case of failure + */ +int ff_bsf_get_packet_ref(AVBSFContext *ctx, AVPacket *pkt); + +const AVClass *ff_bsf_child_class_next(const AVClass *prev); + +#endif /* AVCODEC_BSF_H */ diff --git a/libs/ffvpx/libavcodec/bsf_list.c b/libs/ffvpx/libavcodec/bsf_list.c new file mode 100644 index 000000000..92d9948b2 --- /dev/null +++ b/libs/ffvpx/libavcodec/bsf_list.c @@ -0,0 +1,6 @@ +static const AVBitStreamFilter * const bitstream_filters[] = { + &ff_null_bsf, +#if CONFIG_VP9_SUPERFRAME_SPLIT_BSF + &ff_vp9_superframe_split_bsf, +#endif + NULL }; diff --git a/libs/ffvpx/libavcodec/bytestream.h b/libs/ffvpx/libavcodec/bytestream.h new file mode 100644 index 000000000..7be7fc22f --- /dev/null +++ b/libs/ffvpx/libavcodec/bytestream.h @@ -0,0 +1,376 @@ +/* + * Bytestream functions + * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr> + * Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_BYTESTREAM_H +#define AVCODEC_BYTESTREAM_H + +#include <stdint.h> +#include <string.h> + +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" + +typedef struct GetByteContext { + const uint8_t *buffer, *buffer_end, *buffer_start; +} GetByteContext; + +typedef struct PutByteContext { + uint8_t *buffer, *buffer_end, *buffer_start; + int eof; +} PutByteContext; + +#define DEF(type, name, bytes, read, write) \ +static av_always_inline type bytestream_get_ ## name(const uint8_t **b) \ +{ \ + (*b) += bytes; \ + return read(*b - bytes); \ +} \ +static av_always_inline void bytestream_put_ ## name(uint8_t **b, \ + const type value) \ +{ \ + write(*b, value); \ + (*b) += bytes; \ +} \ +static av_always_inline void bytestream2_put_ ## name ## u(PutByteContext *p, \ + const type value) \ +{ \ + bytestream_put_ ## name(&p->buffer, value); \ +} \ +static av_always_inline void bytestream2_put_ ## name(PutByteContext *p, \ + const type value) \ +{ \ + if (!p->eof && (p->buffer_end - p->buffer >= bytes)) { \ + write(p->buffer, value); \ + p->buffer += bytes; \ + } else \ + p->eof = 1; \ +} \ +static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g) \ +{ \ + return bytestream_get_ ## name(&g->buffer); \ +} \ +static av_always_inline type bytestream2_get_ ## name(GetByteContext *g) \ +{ \ + if (g->buffer_end - g->buffer < bytes) { \ + g->buffer = g->buffer_end; \ + return 0; \ + } \ + return bytestream2_get_ ## name ## u(g); \ +} \ +static av_always_inline type bytestream2_peek_ ## name(GetByteContext *g) \ +{ \ + if (g->buffer_end - g->buffer < bytes) \ + return 0; \ + return read(g->buffer); \ +} + +DEF(uint64_t, le64, 8, AV_RL64, AV_WL64) +DEF(unsigned int, le32, 4, AV_RL32, AV_WL32) +DEF(unsigned int, le24, 3, AV_RL24, AV_WL24) +DEF(unsigned int, le16, 2, AV_RL16, AV_WL16) +DEF(uint64_t, be64, 8, AV_RB64, AV_WB64) +DEF(unsigned int, be32, 4, AV_RB32, AV_WB32) +DEF(unsigned int, be24, 3, AV_RB24, AV_WB24) +DEF(unsigned int, be16, 2, AV_RB16, AV_WB16) +DEF(unsigned int, byte, 1, AV_RB8 , AV_WB8) + +#if AV_HAVE_BIGENDIAN +# define bytestream2_get_ne16 bytestream2_get_be16 +# define bytestream2_get_ne24 bytestream2_get_be24 +# define bytestream2_get_ne32 bytestream2_get_be32 +# define bytestream2_get_ne64 bytestream2_get_be64 +# define bytestream2_get_ne16u bytestream2_get_be16u +# define bytestream2_get_ne24u bytestream2_get_be24u +# define bytestream2_get_ne32u bytestream2_get_be32u +# define bytestream2_get_ne64u bytestream2_get_be64u +# define bytestream2_put_ne16 bytestream2_put_be16 +# define bytestream2_put_ne24 bytestream2_put_be24 +# define bytestream2_put_ne32 bytestream2_put_be32 +# define bytestream2_put_ne64 bytestream2_put_be64 +# define bytestream2_peek_ne16 bytestream2_peek_be16 +# define bytestream2_peek_ne24 bytestream2_peek_be24 +# define bytestream2_peek_ne32 bytestream2_peek_be32 +# define bytestream2_peek_ne64 bytestream2_peek_be64 +#else +# define bytestream2_get_ne16 bytestream2_get_le16 +# define bytestream2_get_ne24 bytestream2_get_le24 +# define bytestream2_get_ne32 bytestream2_get_le32 +# define bytestream2_get_ne64 bytestream2_get_le64 +# define bytestream2_get_ne16u bytestream2_get_le16u +# define bytestream2_get_ne24u bytestream2_get_le24u +# define bytestream2_get_ne32u bytestream2_get_le32u +# define bytestream2_get_ne64u bytestream2_get_le64u +# define bytestream2_put_ne16 bytestream2_put_le16 +# define bytestream2_put_ne24 bytestream2_put_le24 +# define bytestream2_put_ne32 bytestream2_put_le32 +# define bytestream2_put_ne64 bytestream2_put_le64 +# define bytestream2_peek_ne16 bytestream2_peek_le16 +# define bytestream2_peek_ne24 bytestream2_peek_le24 +# define bytestream2_peek_ne32 bytestream2_peek_le32 +# define bytestream2_peek_ne64 bytestream2_peek_le64 +#endif + +static av_always_inline void bytestream2_init(GetByteContext *g, + const uint8_t *buf, + int buf_size) +{ + av_assert0(buf_size >= 0); + g->buffer = buf; + g->buffer_start = buf; + g->buffer_end = buf + buf_size; +} + +static av_always_inline void bytestream2_init_writer(PutByteContext *p, + uint8_t *buf, + int buf_size) +{ + av_assert0(buf_size >= 0); + p->buffer = buf; + p->buffer_start = buf; + p->buffer_end = buf + buf_size; + p->eof = 0; +} + +static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g) +{ + return g->buffer_end - g->buffer; +} + +static av_always_inline unsigned int bytestream2_get_bytes_left_p(PutByteContext *p) +{ + return p->buffer_end - p->buffer; +} + +static av_always_inline void bytestream2_skip(GetByteContext *g, + unsigned int size) +{ + g->buffer += FFMIN(g->buffer_end - g->buffer, size); +} + +static av_always_inline void bytestream2_skipu(GetByteContext *g, + unsigned int size) +{ + g->buffer += size; +} + +static av_always_inline void bytestream2_skip_p(PutByteContext *p, + unsigned int size) +{ + int size2; + if (p->eof) + return; + size2 = FFMIN(p->buffer_end - p->buffer, size); + if (size2 != size) + p->eof = 1; + p->buffer += size2; +} + +static av_always_inline int bytestream2_tell(GetByteContext *g) +{ + return (int)(g->buffer - g->buffer_start); +} + +static av_always_inline int bytestream2_tell_p(PutByteContext *p) +{ + return (int)(p->buffer - p->buffer_start); +} + +static av_always_inline int bytestream2_size(GetByteContext *g) +{ + return (int)(g->buffer_end - g->buffer_start); +} + +static av_always_inline int bytestream2_size_p(PutByteContext *p) +{ + return (int)(p->buffer_end - p->buffer_start); +} + +static av_always_inline int bytestream2_seek(GetByteContext *g, + int offset, + int whence) +{ + switch (whence) { + case SEEK_CUR: + offset = av_clip(offset, -(g->buffer - g->buffer_start), + g->buffer_end - g->buffer); + g->buffer += offset; + break; + case SEEK_END: + offset = av_clip(offset, -(g->buffer_end - g->buffer_start), 0); + g->buffer = g->buffer_end + offset; + break; + case SEEK_SET: + offset = av_clip(offset, 0, g->buffer_end - g->buffer_start); + g->buffer = g->buffer_start + offset; + break; + default: + return AVERROR(EINVAL); + } + return bytestream2_tell(g); +} + +static av_always_inline int bytestream2_seek_p(PutByteContext *p, + int offset, + int whence) +{ + p->eof = 0; + switch (whence) { + case SEEK_CUR: + if (p->buffer_end - p->buffer < offset) + p->eof = 1; + offset = av_clip(offset, -(p->buffer - p->buffer_start), + p->buffer_end - p->buffer); + p->buffer += offset; + break; + case SEEK_END: + if (offset > 0) + p->eof = 1; + offset = av_clip(offset, -(p->buffer_end - p->buffer_start), 0); + p->buffer = p->buffer_end + offset; + break; + case SEEK_SET: + if (p->buffer_end - p->buffer_start < offset) + p->eof = 1; + offset = av_clip(offset, 0, p->buffer_end - p->buffer_start); + p->buffer = p->buffer_start + offset; + break; + default: + return AVERROR(EINVAL); + } + return bytestream2_tell_p(p); +} + +static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g, + uint8_t *dst, + unsigned int size) +{ + int size2 = FFMIN(g->buffer_end - g->buffer, size); + memcpy(dst, g->buffer, size2); + g->buffer += size2; + return size2; +} + +static av_always_inline unsigned int bytestream2_get_bufferu(GetByteContext *g, + uint8_t *dst, + unsigned int size) +{ + memcpy(dst, g->buffer, size); + g->buffer += size; + return size; +} + +static av_always_inline unsigned int bytestream2_put_buffer(PutByteContext *p, + const uint8_t *src, + unsigned int size) +{ + int size2; + if (p->eof) + return 0; + size2 = FFMIN(p->buffer_end - p->buffer, size); + if (size2 != size) + p->eof = 1; + memcpy(p->buffer, src, size2); + p->buffer += size2; + return size2; +} + +static av_always_inline unsigned int bytestream2_put_bufferu(PutByteContext *p, + const uint8_t *src, + unsigned int size) +{ + memcpy(p->buffer, src, size); + p->buffer += size; + return size; +} + +static av_always_inline void bytestream2_set_buffer(PutByteContext *p, + const uint8_t c, + unsigned int size) +{ + int size2; + if (p->eof) + return; + size2 = FFMIN(p->buffer_end - p->buffer, size); + if (size2 != size) + p->eof = 1; + memset(p->buffer, c, size2); + p->buffer += size2; +} + +static av_always_inline void bytestream2_set_bufferu(PutByteContext *p, + const uint8_t c, + unsigned int size) +{ + memset(p->buffer, c, size); + p->buffer += size; +} + +static av_always_inline unsigned int bytestream2_get_eof(PutByteContext *p) +{ + return p->eof; +} + +static av_always_inline unsigned int bytestream2_copy_bufferu(PutByteContext *p, + GetByteContext *g, + unsigned int size) +{ + memcpy(p->buffer, g->buffer, size); + p->buffer += size; + g->buffer += size; + return size; +} + +static av_always_inline unsigned int bytestream2_copy_buffer(PutByteContext *p, + GetByteContext *g, + unsigned int size) +{ + int size2; + + if (p->eof) + return 0; + size = FFMIN(g->buffer_end - g->buffer, size); + size2 = FFMIN(p->buffer_end - p->buffer, size); + if (size2 != size) + p->eof = 1; + + return bytestream2_copy_bufferu(p, g, size2); +} + +static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, + uint8_t *dst, + unsigned int size) +{ + memcpy(dst, *b, size); + (*b) += size; + return size; +} + +static av_always_inline void bytestream_put_buffer(uint8_t **b, + const uint8_t *src, + unsigned int size) +{ + memcpy(*b, src, size); + (*b) += size; +} + +#endif /* AVCODEC_BYTESTREAM_H */ diff --git a/libs/ffvpx/libavcodec/codec_desc.c b/libs/ffvpx/libavcodec/codec_desc.c new file mode 100644 index 000000000..79552a910 --- /dev/null +++ b/libs/ffvpx/libavcodec/codec_desc.c @@ -0,0 +1,3164 @@ +/* + * This file is part of FFmpeg. + * + * This table was generated from the long and short names of AVCodecs + * please see the respective codec sources for authorship + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> + +#include "libavutil/common.h" +#include "libavutil/internal.h" +#include "avcodec.h" +#include "profiles.h" +#include "version.h" + +#define MT(...) (const char *const[]){ __VA_ARGS__, NULL } + +static const AVCodecDescriptor codec_descriptors[] = { + /* video codecs */ + { + .id = AV_CODEC_ID_MPEG1VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mpeg1video", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-1 video"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_MPEG2VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mpeg2video", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-2 video"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles), + }, + { + .id = AV_CODEC_ID_H261, + .type = AVMEDIA_TYPE_VIDEO, + .name = "h261", + .long_name = NULL_IF_CONFIG_SMALL("H.261"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_H263, + .type = AVMEDIA_TYPE_VIDEO, + .name = "h263", + .long_name = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996, H.263+ / H.263-1998 / H.263 version 2"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_RV10, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rv10", + .long_name = NULL_IF_CONFIG_SMALL("RealVideo 1.0"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_RV20, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rv20", + .long_name = NULL_IF_CONFIG_SMALL("RealVideo 2.0"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_MJPEG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mjpeg", + .long_name = NULL_IF_CONFIG_SMALL("Motion JPEG"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + .mime_types= MT("image/jpeg"), + }, + { + .id = AV_CODEC_ID_MJPEGB, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mjpegb", + .long_name = NULL_IF_CONFIG_SMALL("Apple MJPEG-B"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_LJPEG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ljpeg", + .long_name = NULL_IF_CONFIG_SMALL("Lossless JPEG"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SP5X, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sp5x", + .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_JPEGLS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "jpegls", + .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MPEG4, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mpeg4", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles), + }, + { + .id = AV_CODEC_ID_RAWVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rawvideo", + .long_name = NULL_IF_CONFIG_SMALL("raw video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MSMPEG4V1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msmpeg4v1", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MSMPEG4V2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msmpeg4v2", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MSMPEG4V3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msmpeg4v3", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wmv1", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 7"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMV2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wmv2", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 8"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_H263P, + .type = AVMEDIA_TYPE_VIDEO, + .name = "h263p", + .long_name = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_H263I, + .type = AVMEDIA_TYPE_VIDEO, + .name = "h263i", + .long_name = NULL_IF_CONFIG_SMALL("Intel H.263"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_FLV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "flv1", + .long_name = NULL_IF_CONFIG_SMALL("FLV / Sorenson Spark / Sorenson H.263 (Flash Video)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SVQ1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "svq1", + .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SVQ3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "svq3", + .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_DVVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dvvideo", + .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_HUFFYUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "huffyuv", + .long_name = NULL_IF_CONFIG_SMALL("HuffYUV"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_CYUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cyuv", + .long_name = NULL_IF_CONFIG_SMALL("Creative YUV (CYUV)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_H264, + .type = AVMEDIA_TYPE_VIDEO, + .name = "h264", + .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_h264_profiles), + }, + { + .id = AV_CODEC_ID_INDEO3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "indeo3", + .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 3"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp3", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_THEORA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "theora", + .long_name = NULL_IF_CONFIG_SMALL("Theora"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ASV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "asv1", + .long_name = NULL_IF_CONFIG_SMALL("ASUS V1"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ASV2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "asv2", + .long_name = NULL_IF_CONFIG_SMALL("ASUS V2"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FFV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ffv1", + .long_name = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_4XM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "4xm", + .long_name = NULL_IF_CONFIG_SMALL("4X Movie"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VCR1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vcr1", + .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CLJR, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cljr", + .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MDEC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mdec", + .long_name = NULL_IF_CONFIG_SMALL("Sony PlayStation MDEC (Motion DECoder)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ROQ, + .type = AVMEDIA_TYPE_VIDEO, + .name = "roq", + .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_INTERPLAY_VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "interplayvideo", + .long_name = NULL_IF_CONFIG_SMALL("Interplay MVE video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XAN_WC3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xan_wc3", + .long_name = NULL_IF_CONFIG_SMALL("Wing Commander III / Xan"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XAN_WC4, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xan_wc4", + .long_name = NULL_IF_CONFIG_SMALL("Wing Commander IV / Xxan"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_RPZA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rpza", + .long_name = NULL_IF_CONFIG_SMALL("QuickTime video (RPZA)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CINEPAK, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cinepak", + .long_name = NULL_IF_CONFIG_SMALL("Cinepak"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WS_VQA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ws_vqa", + .long_name = NULL_IF_CONFIG_SMALL("Westwood Studios VQA (Vector Quantized Animation) video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MSRLE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msrle", + .long_name = NULL_IF_CONFIG_SMALL("Microsoft RLE"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MSVIDEO1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msvideo1", + .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video 1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_IDCIN, + .type = AVMEDIA_TYPE_VIDEO, + .name = "idcin", + .long_name = NULL_IF_CONFIG_SMALL("id Quake II CIN video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_8BPS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "8bps", + .long_name = NULL_IF_CONFIG_SMALL("QuickTime 8BPS video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SMC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "smc", + .long_name = NULL_IF_CONFIG_SMALL("QuickTime Graphics (SMC)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FLIC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "flic", + .long_name = NULL_IF_CONFIG_SMALL("Autodesk Animator Flic video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_TRUEMOTION1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "truemotion1", + .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 1.0"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VMDVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vmdvideo", + .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MSZH, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mszh", + .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) MSZH"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ZLIB, + .type = AVMEDIA_TYPE_VIDEO, + .name = "zlib", + .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) ZLIB"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_QTRLE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "qtrle", + .long_name = NULL_IF_CONFIG_SMALL("QuickTime Animation (RLE) video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_TSCC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tscc", + .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Capture Codec"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ULTI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ulti", + .long_name = NULL_IF_CONFIG_SMALL("IBM UltiMotion"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_QDRAW, + .type = AVMEDIA_TYPE_VIDEO, + .name = "qdraw", + .long_name = NULL_IF_CONFIG_SMALL("Apple QuickDraw"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_VIXL, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vixl", + .long_name = NULL_IF_CONFIG_SMALL("Miro VideoXL"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_QPEG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "qpeg", + .long_name = NULL_IF_CONFIG_SMALL("Q-team QPEG"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PNG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "png", + .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"), + .props = AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/png"), + }, + { + .id = AV_CODEC_ID_PPM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ppm", + .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PBM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pbm", + .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PGM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pgm", + .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PGMYUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pgmyuv", + .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PAM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pam", + .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-portable-pixmap"), + }, + { + .id = AV_CODEC_ID_FFVHUFF, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ffvhuff", + .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_RV30, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rv30", + .long_name = NULL_IF_CONFIG_SMALL("RealVideo 3.0"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_RV40, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rv40", + .long_name = NULL_IF_CONFIG_SMALL("RealVideo 4.0"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_VC1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vc1", + .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_vc1_profiles), + }, + { + .id = AV_CODEC_ID_WMV3, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wmv3", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_vc1_profiles), + }, + { + .id = AV_CODEC_ID_LOCO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "loco", + .long_name = NULL_IF_CONFIG_SMALL("LOCO"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_WNV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wnv1", + .long_name = NULL_IF_CONFIG_SMALL("Winnov WNV1"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AASC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "aasc", + .long_name = NULL_IF_CONFIG_SMALL("Autodesk RLE"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_INDEO2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "indeo2", + .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FRAPS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "fraps", + .long_name = NULL_IF_CONFIG_SMALL("Fraps"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_TRUEMOTION2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "truemotion2", + .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BMP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bmp", + .long_name = NULL_IF_CONFIG_SMALL("BMP (Windows and OS/2 bitmap)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-ms-bmp"), + }, + { + .id = AV_CODEC_ID_CSCD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cscd", + .long_name = NULL_IF_CONFIG_SMALL("CamStudio"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MMVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mmvideo", + .long_name = NULL_IF_CONFIG_SMALL("American Laser Games MM Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ZMBV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "zmbv", + .long_name = NULL_IF_CONFIG_SMALL("Zip Motion Blocks Video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_AVS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "avs", + .long_name = NULL_IF_CONFIG_SMALL("AVS (Audio Video Standard) video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SMACKVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "smackvideo", + .long_name = NULL_IF_CONFIG_SMALL("Smacker video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_NUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "nuv", + .long_name = NULL_IF_CONFIG_SMALL("NuppelVideo/RTJPEG"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_KMVC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "kmvc", + .long_name = NULL_IF_CONFIG_SMALL("Karl Morton's video codec"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FLASHSV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "flashsv", + .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v1"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_CAVS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cavs", + .long_name = NULL_IF_CONFIG_SMALL("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_JPEG2000, + .type = AVMEDIA_TYPE_VIDEO, + .name = "jpeg2000", + .long_name = NULL_IF_CONFIG_SMALL("JPEG 2000"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/jp2"), + .profiles = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles), + }, + { + .id = AV_CODEC_ID_VMNC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vmnc", + .long_name = NULL_IF_CONFIG_SMALL("VMware Screen Codec / VMware Video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_VP5, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp5", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP5"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP6, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp6", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP6"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP6F, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp6f", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TARGA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "targa", + .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-targa", "image/x-tga"), + }, + { + .id = AV_CODEC_ID_DSICINVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dsicinvideo", + .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TIERTEXSEQVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tiertexseqvideo", + .long_name = NULL_IF_CONFIG_SMALL("Tiertex Limited SEQ video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TIFF, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tiff", + .long_name = NULL_IF_CONFIG_SMALL("TIFF image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/tiff"), + }, + { + .id = AV_CODEC_ID_GIF, + .type = AVMEDIA_TYPE_VIDEO, + .name = "gif", + .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"), + .props = AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/gif"), + }, + { + .id = AV_CODEC_ID_DXA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dxa", + .long_name = NULL_IF_CONFIG_SMALL("Feeble Files/ScummVM DXA"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DNXHD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dnxhd", + .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + .profiles = NULL_IF_CONFIG_SMALL(ff_dnxhd_profiles), + }, + { + .id = AV_CODEC_ID_THP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "thp", + .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SGI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sgi", + .long_name = NULL_IF_CONFIG_SMALL("SGI image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_C93, + .type = AVMEDIA_TYPE_VIDEO, + .name = "c93", + .long_name = NULL_IF_CONFIG_SMALL("Interplay C93"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BETHSOFTVID, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bethsoftvid", + .long_name = NULL_IF_CONFIG_SMALL("Bethesda VID video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PTX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ptx", + .long_name = NULL_IF_CONFIG_SMALL("V.Flash PTX image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TXD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "txd", + .long_name = NULL_IF_CONFIG_SMALL("Renderware TXD (TeXture Dictionary) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP6A, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp6a", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version, with alpha channel)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AMV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "amv", + .long_name = NULL_IF_CONFIG_SMALL("AMV Video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VB, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vb", + .long_name = NULL_IF_CONFIG_SMALL("Beam Software VB"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PCX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pcx", + .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-pcx"), + }, + { + .id = AV_CODEC_ID_SUNRAST, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sunrast", + .long_name = NULL_IF_CONFIG_SMALL("Sun Rasterfile image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_INDEO4, + .type = AVMEDIA_TYPE_VIDEO, + .name = "indeo4", + .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 4"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_INDEO5, + .type = AVMEDIA_TYPE_VIDEO, + .name = "indeo5", + .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 5"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MIMIC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mimic", + .long_name = NULL_IF_CONFIG_SMALL("Mimic"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_RL2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rl2", + .long_name = NULL_IF_CONFIG_SMALL("RL2 video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ESCAPE124, + .type = AVMEDIA_TYPE_VIDEO, + .name = "escape124", + .long_name = NULL_IF_CONFIG_SMALL("Escape 124"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DIRAC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dirac", + .long_name = NULL_IF_CONFIG_SMALL("Dirac"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER, + }, + { + .id = AV_CODEC_ID_BFI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bfi", + .long_name = NULL_IF_CONFIG_SMALL("Brute Force & Ignorance"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CMV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cmv", + .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts CMV video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MOTIONPIXELS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "motionpixels", + .long_name = NULL_IF_CONFIG_SMALL("Motion Pixels video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TGV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tgv", + .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGV video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TGQ, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tgq", + .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGQ video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TQI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tqi", + .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TQI video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AURA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "aura", + .long_name = NULL_IF_CONFIG_SMALL("Auravision AURA"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AURA2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "aura2", + .long_name = NULL_IF_CONFIG_SMALL("Auravision Aura 2"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_V210X, + .type = AVMEDIA_TYPE_VIDEO, + .name = "v210x", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_TMV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tmv", + .long_name = NULL_IF_CONFIG_SMALL("8088flex TMV"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_V210, + .type = AVMEDIA_TYPE_VIDEO, + .name = "v210", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DPX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dpx", + .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MAD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mad", + .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts Madcow Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FRWU, + .type = AVMEDIA_TYPE_VIDEO, + .name = "frwu", + .long_name = NULL_IF_CONFIG_SMALL("Forward Uncompressed"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_FLASHSV2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "flashsv2", + .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CDGRAPHICS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cdgraphics", + .long_name = NULL_IF_CONFIG_SMALL("CD Graphics video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_R210, + .type = AVMEDIA_TYPE_VIDEO, + .name = "r210", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed RGB 10-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ANM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "anm", + .long_name = NULL_IF_CONFIG_SMALL("Deluxe Paint Animation"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BINKVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "binkvideo", + .long_name = NULL_IF_CONFIG_SMALL("Bink video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_IFF_ILBM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "iff_ilbm", + .long_name = NULL_IF_CONFIG_SMALL("IFF ACBM/ANIM/DEEP/ILBM/PBM/RGB8/RGBN"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_KGV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "kgv1", + .long_name = NULL_IF_CONFIG_SMALL("Kega Game Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_YOP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "yop", + .long_name = NULL_IF_CONFIG_SMALL("Psygnosis YOP Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP8, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp8", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PICTOR, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pictor", + .long_name = NULL_IF_CONFIG_SMALL("Pictor/PC Paint"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ANSI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ansi", + .long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_A64_MULTI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "a64_multi", + .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_A64_MULTI5, + .type = AVMEDIA_TYPE_VIDEO, + .name = "a64_multi5", + .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_R10K, + .type = AVMEDIA_TYPE_VIDEO, + .name = "r10k", + .long_name = NULL_IF_CONFIG_SMALL("AJA Kona 10-bit RGB Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MXPEG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mxpeg", + .long_name = NULL_IF_CONFIG_SMALL("Mobotix MxPEG video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_LAGARITH, + .type = AVMEDIA_TYPE_VIDEO, + .name = "lagarith", + .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PRORES, + .type = AVMEDIA_TYPE_VIDEO, + .name = "prores", + .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_JV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "jv", + .long_name = NULL_IF_CONFIG_SMALL("Bitmap Brothers JV video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DFA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dfa", + .long_name = NULL_IF_CONFIG_SMALL("Chronomaster DFA"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMV3IMAGE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wmv3image", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VC1IMAGE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vc1image", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image v2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_UTVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "utvideo", + .long_name = NULL_IF_CONFIG_SMALL("Ut Video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_BMV_VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bmv_video", + .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_VBLE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vble", + .long_name = NULL_IF_CONFIG_SMALL("VBLE Lossless Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DXTORY, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dxtory", + .long_name = NULL_IF_CONFIG_SMALL("Dxtory"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_V410, + .type = AVMEDIA_TYPE_VIDEO, + .name = "v410", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:4:4 10-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_XWD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xwd", + .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-xwindowdump"), + }, + { + .id = AV_CODEC_ID_CDXL, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cdxl", + .long_name = NULL_IF_CONFIG_SMALL("Commodore CDXL video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XBM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xbm", + .long_name = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-xbitmap"), + }, + { + .id = AV_CODEC_ID_ZEROCODEC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "zerocodec", + .long_name = NULL_IF_CONFIG_SMALL("ZeroCodec Lossless Video"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MSS1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mss1", + .long_name = NULL_IF_CONFIG_SMALL("MS Screen 1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MSA1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "msa1", + .long_name = NULL_IF_CONFIG_SMALL("MS ATC Screen"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TSCC2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tscc2", + .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Codec 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MTS2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mts2", + .long_name = NULL_IF_CONFIG_SMALL("MS Expression Encoder Screen"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CLLC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cllc", + .long_name = NULL_IF_CONFIG_SMALL("Canopus Lossless Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MSS2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mss2", + .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VP9, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp9", + .long_name = NULL_IF_CONFIG_SMALL("Google VP9"), + .props = AV_CODEC_PROP_LOSSY, + .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles), + }, + { + .id = AV_CODEC_ID_AIC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "aic", + .long_name = NULL_IF_CONFIG_SMALL("Apple Intermediate Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ESCAPE130, + .type = AVMEDIA_TYPE_VIDEO, + .name = "escape130", + .long_name = NULL_IF_CONFIG_SMALL("Escape 130"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_G2M, + .type = AVMEDIA_TYPE_VIDEO, + .name = "g2m", + .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WEBP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "webp", + .long_name = NULL_IF_CONFIG_SMALL("WebP"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/webp"), + }, + { + .id = AV_CODEC_ID_HNM4_VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "hnm4video", + .long_name = NULL_IF_CONFIG_SMALL("HNM 4 video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_HEVC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "hevc", + .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER, + .profiles = NULL_IF_CONFIG_SMALL(ff_hevc_profiles), + }, + { + .id = AV_CODEC_ID_FIC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "fic", + .long_name = NULL_IF_CONFIG_SMALL("Mirillis FIC"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ALIAS_PIX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "alias_pix", + .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_BRENDER_PIX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "brender_pix", + .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PAF_VIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "paf_video", + .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_EXR, + .type = AVMEDIA_TYPE_VIDEO, + .name = "exr", + .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_VP7, + .type = AVMEDIA_TYPE_VIDEO, + .name = "vp7", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SANM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sanm", + .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SGIRLE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sgirle", + .long_name = NULL_IF_CONFIG_SMALL("SGI RLE 8-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MVC1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mvc1", + .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 1"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MVC2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mvc2", + .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 2"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_HQX, + .type = AVMEDIA_TYPE_VIDEO, + .name = "hqx", + .long_name = NULL_IF_CONFIG_SMALL("Canopus HQX"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TDSC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "tdsc", + .long_name = NULL_IF_CONFIG_SMALL("TDSC"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_HQ_HQA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "hq_hqa", + .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_HAP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "hap", + .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DDS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dds", + .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | + AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DXV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "dxv", + .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SCREENPRESSO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "screenpresso", + .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_RSCC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "rscc", + .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_Y41P, + .type = AVMEDIA_TYPE_VIDEO, + .name = "y41p", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_AVRP, + .type = AVMEDIA_TYPE_VIDEO, + .name = "avrp", + .long_name = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_012V, + .type = AVMEDIA_TYPE_VIDEO, + .name = "012v", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_AVUI, + .type = AVMEDIA_TYPE_VIDEO, + .name = "avui", + .long_name = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_AYUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ayuv", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_TARGA_Y216, + .type = AVMEDIA_TYPE_VIDEO, + .name = "targa_y216", + .long_name = NULL_IF_CONFIG_SMALL("Pinnacle TARGA CineWave YUV16"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_V308, + .type = AVMEDIA_TYPE_VIDEO, + .name = "v308", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_V408, + .type = AVMEDIA_TYPE_VIDEO, + .name = "v408", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_YUV4, + .type = AVMEDIA_TYPE_VIDEO, + .name = "yuv4", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_AVRN, + .type = AVMEDIA_TYPE_VIDEO, + .name = "avrn", + .long_name = NULL_IF_CONFIG_SMALL("Avid AVI Codec"), + }, + { + .id = AV_CODEC_ID_CPIA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cpia", + .long_name = NULL_IF_CONFIG_SMALL("CPiA video format"), + }, + { + .id = AV_CODEC_ID_XFACE, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xface", + .long_name = NULL_IF_CONFIG_SMALL("X-face image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SNOW, + .type = AVMEDIA_TYPE_VIDEO, + .name = "snow", + .long_name = NULL_IF_CONFIG_SMALL("Snow"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SMVJPEG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "smvjpeg", + .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"), + }, + { + .id = AV_CODEC_ID_APNG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "apng", + .long_name = NULL_IF_CONFIG_SMALL("APNG (Animated Portable Network Graphics) image"), + .props = AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/png"), + }, + { + .id = AV_CODEC_ID_DAALA, + .type = AVMEDIA_TYPE_VIDEO, + .name = "daala", + .long_name = NULL_IF_CONFIG_SMALL("Daala"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_CFHD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "cfhd", + .long_name = NULL_IF_CONFIG_SMALL("Cineform HD"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TRUEMOTION2RT, + .type = AVMEDIA_TYPE_VIDEO, + .name = "truemotion2rt", + .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0 Real Time"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_M101, + .type = AVMEDIA_TYPE_VIDEO, + .name = "m101", + .long_name = NULL_IF_CONFIG_SMALL("Matrox Uncompressed SD"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MAGICYUV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "magicyuv", + .long_name = NULL_IF_CONFIG_SMALL("MagicYUV video"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SHEERVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "sheervideo", + .long_name = NULL_IF_CONFIG_SMALL("BitJazz SheerVideo"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_YLC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "ylc", + .long_name = NULL_IF_CONFIG_SMALL("YUY2 Lossless Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PSD, + .type = AVMEDIA_TYPE_VIDEO, + .name = "psd", + .long_name = NULL_IF_CONFIG_SMALL("Photoshop PSD file"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PIXLET, + .type = AVMEDIA_TYPE_VIDEO, + .name = "pixlet", + .long_name = NULL_IF_CONFIG_SMALL("Apple Pixlet"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SPEEDHQ, + .type = AVMEDIA_TYPE_VIDEO, + .name = "speedhq", + .long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FMVC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "fmvc", + .long_name = NULL_IF_CONFIG_SMALL("FM Screen Capture Codec"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SCPR, + .type = AVMEDIA_TYPE_VIDEO, + .name = "scpr", + .long_name = NULL_IF_CONFIG_SMALL("ScreenPressor"), + .props = AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CLEARVIDEO, + .type = AVMEDIA_TYPE_VIDEO, + .name = "clearvideo", + .long_name = NULL_IF_CONFIG_SMALL("Iterated Systems ClearVideo"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XPM, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xpm", + .long_name = NULL_IF_CONFIG_SMALL("XPM (X PixMap) image"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/x-xpixmap"), + }, + { + .id = AV_CODEC_ID_AV1, + .type = AVMEDIA_TYPE_VIDEO, + .name = "av1", + .long_name = NULL_IF_CONFIG_SMALL("Alliance for Open Media AV1"), + .props = AV_CODEC_PROP_LOSSY, + .profiles = NULL_IF_CONFIG_SMALL(ff_av1_profiles), + }, + { + .id = AV_CODEC_ID_BITPACKED, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bitpacked", + .long_name = NULL_IF_CONFIG_SMALL("Bitpacked"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MSCC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mscc", + .long_name = NULL_IF_CONFIG_SMALL("Mandsoft Screen Capture Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SRGC, + .type = AVMEDIA_TYPE_VIDEO, + .name = "srgc", + .long_name = NULL_IF_CONFIG_SMALL("Screen Recorder Gold Codec"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SVG, + .type = AVMEDIA_TYPE_VIDEO, + .name = "svg", + .long_name = NULL_IF_CONFIG_SMALL("Scalable Vector Graphics"), + .props = AV_CODEC_PROP_LOSSLESS, + .mime_types= MT("image/svg+xml"), + }, + { + .id = AV_CODEC_ID_GDV, + .type = AVMEDIA_TYPE_VIDEO, + .name = "gdv", + .long_name = NULL_IF_CONFIG_SMALL("Gremlin Digital Video"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FITS, + .type = AVMEDIA_TYPE_VIDEO, + .name = "fits", + .long_name = NULL_IF_CONFIG_SMALL("FITS (Flexible Image Transport System)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + + /* various PCM "codecs" */ + { + .id = AV_CODEC_ID_PCM_S16LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s16le", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S16BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s16be", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U16LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u16le", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U16BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u16be", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S8, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s8", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U8, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u8", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 8-bit"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_MULAW, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_mulaw", + .long_name = NULL_IF_CONFIG_SMALL("PCM mu-law / G.711 mu-law"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PCM_ALAW, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_alaw", + .long_name = NULL_IF_CONFIG_SMALL("PCM A-law / G.711 A-law"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PCM_S32LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s32le", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S32BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s32be", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U32LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u32le", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U32BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u32be", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S24LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s24le", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S24BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s24be", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U24LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u24le", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_U24BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_u24be", + .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S24DAUD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s24daud", + .long_name = NULL_IF_CONFIG_SMALL("PCM D-Cinema audio signed 24-bit"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_ZORK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_zork", + .long_name = NULL_IF_CONFIG_SMALL("PCM Zork"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PCM_S16LE_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s16le_planar", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_DVD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_dvd", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20|24-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F32BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f32be", + .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F32LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f32le", + .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F64BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f64be", + .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F64LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f64le", + .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_BLURAY, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_bluray", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16|20|24-bit big-endian for Blu-ray media"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_LXF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_lxf", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20-bit little-endian planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_S302M, + .type = AVMEDIA_TYPE_AUDIO, + .name = "s302m", + .long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S8_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s8_planar", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S24LE_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s24le_planar", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S32LE_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s32le_planar", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S16BE_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s16be_planar", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S64LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s64le", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_S64BE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_s64be", + .long_name = NULL_IF_CONFIG_SMALL("PCM signed 64-bit big-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F16LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f16le", + .long_name = NULL_IF_CONFIG_SMALL("PCM 16.8 floating point little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_PCM_F24LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "pcm_f24le", + .long_name = NULL_IF_CONFIG_SMALL("PCM 24.0 floating point little-endian"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + + /* various ADPCM codecs */ + { + .id = AV_CODEC_ID_ADPCM_IMA_QT, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_qt", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA QuickTime"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_WAV, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_wav", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA WAV"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_DK3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_dk3", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK3"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_DK4, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_dk4", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK4"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_WS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_ws", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Westwood"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_SMJPEG, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_smjpeg", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Loki SDL MJPEG"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_MS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ms", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Microsoft"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_4XM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_4xm", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM 4X Movie"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_XA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_xa", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM CDROM XA"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_ADX, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_adx", + .long_name = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_G726, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_g726", + .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_CT, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ct", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Creative Technology"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_SWF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_swf", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Shockwave Flash"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_YAMAHA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_yamaha", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_SBPRO_4, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_sbpro_4", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 4-bit"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_SBPRO_3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_sbpro_3", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2.6-bit"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_SBPRO_2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_sbpro_2", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2-bit"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_THP, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_thp", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_AMV, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_amv", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA AMV"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA_R1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea_r1", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA_R3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea_r3", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R3"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA_R2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea_r2", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_EA_SEAD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_ea_sead", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts SEAD"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_EA_EACS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_ea_eacs", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts EACS"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA_XAS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea_xas", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts XAS"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_EA_MAXIS_XA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ea_maxis_xa", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts Maxis CDROM XA"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_ISS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_iss", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Funcom ISS"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_G722, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_g722", + .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_APC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_apc", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA CRYO APC"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_VIMA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_vima", + .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_AFC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_afc", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube AFC"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_OKI, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_oki", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Dialogic OKI"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_DTK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_dtk", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube DTK"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_RAD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_rad", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Radical"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_G726LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_g726le", + .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_THP_LE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_thp_le", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_PSX, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_psx", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Playstation"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_AICA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_aica", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha AICA"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_IMA_DAT4, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_ima_dat4", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Eurocom DAT4"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ADPCM_MTAF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "adpcm_mtaf", + .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"), + .props = AV_CODEC_PROP_LOSSY, + }, + + /* AMR */ + { + .id = AV_CODEC_ID_AMR_NB, + .type = AVMEDIA_TYPE_AUDIO, + .name = "amr_nb", + .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AMR_WB, + .type = AVMEDIA_TYPE_AUDIO, + .name = "amr_wb", + .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"), + .props = AV_CODEC_PROP_LOSSY, + }, + + /* RealAudio codecs*/ + { + .id = AV_CODEC_ID_RA_144, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ra_144", + .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_RA_288, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ra_288", + .long_name = NULL_IF_CONFIG_SMALL("RealAudio 2.0 (28.8K)"), + .props = AV_CODEC_PROP_LOSSY, + }, + + /* various DPCM codecs */ + { + .id = AV_CODEC_ID_ROQ_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "roq_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM id RoQ"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_INTERPLAY_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "interplay_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM Interplay"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XAN_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "xan_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM Xan"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SOL_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sol_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM Sol"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SDX2_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sdx2_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM Squareroot-Delta-Exact"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_GREMLIN_DPCM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "gremlin_dpcm", + .long_name = NULL_IF_CONFIG_SMALL("DPCM Gremlin"), + .props = AV_CODEC_PROP_LOSSY, + }, + + /* audio codecs */ + { + .id = AV_CODEC_ID_MP2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp2", + .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MP3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp3", + .long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AAC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "aac", + .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"), + .props = AV_CODEC_PROP_LOSSY, + .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), + }, + { + .id = AV_CODEC_ID_AC3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ac3", + .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DTS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dts", + .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS, + .profiles = NULL_IF_CONFIG_SMALL(ff_dca_profiles), + }, + { + .id = AV_CODEC_ID_VORBIS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "vorbis", + .long_name = NULL_IF_CONFIG_SMALL("Vorbis"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DVAUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dvaudio", + .long_name = NULL_IF_CONFIG_SMALL("DV audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMAV1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wmav1", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMAV2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wmav2", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MACE3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mace3", + .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 3:1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MACE6, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mace6", + .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 6:1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_VMDAUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "vmdaudio", + .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FLAC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "flac", + .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MP3ADU, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp3adu", + .long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MP3ON4, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp3on4", + .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SHORTEN, + .type = AVMEDIA_TYPE_AUDIO, + .name = "shorten", + .long_name = NULL_IF_CONFIG_SMALL("Shorten"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ALAC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "alac", + .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_WESTWOOD_SND1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "westwood_snd1", + .long_name = NULL_IF_CONFIG_SMALL("Westwood Audio (SND1)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_GSM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "gsm", + .long_name = NULL_IF_CONFIG_SMALL("GSM"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_QDM2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "qdm2", + .long_name = NULL_IF_CONFIG_SMALL("QDesign Music Codec 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_COOK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "cook", + .long_name = NULL_IF_CONFIG_SMALL("Cook / Cooker / Gecko (RealAudio G2)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TRUESPEECH, + .type = AVMEDIA_TYPE_AUDIO, + .name = "truespeech", + .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TTA, + .type = AVMEDIA_TYPE_AUDIO, + .name = "tta", + .long_name = NULL_IF_CONFIG_SMALL("TTA (True Audio)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_SMACKAUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "smackaudio", + .long_name = NULL_IF_CONFIG_SMALL("Smacker audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_QCELP, + .type = AVMEDIA_TYPE_AUDIO, + .name = "qcelp", + .long_name = NULL_IF_CONFIG_SMALL("QCELP / PureVoice"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WAVPACK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wavpack", + .long_name = NULL_IF_CONFIG_SMALL("WavPack"), + .props = AV_CODEC_PROP_INTRA_ONLY | + AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DSICINAUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dsicinaudio", + .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_IMC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "imc", + .long_name = NULL_IF_CONFIG_SMALL("IMC (Intel Music Coder)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MUSEPACK7, + .type = AVMEDIA_TYPE_AUDIO, + .name = "musepack7", + .long_name = NULL_IF_CONFIG_SMALL("Musepack SV7"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MLP, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mlp", + .long_name = NULL_IF_CONFIG_SMALL("MLP (Meridian Lossless Packing)"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_GSM_MS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "gsm_ms", + .long_name = NULL_IF_CONFIG_SMALL("GSM Microsoft variant"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ATRAC3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "atrac3", + .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_APE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ape", + .long_name = NULL_IF_CONFIG_SMALL("Monkey's Audio"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_NELLYMOSER, + .type = AVMEDIA_TYPE_AUDIO, + .name = "nellymoser", + .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MUSEPACK8, + .type = AVMEDIA_TYPE_AUDIO, + .name = "musepack8", + .long_name = NULL_IF_CONFIG_SMALL("Musepack SV8"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SPEEX, + .type = AVMEDIA_TYPE_AUDIO, + .name = "speex", + .long_name = NULL_IF_CONFIG_SMALL("Speex"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMAVOICE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wmavoice", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMAPRO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wmapro", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_WMALOSSLESS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wmalossless", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ATRAC3P, + .type = AVMEDIA_TYPE_AUDIO, + .name = "atrac3p", + .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_EAC3, + .type = AVMEDIA_TYPE_AUDIO, + .name = "eac3", + .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SIPR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sipr", + .long_name = NULL_IF_CONFIG_SMALL("RealAudio SIPR / ACELP.NET"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_MP1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp1", + .long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TWINVQ, + .type = AVMEDIA_TYPE_AUDIO, + .name = "twinvq", + .long_name = NULL_IF_CONFIG_SMALL("VQF TwinVQ"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TRUEHD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "truehd", + .long_name = NULL_IF_CONFIG_SMALL("TrueHD"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_MP4ALS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "mp4als", + .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Audio Lossless Coding (ALS)"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ATRAC1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "atrac1", + .long_name = NULL_IF_CONFIG_SMALL("ATRAC1 (Adaptive TRansform Acoustic Coding)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BINKAUDIO_RDFT, + .type = AVMEDIA_TYPE_AUDIO, + .name = "binkaudio_rdft", + .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (RDFT)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BINKAUDIO_DCT, + .type = AVMEDIA_TYPE_AUDIO, + .name = "binkaudio_dct", + .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (DCT)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_AAC_LATM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "aac_latm", + .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"), + .props = AV_CODEC_PROP_LOSSY, + .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles), + }, + { + .id = AV_CODEC_ID_QDMC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "qdmc", + .long_name = NULL_IF_CONFIG_SMALL("QDesign Music"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CELT, + .type = AVMEDIA_TYPE_AUDIO, + .name = "celt", + .long_name = NULL_IF_CONFIG_SMALL("Constrained Energy Lapped Transform (CELT)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_G723_1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "g723_1", + .long_name = NULL_IF_CONFIG_SMALL("G.723.1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_G729, + .type = AVMEDIA_TYPE_AUDIO, + .name = "g729", + .long_name = NULL_IF_CONFIG_SMALL("G.729"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_8SVX_EXP, + .type = AVMEDIA_TYPE_AUDIO, + .name = "8svx_exp", + .long_name = NULL_IF_CONFIG_SMALL("8SVX exponential"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_8SVX_FIB, + .type = AVMEDIA_TYPE_AUDIO, + .name = "8svx_fib", + .long_name = NULL_IF_CONFIG_SMALL("8SVX fibonacci"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_BMV_AUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "bmv_audio", + .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_RALF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ralf", + .long_name = NULL_IF_CONFIG_SMALL("RealAudio Lossless"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_IAC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "iac", + .long_name = NULL_IF_CONFIG_SMALL("IAC (Indeo Audio Coder)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ILBC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "ilbc", + .long_name = NULL_IF_CONFIG_SMALL("iLBC (Internet Low Bitrate Codec)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_OPUS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "opus", + .long_name = NULL_IF_CONFIG_SMALL("Opus (Opus Interactive Audio Codec)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_COMFORT_NOISE, + .type = AVMEDIA_TYPE_AUDIO, + .name = "comfortnoise", + .long_name = NULL_IF_CONFIG_SMALL("RFC 3389 Comfort Noise"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_TAK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "tak", + .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio Kompressor)"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_METASOUND, + .type = AVMEDIA_TYPE_AUDIO, + .name = "metasound", + .long_name = NULL_IF_CONFIG_SMALL("Voxware MetaSound"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_PAF_AUDIO, + .type = AVMEDIA_TYPE_AUDIO, + .name = "paf_audio", + .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Audio"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_ON2AVC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "avc", + .long_name = NULL_IF_CONFIG_SMALL("On2 Audio for Video Codec"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DSS_SP, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dss_sp", + .long_name = NULL_IF_CONFIG_SMALL("Digital Speech Standard - Standard Play mode (DSS SP)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_CODEC2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "codec2", + .long_name = NULL_IF_CONFIG_SMALL("codec2 (very low bitrate speech codec)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_FFWAVESYNTH, + .type = AVMEDIA_TYPE_AUDIO, + .name = "wavesynth", + .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"), + }, + { + .id = AV_CODEC_ID_SONIC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sonic", + .long_name = NULL_IF_CONFIG_SMALL("Sonic"), + }, + { + .id = AV_CODEC_ID_SONIC_LS, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sonicls", + .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), + }, + { + .id = AV_CODEC_ID_EVRC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "evrc", + .long_name = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SMV, + .type = AVMEDIA_TYPE_AUDIO, + .name = "smv", + .long_name = NULL_IF_CONFIG_SMALL("SMV (Selectable Mode Vocoder)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DSD_LSBF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dsd_lsbf", + .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DSD_MSBF, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dsd_msbf", + .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DSD_LSBF_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dsd_lsbf_planar", + .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first, planar"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DSD_MSBF_PLANAR, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dsd_msbf_planar", + .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first, planar"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_4GV, + .type = AVMEDIA_TYPE_AUDIO, + .name = "4gv", + .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_INTERPLAY_ACM, + .type = AVMEDIA_TYPE_AUDIO, + .name = "interplayacm", + .long_name = NULL_IF_CONFIG_SMALL("Interplay ACM"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XMA1, + .type = AVMEDIA_TYPE_AUDIO, + .name = "xma1", + .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_XMA2, + .type = AVMEDIA_TYPE_AUDIO, + .name = "xma2", + .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_DST, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dst", + .long_name = NULL_IF_CONFIG_SMALL("DST (Direct Stream Transfer)"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ATRAC3AL, + .type = AVMEDIA_TYPE_AUDIO, + .name = "atrac3al", + .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_ATRAC3PAL, + .type = AVMEDIA_TYPE_AUDIO, + .name = "atrac3pal", + .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ AL (Adaptive TRansform Acoustic Coding 3+ Advanced Lossless)"), + .props = AV_CODEC_PROP_LOSSLESS, + }, + { + .id = AV_CODEC_ID_DOLBY_E, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dolby_e", + .long_name = NULL_IF_CONFIG_SMALL("Dolby E"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_APTX, + .type = AVMEDIA_TYPE_AUDIO, + .name = "aptx", + .long_name = NULL_IF_CONFIG_SMALL("aptX (Audio Processing Technology for Bluetooth)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_APTX_HD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "aptx_hd", + .long_name = NULL_IF_CONFIG_SMALL("aptX HD (Audio Processing Technology for Bluetooth)"), + .props = AV_CODEC_PROP_LOSSY, + }, + { + .id = AV_CODEC_ID_SBC, + .type = AVMEDIA_TYPE_AUDIO, + .name = "sbc", + .long_name = NULL_IF_CONFIG_SMALL("SBC (low-complexity subband codec)"), + .props = AV_CODEC_PROP_LOSSY, + }, + + /* subtitle codecs */ + { + .id = AV_CODEC_ID_DVD_SUBTITLE, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "dvd_subtitle", + .long_name = NULL_IF_CONFIG_SMALL("DVD subtitles"), + .props = AV_CODEC_PROP_BITMAP_SUB, + }, + { + .id = AV_CODEC_ID_DVB_SUBTITLE, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "dvb_subtitle", + .long_name = NULL_IF_CONFIG_SMALL("DVB subtitles"), + .props = AV_CODEC_PROP_BITMAP_SUB, + }, + { + .id = AV_CODEC_ID_TEXT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "text", + .long_name = NULL_IF_CONFIG_SMALL("raw UTF-8 text"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_XSUB, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "xsub", + .long_name = NULL_IF_CONFIG_SMALL("XSUB"), + .props = AV_CODEC_PROP_BITMAP_SUB, + }, + { + .id = AV_CODEC_ID_SSA, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "ssa", + .long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_MOV_TEXT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "mov_text", + .long_name = NULL_IF_CONFIG_SMALL("MOV text"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_HDMV_PGS_SUBTITLE, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "hdmv_pgs_subtitle", + .long_name = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"), + .props = AV_CODEC_PROP_BITMAP_SUB, + }, + { + .id = AV_CODEC_ID_DVB_TELETEXT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "dvb_teletext", + .long_name = NULL_IF_CONFIG_SMALL("DVB teletext"), + }, + { + .id = AV_CODEC_ID_SRT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "srt", + .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_MICRODVD, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "microdvd", + .long_name = NULL_IF_CONFIG_SMALL("MicroDVD subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_EIA_608, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "eia_608", + .long_name = NULL_IF_CONFIG_SMALL("EIA-608 closed captions"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_JACOSUB, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "jacosub", + .long_name = NULL_IF_CONFIG_SMALL("JACOsub subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_SAMI, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "sami", + .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_REALTEXT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "realtext", + .long_name = NULL_IF_CONFIG_SMALL("RealText subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_STL, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "stl", + .long_name = NULL_IF_CONFIG_SMALL("Spruce subtitle format"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_SUBVIEWER1, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "subviewer1", + .long_name = NULL_IF_CONFIG_SMALL("SubViewer v1 subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_SUBVIEWER, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "subviewer", + .long_name = NULL_IF_CONFIG_SMALL("SubViewer subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_SUBRIP, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "subrip", + .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_WEBVTT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "webvtt", + .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_MPL2, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "mpl2", + .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_VPLAYER, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "vplayer", + .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_PJS, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "pjs", + .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_ASS, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "ass", + .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + { + .id = AV_CODEC_ID_HDMV_TEXT_SUBTITLE, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "hdmv_text_subtitle", + .long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, + + /* other kind of codecs and pseudo-codecs */ + { + .id = AV_CODEC_ID_TTF, + .type = AVMEDIA_TYPE_DATA, + .name = "ttf", + .long_name = NULL_IF_CONFIG_SMALL("TrueType font"), + .mime_types= MT("application/x-truetype-font", "application/x-font"), + }, + { + .id = AV_CODEC_ID_SCTE_35, + .type = AVMEDIA_TYPE_DATA, + .name = "scte_35", + .long_name = NULL_IF_CONFIG_SMALL("SCTE 35 Message Queue"), + }, + { + .id = AV_CODEC_ID_BINTEXT, + .type = AVMEDIA_TYPE_VIDEO, + .name = "bintext", + .long_name = NULL_IF_CONFIG_SMALL("Binary text"), + .props = AV_CODEC_PROP_INTRA_ONLY, + }, + { + .id = AV_CODEC_ID_XBIN, + .type = AVMEDIA_TYPE_VIDEO, + .name = "xbin", + .long_name = NULL_IF_CONFIG_SMALL("eXtended BINary text"), + .props = AV_CODEC_PROP_INTRA_ONLY, + }, + { + .id = AV_CODEC_ID_IDF, + .type = AVMEDIA_TYPE_VIDEO, + .name = "idf", + .long_name = NULL_IF_CONFIG_SMALL("iCEDraw text"), + .props = AV_CODEC_PROP_INTRA_ONLY, + }, + { + .id = AV_CODEC_ID_OTF, + .type = AVMEDIA_TYPE_DATA, + .name = "otf", + .long_name = NULL_IF_CONFIG_SMALL("OpenType font"), + .mime_types= MT("application/vnd.ms-opentype"), + }, + { + .id = AV_CODEC_ID_SMPTE_KLV, + .type = AVMEDIA_TYPE_DATA, + .name = "klv", + .long_name = NULL_IF_CONFIG_SMALL("SMPTE 336M Key-Length-Value (KLV) metadata"), + }, + { + .id = AV_CODEC_ID_DVD_NAV, + .type = AVMEDIA_TYPE_DATA, + .name = "dvd_nav_packet", + .long_name = NULL_IF_CONFIG_SMALL("DVD Nav packet"), + }, + { + .id = AV_CODEC_ID_TIMED_ID3, + .type = AVMEDIA_TYPE_DATA, + .name = "timed_id3", + .long_name = NULL_IF_CONFIG_SMALL("timed ID3 metadata"), + }, + { + .id = AV_CODEC_ID_BIN_DATA, + .type = AVMEDIA_TYPE_DATA, + .name = "bin_data", + .long_name = NULL_IF_CONFIG_SMALL("binary data"), + .mime_types= MT("application/octet-stream"), + }, + { + .id = AV_CODEC_ID_WRAPPED_AVFRAME, + .type = AVMEDIA_TYPE_VIDEO, + .name = "wrapped_avframe", + .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"), + .props = AV_CODEC_PROP_LOSSLESS, + }, +}; + +static int descriptor_compare(const void *key, const void *member) +{ + enum AVCodecID id = *(const enum AVCodecID *) key; + const AVCodecDescriptor *desc = member; + + return id - desc->id; +} + +const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id) +{ + return bsearch(&id, codec_descriptors, FF_ARRAY_ELEMS(codec_descriptors), + sizeof(codec_descriptors[0]), descriptor_compare); +} + +const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev) +{ + if (!prev) + return &codec_descriptors[0]; + if (prev - codec_descriptors < FF_ARRAY_ELEMS(codec_descriptors) - 1) + return prev + 1; + return NULL; +} + +const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name) +{ + const AVCodecDescriptor *desc = NULL; + + while ((desc = avcodec_descriptor_next(desc))) + if (!strcmp(desc->name, name)) + return desc; + return NULL; +} + +enum AVMediaType avcodec_get_type(enum AVCodecID codec_id) +{ + const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id); + return desc ? desc->type : AVMEDIA_TYPE_UNKNOWN; +} diff --git a/libs/ffvpx/libavcodec/codec_list.c b/libs/ffvpx/libavcodec/codec_list.c new file mode 100644 index 000000000..063f8ff78 --- /dev/null +++ b/libs/ffvpx/libavcodec/codec_list.c @@ -0,0 +1,11 @@ +static const AVCodec * const codec_list[] = { +#if CONFIG_VP8_DECODER + &ff_vp8_decoder, +#endif +#if CONFIG_VP9_DECODER + &ff_vp9_decoder, +#endif +#if CONFIG_FLAC_DECODER + &ff_flac_decoder, +#endif + NULL }; diff --git a/libs/ffvpx/libavcodec/decode.c b/libs/ffvpx/libavcodec/decode.c new file mode 100644 index 000000000..421a8f1a3 --- /dev/null +++ b/libs/ffvpx/libavcodec/decode.c @@ -0,0 +1,1981 @@ +/* + * generic decoding-related code + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include <string.h> + +#include "config.h" + +#if CONFIG_ICONV +# include <iconv.h> +#endif + +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/bprint.h" +#include "libavutil/common.h" +#include "libavutil/frame.h" +#include "libavutil/hwcontext.h" +#include "libavutil/imgutils.h" +#include "libavutil/internal.h" +#include "libavutil/intmath.h" + +#include "avcodec.h" +#include "bytestream.h" +#include "decode.h" +#include "hwaccel.h" +#include "internal.h" +#include "thread.h" + +static int apply_param_change(AVCodecContext *avctx, const AVPacket *avpkt) +{ + int size = 0, ret; + const uint8_t *data; + uint32_t flags; + int64_t val; + + data = av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, &size); + if (!data) + return 0; + + if (!(avctx->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE)) { + av_log(avctx, AV_LOG_ERROR, "This decoder does not support parameter " + "changes, but PARAM_CHANGE side data was sent to it.\n"); + ret = AVERROR(EINVAL); + goto fail2; + } + + if (size < 4) + goto fail; + + flags = bytestream_get_le32(&data); + size -= 4; + + if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) { + if (size < 4) + goto fail; + val = bytestream_get_le32(&data); + if (val <= 0 || val > INT_MAX) { + av_log(avctx, AV_LOG_ERROR, "Invalid channel count"); + ret = AVERROR_INVALIDDATA; + goto fail2; + } + avctx->channels = val; + size -= 4; + } + if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) { + if (size < 8) + goto fail; + avctx->channel_layout = bytestream_get_le64(&data); + size -= 8; + } + if (flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) { + if (size < 4) + goto fail; + val = bytestream_get_le32(&data); + if (val <= 0 || val > INT_MAX) { + av_log(avctx, AV_LOG_ERROR, "Invalid sample rate"); + ret = AVERROR_INVALIDDATA; + goto fail2; + } + avctx->sample_rate = val; + size -= 4; + } + if (flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) { + if (size < 8) + goto fail; + avctx->width = bytestream_get_le32(&data); + avctx->height = bytestream_get_le32(&data); + size -= 8; + ret = ff_set_dimensions(avctx, avctx->width, avctx->height); + if (ret < 0) + goto fail2; + } + + return 0; +fail: + av_log(avctx, AV_LOG_ERROR, "PARAM_CHANGE side data too small.\n"); + ret = AVERROR_INVALIDDATA; +fail2: + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n"); + if (avctx->err_recognition & AV_EF_EXPLODE) + return ret; + } + return 0; +} + +static int extract_packet_props(AVCodecInternal *avci, const AVPacket *pkt) +{ + int ret = 0; + + av_packet_unref(avci->last_pkt_props); + if (pkt) { + ret = av_packet_copy_props(avci->last_pkt_props, pkt); + if (!ret) + avci->last_pkt_props->size = pkt->size; // HACK: Needed for ff_decode_frame_props(). + } + return ret; +} + +static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame) +{ + int ret; + + /* move the original frame to our backup */ + av_frame_unref(avci->to_free); + av_frame_move_ref(avci->to_free, frame); + + /* now copy everything except the AVBufferRefs back + * note that we make a COPY of the side data, so calling av_frame_free() on + * the caller's frame will work properly */ + ret = av_frame_copy_props(frame, avci->to_free); + if (ret < 0) + return ret; + + memcpy(frame->data, avci->to_free->data, sizeof(frame->data)); + memcpy(frame->linesize, avci->to_free->linesize, sizeof(frame->linesize)); + if (avci->to_free->extended_data != avci->to_free->data) { + int planes = avci->to_free->channels; + int size = planes * sizeof(*frame->extended_data); + + if (!size) { + av_frame_unref(frame); + return AVERROR_BUG; + } + + frame->extended_data = av_malloc(size); + if (!frame->extended_data) { + av_frame_unref(frame); + return AVERROR(ENOMEM); + } + memcpy(frame->extended_data, avci->to_free->extended_data, + size); + } else + frame->extended_data = frame->data; + + frame->format = avci->to_free->format; + frame->width = avci->to_free->width; + frame->height = avci->to_free->height; + frame->channel_layout = avci->to_free->channel_layout; + frame->nb_samples = avci->to_free->nb_samples; + frame->channels = avci->to_free->channels; + + return 0; +} + +static int bsfs_init(AVCodecContext *avctx) +{ + AVCodecInternal *avci = avctx->internal; + DecodeFilterContext *s = &avci->filter; + const char *bsfs_str; + int ret; + + if (s->nb_bsfs) + return 0; + + bsfs_str = avctx->codec->bsfs ? avctx->codec->bsfs : "null"; + while (bsfs_str && *bsfs_str) { + AVBSFContext **tmp; + const AVBitStreamFilter *filter; + char *bsf; + + bsf = av_get_token(&bsfs_str, ","); + if (!bsf) { + ret = AVERROR(ENOMEM); + goto fail; + } + + filter = av_bsf_get_by_name(bsf); + if (!filter) { + av_log(avctx, AV_LOG_ERROR, "A non-existing bitstream filter %s " + "requested by a decoder. This is a bug, please report it.\n", + bsf); + ret = AVERROR_BUG; + av_freep(&bsf); + goto fail; + } + av_freep(&bsf); + + tmp = av_realloc_array(s->bsfs, s->nb_bsfs + 1, sizeof(*s->bsfs)); + if (!tmp) { + ret = AVERROR(ENOMEM); + goto fail; + } + s->bsfs = tmp; + s->nb_bsfs++; + + ret = av_bsf_alloc(filter, &s->bsfs[s->nb_bsfs - 1]); + if (ret < 0) + goto fail; + + if (s->nb_bsfs == 1) { + /* We do not currently have an API for passing the input timebase into decoders, + * but no filters used here should actually need it. + * So we make up some plausible-looking number (the MPEG 90kHz timebase) */ + s->bsfs[s->nb_bsfs - 1]->time_base_in = (AVRational){ 1, 90000 }; + ret = avcodec_parameters_from_context(s->bsfs[s->nb_bsfs - 1]->par_in, + avctx); + } else { + s->bsfs[s->nb_bsfs - 1]->time_base_in = s->bsfs[s->nb_bsfs - 2]->time_base_out; + ret = avcodec_parameters_copy(s->bsfs[s->nb_bsfs - 1]->par_in, + s->bsfs[s->nb_bsfs - 2]->par_out); + } + if (ret < 0) + goto fail; + + ret = av_bsf_init(s->bsfs[s->nb_bsfs - 1]); + if (ret < 0) + goto fail; + } + + return 0; +fail: + ff_decode_bsfs_uninit(avctx); + return ret; +} + +/* try to get one output packet from the filter chain */ +static int bsfs_poll(AVCodecContext *avctx, AVPacket *pkt) +{ + DecodeFilterContext *s = &avctx->internal->filter; + int idx, ret; + + /* start with the last filter in the chain */ + idx = s->nb_bsfs - 1; + while (idx >= 0) { + /* request a packet from the currently selected filter */ + ret = av_bsf_receive_packet(s->bsfs[idx], pkt); + if (ret == AVERROR(EAGAIN)) { + /* no packets available, try the next filter up the chain */ + ret = 0; + idx--; + continue; + } else if (ret < 0 && ret != AVERROR_EOF) { + return ret; + } + + /* got a packet or EOF -- pass it to the caller or to the next filter + * down the chain */ + if (idx == s->nb_bsfs - 1) { + return ret; + } else { + idx++; + ret = av_bsf_send_packet(s->bsfs[idx], ret < 0 ? NULL : pkt); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, + "Error pre-processing a packet before decoding\n"); + av_packet_unref(pkt); + return ret; + } + } + } + + return AVERROR(EAGAIN); +} + +int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt) +{ + AVCodecInternal *avci = avctx->internal; + int ret; + + if (avci->draining) + return AVERROR_EOF; + + ret = bsfs_poll(avctx, pkt); + if (ret == AVERROR_EOF) + avci->draining = 1; + if (ret < 0) + return ret; + + ret = extract_packet_props(avctx->internal, pkt); + if (ret < 0) + goto finish; + + ret = apply_param_change(avctx, pkt); + if (ret < 0) + goto finish; + + if (avctx->codec->receive_frame) + avci->compat_decode_consumed += pkt->size; + + return 0; +finish: + av_packet_unref(pkt); + return ret; +} + +/** + * Attempt to guess proper monotonic timestamps for decoded video frames + * which might have incorrect times. Input timestamps may wrap around, in + * which case the output will as well. + * + * @param pts the pts field of the decoded AVPacket, as passed through + * AVFrame.pts + * @param dts the dts field of the decoded AVPacket + * @return one of the input values, may be AV_NOPTS_VALUE + */ +static int64_t guess_correct_pts(AVCodecContext *ctx, + int64_t reordered_pts, int64_t dts) +{ + int64_t pts = AV_NOPTS_VALUE; + + if (dts != AV_NOPTS_VALUE) { + ctx->pts_correction_num_faulty_dts += dts <= ctx->pts_correction_last_dts; + ctx->pts_correction_last_dts = dts; + } else if (reordered_pts != AV_NOPTS_VALUE) + ctx->pts_correction_last_dts = reordered_pts; + + if (reordered_pts != AV_NOPTS_VALUE) { + ctx->pts_correction_num_faulty_pts += reordered_pts <= ctx->pts_correction_last_pts; + ctx->pts_correction_last_pts = reordered_pts; + } else if(dts != AV_NOPTS_VALUE) + ctx->pts_correction_last_pts = dts; + + if ((ctx->pts_correction_num_faulty_pts<=ctx->pts_correction_num_faulty_dts || dts == AV_NOPTS_VALUE) + && reordered_pts != AV_NOPTS_VALUE) + pts = reordered_pts; + else + pts = dts; + + return pts; +} + +/* + * The core of the receive_frame_wrapper for the decoders implementing + * the simple API. Certain decoders might consume partial packets without + * returning any output, so this function needs to be called in a loop until it + * returns EAGAIN. + **/ +static int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame) +{ + AVCodecInternal *avci = avctx->internal; + DecodeSimpleContext *ds = &avci->ds; + AVPacket *pkt = ds->in_pkt; + // copy to ensure we do not change pkt + int got_frame, actual_got_frame; + int ret; + + if (!pkt->data && !avci->draining) { + av_packet_unref(pkt); + ret = ff_decode_get_packet(avctx, pkt); + if (ret < 0 && ret != AVERROR_EOF) + return ret; + } + + // Some codecs (at least wma lossless) will crash when feeding drain packets + // after EOF was signaled. + if (avci->draining_done) + return AVERROR_EOF; + + if (!pkt->data && + !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY || + avctx->active_thread_type & FF_THREAD_FRAME)) + return AVERROR_EOF; + + got_frame = 0; + + if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) { + ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt); + } else { + ret = avctx->codec->decode(avctx, frame, &got_frame, pkt); + + if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS)) + frame->pkt_dts = pkt->dts; + if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { + if(!avctx->has_b_frames) + frame->pkt_pos = pkt->pos; + //FIXME these should be under if(!avctx->has_b_frames) + /* get_buffer is supposed to set frame parameters */ + if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) { + if (!frame->sample_aspect_ratio.num) frame->sample_aspect_ratio = avctx->sample_aspect_ratio; + if (!frame->width) frame->width = avctx->width; + if (!frame->height) frame->height = avctx->height; + if (frame->format == AV_PIX_FMT_NONE) frame->format = avctx->pix_fmt; + } + } + } + emms_c(); + actual_got_frame = got_frame; + + if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { + if (frame->flags & AV_FRAME_FLAG_DISCARD) + got_frame = 0; + if (got_frame) + frame->best_effort_timestamp = guess_correct_pts(avctx, + frame->pts, + frame->pkt_dts); + } else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) { + uint8_t *side; + int side_size; + uint32_t discard_padding = 0; + uint8_t skip_reason = 0; + uint8_t discard_reason = 0; + + if (ret >= 0 && got_frame) { + frame->best_effort_timestamp = guess_correct_pts(avctx, + frame->pts, + frame->pkt_dts); + if (frame->format == AV_SAMPLE_FMT_NONE) + frame->format = avctx->sample_fmt; + if (!frame->channel_layout) + frame->channel_layout = avctx->channel_layout; + if (!frame->channels) + frame->channels = avctx->channels; + if (!frame->sample_rate) + frame->sample_rate = avctx->sample_rate; + } + + side= av_packet_get_side_data(avci->last_pkt_props, AV_PKT_DATA_SKIP_SAMPLES, &side_size); + if(side && side_size>=10) { + avctx->internal->skip_samples = AV_RL32(side) * avctx->internal->skip_samples_multiplier; + discard_padding = AV_RL32(side + 4); + av_log(avctx, AV_LOG_DEBUG, "skip %d / discard %d samples due to side data\n", + avctx->internal->skip_samples, (int)discard_padding); + skip_reason = AV_RL8(side + 8); + discard_reason = AV_RL8(side + 9); + } + + if ((frame->flags & AV_FRAME_FLAG_DISCARD) && got_frame && + !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) { + avctx->internal->skip_samples = FFMAX(0, avctx->internal->skip_samples - frame->nb_samples); + got_frame = 0; + } + + if (avctx->internal->skip_samples > 0 && got_frame && + !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) { + if(frame->nb_samples <= avctx->internal->skip_samples){ + got_frame = 0; + avctx->internal->skip_samples -= frame->nb_samples; + av_log(avctx, AV_LOG_DEBUG, "skip whole frame, skip left: %d\n", + avctx->internal->skip_samples); + } else { + av_samples_copy(frame->extended_data, frame->extended_data, 0, avctx->internal->skip_samples, + frame->nb_samples - avctx->internal->skip_samples, avctx->channels, frame->format); + if(avctx->pkt_timebase.num && avctx->sample_rate) { + int64_t diff_ts = av_rescale_q(avctx->internal->skip_samples, + (AVRational){1, avctx->sample_rate}, + avctx->pkt_timebase); + if(frame->pts!=AV_NOPTS_VALUE) + frame->pts += diff_ts; +#if FF_API_PKT_PTS +FF_DISABLE_DEPRECATION_WARNINGS + if(frame->pkt_pts!=AV_NOPTS_VALUE) + frame->pkt_pts += diff_ts; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + if(frame->pkt_dts!=AV_NOPTS_VALUE) + frame->pkt_dts += diff_ts; + if (frame->pkt_duration >= diff_ts) + frame->pkt_duration -= diff_ts; + } else { + av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for skipped samples.\n"); + } + av_log(avctx, AV_LOG_DEBUG, "skip %d/%d samples\n", + avctx->internal->skip_samples, frame->nb_samples); + frame->nb_samples -= avctx->internal->skip_samples; + avctx->internal->skip_samples = 0; + } + } + + if (discard_padding > 0 && discard_padding <= frame->nb_samples && got_frame && + !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) { + if (discard_padding == frame->nb_samples) { + got_frame = 0; + } else { + if(avctx->pkt_timebase.num && avctx->sample_rate) { + int64_t diff_ts = av_rescale_q(frame->nb_samples - discard_padding, + (AVRational){1, avctx->sample_rate}, + avctx->pkt_timebase); + frame->pkt_duration = diff_ts; + } else { + av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for discarded samples.\n"); + } + av_log(avctx, AV_LOG_DEBUG, "discard %d/%d samples\n", + (int)discard_padding, frame->nb_samples); + frame->nb_samples -= discard_padding; + } + } + + if ((avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL) && got_frame) { + AVFrameSideData *fside = av_frame_new_side_data(frame, AV_FRAME_DATA_SKIP_SAMPLES, 10); + if (fside) { + AV_WL32(fside->data, avctx->internal->skip_samples); + AV_WL32(fside->data + 4, discard_padding); + AV_WL8(fside->data + 8, skip_reason); + AV_WL8(fside->data + 9, discard_reason); + avctx->internal->skip_samples = 0; + } + } + } + + if (avctx->codec->type == AVMEDIA_TYPE_AUDIO && + !avci->showed_multi_packet_warning && + ret >= 0 && ret != pkt->size && !(avctx->codec->capabilities & AV_CODEC_CAP_SUBFRAMES)) { + av_log(avctx, AV_LOG_WARNING, "Multiple frames in a packet.\n"); + avci->showed_multi_packet_warning = 1; + } + + if (!got_frame) + av_frame_unref(frame); + + if (ret >= 0 && avctx->codec->type == AVMEDIA_TYPE_VIDEO && !(avctx->flags & AV_CODEC_FLAG_TRUNCATED)) + ret = pkt->size; + +#if FF_API_AVCTX_TIMEBASE + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1})); +#endif + + /* do not stop draining when actual_got_frame != 0 or ret < 0 */ + /* got_frame == 0 but actual_got_frame != 0 when frame is discarded */ + if (avctx->internal->draining && !actual_got_frame) { + if (ret < 0) { + /* prevent infinite loop if a decoder wrongly always return error on draining */ + /* reasonable nb_errors_max = maximum b frames + thread count */ + int nb_errors_max = 20 + (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME ? + avctx->thread_count : 1); + + if (avci->nb_draining_errors++ >= nb_errors_max) { + av_log(avctx, AV_LOG_ERROR, "Too many errors when draining, this is a bug. " + "Stop draining and force EOF.\n"); + avci->draining_done = 1; + ret = AVERROR_BUG; + } + } else { + avci->draining_done = 1; + } + } + + avci->compat_decode_consumed += ret; + + if (ret >= pkt->size || ret < 0) { + av_packet_unref(pkt); + } else { + int consumed = ret; + + pkt->data += consumed; + pkt->size -= consumed; + avci->last_pkt_props->size -= consumed; // See extract_packet_props() comment. + pkt->pts = AV_NOPTS_VALUE; + pkt->dts = AV_NOPTS_VALUE; + avci->last_pkt_props->pts = AV_NOPTS_VALUE; + avci->last_pkt_props->dts = AV_NOPTS_VALUE; + } + + if (got_frame) + av_assert0(frame->buf[0]); + + return ret < 0 ? ret : 0; +} + +static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame) +{ + int ret; + + while (!frame->buf[0]) { + ret = decode_simple_internal(avctx, frame); + if (ret < 0) + return ret; + } + + return 0; +} + +static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame) +{ + AVCodecInternal *avci = avctx->internal; + int ret; + + av_assert0(!frame->buf[0]); + + if (avctx->codec->receive_frame) + ret = avctx->codec->receive_frame(avctx, frame); + else + ret = decode_simple_receive_frame(avctx, frame); + + if (ret == AVERROR_EOF) + avci->draining_done = 1; + + if (!ret) { + /* the only case where decode data is not set should be decoders + * that do not call ff_get_buffer() */ + av_assert0((frame->private_ref && frame->private_ref->size == sizeof(FrameDecodeData)) || + !(avctx->codec->capabilities & AV_CODEC_CAP_DR1)); + + if (frame->private_ref) { + FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data; + + if (fdd->post_process) { + ret = fdd->post_process(avctx, frame); + if (ret < 0) { + av_frame_unref(frame); + return ret; + } + } + } + } + + /* free the per-frame decode data */ + av_buffer_unref(&frame->private_ref); + + return ret; +} + +int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt) +{ + AVCodecInternal *avci = avctx->internal; + int ret; + + if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec)) + return AVERROR(EINVAL); + + if (avctx->internal->draining) + return AVERROR_EOF; + + if (avpkt && !avpkt->size && avpkt->data) + return AVERROR(EINVAL); + + ret = bsfs_init(avctx); + if (ret < 0) + return ret; + + av_packet_unref(avci->buffer_pkt); + if (avpkt && (avpkt->data || avpkt->side_data_elems)) { + ret = av_packet_ref(avci->buffer_pkt, avpkt); + if (ret < 0) + return ret; + } + + ret = av_bsf_send_packet(avci->filter.bsfs[0], avci->buffer_pkt); + if (ret < 0) { + av_packet_unref(avci->buffer_pkt); + return ret; + } + + if (!avci->buffer_frame->buf[0]) { + ret = decode_receive_frame_internal(avctx, avci->buffer_frame); + if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) + return ret; + } + + return 0; +} + +static int apply_cropping(AVCodecContext *avctx, AVFrame *frame) +{ + /* make sure we are noisy about decoders returning invalid cropping data */ + if (frame->crop_left >= INT_MAX - frame->crop_right || + frame->crop_top >= INT_MAX - frame->crop_bottom || + (frame->crop_left + frame->crop_right) >= frame->width || + (frame->crop_top + frame->crop_bottom) >= frame->height) { + av_log(avctx, AV_LOG_WARNING, + "Invalid cropping information set by a decoder: " + "%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER"/%"SIZE_SPECIFIER" " + "(frame size %dx%d). This is a bug, please report it\n", + frame->crop_left, frame->crop_right, frame->crop_top, frame->crop_bottom, + frame->width, frame->height); + frame->crop_left = 0; + frame->crop_right = 0; + frame->crop_top = 0; + frame->crop_bottom = 0; + return 0; + } + + if (!avctx->apply_cropping) + return 0; + + return av_frame_apply_cropping(frame, avctx->flags & AV_CODEC_FLAG_UNALIGNED ? + AV_FRAME_CROP_UNALIGNED : 0); +} + +int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame) +{ + AVCodecInternal *avci = avctx->internal; + int ret; + + av_frame_unref(frame); + + if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec)) + return AVERROR(EINVAL); + + ret = bsfs_init(avctx); + if (ret < 0) + return ret; + + if (avci->buffer_frame->buf[0]) { + av_frame_move_ref(frame, avci->buffer_frame); + } else { + ret = decode_receive_frame_internal(avctx, frame); + if (ret < 0) + return ret; + } + + if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) { + ret = apply_cropping(avctx, frame); + if (ret < 0) { + av_frame_unref(frame); + return ret; + } + } + + avctx->frame_number++; + + return 0; +} + +static int compat_decode(AVCodecContext *avctx, AVFrame *frame, + int *got_frame, const AVPacket *pkt) +{ + AVCodecInternal *avci = avctx->internal; + int ret = 0; + + av_assert0(avci->compat_decode_consumed == 0); + + if (avci->draining_done && pkt && pkt->size != 0) { + av_log(avctx, AV_LOG_WARNING, "Got unexpected packet after EOF\n"); + avcodec_flush_buffers(avctx); + } + + *got_frame = 0; + avci->compat_decode = 1; + + if (avci->compat_decode_partial_size > 0 && + avci->compat_decode_partial_size != pkt->size) { + av_log(avctx, AV_LOG_ERROR, + "Got unexpected packet size after a partial decode\n"); + ret = AVERROR(EINVAL); + goto finish; + } + + if (!avci->compat_decode_partial_size) { + ret = avcodec_send_packet(avctx, pkt); + if (ret == AVERROR_EOF) + ret = 0; + else if (ret == AVERROR(EAGAIN)) { + /* we fully drain all the output in each decode call, so this should not + * ever happen */ + ret = AVERROR_BUG; + goto finish; + } else if (ret < 0) + goto finish; + } + + while (ret >= 0) { + ret = avcodec_receive_frame(avctx, frame); + if (ret < 0) { + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + ret = 0; + goto finish; + } + + if (frame != avci->compat_decode_frame) { + if (!avctx->refcounted_frames) { + ret = unrefcount_frame(avci, frame); + if (ret < 0) + goto finish; + } + + *got_frame = 1; + frame = avci->compat_decode_frame; + } else { + if (!avci->compat_decode_warned) { + av_log(avctx, AV_LOG_WARNING, "The deprecated avcodec_decode_* " + "API cannot return all the frames for this decoder. " + "Some frames will be dropped. Update your code to the " + "new decoding API to fix this.\n"); + avci->compat_decode_warned = 1; + } + } + + if (avci->draining || (!avctx->codec->bsfs && avci->compat_decode_consumed < pkt->size)) + break; + } + +finish: + if (ret == 0) { + /* if there are any bsfs then assume full packet is always consumed */ + if (avctx->codec->bsfs) + ret = pkt->size; + else + ret = FFMIN(avci->compat_decode_consumed, pkt->size); + } + avci->compat_decode_consumed = 0; + avci->compat_decode_partial_size = (ret >= 0) ? pkt->size - ret : 0; + + return ret; +} + +int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture, + int *got_picture_ptr, + const AVPacket *avpkt) +{ + return compat_decode(avctx, picture, got_picture_ptr, avpkt); +} + +int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx, + AVFrame *frame, + int *got_frame_ptr, + const AVPacket *avpkt) +{ + return compat_decode(avctx, frame, got_frame_ptr, avpkt); +} + +static void get_subtitle_defaults(AVSubtitle *sub) +{ + memset(sub, 0, sizeof(*sub)); + sub->pts = AV_NOPTS_VALUE; +} + +#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */ +static int recode_subtitle(AVCodecContext *avctx, + AVPacket *outpkt, const AVPacket *inpkt) +{ +#if CONFIG_ICONV + iconv_t cd = (iconv_t)-1; + int ret = 0; + char *inb, *outb; + size_t inl, outl; + AVPacket tmp; +#endif + + if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER || inpkt->size == 0) + return 0; + +#if CONFIG_ICONV + cd = iconv_open("UTF-8", avctx->sub_charenc); + av_assert0(cd != (iconv_t)-1); + + inb = inpkt->data; + inl = inpkt->size; + + if (inl >= INT_MAX / UTF8_MAX_BYTES - AV_INPUT_BUFFER_PADDING_SIZE) { + av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n"); + ret = AVERROR(ENOMEM); + goto end; + } + + ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES); + if (ret < 0) + goto end; + outpkt->buf = tmp.buf; + outpkt->data = tmp.data; + outpkt->size = tmp.size; + outb = outpkt->data; + outl = outpkt->size; + + if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 || + iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 || + outl >= outpkt->size || inl != 0) { + ret = FFMIN(AVERROR(errno), -1); + av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" " + "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc); + av_packet_unref(&tmp); + goto end; + } + outpkt->size -= outl; + memset(outpkt->data + outpkt->size, 0, outl); + +end: + if (cd != (iconv_t)-1) + iconv_close(cd); + return ret; +#else + av_log(avctx, AV_LOG_ERROR, "requesting subtitles recoding without iconv"); + return AVERROR(EINVAL); +#endif +} + +static int utf8_check(const uint8_t *str) +{ + const uint8_t *byte; + uint32_t codepoint, min; + + while (*str) { + byte = str; + GET_UTF8(codepoint, *(byte++), return 0;); + min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 : + 1 << (5 * (byte - str) - 4); + if (codepoint < min || codepoint >= 0x110000 || + codepoint == 0xFFFE /* BOM */ || + codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */) + return 0; + str = byte; + } + return 1; +} + +#if FF_API_ASS_TIMING +static void insert_ts(AVBPrint *buf, int ts) +{ + if (ts == -1) { + av_bprintf(buf, "9:59:59.99,"); + } else { + int h, m, s; + + h = ts/360000; ts -= 360000*h; + m = ts/ 6000; ts -= 6000*m; + s = ts/ 100; ts -= 100*s; + av_bprintf(buf, "%d:%02d:%02d.%02d,", h, m, s, ts); + } +} + +static int convert_sub_to_old_ass_form(AVSubtitle *sub, const AVPacket *pkt, AVRational tb) +{ + int i; + AVBPrint buf; + + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (i = 0; i < sub->num_rects; i++) { + char *final_dialog; + const char *dialog; + AVSubtitleRect *rect = sub->rects[i]; + int ts_start, ts_duration = -1; + long int layer; + + if (rect->type != SUBTITLE_ASS || !strncmp(rect->ass, "Dialogue: ", 10)) + continue; + + av_bprint_clear(&buf); + + /* skip ReadOrder */ + dialog = strchr(rect->ass, ','); + if (!dialog) + continue; + dialog++; + + /* extract Layer or Marked */ + layer = strtol(dialog, (char**)&dialog, 10); + if (*dialog != ',') + continue; + dialog++; + + /* rescale timing to ASS time base (ms) */ + ts_start = av_rescale_q(pkt->pts, tb, av_make_q(1, 100)); + if (pkt->duration != -1) + ts_duration = av_rescale_q(pkt->duration, tb, av_make_q(1, 100)); + sub->end_display_time = FFMAX(sub->end_display_time, 10 * ts_duration); + + /* construct ASS (standalone file form with timestamps) string */ + av_bprintf(&buf, "Dialogue: %ld,", layer); + insert_ts(&buf, ts_start); + insert_ts(&buf, ts_duration == -1 ? -1 : ts_start + ts_duration); + av_bprintf(&buf, "%s\r\n", dialog); + + final_dialog = av_strdup(buf.str); + if (!av_bprint_is_complete(&buf) || !final_dialog) { + av_freep(&final_dialog); + av_bprint_finalize(&buf, NULL); + return AVERROR(ENOMEM); + } + av_freep(&rect->ass); + rect->ass = final_dialog; + } + + av_bprint_finalize(&buf, NULL); + return 0; +} +#endif + +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, + AVPacket *avpkt) +{ + int i, ret = 0; + + if (!avpkt->data && avpkt->size) { + av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n"); + return AVERROR(EINVAL); + } + if (!avctx->codec) + return AVERROR(EINVAL); + if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) { + av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n"); + return AVERROR(EINVAL); + } + + *got_sub_ptr = 0; + get_subtitle_defaults(sub); + + if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size) { + AVPacket pkt_recoded = *avpkt; + + ret = recode_subtitle(avctx, &pkt_recoded, avpkt); + if (ret < 0) { + *got_sub_ptr = 0; + } else { + ret = extract_packet_props(avctx->internal, &pkt_recoded); + if (ret < 0) + return ret; + + if (avctx->pkt_timebase.num && avpkt->pts != AV_NOPTS_VALUE) + sub->pts = av_rescale_q(avpkt->pts, + avctx->pkt_timebase, AV_TIME_BASE_Q); + ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded); + av_assert1((ret >= 0) >= !!*got_sub_ptr && + !!*got_sub_ptr >= !!sub->num_rects); + +#if FF_API_ASS_TIMING + if (avctx->sub_text_format == FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS + && *got_sub_ptr && sub->num_rects) { + const AVRational tb = avctx->pkt_timebase.num ? avctx->pkt_timebase + : avctx->time_base; + int err = convert_sub_to_old_ass_form(sub, avpkt, tb); + if (err < 0) + ret = err; + } +#endif + + if (sub->num_rects && !sub->end_display_time && avpkt->duration && + avctx->pkt_timebase.num) { + AVRational ms = { 1, 1000 }; + sub->end_display_time = av_rescale_q(avpkt->duration, + avctx->pkt_timebase, ms); + } + + if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) + sub->format = 0; + else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB) + sub->format = 1; + + for (i = 0; i < sub->num_rects; i++) { + if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE && + sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) { + av_log(avctx, AV_LOG_ERROR, + "Invalid UTF-8 in decoded subtitles text; " + "maybe missing -sub_charenc option\n"); + avsubtitle_free(sub); + ret = AVERROR_INVALIDDATA; + break; + } + } + + if (avpkt->data != pkt_recoded.data) { // did we recode? + /* prevent from destroying side data from original packet */ + pkt_recoded.side_data = NULL; + pkt_recoded.side_data_elems = 0; + + av_packet_unref(&pkt_recoded); + } + } + + if (*got_sub_ptr) + avctx->frame_number++; + } + + return ret; +} + +enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx, + const enum AVPixelFormat *fmt) +{ + const AVPixFmtDescriptor *desc; + const AVCodecHWConfig *config; + int i, n; + + // If a device was supplied when the codec was opened, assume that the + // user wants to use it. + if (avctx->hw_device_ctx && avctx->codec->hw_configs) { + AVHWDeviceContext *device_ctx = + (AVHWDeviceContext*)avctx->hw_device_ctx->data; + for (i = 0;; i++) { + config = &avctx->codec->hw_configs[i]->public; + if (!config) + break; + if (!(config->methods & + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) + continue; + if (device_ctx->type != config->device_type) + continue; + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + if (config->pix_fmt == fmt[n]) + return fmt[n]; + } + } + } + // No device or other setup, so we have to choose from things which + // don't any other external information. + + // If the last element of the list is a software format, choose it + // (this should be best software format if any exist). + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++); + desc = av_pix_fmt_desc_get(fmt[n - 1]); + if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) + return fmt[n - 1]; + + // Finally, traverse the list in order and choose the first entry + // with no external dependencies (if there is no hardware configuration + // information available then this just picks the first entry). + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + for (i = 0;; i++) { + config = avcodec_get_hw_config(avctx->codec, i); + if (!config) + break; + if (config->pix_fmt == fmt[n]) + break; + } + if (!config) { + // No specific config available, so the decoder must be able + // to handle this format without any additional setup. + return fmt[n]; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { + // Usable with only internal setup. + return fmt[n]; + } + } + + // Nothing is usable, give up. + return AV_PIX_FMT_NONE; +} + +int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx, + enum AVHWDeviceType dev_type) +{ + AVHWDeviceContext *device_ctx; + AVHWFramesContext *frames_ctx; + int ret; + + if (!avctx->hwaccel) + return AVERROR(ENOSYS); + + if (avctx->hw_frames_ctx) + return 0; + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "A hardware frames or device context is " + "required for hardware accelerated decoding.\n"); + return AVERROR(EINVAL); + } + + device_ctx = (AVHWDeviceContext *)avctx->hw_device_ctx->data; + if (device_ctx->type != dev_type) { + av_log(avctx, AV_LOG_ERROR, "Device type %s expected for hardware " + "decoding, but got %s.\n", av_hwdevice_get_type_name(dev_type), + av_hwdevice_get_type_name(device_ctx->type)); + return AVERROR(EINVAL); + } + + ret = avcodec_get_hw_frames_parameters(avctx, + avctx->hw_device_ctx, + avctx->hwaccel->pix_fmt, + &avctx->hw_frames_ctx); + if (ret < 0) + return ret; + + frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + + + if (frames_ctx->initial_pool_size) { + // We guarantee 4 base work surfaces. The function above guarantees 1 + // (the absolute minimum), so add the missing count. + frames_ctx->initial_pool_size += 3; + } + + ret = av_hwframe_ctx_init(avctx->hw_frames_ctx); + if (ret < 0) { + av_buffer_unref(&avctx->hw_frames_ctx); + return ret; + } + + return 0; +} + +int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, + AVBufferRef *device_ref, + enum AVPixelFormat hw_pix_fmt, + AVBufferRef **out_frames_ref) +{ + AVBufferRef *frames_ref = NULL; + const AVCodecHWConfigInternal *hw_config; + const AVHWAccel *hwa; + int i, ret; + + for (i = 0;; i++) { + hw_config = avctx->codec->hw_configs[i]; + if (!hw_config) + return AVERROR(ENOENT); + if (hw_config->public.pix_fmt == hw_pix_fmt) + break; + } + + hwa = hw_config->hwaccel; + if (!hwa || !hwa->frame_params) + return AVERROR(ENOENT); + + frames_ref = av_hwframe_ctx_alloc(device_ref); + if (!frames_ref) + return AVERROR(ENOMEM); + + ret = hwa->frame_params(avctx, frames_ref); + if (ret >= 0) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frames_ref->data; + + if (frames_ctx->initial_pool_size) { + // If the user has requested that extra output surfaces be + // available then add them here. + if (avctx->extra_hw_frames > 0) + frames_ctx->initial_pool_size += avctx->extra_hw_frames; + + // If frame threading is enabled then an extra surface per thread + // is also required. + if (avctx->active_thread_type & FF_THREAD_FRAME) + frames_ctx->initial_pool_size += avctx->thread_count; + } + + *out_frames_ref = frames_ref; + } else { + av_buffer_unref(&frames_ref); + } + return ret; +} + +static int hwaccel_init(AVCodecContext *avctx, + const AVCodecHWConfigInternal *hw_config) +{ + const AVHWAccel *hwaccel; + int err; + + hwaccel = hw_config->hwaccel; + if (hwaccel->capabilities & AV_HWACCEL_CODEC_CAP_EXPERIMENTAL && + avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { + av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n", + hwaccel->name); + return AVERROR_PATCHWELCOME; + } + + if (hwaccel->priv_data_size) { + avctx->internal->hwaccel_priv_data = + av_mallocz(hwaccel->priv_data_size); + if (!avctx->internal->hwaccel_priv_data) + return AVERROR(ENOMEM); + } + + avctx->hwaccel = hwaccel; + if (hwaccel->init) { + err = hwaccel->init(avctx); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed setup for format %s: " + "hwaccel initialisation returned error.\n", + av_get_pix_fmt_name(hw_config->public.pix_fmt)); + av_freep(&avctx->internal->hwaccel_priv_data); + avctx->hwaccel = NULL; + return err; + } + } + + return 0; +} + +static void hwaccel_uninit(AVCodecContext *avctx) +{ + if (avctx->hwaccel && avctx->hwaccel->uninit) + avctx->hwaccel->uninit(avctx); + + av_freep(&avctx->internal->hwaccel_priv_data); + + avctx->hwaccel = NULL; + + av_buffer_unref(&avctx->hw_frames_ctx); +} + +int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) +{ + const AVPixFmtDescriptor *desc; + enum AVPixelFormat *choices; + enum AVPixelFormat ret, user_choice; + const AVCodecHWConfigInternal *hw_config; + const AVCodecHWConfig *config; + int i, n, err; + + // Find end of list. + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++); + // Must contain at least one entry. + av_assert0(n >= 1); + // If a software format is available, it must be the last entry. + desc = av_pix_fmt_desc_get(fmt[n - 1]); + if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { + // No software format is available. + } else { + avctx->sw_pix_fmt = fmt[n - 1]; + } + + choices = av_malloc_array(n + 1, sizeof(*choices)); + if (!choices) + return AV_PIX_FMT_NONE; + + memcpy(choices, fmt, (n + 1) * sizeof(*choices)); + + for (;;) { + // Remove the previous hwaccel, if there was one. + hwaccel_uninit(avctx); + + user_choice = avctx->get_format(avctx, choices); + if (user_choice == AV_PIX_FMT_NONE) { + // Explicitly chose nothing, give up. + ret = AV_PIX_FMT_NONE; + break; + } + + desc = av_pix_fmt_desc_get(user_choice); + if (!desc) { + av_log(avctx, AV_LOG_ERROR, "Invalid format returned by " + "get_format() callback.\n"); + ret = AV_PIX_FMT_NONE; + break; + } + av_log(avctx, AV_LOG_DEBUG, "Format %s chosen by get_format().\n", + desc->name); + + for (i = 0; i < n; i++) { + if (choices[i] == user_choice) + break; + } + if (i == n) { + av_log(avctx, AV_LOG_ERROR, "Invalid return from get_format(): " + "%s not in possible list.\n", desc->name); + break; + } + + if (avctx->codec->hw_configs) { + for (i = 0;; i++) { + hw_config = avctx->codec->hw_configs[i]; + if (!hw_config) + break; + if (hw_config->public.pix_fmt == user_choice) + break; + } + } else { + hw_config = NULL; + } + + if (!hw_config) { + // No config available, so no extra setup required. + ret = user_choice; + break; + } + config = &hw_config->public; + + if (config->methods & + AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX && + avctx->hw_frames_ctx) { + const AVHWFramesContext *frames_ctx = + (AVHWFramesContext*)avctx->hw_frames_ctx->data; + if (frames_ctx->format != user_choice) { + av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: " + "does not match the format of the provided frames " + "context.\n", desc->name); + goto try_again; + } + } else if (config->methods & + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && + avctx->hw_device_ctx) { + const AVHWDeviceContext *device_ctx = + (AVHWDeviceContext*)avctx->hw_device_ctx->data; + if (device_ctx->type != config->device_type) { + av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: " + "does not match the type of the provided device " + "context.\n", desc->name); + goto try_again; + } + } else if (config->methods & + AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { + // Internal-only setup, no additional configuration. + } else if (config->methods & + AV_CODEC_HW_CONFIG_METHOD_AD_HOC) { + // Some ad-hoc configuration we can't see and can't check. + } else { + av_log(avctx, AV_LOG_ERROR, "Invalid setup for format %s: " + "missing configuration.\n", desc->name); + goto try_again; + } + if (hw_config->hwaccel) { + av_log(avctx, AV_LOG_DEBUG, "Format %s requires hwaccel " + "initialisation.\n", desc->name); + err = hwaccel_init(avctx, hw_config); + if (err < 0) + goto try_again; + } + ret = user_choice; + break; + + try_again: + av_log(avctx, AV_LOG_DEBUG, "Format %s not usable, retrying " + "get_format() without it.\n", desc->name); + for (i = 0; i < n; i++) { + if (choices[i] == user_choice) + break; + } + for (; i + 1 < n; i++) + choices[i] = choices[i + 1]; + --n; + } + + av_freep(&choices); + return ret; +} + +static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) +{ + FramePool *pool = avctx->internal->pool; + int i, ret; + + switch (avctx->codec_type) { + case AVMEDIA_TYPE_VIDEO: { + uint8_t *data[4]; + int linesize[4]; + int size[4] = { 0 }; + int w = frame->width; + int h = frame->height; + int tmpsize, unaligned; + + if (pool->format == frame->format && + pool->width == frame->width && pool->height == frame->height) + return 0; + + avcodec_align_dimensions2(avctx, &w, &h, pool->stride_align); + + do { + // NOTE: do not align linesizes individually, this breaks e.g. assumptions + // that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2 + ret = av_image_fill_linesizes(linesize, avctx->pix_fmt, w); + if (ret < 0) + return ret; + // increase alignment of w for next try (rhs gives the lowest bit set in w) + w += w & ~(w - 1); + + unaligned = 0; + for (i = 0; i < 4; i++) + unaligned |= linesize[i] % pool->stride_align[i]; + } while (unaligned); + + tmpsize = av_image_fill_pointers(data, avctx->pix_fmt, h, + NULL, linesize); + if (tmpsize < 0) + return -1; + + for (i = 0; i < 3 && data[i + 1]; i++) + size[i] = data[i + 1] - data[i]; + size[i] = tmpsize - (data[i] - data[0]); + + for (i = 0; i < 4; i++) { + av_buffer_pool_uninit(&pool->pools[i]); + pool->linesize[i] = linesize[i]; + if (size[i]) { + pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, + CONFIG_MEMORY_POISONING ? + NULL : + av_buffer_allocz); + if (!pool->pools[i]) { + ret = AVERROR(ENOMEM); + goto fail; + } + } + } + pool->format = frame->format; + pool->width = frame->width; + pool->height = frame->height; + + break; + } + case AVMEDIA_TYPE_AUDIO: { + int ch = frame->channels; //av_get_channel_layout_nb_channels(frame->channel_layout); + int planar = av_sample_fmt_is_planar(frame->format); + int planes = planar ? ch : 1; + + if (pool->format == frame->format && pool->planes == planes && + pool->channels == ch && frame->nb_samples == pool->samples) + return 0; + + av_buffer_pool_uninit(&pool->pools[0]); + ret = av_samples_get_buffer_size(&pool->linesize[0], ch, + frame->nb_samples, frame->format, 0); + if (ret < 0) + goto fail; + + pool->pools[0] = av_buffer_pool_init(pool->linesize[0], NULL); + if (!pool->pools[0]) { + ret = AVERROR(ENOMEM); + goto fail; + } + + pool->format = frame->format; + pool->planes = planes; + pool->channels = ch; + pool->samples = frame->nb_samples; + break; + } + default: av_assert0(0); + } + return 0; +fail: + for (i = 0; i < 4; i++) + av_buffer_pool_uninit(&pool->pools[i]); + pool->format = -1; + pool->planes = pool->channels = pool->samples = 0; + pool->width = pool->height = 0; + return ret; +} + +static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame) +{ + FramePool *pool = avctx->internal->pool; + int planes = pool->planes; + int i; + + frame->linesize[0] = pool->linesize[0]; + + if (planes > AV_NUM_DATA_POINTERS) { + frame->extended_data = av_mallocz_array(planes, sizeof(*frame->extended_data)); + frame->nb_extended_buf = planes - AV_NUM_DATA_POINTERS; + frame->extended_buf = av_mallocz_array(frame->nb_extended_buf, + sizeof(*frame->extended_buf)); + if (!frame->extended_data || !frame->extended_buf) { + av_freep(&frame->extended_data); + av_freep(&frame->extended_buf); + return AVERROR(ENOMEM); + } + } else { + frame->extended_data = frame->data; + av_assert0(frame->nb_extended_buf == 0); + } + + for (i = 0; i < FFMIN(planes, AV_NUM_DATA_POINTERS); i++) { + frame->buf[i] = av_buffer_pool_get(pool->pools[0]); + if (!frame->buf[i]) + goto fail; + frame->extended_data[i] = frame->data[i] = frame->buf[i]->data; + } + for (i = 0; i < frame->nb_extended_buf; i++) { + frame->extended_buf[i] = av_buffer_pool_get(pool->pools[0]); + if (!frame->extended_buf[i]) + goto fail; + frame->extended_data[i + AV_NUM_DATA_POINTERS] = frame->extended_buf[i]->data; + } + + if (avctx->debug & FF_DEBUG_BUFFERS) + av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p", frame); + + return 0; +fail: + av_frame_unref(frame); + return AVERROR(ENOMEM); +} + +static int video_get_buffer(AVCodecContext *s, AVFrame *pic) +{ + FramePool *pool = s->internal->pool; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format); + int i; + + if (pic->data[0] || pic->data[1] || pic->data[2] || pic->data[3]) { + av_log(s, AV_LOG_ERROR, "pic->data[*]!=NULL in avcodec_default_get_buffer\n"); + return -1; + } + + if (!desc) { + av_log(s, AV_LOG_ERROR, + "Unable to get pixel format descriptor for format %s\n", + av_get_pix_fmt_name(pic->format)); + return AVERROR(EINVAL); + } + + memset(pic->data, 0, sizeof(pic->data)); + pic->extended_data = pic->data; + + for (i = 0; i < 4 && pool->pools[i]; i++) { + pic->linesize[i] = pool->linesize[i]; + + pic->buf[i] = av_buffer_pool_get(pool->pools[i]); + if (!pic->buf[i]) + goto fail; + + pic->data[i] = pic->buf[i]->data; + } + for (; i < AV_NUM_DATA_POINTERS; i++) { + pic->data[i] = NULL; + pic->linesize[i] = 0; + } + if (desc->flags & AV_PIX_FMT_FLAG_PAL || + ((desc->flags & FF_PSEUDOPAL) && pic->data[1])) + avpriv_set_systematic_pal2((uint32_t *)pic->data[1], pic->format); + + if (s->debug & FF_DEBUG_BUFFERS) + av_log(s, AV_LOG_DEBUG, "default_get_buffer called on pic %p\n", pic); + + return 0; +fail: + av_frame_unref(pic); + return AVERROR(ENOMEM); +} + +int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags) +{ + int ret; + + if (avctx->hw_frames_ctx) { + ret = av_hwframe_get_buffer(avctx->hw_frames_ctx, frame, 0); + frame->width = avctx->coded_width; + frame->height = avctx->coded_height; + return ret; + } + + if ((ret = update_frame_pool(avctx, frame)) < 0) + return ret; + + switch (avctx->codec_type) { + case AVMEDIA_TYPE_VIDEO: + return video_get_buffer(avctx, frame); + case AVMEDIA_TYPE_AUDIO: + return audio_get_buffer(avctx, frame); + default: + return -1; + } +} + +static int add_metadata_from_side_data(const AVPacket *avpkt, AVFrame *frame) +{ + int size; + const uint8_t *side_metadata; + + AVDictionary **frame_md = &frame->metadata; + + side_metadata = av_packet_get_side_data(avpkt, + AV_PKT_DATA_STRINGS_METADATA, &size); + return av_packet_unpack_dictionary(side_metadata, size, frame_md); +} + +int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame) +{ + const AVPacket *pkt = avctx->internal->last_pkt_props; + int i; + static const struct { + enum AVPacketSideDataType packet; + enum AVFrameSideDataType frame; + } sd[] = { + { AV_PKT_DATA_REPLAYGAIN , AV_FRAME_DATA_REPLAYGAIN }, + { AV_PKT_DATA_DISPLAYMATRIX, AV_FRAME_DATA_DISPLAYMATRIX }, + { AV_PKT_DATA_SPHERICAL, AV_FRAME_DATA_SPHERICAL }, + { AV_PKT_DATA_STEREO3D, AV_FRAME_DATA_STEREO3D }, + { AV_PKT_DATA_AUDIO_SERVICE_TYPE, AV_FRAME_DATA_AUDIO_SERVICE_TYPE }, + { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA }, + { AV_PKT_DATA_CONTENT_LIGHT_LEVEL, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL }, + { AV_PKT_DATA_A53_CC, AV_FRAME_DATA_A53_CC }, + }; + + if (pkt) { + frame->pts = pkt->pts; +#if FF_API_PKT_PTS +FF_DISABLE_DEPRECATION_WARNINGS + frame->pkt_pts = pkt->pts; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + frame->pkt_pos = pkt->pos; + frame->pkt_duration = pkt->duration; + frame->pkt_size = pkt->size; + + for (i = 0; i < FF_ARRAY_ELEMS(sd); i++) { + int size; + uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size); + if (packet_sd) { + AVFrameSideData *frame_sd = av_frame_new_side_data(frame, + sd[i].frame, + size); + if (!frame_sd) + return AVERROR(ENOMEM); + + memcpy(frame_sd->data, packet_sd, size); + } + } + add_metadata_from_side_data(pkt, frame); + + if (pkt->flags & AV_PKT_FLAG_DISCARD) { + frame->flags |= AV_FRAME_FLAG_DISCARD; + } else { + frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD); + } + } + frame->reordered_opaque = avctx->reordered_opaque; + + if (frame->color_primaries == AVCOL_PRI_UNSPECIFIED) + frame->color_primaries = avctx->color_primaries; + if (frame->color_trc == AVCOL_TRC_UNSPECIFIED) + frame->color_trc = avctx->color_trc; + if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) + frame->colorspace = avctx->colorspace; + if (frame->color_range == AVCOL_RANGE_UNSPECIFIED) + frame->color_range = avctx->color_range; + if (frame->chroma_location == AVCHROMA_LOC_UNSPECIFIED) + frame->chroma_location = avctx->chroma_sample_location; + + switch (avctx->codec->type) { + case AVMEDIA_TYPE_VIDEO: + frame->format = avctx->pix_fmt; + if (!frame->sample_aspect_ratio.num) + frame->sample_aspect_ratio = avctx->sample_aspect_ratio; + + if (frame->width && frame->height && + av_image_check_sar(frame->width, frame->height, + frame->sample_aspect_ratio) < 0) { + av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n", + frame->sample_aspect_ratio.num, + frame->sample_aspect_ratio.den); + frame->sample_aspect_ratio = (AVRational){ 0, 1 }; + } + + break; + case AVMEDIA_TYPE_AUDIO: + if (!frame->sample_rate) + frame->sample_rate = avctx->sample_rate; + if (frame->format < 0) + frame->format = avctx->sample_fmt; + if (!frame->channel_layout) { + if (avctx->channel_layout) { + if (av_get_channel_layout_nb_channels(avctx->channel_layout) != + avctx->channels) { + av_log(avctx, AV_LOG_ERROR, "Inconsistent channel " + "configuration.\n"); + return AVERROR(EINVAL); + } + + frame->channel_layout = avctx->channel_layout; + } else { + if (avctx->channels > FF_SANE_NB_CHANNELS) { + av_log(avctx, AV_LOG_ERROR, "Too many channels: %d.\n", + avctx->channels); + return AVERROR(ENOSYS); + } + } + } + frame->channels = avctx->channels; + break; + } + return 0; +} + +static void validate_avframe_allocation(AVCodecContext *avctx, AVFrame *frame) +{ + if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) { + int i; + int num_planes = av_pix_fmt_count_planes(frame->format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + int flags = desc ? desc->flags : 0; + if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PAL)) + num_planes = 2; + if ((flags & FF_PSEUDOPAL) && frame->data[1]) + num_planes = 2; + for (i = 0; i < num_planes; i++) { + av_assert0(frame->data[i]); + } + // For formats without data like hwaccel allow unused pointers to be non-NULL. + for (i = num_planes; num_planes > 0 && i < FF_ARRAY_ELEMS(frame->data); i++) { + if (frame->data[i]) + av_log(avctx, AV_LOG_ERROR, "Buffer returned by get_buffer2() did not zero unused plane pointers\n"); + frame->data[i] = NULL; + } + } +} + +static void decode_data_free(void *opaque, uint8_t *data) +{ + FrameDecodeData *fdd = (FrameDecodeData*)data; + + if (fdd->post_process_opaque_free) + fdd->post_process_opaque_free(fdd->post_process_opaque); + + if (fdd->hwaccel_priv_free) + fdd->hwaccel_priv_free(fdd->hwaccel_priv); + + av_freep(&fdd); +} + +int ff_attach_decode_data(AVFrame *frame) +{ + AVBufferRef *fdd_buf; + FrameDecodeData *fdd; + + av_assert1(!frame->private_ref); + av_buffer_unref(&frame->private_ref); + + fdd = av_mallocz(sizeof(*fdd)); + if (!fdd) + return AVERROR(ENOMEM); + + fdd_buf = av_buffer_create((uint8_t*)fdd, sizeof(*fdd), decode_data_free, + NULL, AV_BUFFER_FLAG_READONLY); + if (!fdd_buf) { + av_freep(&fdd); + return AVERROR(ENOMEM); + } + + frame->private_ref = fdd_buf; + + return 0; +} + +static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags) +{ + const AVHWAccel *hwaccel = avctx->hwaccel; + int override_dimensions = 1; + int ret; + + if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) { + if ((ret = av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) { + av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n"); + return AVERROR(EINVAL); + } + + if (frame->width <= 0 || frame->height <= 0) { + frame->width = FFMAX(avctx->width, AV_CEIL_RSHIFT(avctx->coded_width, avctx->lowres)); + frame->height = FFMAX(avctx->height, AV_CEIL_RSHIFT(avctx->coded_height, avctx->lowres)); + override_dimensions = 0; + } + + if (frame->data[0] || frame->data[1] || frame->data[2] || frame->data[3]) { + av_log(avctx, AV_LOG_ERROR, "pic->data[*]!=NULL in get_buffer_internal\n"); + return AVERROR(EINVAL); + } + } + ret = ff_decode_frame_props(avctx, frame); + if (ret < 0) + return ret; + + if (hwaccel) { + if (hwaccel->alloc_frame) { + ret = hwaccel->alloc_frame(avctx, frame); + goto end; + } + } else + avctx->sw_pix_fmt = avctx->pix_fmt; + + ret = avctx->get_buffer2(avctx, frame, flags); + if (ret < 0) + goto end; + + validate_avframe_allocation(avctx, frame); + + ret = ff_attach_decode_data(frame); + if (ret < 0) + goto end; + +end: + if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions && + !(avctx->codec->caps_internal & FF_CODEC_CAP_EXPORTS_CROPPING)) { + frame->width = avctx->width; + frame->height = avctx->height; + } + + if (ret < 0) + av_frame_unref(frame); + + return ret; +} + +int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags) +{ + int ret = get_buffer_internal(avctx, frame, flags); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + frame->width = frame->height = 0; + } + return ret; +} + +static int reget_buffer_internal(AVCodecContext *avctx, AVFrame *frame) +{ + AVFrame *tmp; + int ret; + + av_assert0(avctx->codec_type == AVMEDIA_TYPE_VIDEO); + + if (frame->data[0] && (frame->width != avctx->width || frame->height != avctx->height || frame->format != avctx->pix_fmt)) { + av_log(avctx, AV_LOG_WARNING, "Picture changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s in reget buffer()\n", + frame->width, frame->height, av_get_pix_fmt_name(frame->format), avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt)); + av_frame_unref(frame); + } + + if (!frame->data[0]) + return ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF); + + if (av_frame_is_writable(frame)) + return ff_decode_frame_props(avctx, frame); + + tmp = av_frame_alloc(); + if (!tmp) + return AVERROR(ENOMEM); + + av_frame_move_ref(tmp, frame); + + ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF); + if (ret < 0) { + av_frame_free(&tmp); + return ret; + } + + av_frame_copy(frame, tmp); + av_frame_free(&tmp); + + return 0; +} + +int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame) +{ + int ret = reget_buffer_internal(avctx, frame); + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return ret; +} + +void avcodec_flush_buffers(AVCodecContext *avctx) +{ + avctx->internal->draining = 0; + avctx->internal->draining_done = 0; + avctx->internal->nb_draining_errors = 0; + av_frame_unref(avctx->internal->buffer_frame); + av_frame_unref(avctx->internal->compat_decode_frame); + av_packet_unref(avctx->internal->buffer_pkt); + avctx->internal->buffer_pkt_valid = 0; + + av_packet_unref(avctx->internal->ds.in_pkt); + + if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) + ff_thread_flush(avctx); + else if (avctx->codec->flush) + avctx->codec->flush(avctx); + + avctx->pts_correction_last_pts = + avctx->pts_correction_last_dts = INT64_MIN; + + ff_decode_bsfs_uninit(avctx); + + if (!avctx->refcounted_frames) + av_frame_unref(avctx->internal->to_free); +} + +void ff_decode_bsfs_uninit(AVCodecContext *avctx) +{ + DecodeFilterContext *s = &avctx->internal->filter; + int i; + + for (i = 0; i < s->nb_bsfs; i++) + av_bsf_free(&s->bsfs[i]); + av_freep(&s->bsfs); + s->nb_bsfs = 0; +} diff --git a/libs/ffvpx/libavcodec/decode.h b/libs/ffvpx/libavcodec/decode.h new file mode 100644 index 000000000..15271c529 --- /dev/null +++ b/libs/ffvpx/libavcodec/decode.h @@ -0,0 +1,79 @@ +/* + * generic decoding-related code + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DECODE_H +#define AVCODEC_DECODE_H + +#include "libavutil/buffer.h" +#include "libavutil/frame.h" +#include "libavutil/hwcontext.h" + +#include "avcodec.h" + +/** + * This struct stores per-frame lavc-internal data and is attached to it via + * private_ref. + */ +typedef struct FrameDecodeData { + /** + * The callback to perform some delayed processing on the frame right + * before it is returned to the caller. + * + * @note This code is called at some unspecified point after the frame is + * returned from the decoder's decode/receive_frame call. Therefore it cannot rely + * on AVCodecContext being in any specific state, so it does not get to + * access AVCodecContext directly at all. All the state it needs must be + * stored in the post_process_opaque object. + */ + int (*post_process)(void *logctx, AVFrame *frame); + void *post_process_opaque; + void (*post_process_opaque_free)(void *opaque); + + /** + * Per-frame private data for hwaccels. + */ + void *hwaccel_priv; + void (*hwaccel_priv_free)(void *priv); +} FrameDecodeData; + +/** + * Called by decoders to get the next packet for decoding. + * + * @param pkt An empty packet to be filled with data. + * @return 0 if a new reference has been successfully written to pkt + * AVERROR(EAGAIN) if no data is currently available + * AVERROR_EOF if and end of stream has been reached, so no more data + * will be available + */ +int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt); + +void ff_decode_bsfs_uninit(AVCodecContext *avctx); + +/** + * Make sure avctx.hw_frames_ctx is set. If it's not set, the function will + * try to allocate it from hw_device_ctx. If that is not possible, an error + * message is printed, and an error code is returned. + */ +int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx, + enum AVHWDeviceType dev_type); + +int ff_attach_decode_data(AVFrame *frame); + +#endif /* AVCODEC_DECODE_H */ diff --git a/libs/ffvpx/libavcodec/dummy_funcs.c b/libs/ffvpx/libavcodec/dummy_funcs.c new file mode 100644 index 000000000..295ee84e1 --- /dev/null +++ b/libs/ffvpx/libavcodec/dummy_funcs.c @@ -0,0 +1,851 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "avcodec.h" + +typedef struct H264PredContext H264PredContext; +typedef struct VideoDSPContext VideoDSPContext; +typedef struct VP8DSPContext VP8DSPContext; +typedef struct VP9DSPContext VP9DSPContext; +typedef struct FLACDSPContext FLACDSPContext; + +AVHWAccel ff_h263_vaapi_hwaccel; +AVHWAccel ff_h263_videotoolbox_hwaccel; +AVHWAccel ff_h264_d3d11va_hwaccel; +AVHWAccel ff_h264_dxva2_hwaccel; +AVHWAccel ff_h264_vaapi_hwaccel; +AVHWAccel ff_h264_vdpau_hwaccel; +AVHWAccel ff_h264_videotoolbox_hwaccel; +AVHWAccel ff_hevc_d3d11va_hwaccel; +AVHWAccel ff_hevc_dxva2_hwaccel; +AVHWAccel ff_hevc_vaapi_hwaccel; +AVHWAccel ff_hevc_vdpau_hwaccel; +AVHWAccel ff_mpeg1_xvmc_hwaccel; +AVHWAccel ff_mpeg1_vdpau_hwaccel; +AVHWAccel ff_mpeg1_videotoolbox_hwaccel; +AVHWAccel ff_mpeg2_xvmc_hwaccel; +AVHWAccel ff_mpeg2_d3d11va_hwaccel; +AVHWAccel ff_mpeg2_dxva2_hwaccel; +AVHWAccel ff_mpeg2_vaapi_hwaccel; +AVHWAccel ff_mpeg2_vdpau_hwaccel; +AVHWAccel ff_mpeg2_videotoolbox_hwaccel; +AVHWAccel ff_mpeg4_vaapi_hwaccel; +AVHWAccel ff_mpeg4_vdpau_hwaccel; +AVHWAccel ff_mpeg4_videotoolbox_hwaccel; +AVHWAccel ff_vc1_d3d11va_hwaccel; +AVHWAccel ff_vc1_dxva2_hwaccel; +AVHWAccel ff_vc1_vaapi_hwaccel; +AVHWAccel ff_vc1_vdpau_hwaccel; +AVHWAccel ff_wmv3_d3d11va_hwaccel; +AVHWAccel ff_wmv3_dxva2_hwaccel; +AVHWAccel ff_wmv3_vaapi_hwaccel; +AVHWAccel ff_wmv3_vdpau_hwaccel; +AVHWAccel ff_vp9_d3d11va_hwaccel; +AVHWAccel ff_vp9_dxva2_hwaccel; +AVHWAccel ff_vp9_vaapi_hwaccel; +/* Added by FFmpeg 3.4 */ +AVHWAccel ff_h264_d3d11va2_hwaccel; +AVHWAccel ff_hevc_d3d11va2_hwaccel; +AVHWAccel ff_hevc_videotoolbox_hwaccel; +AVHWAccel ff_mpeg2_d3d11va2_hwaccel; +AVHWAccel ff_vc1_d3d11va2_hwaccel; +AVHWAccel ff_vp9_d3d11va2_hwaccel; +AVHWAccel ff_wmv3_d3d11va2_hwaccel; + +AVCodec ff_a64multi_encoder; +AVCodec ff_a64multi5_encoder; +AVCodec ff_aasc_decoder; +AVCodec ff_aic_decoder; +AVCodec ff_alias_pix_encoder; +AVCodec ff_alias_pix_decoder; +AVCodec ff_amv_encoder; +AVCodec ff_amv_decoder; +AVCodec ff_anm_decoder; +AVCodec ff_ansi_decoder; +AVCodec ff_apng_encoder; +AVCodec ff_apng_decoder; +AVCodec ff_asv1_encoder; +AVCodec ff_asv1_decoder; +AVCodec ff_asv2_encoder; +AVCodec ff_asv2_decoder; +AVCodec ff_aura_decoder; +AVCodec ff_aura2_decoder; +AVCodec ff_avrp_encoder; +AVCodec ff_avrp_decoder; +AVCodec ff_avrn_decoder; +AVCodec ff_avs_decoder; +AVCodec ff_avui_encoder; +AVCodec ff_avui_decoder; +AVCodec ff_ayuv_encoder; +AVCodec ff_ayuv_decoder; +AVCodec ff_bethsoftvid_decoder; +AVCodec ff_bfi_decoder; +AVCodec ff_bink_decoder; +AVCodec ff_bmp_encoder; +AVCodec ff_bmp_decoder; +AVCodec ff_bmv_video_decoder; +AVCodec ff_brender_pix_decoder; +AVCodec ff_c93_decoder; +AVCodec ff_cavs_decoder; +AVCodec ff_cdgraphics_decoder; +AVCodec ff_cdxl_decoder; +AVCodec ff_cfhd_decoder; +AVCodec ff_cinepak_encoder; +AVCodec ff_cinepak_decoder; +AVCodec ff_cljr_encoder; +AVCodec ff_cljr_decoder; +AVCodec ff_cllc_decoder; +AVCodec ff_comfortnoise_encoder; +AVCodec ff_comfortnoise_decoder; +AVCodec ff_cpia_decoder; +AVCodec ff_cscd_decoder; +AVCodec ff_cyuv_decoder; +AVCodec ff_dds_decoder; +AVCodec ff_dfa_decoder; +AVCodec ff_dirac_decoder; +AVCodec ff_dnxhd_encoder; +AVCodec ff_dnxhd_decoder; +AVCodec ff_dpx_encoder; +AVCodec ff_dpx_decoder; +AVCodec ff_dsicinvideo_decoder; +AVCodec ff_dvaudio_decoder; +AVCodec ff_dvvideo_encoder; +AVCodec ff_dvvideo_decoder; +AVCodec ff_dxa_decoder; +AVCodec ff_dxtory_decoder; +AVCodec ff_eacmv_decoder; +AVCodec ff_eamad_decoder; +AVCodec ff_eatgq_decoder; +AVCodec ff_eatgv_decoder; +AVCodec ff_eatqi_decoder; +AVCodec ff_eightbps_decoder; +AVCodec ff_eightsvx_exp_decoder; +AVCodec ff_eightsvx_fib_decoder; +AVCodec ff_escape124_decoder; +AVCodec ff_escape130_decoder; +AVCodec ff_exr_decoder; +AVCodec ff_ffv1_encoder; +AVCodec ff_ffv1_decoder; +AVCodec ff_ffvhuff_encoder; +AVCodec ff_ffvhuff_decoder; +AVCodec ff_fic_decoder; +AVCodec ff_flashsv_encoder; +AVCodec ff_flashsv_decoder; +AVCodec ff_flashsv2_encoder; +AVCodec ff_flashsv2_decoder; +AVCodec ff_flic_decoder; +AVCodec ff_flv_encoder; +AVCodec ff_flv_decoder; +AVCodec ff_fourxm_decoder; +AVCodec ff_fraps_decoder; +AVCodec ff_frwu_decoder; +AVCodec ff_g2m_decoder; +AVCodec ff_gif_encoder; +AVCodec ff_gif_decoder; +AVCodec ff_h261_encoder; +AVCodec ff_h261_decoder; +AVCodec ff_h263_encoder; +AVCodec ff_h263_decoder; +AVCodec ff_h263i_decoder; +AVCodec ff_h263p_encoder; +AVCodec ff_h263p_decoder; +AVCodec ff_h264_decoder; +AVCodec ff_h264_crystalhd_decoder; +AVCodec ff_h264_mmal_decoder; +AVCodec ff_h264_qsv_decoder; +AVCodec ff_hap_encoder; +AVCodec ff_hap_decoder; +AVCodec ff_hevc_decoder; +AVCodec ff_hevc_qsv_decoder; +AVCodec ff_hnm4_video_decoder; +AVCodec ff_hq_hqa_decoder; +AVCodec ff_hqx_decoder; +AVCodec ff_huffyuv_encoder; +AVCodec ff_huffyuv_decoder; +AVCodec ff_idcin_decoder; +AVCodec ff_iff_byterun1_decoder; +AVCodec ff_iff_ilbm_decoder; +AVCodec ff_indeo2_decoder; +AVCodec ff_indeo3_decoder; +AVCodec ff_indeo4_decoder; +AVCodec ff_indeo5_decoder; +AVCodec ff_interplay_video_decoder; +AVCodec ff_jpeg2000_encoder; +AVCodec ff_jpeg2000_decoder; +AVCodec ff_jpegls_encoder; +AVCodec ff_jpegls_decoder; +AVCodec ff_jv_decoder; +AVCodec ff_kgv1_decoder; +AVCodec ff_kmvc_decoder; +AVCodec ff_lagarith_decoder; +AVCodec ff_ljpeg_encoder; +AVCodec ff_loco_decoder; +AVCodec ff_mdec_decoder; +AVCodec ff_mimic_decoder; +AVCodec ff_mjpeg_encoder; +AVCodec ff_mjpeg_decoder; +AVCodec ff_mjpegb_decoder; +AVCodec ff_mmvideo_decoder; +AVCodec ff_motionpixels_decoder; +AVCodec ff_mpeg1video_encoder; +AVCodec ff_mpeg1video_decoder; +AVCodec ff_mpeg2video_encoder; +AVCodec ff_mpeg2video_decoder; +AVCodec ff_mpeg4_encoder; +AVCodec ff_mpeg4_decoder; +AVCodec ff_mpeg4_crystalhd_decoder; +AVCodec ff_mpeg4_mmal_decoder; +AVCodec ff_mpegvideo_decoder; +AVCodec ff_mpeg2_crystalhd_decoder; +AVCodec ff_mpeg2_qsv_decoder; +AVCodec ff_msa1_decoder; +AVCodec ff_msmpeg4_crystalhd_decoder; +AVCodec ff_msmpeg4v1_decoder; +AVCodec ff_msmpeg4v2_encoder; +AVCodec ff_msmpeg4v2_decoder; +AVCodec ff_msmpeg4v3_encoder; +AVCodec ff_msmpeg4v3_decoder; +AVCodec ff_msrle_decoder; +AVCodec ff_mss1_decoder; +AVCodec ff_mss2_decoder; +AVCodec ff_msvideo1_encoder; +AVCodec ff_msvideo1_decoder; +AVCodec ff_mszh_decoder; +AVCodec ff_mts2_decoder; +AVCodec ff_mvc1_decoder; +AVCodec ff_mvc2_decoder; +AVCodec ff_mxpeg_decoder; +AVCodec ff_nuv_decoder; +AVCodec ff_paf_video_decoder; +AVCodec ff_pam_encoder; +AVCodec ff_pam_decoder; +AVCodec ff_pbm_encoder; +AVCodec ff_pbm_decoder; +AVCodec ff_pcx_encoder; +AVCodec ff_pcx_decoder; +AVCodec ff_pgm_encoder; +AVCodec ff_pgm_decoder; +AVCodec ff_pgmyuv_encoder; +AVCodec ff_pgmyuv_decoder; +AVCodec ff_pictor_decoder; +AVCodec ff_png_encoder; +AVCodec ff_png_decoder; +AVCodec ff_ppm_encoder; +AVCodec ff_ppm_decoder; +AVCodec ff_prores_encoder; +AVCodec ff_prores_decoder; +AVCodec ff_prores_aw_encoder; +AVCodec ff_prores_ks_encoder; +AVCodec ff_prores_lgpl_decoder; +AVCodec ff_ptx_decoder; +AVCodec ff_qdraw_decoder; +AVCodec ff_qpeg_decoder; +AVCodec ff_qtrle_encoder; +AVCodec ff_qtrle_decoder; +AVCodec ff_r10k_encoder; +AVCodec ff_r10k_decoder; +AVCodec ff_r210_encoder; +AVCodec ff_r210_decoder; +AVCodec ff_rawvideo_encoder; +AVCodec ff_rawvideo_decoder; +AVCodec ff_rl2_decoder; +AVCodec ff_roq_encoder; +AVCodec ff_roq_decoder; +AVCodec ff_rpza_decoder; +AVCodec ff_rv10_encoder; +AVCodec ff_rv10_decoder; +AVCodec ff_rv20_encoder; +AVCodec ff_rv20_decoder; +AVCodec ff_rv30_decoder; +AVCodec ff_rv40_decoder; +AVCodec ff_s302m_encoder; +AVCodec ff_s302m_decoder; +AVCodec ff_sanm_decoder; +AVCodec ff_sgi_encoder; +AVCodec ff_sgi_decoder; +AVCodec ff_sgirle_decoder; +AVCodec ff_smacker_decoder; +AVCodec ff_smc_decoder; +AVCodec ff_smvjpeg_decoder; +AVCodec ff_snow_encoder; +AVCodec ff_snow_decoder; +AVCodec ff_sp5x_decoder; +AVCodec ff_sunrast_encoder; +AVCodec ff_sunrast_decoder; +AVCodec ff_svq1_encoder; +AVCodec ff_svq1_decoder; +AVCodec ff_svq3_decoder; +AVCodec ff_targa_encoder; +AVCodec ff_targa_decoder; +AVCodec ff_targa_y216_decoder; +AVCodec ff_tdsc_decoder; +AVCodec ff_theora_decoder; +AVCodec ff_thp_decoder; +AVCodec ff_tiertexseqvideo_decoder; +AVCodec ff_tiff_encoder; +AVCodec ff_tiff_decoder; +AVCodec ff_tmv_decoder; +AVCodec ff_truemotion1_decoder; +AVCodec ff_truemotion2_decoder; +AVCodec ff_tscc_decoder; +AVCodec ff_tscc2_decoder; +AVCodec ff_txd_decoder; +AVCodec ff_ulti_decoder; +AVCodec ff_utvideo_encoder; +AVCodec ff_utvideo_decoder; +AVCodec ff_v210_encoder; +AVCodec ff_v210_decoder; +AVCodec ff_v210x_decoder; +AVCodec ff_v308_encoder; +AVCodec ff_v308_decoder; +AVCodec ff_v408_encoder; +AVCodec ff_v408_decoder; +AVCodec ff_v410_encoder; +AVCodec ff_v410_decoder; +AVCodec ff_vb_decoder; +AVCodec ff_vble_decoder; +AVCodec ff_vc1_decoder; +AVCodec ff_vc1_crystalhd_decoder; +AVCodec ff_vc1image_decoder; +AVCodec ff_vc1_qsv_decoder; +AVCodec ff_vc2_encoder; +AVCodec ff_vcr1_decoder; +AVCodec ff_vmdvideo_decoder; +AVCodec ff_vmnc_decoder; +AVCodec ff_vp3_decoder; +AVCodec ff_vp5_decoder; +AVCodec ff_vp6_decoder; +AVCodec ff_vp6a_decoder; +AVCodec ff_vp6f_decoder; +AVCodec ff_vp7_decoder; +AVCodec ff_vqa_decoder; +AVCodec ff_webp_decoder; +AVCodec ff_wmv1_encoder; +AVCodec ff_wmv1_decoder; +AVCodec ff_wmv2_encoder; +AVCodec ff_wmv2_decoder; +AVCodec ff_wmv3_decoder; +AVCodec ff_wmv3_crystalhd_decoder; +AVCodec ff_wmv3image_decoder; +AVCodec ff_wnv1_decoder; +AVCodec ff_xan_wc3_decoder; +AVCodec ff_xan_wc4_decoder; +AVCodec ff_xbm_encoder; +AVCodec ff_xbm_decoder; +AVCodec ff_xface_encoder; +AVCodec ff_xface_decoder; +AVCodec ff_xl_decoder; +AVCodec ff_xwd_encoder; +AVCodec ff_xwd_decoder; +AVCodec ff_y41p_encoder; +AVCodec ff_y41p_decoder; +AVCodec ff_yop_decoder; +AVCodec ff_yuv4_encoder; +AVCodec ff_yuv4_decoder; +AVCodec ff_zero12v_decoder; +AVCodec ff_zerocodec_decoder; +AVCodec ff_zlib_encoder; +AVCodec ff_zlib_decoder; +AVCodec ff_zmbv_encoder; +AVCodec ff_zmbv_decoder; +AVCodec ff_aac_encoder; +AVCodec ff_aac_decoder; +AVCodec ff_aac_fixed_decoder; +AVCodec ff_aac_latm_decoder; +AVCodec ff_ac3_encoder; +AVCodec ff_ac3_decoder; +AVCodec ff_ac3_fixed_encoder; +AVCodec ff_ac3_fixed_decoder; +AVCodec ff_alac_encoder; +AVCodec ff_alac_decoder; +AVCodec ff_als_decoder; +AVCodec ff_amrnb_decoder; +AVCodec ff_amrwb_decoder; +AVCodec ff_ape_decoder; +AVCodec ff_atrac1_decoder; +AVCodec ff_atrac3_decoder; +AVCodec ff_atrac3p_decoder; +AVCodec ff_binkaudio_dct_decoder; +AVCodec ff_binkaudio_rdft_decoder; +AVCodec ff_bmv_audio_decoder; +AVCodec ff_cook_decoder; +AVCodec ff_dca_encoder; +AVCodec ff_dca_decoder; +AVCodec ff_dsd_lsbf_decoder; +AVCodec ff_dsd_msbf_decoder; +AVCodec ff_dsd_lsbf_planar_decoder; +AVCodec ff_dsd_msbf_planar_decoder; +AVCodec ff_dsicinaudio_decoder; +AVCodec ff_dss_sp_decoder; +AVCodec ff_eac3_encoder; +AVCodec ff_eac3_decoder; +AVCodec ff_evrc_decoder; +AVCodec ff_ffwavesynth_decoder; +AVCodec ff_flac_encoder; +AVCodec ff_g723_1_encoder; +AVCodec ff_g723_1_decoder; +AVCodec ff_g729_decoder; +AVCodec ff_gsm_decoder; +AVCodec ff_gsm_ms_decoder; +AVCodec ff_iac_decoder; +AVCodec ff_imc_decoder; +AVCodec ff_mace3_decoder; +AVCodec ff_mace6_decoder; +AVCodec ff_metasound_decoder; +AVCodec ff_mlp_decoder; +AVCodec ff_mp1_decoder; +AVCodec ff_mp1float_decoder; +AVCodec ff_mp2_encoder; +AVCodec ff_mp2_decoder; +AVCodec ff_mp2float_decoder; +AVCodec ff_mp2fixed_encoder; +AVCodec ff_mp3_decoder; +AVCodec ff_mp3float_decoder; +AVCodec ff_mp3adu_decoder; +AVCodec ff_mp3adufloat_decoder; +AVCodec ff_mp3on4_decoder; +AVCodec ff_mp3on4float_decoder; +AVCodec ff_mpc7_decoder; +AVCodec ff_mpc8_decoder; +AVCodec ff_nellymoser_encoder; +AVCodec ff_nellymoser_decoder; +AVCodec ff_on2avc_decoder; +AVCodec ff_opus_decoder; +AVCodec ff_paf_audio_decoder; +AVCodec ff_qcelp_decoder; +AVCodec ff_qdm2_decoder; +AVCodec ff_ra_144_encoder; +AVCodec ff_ra_144_decoder; +AVCodec ff_ra_288_decoder; +AVCodec ff_ralf_decoder; +AVCodec ff_shorten_decoder; +AVCodec ff_sipr_decoder; +AVCodec ff_smackaud_decoder; +AVCodec ff_sonic_encoder; +AVCodec ff_sonic_decoder; +AVCodec ff_sonic_ls_encoder; +AVCodec ff_tak_decoder; +AVCodec ff_truehd_decoder; +AVCodec ff_truespeech_decoder; +AVCodec ff_tta_encoder; +AVCodec ff_tta_decoder; +AVCodec ff_twinvq_decoder; +AVCodec ff_vmdaudio_decoder; +AVCodec ff_vorbis_encoder; +AVCodec ff_vorbis_decoder; +AVCodec ff_wavpack_encoder; +AVCodec ff_wavpack_decoder; +AVCodec ff_wmalossless_decoder; +AVCodec ff_wmapro_decoder; +AVCodec ff_wmav1_encoder; +AVCodec ff_wmav1_decoder; +AVCodec ff_wmav2_encoder; +AVCodec ff_wmav2_decoder; +AVCodec ff_wmavoice_decoder; +AVCodec ff_ws_snd1_decoder; +AVCodec ff_pcm_alaw_encoder; +AVCodec ff_pcm_alaw_decoder; +AVCodec ff_pcm_bluray_decoder; +AVCodec ff_pcm_dvd_decoder; +AVCodec ff_pcm_f32be_encoder; +AVCodec ff_pcm_f32be_decoder; +AVCodec ff_pcm_f32le_encoder; +AVCodec ff_pcm_f32le_decoder; +AVCodec ff_pcm_f64be_encoder; +AVCodec ff_pcm_f64be_decoder; +AVCodec ff_pcm_f64le_encoder; +AVCodec ff_pcm_f64le_decoder; +AVCodec ff_pcm_lxf_decoder; +AVCodec ff_pcm_mulaw_encoder; +AVCodec ff_pcm_mulaw_decoder; +AVCodec ff_pcm_s8_encoder; +AVCodec ff_pcm_s8_decoder; +AVCodec ff_pcm_s8_planar_encoder; +AVCodec ff_pcm_s8_planar_decoder; +AVCodec ff_pcm_s16be_encoder; +AVCodec ff_pcm_s16be_decoder; +AVCodec ff_pcm_s16be_planar_encoder; +AVCodec ff_pcm_s16be_planar_decoder; +AVCodec ff_pcm_s16le_encoder; +AVCodec ff_pcm_s16le_decoder; +AVCodec ff_pcm_s16le_planar_encoder; +AVCodec ff_pcm_s16le_planar_decoder; +AVCodec ff_pcm_s24be_encoder; +AVCodec ff_pcm_s24be_decoder; +AVCodec ff_pcm_s24daud_encoder; +AVCodec ff_pcm_s24daud_decoder; +AVCodec ff_pcm_s24le_encoder; +AVCodec ff_pcm_s24le_decoder; +AVCodec ff_pcm_s24le_planar_encoder; +AVCodec ff_pcm_s24le_planar_decoder; +AVCodec ff_pcm_s32be_encoder; +AVCodec ff_pcm_s32be_decoder; +AVCodec ff_pcm_s32le_encoder; +AVCodec ff_pcm_s32le_decoder; +AVCodec ff_pcm_s32le_planar_encoder; +AVCodec ff_pcm_s32le_planar_decoder; +AVCodec ff_pcm_u8_encoder; +AVCodec ff_pcm_u8_decoder; +AVCodec ff_pcm_u16be_encoder; +AVCodec ff_pcm_u16be_decoder; +AVCodec ff_pcm_u16le_encoder; +AVCodec ff_pcm_u16le_decoder; +AVCodec ff_pcm_u24be_encoder; +AVCodec ff_pcm_u24be_decoder; +AVCodec ff_pcm_u24le_encoder; +AVCodec ff_pcm_u24le_decoder; +AVCodec ff_pcm_u32be_encoder; +AVCodec ff_pcm_u32be_decoder; +AVCodec ff_pcm_u32le_encoder; +AVCodec ff_pcm_u32le_decoder; +AVCodec ff_pcm_zork_decoder; +AVCodec ff_interplay_dpcm_decoder; +AVCodec ff_roq_dpcm_encoder; +AVCodec ff_roq_dpcm_decoder; +AVCodec ff_sol_dpcm_decoder; +AVCodec ff_xan_dpcm_decoder; +AVCodec ff_adpcm_4xm_decoder; +AVCodec ff_adpcm_adx_encoder; +AVCodec ff_adpcm_adx_decoder; +AVCodec ff_adpcm_afc_decoder; +AVCodec ff_adpcm_ct_decoder; +AVCodec ff_adpcm_dtk_decoder; +AVCodec ff_adpcm_ea_decoder; +AVCodec ff_adpcm_ea_maxis_xa_decoder; +AVCodec ff_adpcm_ea_r1_decoder; +AVCodec ff_adpcm_ea_r2_decoder; +AVCodec ff_adpcm_ea_r3_decoder; +AVCodec ff_adpcm_ea_xas_decoder; +AVCodec ff_adpcm_g722_encoder; +AVCodec ff_adpcm_g722_decoder; +AVCodec ff_adpcm_g726_encoder; +AVCodec ff_adpcm_g726_decoder; +AVCodec ff_adpcm_g726le_decoder; +AVCodec ff_adpcm_ima_amv_decoder; +AVCodec ff_adpcm_ima_apc_decoder; +AVCodec ff_adpcm_ima_dk3_decoder; +AVCodec ff_adpcm_ima_dk4_decoder; +AVCodec ff_adpcm_ima_ea_eacs_decoder; +AVCodec ff_adpcm_ima_ea_sead_decoder; +AVCodec ff_adpcm_ima_iss_decoder; +AVCodec ff_adpcm_ima_oki_decoder; +AVCodec ff_adpcm_ima_qt_encoder; +AVCodec ff_adpcm_ima_qt_decoder; +AVCodec ff_adpcm_ima_rad_decoder; +AVCodec ff_adpcm_ima_smjpeg_decoder; +AVCodec ff_adpcm_ima_wav_encoder; +AVCodec ff_adpcm_ima_wav_decoder; +AVCodec ff_adpcm_ima_ws_decoder; +AVCodec ff_adpcm_ms_encoder; +AVCodec ff_adpcm_ms_decoder; +AVCodec ff_adpcm_sbpro_2_decoder; +AVCodec ff_adpcm_sbpro_3_decoder; +AVCodec ff_adpcm_sbpro_4_decoder; +AVCodec ff_adpcm_swf_encoder; +AVCodec ff_adpcm_swf_decoder; +AVCodec ff_adpcm_thp_decoder; +AVCodec ff_adpcm_thp_le_decoder; +AVCodec ff_adpcm_vima_decoder; +AVCodec ff_adpcm_xa_decoder; +AVCodec ff_adpcm_yamaha_encoder; +AVCodec ff_adpcm_yamaha_decoder; +AVCodec ff_vima_decoder; +AVCodec ff_ssa_encoder; +AVCodec ff_ssa_decoder; +AVCodec ff_ass_encoder; +AVCodec ff_ass_decoder; +AVCodec ff_ccaption_decoder; +AVCodec ff_dvbsub_encoder; +AVCodec ff_dvbsub_decoder; +AVCodec ff_dvdsub_encoder; +AVCodec ff_dvdsub_decoder; +AVCodec ff_jacosub_decoder; +AVCodec ff_microdvd_decoder; +AVCodec ff_movtext_encoder; +AVCodec ff_movtext_decoder; +AVCodec ff_mpl2_decoder; +AVCodec ff_pgssub_decoder; +AVCodec ff_pjs_decoder; +AVCodec ff_realtext_decoder; +AVCodec ff_sami_decoder; +AVCodec ff_srt_encoder; +AVCodec ff_srt_decoder; +AVCodec ff_stl_decoder; +AVCodec ff_subrip_encoder; +AVCodec ff_subrip_decoder; +AVCodec ff_subviewer_decoder; +AVCodec ff_subviewer1_decoder; +AVCodec ff_text_decoder; +AVCodec ff_vplayer_decoder; +AVCodec ff_webvtt_encoder; +AVCodec ff_webvtt_decoder; +AVCodec ff_xsub_encoder; +AVCodec ff_xsub_decoder; +AVCodec ff_libcelt_decoder; +AVCodec ff_libdcadec_decoder; +AVCodec ff_libfaac_encoder; +AVCodec ff_libfdk_aac_encoder; +AVCodec ff_libfdk_aac_decoder; +AVCodec ff_libgsm_encoder; +AVCodec ff_libgsm_decoder; +AVCodec ff_libgsm_ms_encoder; +AVCodec ff_libgsm_ms_decoder; +AVCodec ff_libilbc_encoder; +AVCodec ff_libilbc_decoder; +AVCodec ff_libmp3lame_encoder; +AVCodec ff_libopencore_amrnb_encoder; +AVCodec ff_libopencore_amrnb_decoder; +AVCodec ff_libopencore_amrwb_decoder; +AVCodec ff_libopenjpeg_encoder; +AVCodec ff_libopenjpeg_decoder; +AVCodec ff_libopus_encoder; +AVCodec ff_libopus_decoder; +AVCodec ff_libschroedinger_encoder; +AVCodec ff_libschroedinger_decoder; +AVCodec ff_libshine_encoder; +AVCodec ff_libspeex_encoder; +AVCodec ff_libspeex_decoder; +AVCodec ff_libstagefright_h264_decoder; +AVCodec ff_libtheora_encoder; +AVCodec ff_libtwolame_encoder; +AVCodec ff_libutvideo_encoder; +AVCodec ff_libutvideo_decoder; +AVCodec ff_libvo_aacenc_encoder; +AVCodec ff_libvo_amrwbenc_encoder; +AVCodec ff_libvorbis_encoder; +AVCodec ff_libvorbis_decoder; +AVCodec ff_libvpx_vp8_encoder; +AVCodec ff_libvpx_vp8_decoder; +AVCodec ff_libvpx_vp9_encoder; +AVCodec ff_libvpx_vp9_decoder; +AVCodec ff_libwavpack_encoder; +AVCodec ff_libwebp_anim_encoder; +AVCodec ff_libwebp_encoder; +AVCodec ff_libx262_encoder; +AVCodec ff_libx264_encoder; +AVCodec ff_libx264rgb_encoder; +AVCodec ff_libx265_encoder; +AVCodec ff_libxavs_encoder; +AVCodec ff_libxvid_encoder; +AVCodec ff_libzvbi_teletext_decoder; +AVCodec ff_libaacplus_encoder; +AVCodec ff_bintext_decoder; +AVCodec ff_xbin_decoder; +AVCodec ff_idf_decoder; +AVCodec ff_libopenh264_encoder; +AVCodec ff_h264_qsv_encoder; +AVCodec ff_nvenc_encoder; +AVCodec ff_nvenc_h264_encoder; +AVCodec ff_nvenc_hevc_encoder; +AVCodec ff_hevc_qsv_encoder; +AVCodec ff_libkvazaar_encoder; +AVCodec ff_mpeg2_qsv_encoder; +AVCodec ff_dxv_decoder; +AVCodec ff_mpeg2_mmal_decoder; +AVCodec ff_rscc_decoder; +AVCodec ff_screenpresso_decoder; +AVCodec ff_sdx2_dpcm_decoder; +AVCodec ff_vc1_mmal_decoder; +AVCodec ff_wrapped_avframe_encoder; +AVCodec ff_interplay_acm_decoder; +AVCodec ff_xma1_decoder; +AVCodec ff_xma2_decoder; +AVCodec ff_adpcm_aica_decoder; +AVCodec ff_adpcm_psx_decoder; +AVCodec ff_text_encoder; +AVCodec ff_vp9_cuvid_decoder; +AVCodec ff_vp8_cuvid_decoder; +AVCodec ff_vc1_cuvid_decoder; +AVCodec ff_mjpeg_vaapi_encoder; +AVCodec ff_hevc_vaapi_encoder; +AVCodec ff_hevc_nvenc_encoder; +AVCodec ff_hevc_cuvid_decoder; +AVCodec ff_h264_videotoolbox_encoder; +AVCodec ff_h264_vaapi_encoder; +AVCodec ff_h264_omx_encoder; +AVCodec ff_h264_nvenc_encoder; +AVCodec ff_h264_cuvid_decoder; +AVCodec ff_qdm2_at_decoder; +AVCodec ff_qdmc_at_decoder; +AVCodec ff_pcm_mulaw_at_decoder; +AVCodec ff_pcm_mulaw_at_encoder; +AVCodec ff_pcm_alaw_at_decoder; +AVCodec ff_pcm_alaw_at_encoder; +AVCodec ff_mp3_at_decoder; +AVCodec ff_mp2_at_decoder; +AVCodec ff_mp1_at_decoder; +AVCodec ff_ilbc_at_decoder; +AVCodec ff_ilbc_at_encoder; +AVCodec ff_gsm_ms_at_decoder; +AVCodec ff_eac3_at_decoder; +AVCodec ff_amr_nb_at_decoder; +AVCodec ff_alac_at_decoder; +AVCodec ff_alac_at_encoder; +AVCodec ff_adpcm_ima_qt_at_decoder; +AVCodec ff_ac3_at_decoder; +AVCodec ff_aac_at_decoder; +AVCodec ff_aac_at_encoder; +AVCodec ff_adpcm_mtaf_decoder; +AVCodec ff_adpcm_ima_dat4_decoder; +AVCodec ff_dst_decoder; +AVCodec ff_ylc_decoder; +AVCodec ff_truemotion2rt_decoder; +AVCodec ff_sheervideo_decoder; +AVCodec ff_magicyuv_decoder; +AVCodec ff_m101_decoder; +AVCodec ff_h264_mediacodec_decoder; +/* Added by FFmpeg 3.2 */ +AVCodec ff_vp9_mediacodec_decoder; +AVCodec ff_vp8_mediacodec_decoder; +AVCodec ff_mpeg4_mediacodec_decoder; +AVCodec ff_mpeg4_cuvid_decoder; +AVCodec ff_mpeg2_cuvid_decoder; +AVCodec ff_mjpeg_cuvid_decoder; +AVCodec ff_hevc_mediacodec_decoder; +AVCodec ff_libopenh264_decoder; +AVCodec ff_pcm_s64le_decoder; +AVCodec ff_pcm_s64le_encoder; +AVCodec ff_pcm_s64be_decoder; +AVCodec ff_pcm_s64be_encoder; +AVCodec ff_truehd_encoder; +AVCodec ff_mlp_encoder; +/* Added by FFmpeg 3.4 */ +AVCodec ff_clearvideo_decoder; +AVCodec ff_fits_encoder; +AVCodec ff_fits_decoder; +AVCodec ff_fmvc_decoder; +AVCodec ff_gdv_decoder; +AVCodec ff_h263_v4l2m2m_decoder; +AVCodec ff_h264_v4l2m2m_decoder; +AVCodec ff_h264_rkmpp_decoder; +AVCodec ff_hevc_rkmpp_decoder; +AVCodec ff_hevc_v4l2m2m_decoder; +AVCodec ff_mpeg4_v4l2m2m_decoder; +AVCodec ff_mpeg1_v4l2m2m_decoder; +AVCodec ff_mpeg2_v4l2m2m_decoder; +AVCodec ff_mpeg2_mediacodec_decoder; +AVCodec ff_mscc_decoder; +AVCodec ff_pixlet_decoder; +AVCodec ff_psd_decoder; +AVCodec ff_scpr_decoder; +AVCodec ff_speedhq_decoder; +AVCodec ff_srgc_decoder; +AVCodec ff_vc1_v4l2m2m_decoder; +AVCodec ff_vp8_rkmpp_decoder; +AVCodec ff_vp8_v4l2m2m_decoder; +AVCodec ff_vp9_rkmpp_decoder; +AVCodec ff_vp9_v4l2m2m_decoder; +AVCodec ff_bitpacked_decoder; +AVCodec ff_wrapped_avframe_decoder; +AVCodec ff_xpm_decoder; +AVCodec ff_atrac3al_decoder; +AVCodec ff_atrac3pal_decoder; +AVCodec ff_dolby_e_decoder; +AVCodec ff_opus_encoder; +AVCodec ff_qdmc_decoder; +AVCodec ff_pcm_f16le_decoder; +AVCodec ff_pcm_f24le_decoder; +AVCodec ff_gremlin_dpcm_decoder; +AVCodec ff_adpcm_g726le_encoder; +AVCodec ff_librsvg_decoder; +AVCodec ff_h263_v4l2m2m_encoder; +AVCodec ff_h264_v4l2m2m_encoder; +AVCodec ff_hevc_v4l2m2m_encoder; +AVCodec ff_mpeg2_vaapi_encoder; +AVCodec ff_mpeg4_v4l2m2m_encoder; +AVCodec ff_vp8_qsv_decoder; +AVCodec ff_vp8_v4l2m2m_encoder; +AVCodec ff_vp8_vaapi_encoder; +AVCodec ff_vp9_vaapi_encoder; + + +AVCodecParser ff_aac_parser; +AVCodecParser ff_aac_latm_parser; +AVCodecParser ff_ac3_parser; +AVCodecParser ff_adx_parser; +AVCodecParser ff_bmp_parser; +AVCodecParser ff_cavsvideo_parser; +AVCodecParser ff_cook_parser; +AVCodecParser ff_dca_parser; +AVCodecParser ff_dirac_parser; +AVCodecParser ff_dnxhd_parser; +AVCodecParser ff_dpx_parser; +AVCodecParser ff_dvaudio_parser; +AVCodecParser ff_dvbsub_parser; +AVCodecParser ff_dvdsub_parser; +AVCodecParser ff_dvd_nav_parser; +AVCodecParser ff_flac_parser; +AVCodecParser ff_g729_parser; +AVCodecParser ff_gsm_parser; +AVCodecParser ff_h261_parser; +AVCodecParser ff_h263_parser; +AVCodecParser ff_h264_parser; +AVCodecParser ff_hevc_parser; +AVCodecParser ff_mjpeg_parser; +AVCodecParser ff_mlp_parser; +AVCodecParser ff_mpeg4video_parser; +AVCodecParser ff_mpegaudio_parser; +AVCodecParser ff_mpegvideo_parser; +AVCodecParser ff_opus_parser; +AVCodecParser ff_png_parser; +AVCodecParser ff_pnm_parser; +AVCodecParser ff_rv30_parser; +AVCodecParser ff_rv40_parser; +AVCodecParser ff_tak_parser; +AVCodecParser ff_vc1_parser; +AVCodecParser ff_vorbis_parser; +AVCodecParser ff_vp3_parser; +/* Added by FFmpeg 3.4 */ +AVCodecParser ff_sipr_parser; +AVCodecParser ff_xma_parser; + +AVBitStreamFilter ff_aac_adtstoasc_bsf; +AVBitStreamFilter ff_chomp_bsf; +AVBitStreamFilter ff_dump_extradata_bsf; +AVBitStreamFilter ff_h264_mp4toannexb_bsf; +AVBitStreamFilter ff_hevc_mp4toannexb_bsf; +AVBitStreamFilter ff_imx_dump_header_bsf; +AVBitStreamFilter ff_mjpeg2jpeg_bsf; +AVBitStreamFilter ff_mjpega_dump_header_bsf; +AVBitStreamFilter ff_mp3_header_decompress_bsf; +AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf; +AVBitStreamFilter ff_mov2textsub_bsf; +AVBitStreamFilter ff_noise_bsf; +AVBitStreamFilter ff_remove_extradata_bsf; +AVBitStreamFilter ff_text2movsub_bsf; + +void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id, + const int bit_depth, + const int chroma_format_idc) {} +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc) {} +void ff_h264_pred_init_mips(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc) {} +void ff_me_cmp_init_static(void) {} +int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options) { return 0; } +void ff_frame_thread_encoder_free(AVCodecContext *avctx) {} +int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr) { return 0; } +void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc) {} +void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc) {} +void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc) {} +void ff_videodsp_init_mips(VideoDSPContext *ctx, int bpc) {} +void ff_vp7dsp_init(VP8DSPContext *c) {} +void ff_vp78dsp_init_arm(VP8DSPContext *c) {} +void ff_vp78dsp_init_ppc(VP8DSPContext *c) {} +void ff_vp8dsp_init_arm(VP8DSPContext *c) {} +void ff_vp8dsp_init_mips(VP8DSPContext *c) {} +void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) {} +void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp) {} +void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp) {} +#if !defined(__arm__) +void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps) {} +#endif +#if !defined(HAVE_64BIT_BUILD) +void ff_flac_decorrelate_indep8_16_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {} +void ff_flac_decorrelate_indep8_32_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {} +void ff_flac_decorrelate_indep8_16_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {} +void ff_flac_decorrelate_indep8_32_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {} +#endif diff --git a/libs/ffvpx/libavcodec/error_resilience.h b/libs/ffvpx/libavcodec/error_resilience.h new file mode 100644 index 000000000..664a76565 --- /dev/null +++ b/libs/ffvpx/libavcodec/error_resilience.h @@ -0,0 +1,97 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ERROR_RESILIENCE_H +#define AVCODEC_ERROR_RESILIENCE_H + +#include <stdint.h> +#include <stdatomic.h> + +#include "avcodec.h" +#include "me_cmp.h" +#include "thread.h" + +///< current MB is the first after a resync marker +#define VP_START 1 +#define ER_AC_ERROR 2 +#define ER_DC_ERROR 4 +#define ER_MV_ERROR 8 +#define ER_AC_END 16 +#define ER_DC_END 32 +#define ER_MV_END 64 + +#define ER_MB_ERROR (ER_AC_ERROR|ER_DC_ERROR|ER_MV_ERROR) +#define ER_MB_END (ER_AC_END|ER_DC_END|ER_MV_END) + +typedef struct ERPicture { + AVFrame *f; + ThreadFrame *tf; + + // it is the caller's responsibility to allocate these buffers + int16_t (*motion_val[2])[2]; + int8_t *ref_index[2]; + + uint32_t *mb_type; + int field_picture; +} ERPicture; + +typedef struct ERContext { + AVCodecContext *avctx; + MECmpContext mecc; + int mecc_inited; + + int *mb_index2xy; + int mb_num; + int mb_width, mb_height; + ptrdiff_t mb_stride; + ptrdiff_t b8_stride; + + atomic_int error_count; + int error_occurred; + uint8_t *error_status_table; + uint8_t *er_temp_buffer; + int16_t *dc_val[3]; + uint8_t *mbskip_table; + uint8_t *mbintra_table; + int mv[2][4][2]; + + ERPicture cur_pic; + ERPicture last_pic; + ERPicture next_pic; + + AVBufferRef *ref_index_buf[2]; + AVBufferRef *motion_val_buf[2]; + + uint16_t pp_time; + uint16_t pb_time; + int quarter_sample; + int partitioned_frame; + int ref_count; + + void (*decode_mb)(void *opaque, int ref, int mv_dir, int mv_type, + int (*mv)[2][4][2], + int mb_x, int mb_y, int mb_intra, int mb_skipped); + void *opaque; +} ERContext; + +void ff_er_frame_start(ERContext *s); +void ff_er_frame_end(ERContext *s); +void ff_er_add_slice(ERContext *s, int startx, int starty, int endx, int endy, + int status); + +#endif /* AVCODEC_ERROR_RESILIENCE_H */ diff --git a/libs/ffvpx/libavcodec/fdctdsp.h b/libs/ffvpx/libavcodec/fdctdsp.h new file mode 100644 index 000000000..3e1f683b9 --- /dev/null +++ b/libs/ffvpx/libavcodec/fdctdsp.h @@ -0,0 +1,37 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FDCTDSP_H +#define AVCODEC_FDCTDSP_H + +#include <stdint.h> + +#include "avcodec.h" + +typedef struct FDCTDSPContext { + void (*fdct)(int16_t *block /* align 16 */); + void (*fdct248)(int16_t *block /* align 16 */); +} FDCTDSPContext; + +void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx); +void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + +#endif /* AVCODEC_FDCTDSP_H */ diff --git a/libs/ffvpx/libavcodec/flac.c b/libs/ffvpx/libavcodec/flac.c new file mode 100644 index 000000000..5ffbf9319 --- /dev/null +++ b/libs/ffvpx/libavcodec/flac.c @@ -0,0 +1,246 @@ +/* + * FLAC common code + * Copyright (c) 2009 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/channel_layout.h" +#include "libavutil/crc.h" +#include "libavutil/log.h" +#include "bytestream.h" +#include "get_bits.h" +#include "flac.h" +#include "flacdata.h" + +static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 }; + +static const uint64_t flac_channel_layouts[8] = { + AV_CH_LAYOUT_MONO, + AV_CH_LAYOUT_STEREO, + AV_CH_LAYOUT_SURROUND, + AV_CH_LAYOUT_QUAD, + AV_CH_LAYOUT_5POINT0, + AV_CH_LAYOUT_5POINT1, + AV_CH_LAYOUT_6POINT1, + AV_CH_LAYOUT_7POINT1 +}; + +static int64_t get_utf8(GetBitContext *gb) +{ + int64_t val; + GET_UTF8(val, get_bits(gb, 8), return -1;) + return val; +} + +int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb, + FLACFrameInfo *fi, int log_level_offset) +{ + int bs_code, sr_code, bps_code; + + /* frame sync code */ + if ((get_bits(gb, 15) & 0x7FFF) != 0x7FFC) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, "invalid sync code\n"); + return AVERROR_INVALIDDATA; + } + + /* variable block size stream code */ + fi->is_var_size = get_bits1(gb); + + /* block size and sample rate codes */ + bs_code = get_bits(gb, 4); + sr_code = get_bits(gb, 4); + + /* channels and decorrelation */ + fi->ch_mode = get_bits(gb, 4); + if (fi->ch_mode < FLAC_MAX_CHANNELS) { + fi->channels = fi->ch_mode + 1; + fi->ch_mode = FLAC_CHMODE_INDEPENDENT; + } else if (fi->ch_mode < FLAC_MAX_CHANNELS + FLAC_CHMODE_MID_SIDE) { + fi->channels = 2; + fi->ch_mode -= FLAC_MAX_CHANNELS - 1; + } else { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "invalid channel mode: %d\n", fi->ch_mode); + return AVERROR_INVALIDDATA; + } + + /* bits per sample */ + bps_code = get_bits(gb, 3); + if (bps_code == 3 || bps_code == 7) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "invalid sample size code (%d)\n", + bps_code); + return AVERROR_INVALIDDATA; + } + fi->bps = sample_size_table[bps_code]; + + /* reserved bit */ + if (get_bits1(gb)) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "broken stream, invalid padding\n"); + return AVERROR_INVALIDDATA; + } + + /* sample or frame count */ + fi->frame_or_sample_num = get_utf8(gb); + if (fi->frame_or_sample_num < 0) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "sample/frame number invalid; utf8 fscked\n"); + return AVERROR_INVALIDDATA; + } + + /* blocksize */ + if (bs_code == 0) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "reserved blocksize code: 0\n"); + return AVERROR_INVALIDDATA; + } else if (bs_code == 6) { + fi->blocksize = get_bits(gb, 8) + 1; + } else if (bs_code == 7) { + fi->blocksize = get_bits(gb, 16) + 1; + } else { + fi->blocksize = ff_flac_blocksize_table[bs_code]; + } + + /* sample rate */ + if (sr_code < 12) { + fi->samplerate = ff_flac_sample_rate_table[sr_code]; + } else if (sr_code == 12) { + fi->samplerate = get_bits(gb, 8) * 1000; + } else if (sr_code == 13) { + fi->samplerate = get_bits(gb, 16); + } else if (sr_code == 14) { + fi->samplerate = get_bits(gb, 16) * 10; + } else { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "illegal sample rate code %d\n", + sr_code); + return AVERROR_INVALIDDATA; + } + + /* header CRC-8 check */ + skip_bits(gb, 8); + if (av_crc(av_crc_get_table(AV_CRC_8_ATM), 0, gb->buffer, + get_bits_count(gb)/8)) { + av_log(avctx, AV_LOG_ERROR + log_level_offset, + "header crc mismatch\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +int ff_flac_get_max_frame_size(int blocksize, int ch, int bps) +{ + /* Technically, there is no limit to FLAC frame size, but an encoder + should not write a frame that is larger than if verbatim encoding mode + were to be used. */ + + int count; + + count = 16; /* frame header */ + count += ch * ((7+bps+7)/8); /* subframe headers */ + if (ch == 2) { + /* for stereo, need to account for using decorrelation */ + count += (( 2*bps+1) * blocksize + 7) / 8; + } else { + count += ( ch*bps * blocksize + 7) / 8; + } + count += 2; /* frame footer */ + + return count; +} + +int ff_flac_is_extradata_valid(AVCodecContext *avctx, + enum FLACExtradataFormat *format, + uint8_t **streaminfo_start) +{ + if (!avctx->extradata || avctx->extradata_size < FLAC_STREAMINFO_SIZE) { + av_log(avctx, AV_LOG_ERROR, "extradata NULL or too small.\n"); + return 0; + } + if (AV_RL32(avctx->extradata) != MKTAG('f','L','a','C')) { + /* extradata contains STREAMINFO only */ + if (avctx->extradata_size != FLAC_STREAMINFO_SIZE) { + av_log(avctx, AV_LOG_WARNING, "extradata contains %d bytes too many.\n", + FLAC_STREAMINFO_SIZE-avctx->extradata_size); + } + *format = FLAC_EXTRADATA_FORMAT_STREAMINFO; + *streaminfo_start = avctx->extradata; + } else { + if (avctx->extradata_size < 8+FLAC_STREAMINFO_SIZE) { + av_log(avctx, AV_LOG_ERROR, "extradata too small.\n"); + return 0; + } + *format = FLAC_EXTRADATA_FORMAT_FULL_HEADER; + *streaminfo_start = &avctx->extradata[8]; + } + return 1; +} + +void ff_flac_set_channel_layout(AVCodecContext *avctx) +{ + if (avctx->channels <= FF_ARRAY_ELEMS(flac_channel_layouts)) + avctx->channel_layout = flac_channel_layouts[avctx->channels - 1]; + else + avctx->channel_layout = 0; +} + +int ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s, + const uint8_t *buffer) +{ + GetBitContext gb; + init_get_bits(&gb, buffer, FLAC_STREAMINFO_SIZE*8); + + skip_bits(&gb, 16); /* skip min blocksize */ + s->max_blocksize = get_bits(&gb, 16); + if (s->max_blocksize < FLAC_MIN_BLOCKSIZE) { + av_log(avctx, AV_LOG_WARNING, "invalid max blocksize: %d\n", + s->max_blocksize); + s->max_blocksize = 16; + return AVERROR_INVALIDDATA; + } + + skip_bits(&gb, 24); /* skip min frame size */ + s->max_framesize = get_bits_long(&gb, 24); + + s->samplerate = get_bits_long(&gb, 20); + s->channels = get_bits(&gb, 3) + 1; + s->bps = get_bits(&gb, 5) + 1; + + if (s->bps < 4) { + av_log(avctx, AV_LOG_ERROR, "invalid bps: %d\n", s->bps); + s->bps = 16; + return AVERROR_INVALIDDATA; + } + + avctx->channels = s->channels; + avctx->sample_rate = s->samplerate; + avctx->bits_per_raw_sample = s->bps; + + if (!avctx->channel_layout || + av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels) + ff_flac_set_channel_layout(avctx); + + s->samples = get_bits64(&gb, 36); + + skip_bits_long(&gb, 64); /* md5 sum */ + skip_bits_long(&gb, 64); /* md5 sum */ + + return 0; +} diff --git a/libs/ffvpx/libavcodec/flac.h b/libs/ffvpx/libavcodec/flac.h new file mode 100644 index 000000000..991ab43f3 --- /dev/null +++ b/libs/ffvpx/libavcodec/flac.h @@ -0,0 +1,155 @@ +/* + * FLAC (Free Lossless Audio Codec) decoder/demuxer common functions + * Copyright (c) 2008 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * FLAC (Free Lossless Audio Codec) decoder/demuxer common functions + */ + +#ifndef AVCODEC_FLAC_H +#define AVCODEC_FLAC_H + +#include "avcodec.h" +#include "bytestream.h" +#include "get_bits.h" + +#define FLAC_STREAMINFO_SIZE 34 +#define FLAC_MAX_CHANNELS 8 +#define FLAC_MIN_BLOCKSIZE 16 +#define FLAC_MAX_BLOCKSIZE 65535 +#define FLAC_MIN_FRAME_SIZE 11 + +enum { + FLAC_CHMODE_INDEPENDENT = 0, + FLAC_CHMODE_LEFT_SIDE = 1, + FLAC_CHMODE_RIGHT_SIDE = 2, + FLAC_CHMODE_MID_SIDE = 3, +}; + +enum { + FLAC_METADATA_TYPE_STREAMINFO = 0, + FLAC_METADATA_TYPE_PADDING, + FLAC_METADATA_TYPE_APPLICATION, + FLAC_METADATA_TYPE_SEEKTABLE, + FLAC_METADATA_TYPE_VORBIS_COMMENT, + FLAC_METADATA_TYPE_CUESHEET, + FLAC_METADATA_TYPE_PICTURE, + FLAC_METADATA_TYPE_INVALID = 127 +}; + +enum FLACExtradataFormat { + FLAC_EXTRADATA_FORMAT_STREAMINFO = 0, + FLAC_EXTRADATA_FORMAT_FULL_HEADER = 1 +}; + +#define FLACCOMMONINFO \ + int samplerate; /**< sample rate */\ + int channels; /**< number of channels */\ + int bps; /**< bits-per-sample */\ + +/** + * Data needed from the Streaminfo header for use by the raw FLAC demuxer + * and/or the FLAC decoder. + */ +#define FLACSTREAMINFO \ + FLACCOMMONINFO \ + int max_blocksize; /**< maximum block size, in samples */\ + int max_framesize; /**< maximum frame size, in bytes */\ + int64_t samples; /**< total number of samples */\ + +typedef struct FLACStreaminfo { + FLACSTREAMINFO +} FLACStreaminfo; + +typedef struct FLACFrameInfo { + FLACCOMMONINFO + int blocksize; /**< block size of the frame */ + int ch_mode; /**< channel decorrelation mode */ + int64_t frame_or_sample_num; /**< frame number or sample number */ + int is_var_size; /**< specifies if the stream uses variable + block sizes or a fixed block size; + also determines the meaning of + frame_or_sample_num */ +} FLACFrameInfo; + +/** + * Parse the Streaminfo metadata block + * @param[out] avctx codec context to set basic stream parameters + * @param[out] s where parsed information is stored + * @param[in] buffer pointer to start of 34-byte streaminfo data + * + * @return negative error code on faiure or >= 0 on success + */ +int ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s, + const uint8_t *buffer); + +/** + * Validate the FLAC extradata. + * @param[in] avctx codec context containing the extradata. + * @param[out] format extradata format. + * @param[out] streaminfo_start pointer to start of 34-byte STREAMINFO data. + * @return 1 if valid, 0 if not valid. + */ +int ff_flac_is_extradata_valid(AVCodecContext *avctx, + enum FLACExtradataFormat *format, + uint8_t **streaminfo_start); + +/** + * Calculate an estimate for the maximum frame size based on verbatim mode. + * @param blocksize block size, in samples + * @param ch number of channels + * @param bps bits-per-sample + */ +int ff_flac_get_max_frame_size(int blocksize, int ch, int bps); + +/** + * Validate and decode a frame header. + * @param avctx AVCodecContext to use as av_log() context + * @param gb GetBitContext from which to read frame header + * @param[out] fi frame information + * @param log_level_offset log level offset. can be used to silence error messages. + * @return non-zero on error, 0 if ok + */ +int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb, + FLACFrameInfo *fi, int log_level_offset); + +void ff_flac_set_channel_layout(AVCodecContext *avctx); + +/** + * Parse the metadata block parameters from the header. + * @param[in] block_header header data, at least 4 bytes + * @param[out] last indicator for last metadata block + * @param[out] type metadata block type + * @param[out] size metadata block size + */ +static av_always_inline void flac_parse_block_header(const uint8_t *block_header, + int *last, int *type, int *size) +{ + int tmp = bytestream_get_byte(&block_header); + if (last) + *last = tmp & 0x80; + if (type) + *type = tmp & 0x7F; + if (size) + *size = bytestream_get_be24(&block_header); +} + +#endif /* AVCODEC_FLAC_H */ diff --git a/libs/ffvpx/libavcodec/flac_parser.c b/libs/ffvpx/libavcodec/flac_parser.c new file mode 100644 index 000000000..272128646 --- /dev/null +++ b/libs/ffvpx/libavcodec/flac_parser.c @@ -0,0 +1,762 @@ +/* + * FLAC parser + * Copyright (c) 2010 Michael Chinen + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * FLAC parser + * + * The FLAC parser buffers input until FLAC_MIN_HEADERS has been found. + * Each time it finds and verifies a CRC-8 header it sees which of the + * FLAC_MAX_SEQUENTIAL_HEADERS that came before it have a valid CRC-16 footer + * that ends at the newly found header. + * Headers are scored by FLAC_HEADER_BASE_SCORE plus the max of its crc-verified + * children, penalized by changes in sample rate, frame number, etc. + * The parser returns the frame with the highest score. + **/ + +#include "libavutil/attributes.h" +#include "libavutil/crc.h" +#include "libavutil/fifo.h" +#include "bytestream.h" +#include "parser.h" +#include "flac.h" + +/** maximum number of adjacent headers that compare CRCs against each other */ +#define FLAC_MAX_SEQUENTIAL_HEADERS 4 +/** minimum number of headers buffered and checked before returning frames */ +#define FLAC_MIN_HEADERS 10 +/** estimate for average size of a FLAC frame */ +#define FLAC_AVG_FRAME_SIZE 8192 + +/** scoring settings for score_header */ +#define FLAC_HEADER_BASE_SCORE 10 +#define FLAC_HEADER_CHANGED_PENALTY 7 +#define FLAC_HEADER_CRC_FAIL_PENALTY 50 +#define FLAC_HEADER_NOT_PENALIZED_YET 100000 +#define FLAC_HEADER_NOT_SCORED_YET -100000 + +/** largest possible size of flac header */ +#define MAX_FRAME_HEADER_SIZE 16 + +typedef struct FLACHeaderMarker { + int offset; /**< byte offset from start of FLACParseContext->buffer */ + int *link_penalty; /**< pointer to array of local scores between this header + and the one at a distance equal array position */ + int max_score; /**< maximum score found after checking each child that + has a valid CRC */ + FLACFrameInfo fi; /**< decoded frame header info */ + struct FLACHeaderMarker *next; /**< next CRC-8 verified header that + immediately follows this one in + the bytestream */ + struct FLACHeaderMarker *best_child; /**< following frame header with + which this frame has the best + score with */ +} FLACHeaderMarker; + +typedef struct FLACParseContext { + AVCodecParserContext *pc; /**< parent context */ + AVCodecContext *avctx; /**< codec context pointer for logging */ + FLACHeaderMarker *headers; /**< linked-list that starts at the first + CRC-8 verified header within buffer */ + FLACHeaderMarker *best_header; /**< highest scoring header within buffer */ + int nb_headers_found; /**< number of headers found in the last + flac_parse() call */ + int nb_headers_buffered; /**< number of headers that are buffered */ + int best_header_valid; /**< flag set when the parser returns junk; + if set return best_header next time */ + AVFifoBuffer *fifo_buf; /**< buffer to store all data until headers + can be verified */ + int end_padded; /**< specifies if fifo_buf's end is padded */ + uint8_t *wrap_buf; /**< general fifo read buffer when wrapped */ + int wrap_buf_allocated_size; /**< actual allocated size of the buffer */ + FLACFrameInfo last_fi; /**< last decoded frame header info */ + int last_fi_valid; /**< set if last_fi is valid */ +} FLACParseContext; + +static int frame_header_is_valid(AVCodecContext *avctx, const uint8_t *buf, + FLACFrameInfo *fi) +{ + GetBitContext gb; + init_get_bits(&gb, buf, MAX_FRAME_HEADER_SIZE * 8); + return !ff_flac_decode_frame_header(avctx, &gb, fi, 127); +} + +/** + * Non-destructive fast fifo pointer fetching + * Returns a pointer from the specified offset. + * If possible the pointer points within the fifo buffer. + * Otherwise (if it would cause a wrap around,) a pointer to a user-specified + * buffer is used. + * The pointer can be NULL. In any case it will be reallocated to hold the size. + * If the returned pointer will be used after subsequent calls to flac_fifo_read_wrap + * then the subsequent calls should pass in a different wrap_buf so as to not + * overwrite the contents of the previous wrap_buf. + * This function is based on av_fifo_generic_read, which is why there is a comment + * about a memory barrier for SMP. + */ +static uint8_t* flac_fifo_read_wrap(FLACParseContext *fpc, int offset, int len, + uint8_t** wrap_buf, int* allocated_size) +{ + AVFifoBuffer *f = fpc->fifo_buf; + uint8_t *start = f->rptr + offset; + uint8_t *tmp_buf; + + if (start >= f->end) + start -= f->end - f->buffer; + if (f->end - start >= len) + return start; + + tmp_buf = av_fast_realloc(*wrap_buf, allocated_size, len); + + if (!tmp_buf) { + av_log(fpc->avctx, AV_LOG_ERROR, + "couldn't reallocate wrap buffer of size %d", len); + return NULL; + } + *wrap_buf = tmp_buf; + do { + int seg_len = FFMIN(f->end - start, len); + memcpy(tmp_buf, start, seg_len); + tmp_buf = (uint8_t*)tmp_buf + seg_len; +// memory barrier needed for SMP here in theory + + start += seg_len - (f->end - f->buffer); + len -= seg_len; + } while (len > 0); + + return *wrap_buf; +} + +/** + * Return a pointer in the fifo buffer where the offset starts at until + * the wrap point or end of request. + * len will contain the valid length of the returned buffer. + * A second call to flac_fifo_read (with new offset and len) should be called + * to get the post-wrap buf if the returned len is less than the requested. + **/ +static uint8_t* flac_fifo_read(FLACParseContext *fpc, int offset, int *len) +{ + AVFifoBuffer *f = fpc->fifo_buf; + uint8_t *start = f->rptr + offset; + + if (start >= f->end) + start -= f->end - f->buffer; + *len = FFMIN(*len, f->end - start); + return start; +} + +static int find_headers_search_validate(FLACParseContext *fpc, int offset) +{ + FLACFrameInfo fi; + uint8_t *header_buf; + int size = 0; + header_buf = flac_fifo_read_wrap(fpc, offset, + MAX_FRAME_HEADER_SIZE, + &fpc->wrap_buf, + &fpc->wrap_buf_allocated_size); + if (frame_header_is_valid(fpc->avctx, header_buf, &fi)) { + FLACHeaderMarker **end_handle = &fpc->headers; + int i; + + size = 0; + while (*end_handle) { + end_handle = &(*end_handle)->next; + size++; + } + + *end_handle = av_mallocz(sizeof(**end_handle)); + if (!*end_handle) { + av_log(fpc->avctx, AV_LOG_ERROR, + "couldn't allocate FLACHeaderMarker\n"); + return AVERROR(ENOMEM); + } + (*end_handle)->fi = fi; + (*end_handle)->offset = offset; + (*end_handle)->link_penalty = av_malloc(sizeof(int) * + FLAC_MAX_SEQUENTIAL_HEADERS); + if (!(*end_handle)->link_penalty) { + av_freep(end_handle); + av_log(fpc->avctx, AV_LOG_ERROR, + "couldn't allocate link_penalty\n"); + return AVERROR(ENOMEM); + } + + for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS; i++) + (*end_handle)->link_penalty[i] = FLAC_HEADER_NOT_PENALIZED_YET; + + fpc->nb_headers_found++; + size++; + } + return size; +} + +static int find_headers_search(FLACParseContext *fpc, uint8_t *buf, int buf_size, + int search_start) + +{ + int size = 0, mod_offset = (buf_size - 1) % 4, i, j; + uint32_t x; + + for (i = 0; i < mod_offset; i++) { + if ((AV_RB16(buf + i) & 0xFFFE) == 0xFFF8) + size = find_headers_search_validate(fpc, search_start + i); + } + + for (; i < buf_size - 1; i += 4) { + x = AV_RB32(buf + i); + if (((x & ~(x + 0x01010101)) & 0x80808080)) { + for (j = 0; j < 4; j++) { + if ((AV_RB16(buf + i + j) & 0xFFFE) == 0xFFF8) + size = find_headers_search_validate(fpc, search_start + i + j); + } + } + } + return size; +} + +static int find_new_headers(FLACParseContext *fpc, int search_start) +{ + FLACHeaderMarker *end; + int search_end, size = 0, read_len, temp; + uint8_t *buf; + fpc->nb_headers_found = 0; + + /* Search for a new header of at most 16 bytes. */ + search_end = av_fifo_size(fpc->fifo_buf) - (MAX_FRAME_HEADER_SIZE - 1); + read_len = search_end - search_start + 1; + buf = flac_fifo_read(fpc, search_start, &read_len); + size = find_headers_search(fpc, buf, read_len, search_start); + search_start += read_len - 1; + + /* If fifo end was hit do the wrap around. */ + if (search_start != search_end) { + uint8_t wrap[2]; + + wrap[0] = buf[read_len - 1]; + read_len = search_end - search_start + 1; + + /* search_start + 1 is the post-wrap offset in the fifo. */ + buf = flac_fifo_read(fpc, search_start + 1, &read_len); + wrap[1] = buf[0]; + + if ((AV_RB16(wrap) & 0xFFFE) == 0xFFF8) { + temp = find_headers_search_validate(fpc, search_start); + size = FFMAX(size, temp); + } + search_start++; + + /* Continue to do the last half of the wrap. */ + temp = find_headers_search(fpc, buf, read_len, search_start); + size = FFMAX(size, temp); + search_start += read_len - 1; + } + + /* Return the size even if no new headers were found. */ + if (!size && fpc->headers) + for (end = fpc->headers; end; end = end->next) + size++; + return size; +} + +static int check_header_fi_mismatch(FLACParseContext *fpc, + FLACFrameInfo *header_fi, + FLACFrameInfo *child_fi, + int log_level_offset) +{ + int deduction = 0; + if (child_fi->samplerate != header_fi->samplerate) { + deduction += FLAC_HEADER_CHANGED_PENALTY; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "sample rate change detected in adjacent frames\n"); + } + if (child_fi->bps != header_fi->bps) { + deduction += FLAC_HEADER_CHANGED_PENALTY; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "bits per sample change detected in adjacent frames\n"); + } + if (child_fi->is_var_size != header_fi->is_var_size) { + /* Changing blocking strategy not allowed per the spec */ + deduction += FLAC_HEADER_BASE_SCORE; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "blocking strategy change detected in adjacent frames\n"); + } + if (child_fi->channels != header_fi->channels) { + deduction += FLAC_HEADER_CHANGED_PENALTY; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "number of channels change detected in adjacent frames\n"); + } + return deduction; +} + +static int check_header_mismatch(FLACParseContext *fpc, + FLACHeaderMarker *header, + FLACHeaderMarker *child, + int log_level_offset) +{ + FLACFrameInfo *header_fi = &header->fi, *child_fi = &child->fi; + int deduction, deduction_expected = 0, i; + deduction = check_header_fi_mismatch(fpc, header_fi, child_fi, + log_level_offset); + /* Check sample and frame numbers. */ + if ((child_fi->frame_or_sample_num - header_fi->frame_or_sample_num + != header_fi->blocksize) && + (child_fi->frame_or_sample_num + != header_fi->frame_or_sample_num + 1)) { + FLACHeaderMarker *curr; + int expected_frame_num, expected_sample_num; + /* If there are frames in the middle we expect this deduction, + as they are probably valid and this one follows it */ + + expected_frame_num = expected_sample_num = header_fi->frame_or_sample_num; + curr = header; + while (curr != child) { + /* Ignore frames that failed all crc checks */ + for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS; i++) { + if (curr->link_penalty[i] < FLAC_HEADER_CRC_FAIL_PENALTY) { + expected_frame_num++; + expected_sample_num += curr->fi.blocksize; + break; + } + } + curr = curr->next; + } + + if (expected_frame_num == child_fi->frame_or_sample_num || + expected_sample_num == child_fi->frame_or_sample_num) + deduction_expected = deduction ? 0 : 1; + + deduction += FLAC_HEADER_CHANGED_PENALTY; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "sample/frame number mismatch in adjacent frames\n"); + } + + /* If we have suspicious headers, check the CRC between them */ + if (deduction && !deduction_expected) { + FLACHeaderMarker *curr; + int read_len; + uint8_t *buf; + uint32_t crc = 1; + int inverted_test = 0; + + /* Since CRC is expensive only do it if we haven't yet. + This assumes a CRC penalty is greater than all other check penalties */ + curr = header->next; + for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS && curr != child; i++) + curr = curr->next; + + if (header->link_penalty[i] < FLAC_HEADER_CRC_FAIL_PENALTY || + header->link_penalty[i] == FLAC_HEADER_NOT_PENALIZED_YET) { + FLACHeaderMarker *start, *end; + + /* Although overlapping chains are scored, the crc should never + have to be computed twice for a single byte. */ + start = header; + end = child; + if (i > 0 && + header->link_penalty[i - 1] >= FLAC_HEADER_CRC_FAIL_PENALTY) { + while (start->next != child) + start = start->next; + inverted_test = 1; + } else if (i > 0 && + header->next->link_penalty[i-1] >= + FLAC_HEADER_CRC_FAIL_PENALTY ) { + end = header->next; + inverted_test = 1; + } + + read_len = end->offset - start->offset; + buf = flac_fifo_read(fpc, start->offset, &read_len); + crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, buf, read_len); + read_len = (end->offset - start->offset) - read_len; + + if (read_len) { + buf = flac_fifo_read(fpc, end->offset - read_len, &read_len); + crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI), crc, buf, read_len); + } + } + + if (!crc ^ !inverted_test) { + deduction += FLAC_HEADER_CRC_FAIL_PENALTY; + av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset, + "crc check failed from offset %i (frame %"PRId64") to %i (frame %"PRId64")\n", + header->offset, header_fi->frame_or_sample_num, + child->offset, child_fi->frame_or_sample_num); + } + } + return deduction; +} + +/** + * Score a header. + * + * Give FLAC_HEADER_BASE_SCORE points to a frame for existing. + * If it has children, (subsequent frames of which the preceding CRC footer + * validates against this one,) then take the maximum score of the children, + * with a penalty of FLAC_HEADER_CHANGED_PENALTY applied for each change to + * bps, sample rate, channels, but not decorrelation mode, or blocksize, + * because it can change often. + **/ +static int score_header(FLACParseContext *fpc, FLACHeaderMarker *header) +{ + FLACHeaderMarker *child; + int dist = 0; + int child_score; + int base_score = FLAC_HEADER_BASE_SCORE; + if (header->max_score != FLAC_HEADER_NOT_SCORED_YET) + return header->max_score; + + /* Modify the base score with changes from the last output header */ + if (fpc->last_fi_valid) { + /* Silence the log since this will be repeated if selected */ + base_score -= check_header_fi_mismatch(fpc, &fpc->last_fi, &header->fi, + AV_LOG_DEBUG); + } + + header->max_score = base_score; + + /* Check and compute the children's scores. */ + child = header->next; + for (dist = 0; dist < FLAC_MAX_SEQUENTIAL_HEADERS && child; dist++) { + /* Look at the child's frame header info and penalize suspicious + changes between the headers. */ + if (header->link_penalty[dist] == FLAC_HEADER_NOT_PENALIZED_YET) { + header->link_penalty[dist] = check_header_mismatch(fpc, header, + child, AV_LOG_DEBUG); + } + child_score = score_header(fpc, child) - header->link_penalty[dist]; + + if (FLAC_HEADER_BASE_SCORE + child_score > header->max_score) { + /* Keep the child because the frame scoring is dynamic. */ + header->best_child = child; + header->max_score = base_score + child_score; + } + child = child->next; + } + + return header->max_score; +} + +static void score_sequences(FLACParseContext *fpc) +{ + FLACHeaderMarker *curr; + int best_score = 0;//FLAC_HEADER_NOT_SCORED_YET; + /* First pass to clear all old scores. */ + for (curr = fpc->headers; curr; curr = curr->next) + curr->max_score = FLAC_HEADER_NOT_SCORED_YET; + + /* Do a second pass to score them all. */ + for (curr = fpc->headers; curr; curr = curr->next) { + if (score_header(fpc, curr) > best_score) { + fpc->best_header = curr; + best_score = curr->max_score; + } + } +} + +static int get_best_header(FLACParseContext* fpc, const uint8_t **poutbuf, + int *poutbuf_size) +{ + FLACHeaderMarker *header = fpc->best_header; + FLACHeaderMarker *child = header->best_child; + if (!child) { + *poutbuf_size = av_fifo_size(fpc->fifo_buf) - header->offset; + } else { + *poutbuf_size = child->offset - header->offset; + + /* If the child has suspicious changes, log them */ + check_header_mismatch(fpc, header, child, 0); + } + + if (header->fi.channels != fpc->avctx->channels || + !fpc->avctx->channel_layout) { + fpc->avctx->channels = header->fi.channels; + ff_flac_set_channel_layout(fpc->avctx); + } + fpc->avctx->sample_rate = header->fi.samplerate; + fpc->pc->duration = header->fi.blocksize; + *poutbuf = flac_fifo_read_wrap(fpc, header->offset, *poutbuf_size, + &fpc->wrap_buf, + &fpc->wrap_buf_allocated_size); + + + if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){ + if (header->fi.is_var_size) + fpc->pc->pts = header->fi.frame_or_sample_num; + else if (header->best_child) + fpc->pc->pts = header->fi.frame_or_sample_num * header->fi.blocksize; + } + + fpc->best_header_valid = 0; + fpc->last_fi_valid = 1; + fpc->last_fi = header->fi; + + /* Return the negative overread index so the client can compute pos. + This should be the amount overread to the beginning of the child */ + if (child) + return child->offset - av_fifo_size(fpc->fifo_buf); + return 0; +} + +static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + FLACParseContext *fpc = s->priv_data; + FLACHeaderMarker *curr; + int nb_headers; + const uint8_t *read_end = buf; + const uint8_t *read_start = buf; + + if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) { + FLACFrameInfo fi; + if (frame_header_is_valid(avctx, buf, &fi)) { + s->duration = fi.blocksize; + if (!avctx->sample_rate) + avctx->sample_rate = fi.samplerate; + if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){ + fpc->pc->pts = fi.frame_or_sample_num; + if (!fi.is_var_size) + fpc->pc->pts *= fi.blocksize; + } + } + *poutbuf = buf; + *poutbuf_size = buf_size; + return buf_size; + } + + fpc->avctx = avctx; + if (fpc->best_header_valid) + return get_best_header(fpc, poutbuf, poutbuf_size); + + /* If a best_header was found last call remove it with the buffer data. */ + if (fpc->best_header && fpc->best_header->best_child) { + FLACHeaderMarker *temp; + FLACHeaderMarker *best_child = fpc->best_header->best_child; + + /* Remove headers in list until the end of the best_header. */ + for (curr = fpc->headers; curr != best_child; curr = temp) { + if (curr != fpc->best_header) { + av_log(avctx, AV_LOG_DEBUG, + "dropping low score %i frame header from offset %i to %i\n", + curr->max_score, curr->offset, curr->next->offset); + } + temp = curr->next; + av_freep(&curr->link_penalty); + av_free(curr); + fpc->nb_headers_buffered--; + } + /* Release returned data from ring buffer. */ + av_fifo_drain(fpc->fifo_buf, best_child->offset); + + /* Fix the offset for the headers remaining to match the new buffer. */ + for (curr = best_child->next; curr; curr = curr->next) + curr->offset -= best_child->offset; + + fpc->nb_headers_buffered--; + best_child->offset = 0; + fpc->headers = best_child; + if (fpc->nb_headers_buffered >= FLAC_MIN_HEADERS) { + fpc->best_header = best_child; + return get_best_header(fpc, poutbuf, poutbuf_size); + } + fpc->best_header = NULL; + } else if (fpc->best_header) { + /* No end frame no need to delete the buffer; probably eof */ + FLACHeaderMarker *temp; + + for (curr = fpc->headers; curr != fpc->best_header; curr = temp) { + temp = curr->next; + av_freep(&curr->link_penalty); + av_free(curr); + fpc->nb_headers_buffered--; + } + fpc->headers = fpc->best_header->next; + av_freep(&fpc->best_header->link_penalty); + av_freep(&fpc->best_header); + fpc->nb_headers_buffered--; + } + + /* Find and score new headers. */ + /* buf_size is to zero when padding, so check for this since we do */ + /* not want to try to read more input once we have found the end. */ + /* Note that as (non-modified) parameters, buf can be non-NULL, */ + /* while buf_size is 0. */ + while ((buf && buf_size && read_end < buf + buf_size && + fpc->nb_headers_buffered < FLAC_MIN_HEADERS) + || ((!buf || !buf_size) && !fpc->end_padded)) { + int start_offset; + + /* Pad the end once if EOF, to check the final region for headers. */ + if (!buf || !buf_size) { + fpc->end_padded = 1; + buf_size = MAX_FRAME_HEADER_SIZE; + read_end = read_start + MAX_FRAME_HEADER_SIZE; + } else { + /* The maximum read size is the upper-bound of what the parser + needs to have the required number of frames buffered */ + int nb_desired = FLAC_MIN_HEADERS - fpc->nb_headers_buffered + 1; + read_end = read_end + FFMIN(buf + buf_size - read_end, + nb_desired * FLAC_AVG_FRAME_SIZE); + } + + if (!av_fifo_space(fpc->fifo_buf) && + av_fifo_size(fpc->fifo_buf) / FLAC_AVG_FRAME_SIZE > + fpc->nb_headers_buffered * 20) { + /* There is less than one valid flac header buffered for 20 headers + * buffered. Therefore the fifo is most likely filled with invalid + * data and the input is not a flac file. */ + goto handle_error; + } + + /* Fill the buffer. */ + if ( av_fifo_space(fpc->fifo_buf) < read_end - read_start + && av_fifo_realloc2(fpc->fifo_buf, (read_end - read_start) + 2*av_fifo_size(fpc->fifo_buf)) < 0) { + av_log(avctx, AV_LOG_ERROR, + "couldn't reallocate buffer of size %"PTRDIFF_SPECIFIER"\n", + (read_end - read_start) + av_fifo_size(fpc->fifo_buf)); + goto handle_error; + } + + if (buf && buf_size) { + av_fifo_generic_write(fpc->fifo_buf, (void*) read_start, + read_end - read_start, NULL); + } else { + int8_t pad[MAX_FRAME_HEADER_SIZE] = { 0 }; + av_fifo_generic_write(fpc->fifo_buf, pad, sizeof(pad), NULL); + } + + /* Tag headers and update sequences. */ + start_offset = av_fifo_size(fpc->fifo_buf) - + ((read_end - read_start) + (MAX_FRAME_HEADER_SIZE - 1)); + start_offset = FFMAX(0, start_offset); + nb_headers = find_new_headers(fpc, start_offset); + + if (nb_headers < 0) { + av_log(avctx, AV_LOG_ERROR, + "find_new_headers couldn't allocate FLAC header\n"); + goto handle_error; + } + + fpc->nb_headers_buffered = nb_headers; + /* Wait till FLAC_MIN_HEADERS to output a valid frame. */ + if (!fpc->end_padded && fpc->nb_headers_buffered < FLAC_MIN_HEADERS) { + if (buf && read_end < buf + buf_size) { + read_start = read_end; + continue; + } else { + goto handle_error; + } + } + + /* If headers found, update the scores since we have longer chains. */ + if (fpc->end_padded || fpc->nb_headers_found) + score_sequences(fpc); + + /* restore the state pre-padding */ + if (fpc->end_padded) { + int warp = fpc->fifo_buf->wptr - fpc->fifo_buf->buffer < MAX_FRAME_HEADER_SIZE; + /* HACK: drain the tail of the fifo */ + fpc->fifo_buf->wptr -= MAX_FRAME_HEADER_SIZE; + fpc->fifo_buf->wndx -= MAX_FRAME_HEADER_SIZE; + if (warp) { + fpc->fifo_buf->wptr += fpc->fifo_buf->end - + fpc->fifo_buf->buffer; + } + buf_size = 0; + read_start = read_end = NULL; + } + } + + for (curr = fpc->headers; curr; curr = curr->next) { + if (!fpc->best_header || curr->max_score > fpc->best_header->max_score) { + fpc->best_header = curr; + } + } + + if (fpc->best_header && fpc->best_header->max_score <= 0) { + // Only accept a bad header if there is no other option to continue + if (!buf_size || !buf || read_end != buf || fpc->nb_headers_buffered < FLAC_MIN_HEADERS) + fpc->best_header = NULL; + } + + if (fpc->best_header) { + fpc->best_header_valid = 1; + if (fpc->best_header->offset > 0) { + /* Output a junk frame. */ + av_log(avctx, AV_LOG_DEBUG, "Junk frame till offset %i\n", + fpc->best_header->offset); + + /* Set duration to 0. It is unknown or invalid in a junk frame. */ + s->duration = 0; + *poutbuf_size = fpc->best_header->offset; + *poutbuf = flac_fifo_read_wrap(fpc, 0, *poutbuf_size, + &fpc->wrap_buf, + &fpc->wrap_buf_allocated_size); + return buf_size ? (read_end - buf) : (fpc->best_header->offset - + av_fifo_size(fpc->fifo_buf)); + } + if (!buf_size) + return get_best_header(fpc, poutbuf, poutbuf_size); + } + +handle_error: + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size ? read_end - buf : 0; +} + +static av_cold int flac_parse_init(AVCodecParserContext *c) +{ + FLACParseContext *fpc = c->priv_data; + fpc->pc = c; + /* There will generally be FLAC_MIN_HEADERS buffered in the fifo before + it drains. This is allocated early to avoid slow reallocation. */ + fpc->fifo_buf = av_fifo_alloc_array(FLAC_MIN_HEADERS + 3, FLAC_AVG_FRAME_SIZE); + if (!fpc->fifo_buf) { + av_log(fpc->avctx, AV_LOG_ERROR, + "couldn't allocate fifo_buf\n"); + return AVERROR(ENOMEM); + } + return 0; +} + +static void flac_parse_close(AVCodecParserContext *c) +{ + FLACParseContext *fpc = c->priv_data; + FLACHeaderMarker *curr = fpc->headers, *temp; + + while (curr) { + temp = curr->next; + av_freep(&curr->link_penalty); + av_free(curr); + curr = temp; + } + av_fifo_freep(&fpc->fifo_buf); + av_freep(&fpc->wrap_buf); +} + +AVCodecParser ff_flac_parser = { + .codec_ids = { AV_CODEC_ID_FLAC }, + .priv_data_size = sizeof(FLACParseContext), + .parser_init = flac_parse_init, + .parser_parse = flac_parse, + .parser_close = flac_parse_close, +}; diff --git a/libs/ffvpx/libavcodec/flacdata.c b/libs/ffvpx/libavcodec/flacdata.c new file mode 100644 index 000000000..1954f32d3 --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdata.c @@ -0,0 +1,33 @@ +/* + * FLAC data + * Copyright (c) 2003 Alex Beregszaszi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "internal.h" + +const int ff_flac_sample_rate_table[16] = +{ 0, + 88200, 176400, 192000, + 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000, + 0, 0, 0, 0 }; + +const int32_t ff_flac_blocksize_table[16] = { + 0, 192, 576<<0, 576<<1, 576<<2, 576<<3, 0, 0, +256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7 +}; diff --git a/libs/ffvpx/libavcodec/flacdata.h b/libs/ffvpx/libavcodec/flacdata.h new file mode 100644 index 000000000..e2c1e5d7f --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdata.h @@ -0,0 +1,31 @@ +/* + * FLAC data header + * Copyright (c) 2003 Alex Beregszaszi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FLACDATA_H +#define AVCODEC_FLACDATA_H + +#include "internal.h" + +extern const int ff_flac_sample_rate_table[16]; + +extern const int32_t ff_flac_blocksize_table[16]; + +#endif /* AVCODEC_FLACDATA_H */ diff --git a/libs/ffvpx/libavcodec/flacdec.c b/libs/ffvpx/libavcodec/flacdec.c new file mode 100644 index 000000000..c8eb45604 --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdec.c @@ -0,0 +1,693 @@ +/* + * FLAC (Free Lossless Audio Codec) decoder + * Copyright (c) 2003 Alex Beregszaszi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * FLAC (Free Lossless Audio Codec) decoder + * @author Alex Beregszaszi + * @see http://flac.sourceforge.net/ + * + * This decoder can be used in 1 of 2 ways: Either raw FLAC data can be fed + * through, starting from the initial 'fLaC' signature; or by passing the + * 34-byte streaminfo structure through avctx->extradata[_size] followed + * by data starting with the 0xFFF8 marker. + */ + +#include <limits.h> + +#include "libavutil/avassert.h" +#include "libavutil/crc.h" +#include "libavutil/opt.h" +#include "avcodec.h" +#include "internal.h" +#include "get_bits.h" +#include "bytestream.h" +#include "golomb.h" +#include "flac.h" +#include "flacdata.h" +#include "flacdsp.h" +#include "thread.h" +#include "unary.h" + + +typedef struct FLACContext { + AVClass *class; + struct FLACStreaminfo flac_stream_info; + + AVCodecContext *avctx; ///< parent AVCodecContext + GetBitContext gb; ///< GetBitContext initialized to start at the current frame + + int blocksize; ///< number of samples in the current frame + int sample_shift; ///< shift required to make output samples 16-bit or 32-bit + int ch_mode; ///< channel decorrelation type in the current frame + int got_streaminfo; ///< indicates if the STREAMINFO has been read + + int32_t *decoded[FLAC_MAX_CHANNELS]; ///< decoded samples + uint8_t *decoded_buffer; + unsigned int decoded_buffer_size; + int buggy_lpc; ///< use workaround for old lavc encoded files + + FLACDSPContext dsp; +} FLACContext; + +static int allocate_buffers(FLACContext *s); + +static void flac_set_bps(FLACContext *s) +{ + enum AVSampleFormat req = s->avctx->request_sample_fmt; + int need32 = s->flac_stream_info.bps > 16; + int want32 = av_get_bytes_per_sample(req) > 2; + int planar = av_sample_fmt_is_planar(req); + + if (need32 || want32) { + if (planar) + s->avctx->sample_fmt = AV_SAMPLE_FMT_S32P; + else + s->avctx->sample_fmt = AV_SAMPLE_FMT_S32; + s->sample_shift = 32 - s->flac_stream_info.bps; + } else { + if (planar) + s->avctx->sample_fmt = AV_SAMPLE_FMT_S16P; + else + s->avctx->sample_fmt = AV_SAMPLE_FMT_S16; + s->sample_shift = 16 - s->flac_stream_info.bps; + } +} + +static av_cold int flac_decode_init(AVCodecContext *avctx) +{ + enum FLACExtradataFormat format; + uint8_t *streaminfo; + int ret; + FLACContext *s = avctx->priv_data; + s->avctx = avctx; + + /* for now, the raw FLAC header is allowed to be passed to the decoder as + frame data instead of extradata. */ + if (!avctx->extradata) + return 0; + + if (!ff_flac_is_extradata_valid(avctx, &format, &streaminfo)) + return AVERROR_INVALIDDATA; + + /* initialize based on the demuxer-supplied streamdata header */ + ret = ff_flac_parse_streaminfo(avctx, &s->flac_stream_info, streaminfo); + if (ret < 0) + return ret; + ret = allocate_buffers(s); + if (ret < 0) + return ret; + flac_set_bps(s); + ff_flacdsp_init(&s->dsp, avctx->sample_fmt, + s->flac_stream_info.channels, s->flac_stream_info.bps); + s->got_streaminfo = 1; + + return 0; +} + +static void dump_headers(AVCodecContext *avctx, FLACStreaminfo *s) +{ + av_log(avctx, AV_LOG_DEBUG, " Max Blocksize: %d\n", s->max_blocksize); + av_log(avctx, AV_LOG_DEBUG, " Max Framesize: %d\n", s->max_framesize); + av_log(avctx, AV_LOG_DEBUG, " Samplerate: %d\n", s->samplerate); + av_log(avctx, AV_LOG_DEBUG, " Channels: %d\n", s->channels); + av_log(avctx, AV_LOG_DEBUG, " Bits: %d\n", s->bps); +} + +static int allocate_buffers(FLACContext *s) +{ + int buf_size; + int ret; + + av_assert0(s->flac_stream_info.max_blocksize); + + buf_size = av_samples_get_buffer_size(NULL, s->flac_stream_info.channels, + s->flac_stream_info.max_blocksize, + AV_SAMPLE_FMT_S32P, 0); + if (buf_size < 0) + return buf_size; + + av_fast_malloc(&s->decoded_buffer, &s->decoded_buffer_size, buf_size); + if (!s->decoded_buffer) + return AVERROR(ENOMEM); + + ret = av_samples_fill_arrays((uint8_t **)s->decoded, NULL, + s->decoded_buffer, + s->flac_stream_info.channels, + s->flac_stream_info.max_blocksize, + AV_SAMPLE_FMT_S32P, 0); + return ret < 0 ? ret : 0; +} + +/** + * Parse the STREAMINFO from an inline header. + * @param s the flac decoding context + * @param buf input buffer, starting with the "fLaC" marker + * @param buf_size buffer size + * @return non-zero if metadata is invalid + */ +static int parse_streaminfo(FLACContext *s, const uint8_t *buf, int buf_size) +{ + int metadata_type, metadata_size, ret; + + if (buf_size < FLAC_STREAMINFO_SIZE+8) { + /* need more data */ + return 0; + } + flac_parse_block_header(&buf[4], NULL, &metadata_type, &metadata_size); + if (metadata_type != FLAC_METADATA_TYPE_STREAMINFO || + metadata_size != FLAC_STREAMINFO_SIZE) { + return AVERROR_INVALIDDATA; + } + ret = ff_flac_parse_streaminfo(s->avctx, &s->flac_stream_info, &buf[8]); + if (ret < 0) + return ret; + ret = allocate_buffers(s); + if (ret < 0) + return ret; + flac_set_bps(s); + ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt, + s->flac_stream_info.channels, s->flac_stream_info.bps); + s->got_streaminfo = 1; + + return 0; +} + +/** + * Determine the size of an inline header. + * @param buf input buffer, starting with the "fLaC" marker + * @param buf_size buffer size + * @return number of bytes in the header, or 0 if more data is needed + */ +static int get_metadata_size(const uint8_t *buf, int buf_size) +{ + int metadata_last, metadata_size; + const uint8_t *buf_end = buf + buf_size; + + buf += 4; + do { + if (buf_end - buf < 4) + return AVERROR_INVALIDDATA; + flac_parse_block_header(buf, &metadata_last, NULL, &metadata_size); + buf += 4; + if (buf_end - buf < metadata_size) { + /* need more data in order to read the complete header */ + return AVERROR_INVALIDDATA; + } + buf += metadata_size; + } while (!metadata_last); + + return buf_size - (buf_end - buf); +} + +static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order) +{ + GetBitContext gb = s->gb; + int i, tmp, partition, method_type, rice_order; + int rice_bits, rice_esc; + int samples; + + method_type = get_bits(&gb, 2); + rice_order = get_bits(&gb, 4); + + samples = s->blocksize >> rice_order; + rice_bits = 4 + method_type; + rice_esc = (1 << rice_bits) - 1; + + decoded += pred_order; + i = pred_order; + + if (method_type > 1) { + av_log(s->avctx, AV_LOG_ERROR, "illegal residual coding method %d\n", + method_type); + return AVERROR_INVALIDDATA; + } + + if (samples << rice_order != s->blocksize) { + av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n", + rice_order, s->blocksize); + return AVERROR_INVALIDDATA; + } + + if (pred_order > samples) { + av_log(s->avctx, AV_LOG_ERROR, "invalid predictor order: %i > %i\n", + pred_order, samples); + return AVERROR_INVALIDDATA; + } + + for (partition = 0; partition < (1 << rice_order); partition++) { + tmp = get_bits(&gb, rice_bits); + if (tmp == rice_esc) { + tmp = get_bits(&gb, 5); + for (; i < samples; i++) + *decoded++ = get_sbits_long(&gb, tmp); + } else { + int real_limit = tmp ? (INT_MAX >> tmp) + 2 : INT_MAX; + for (; i < samples; i++) { + int v = get_sr_golomb_flac(&gb, tmp, real_limit, 0); + if (v == 0x80000000){ + av_log(s->avctx, AV_LOG_ERROR, "invalid residual\n"); + return AVERROR_INVALIDDATA; + } + + *decoded++ = v; + } + } + i= 0; + } + + s->gb = gb; + + return 0; +} + +static int decode_subframe_fixed(FLACContext *s, int32_t *decoded, + int pred_order, int bps) +{ + const int blocksize = s->blocksize; + unsigned av_uninit(a), av_uninit(b), av_uninit(c), av_uninit(d); + int i; + int ret; + + /* warm up samples */ + for (i = 0; i < pred_order; i++) { + decoded[i] = get_sbits_long(&s->gb, bps); + } + + if ((ret = decode_residuals(s, decoded, pred_order)) < 0) + return ret; + + if (pred_order > 0) + a = decoded[pred_order-1]; + if (pred_order > 1) + b = a - decoded[pred_order-2]; + if (pred_order > 2) + c = b - decoded[pred_order-2] + decoded[pred_order-3]; + if (pred_order > 3) + d = c - decoded[pred_order-2] + 2U*decoded[pred_order-3] - decoded[pred_order-4]; + + switch (pred_order) { + case 0: + break; + case 1: + for (i = pred_order; i < blocksize; i++) + decoded[i] = a += decoded[i]; + break; + case 2: + for (i = pred_order; i < blocksize; i++) + decoded[i] = a += b += decoded[i]; + break; + case 3: + for (i = pred_order; i < blocksize; i++) + decoded[i] = a += b += c += decoded[i]; + break; + case 4: + for (i = pred_order; i < blocksize; i++) + decoded[i] = a += b += c += d += decoded[i]; + break; + default: + av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32], + int order, int qlevel, int len, int bps) +{ + int i, j; + int ebps = 1 << (bps-1); + unsigned sigma = 0; + + for (i = order; i < len; i++) + sigma |= decoded[i] + ebps; + + if (sigma < 2*ebps) + return; + + for (i = len - 1; i >= order; i--) { + int64_t p = 0; + for (j = 0; j < order; j++) + p += coeffs[j] * (int64_t)(int32_t)decoded[i-order+j]; + decoded[i] -= p >> qlevel; + } + for (i = order; i < len; i++, decoded++) { + int32_t p = 0; + for (j = 0; j < order; j++) + p += coeffs[j] * (uint32_t)decoded[j]; + decoded[j] += p >> qlevel; + } +} + +static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order, + int bps) +{ + int i, ret; + int coeff_prec, qlevel; + int coeffs[32]; + + /* warm up samples */ + for (i = 0; i < pred_order; i++) { + decoded[i] = get_sbits_long(&s->gb, bps); + } + + coeff_prec = get_bits(&s->gb, 4) + 1; + if (coeff_prec == 16) { + av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n"); + return AVERROR_INVALIDDATA; + } + qlevel = get_sbits(&s->gb, 5); + if (qlevel < 0) { + av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n", + qlevel); + return AVERROR_INVALIDDATA; + } + + for (i = 0; i < pred_order; i++) { + coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec); + } + + if ((ret = decode_residuals(s, decoded, pred_order)) < 0) + return ret; + + if ( ( s->buggy_lpc && s->flac_stream_info.bps <= 16) + || ( !s->buggy_lpc && bps <= 16 + && bps + coeff_prec + av_log2(pred_order) <= 32)) { + s->dsp.lpc16(decoded, coeffs, pred_order, qlevel, s->blocksize); + } else { + s->dsp.lpc32(decoded, coeffs, pred_order, qlevel, s->blocksize); + if (s->flac_stream_info.bps <= 16) + lpc_analyze_remodulate(decoded, coeffs, pred_order, qlevel, s->blocksize, bps); + } + + return 0; +} + +static inline int decode_subframe(FLACContext *s, int channel) +{ + int32_t *decoded = s->decoded[channel]; + int type, wasted = 0; + int bps = s->flac_stream_info.bps; + int i, tmp, ret; + + if (channel == 0) { + if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE) + bps++; + } else { + if (s->ch_mode == FLAC_CHMODE_LEFT_SIDE || s->ch_mode == FLAC_CHMODE_MID_SIDE) + bps++; + } + + if (get_bits1(&s->gb)) { + av_log(s->avctx, AV_LOG_ERROR, "invalid subframe padding\n"); + return AVERROR_INVALIDDATA; + } + type = get_bits(&s->gb, 6); + + if (get_bits1(&s->gb)) { + int left = get_bits_left(&s->gb); + if ( left <= 0 || + (left < bps && !show_bits_long(&s->gb, left)) || + !show_bits_long(&s->gb, bps)) { + av_log(s->avctx, AV_LOG_ERROR, + "Invalid number of wasted bits > available bits (%d) - left=%d\n", + bps, left); + return AVERROR_INVALIDDATA; + } + wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb)); + bps -= wasted; + } + if (bps > 32) { + avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth > 32"); + return AVERROR_PATCHWELCOME; + } + +//FIXME use av_log2 for types + if (type == 0) { + tmp = get_sbits_long(&s->gb, bps); + for (i = 0; i < s->blocksize; i++) + decoded[i] = tmp; + } else if (type == 1) { + for (i = 0; i < s->blocksize; i++) + decoded[i] = get_sbits_long(&s->gb, bps); + } else if ((type >= 8) && (type <= 12)) { + if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) < 0) + return ret; + } else if (type >= 32) { + if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0) + return ret; + } else { + av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n"); + return AVERROR_INVALIDDATA; + } + + if (wasted && wasted < 32) { + int i; + for (i = 0; i < s->blocksize; i++) + decoded[i] = (unsigned)decoded[i] << wasted; + } + + return 0; +} + +static int decode_frame(FLACContext *s) +{ + int i, ret; + GetBitContext *gb = &s->gb; + FLACFrameInfo fi; + + if ((ret = ff_flac_decode_frame_header(s->avctx, gb, &fi, 0)) < 0) { + av_log(s->avctx, AV_LOG_ERROR, "invalid frame header\n"); + return ret; + } + + if ( s->flac_stream_info.channels + && fi.channels != s->flac_stream_info.channels + && s->got_streaminfo) { + s->flac_stream_info.channels = s->avctx->channels = fi.channels; + ff_flac_set_channel_layout(s->avctx); + ret = allocate_buffers(s); + if (ret < 0) + return ret; + } + s->flac_stream_info.channels = s->avctx->channels = fi.channels; + if (!s->avctx->channel_layout) + ff_flac_set_channel_layout(s->avctx); + s->ch_mode = fi.ch_mode; + + if (!s->flac_stream_info.bps && !fi.bps) { + av_log(s->avctx, AV_LOG_ERROR, "bps not found in STREAMINFO or frame header\n"); + return AVERROR_INVALIDDATA; + } + if (!fi.bps) { + fi.bps = s->flac_stream_info.bps; + } else if (s->flac_stream_info.bps && fi.bps != s->flac_stream_info.bps) { + av_log(s->avctx, AV_LOG_ERROR, "switching bps mid-stream is not " + "supported\n"); + return AVERROR_INVALIDDATA; + } + + if (!s->flac_stream_info.bps) { + s->flac_stream_info.bps = s->avctx->bits_per_raw_sample = fi.bps; + flac_set_bps(s); + } + + if (!s->flac_stream_info.max_blocksize) + s->flac_stream_info.max_blocksize = FLAC_MAX_BLOCKSIZE; + if (fi.blocksize > s->flac_stream_info.max_blocksize) { + av_log(s->avctx, AV_LOG_ERROR, "blocksize %d > %d\n", fi.blocksize, + s->flac_stream_info.max_blocksize); + return AVERROR_INVALIDDATA; + } + s->blocksize = fi.blocksize; + + if (!s->flac_stream_info.samplerate && !fi.samplerate) { + av_log(s->avctx, AV_LOG_ERROR, "sample rate not found in STREAMINFO" + " or frame header\n"); + return AVERROR_INVALIDDATA; + } + if (fi.samplerate == 0) + fi.samplerate = s->flac_stream_info.samplerate; + s->flac_stream_info.samplerate = s->avctx->sample_rate = fi.samplerate; + + if (!s->got_streaminfo) { + ret = allocate_buffers(s); + if (ret < 0) + return ret; + s->got_streaminfo = 1; + dump_headers(s->avctx, &s->flac_stream_info); + } + ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt, + s->flac_stream_info.channels, s->flac_stream_info.bps); + +// dump_headers(s->avctx, &s->flac_stream_info); + + /* subframes */ + for (i = 0; i < s->flac_stream_info.channels; i++) { + if ((ret = decode_subframe(s, i)) < 0) + return ret; + } + + align_get_bits(gb); + + /* frame footer */ + skip_bits(gb, 16); /* data crc */ + + return 0; +} + +static int flac_decode_frame(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + AVFrame *frame = data; + ThreadFrame tframe = { .f = data }; + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + FLACContext *s = avctx->priv_data; + int bytes_read = 0; + int ret; + + *got_frame_ptr = 0; + + if (s->flac_stream_info.max_framesize == 0) { + s->flac_stream_info.max_framesize = + ff_flac_get_max_frame_size(s->flac_stream_info.max_blocksize ? s->flac_stream_info.max_blocksize : FLAC_MAX_BLOCKSIZE, + FLAC_MAX_CHANNELS, 32); + } + + if (buf_size > 5 && !memcmp(buf, "\177FLAC", 5)) { + av_log(s->avctx, AV_LOG_DEBUG, "skipping flac header packet 1\n"); + return buf_size; + } + + if (buf_size > 0 && (*buf & 0x7F) == FLAC_METADATA_TYPE_VORBIS_COMMENT) { + av_log(s->avctx, AV_LOG_DEBUG, "skipping vorbis comment\n"); + return buf_size; + } + + /* check that there is at least the smallest decodable amount of data. + this amount corresponds to the smallest valid FLAC frame possible. + FF F8 69 02 00 00 9A 00 00 34 46 */ + if (buf_size < FLAC_MIN_FRAME_SIZE) + return buf_size; + + /* check for inline header */ + if (AV_RB32(buf) == MKBETAG('f','L','a','C')) { + if (!s->got_streaminfo && (ret = parse_streaminfo(s, buf, buf_size))) { + av_log(s->avctx, AV_LOG_ERROR, "invalid header\n"); + return ret; + } + return get_metadata_size(buf, buf_size); + } + + /* decode frame */ + if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0) + return ret; + if ((ret = decode_frame(s)) < 0) { + av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n"); + return ret; + } + bytes_read = get_bits_count(&s->gb)/8; + + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_COMPLIANT)) && + av_crc(av_crc_get_table(AV_CRC_16_ANSI), + 0, buf, bytes_read)) { + av_log(s->avctx, AV_LOG_ERROR, "CRC error at PTS %"PRId64"\n", avpkt->pts); + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return AVERROR_INVALIDDATA; + } + + /* get output buffer */ + frame->nb_samples = s->blocksize; + if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0) + return ret; + + s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded, + s->flac_stream_info.channels, + s->blocksize, s->sample_shift); + + if (bytes_read > buf_size) { + av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size); + return AVERROR_INVALIDDATA; + } + if (bytes_read < buf_size) { + av_log(s->avctx, AV_LOG_DEBUG, "underread: %d orig size: %d\n", + buf_size - bytes_read, buf_size); + } + + *got_frame_ptr = 1; + + return bytes_read; +} + +#if HAVE_THREADS +static int init_thread_copy(AVCodecContext *avctx) +{ + FLACContext *s = avctx->priv_data; + s->decoded_buffer = NULL; + s->decoded_buffer_size = 0; + s->avctx = avctx; + if (s->flac_stream_info.max_blocksize) + return allocate_buffers(s); + return 0; +} +#endif + +static av_cold int flac_decode_close(AVCodecContext *avctx) +{ + FLACContext *s = avctx->priv_data; + + av_freep(&s->decoded_buffer); + + return 0; +} + +static const AVOption options[] = { +{ "use_buggy_lpc", "emulate old buggy lavc behavior", offsetof(FLACContext, buggy_lpc), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM }, +{ NULL }, +}; + +static const AVClass flac_decoder_class = { + "FLAC decoder", + av_default_item_name, + options, + LIBAVUTIL_VERSION_INT, +}; + +AVCodec ff_flac_decoder = { + .name = "flac", + .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_FLAC, + .priv_data_size = sizeof(FLACContext), + .init = flac_decode_init, + .close = flac_decode_close, + .decode = flac_decode_frame, + .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy), + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S32, + AV_SAMPLE_FMT_S32P, + AV_SAMPLE_FMT_NONE }, + .priv_class = &flac_decoder_class, +}; diff --git a/libs/ffvpx/libavcodec/flacdsp.c b/libs/ffvpx/libavcodec/flacdsp.c new file mode 100644 index 000000000..bc9a5dbed --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdsp.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2012 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/samplefmt.h" +#include "flacdsp.h" +#include "config.h" + +#define SAMPLE_SIZE 16 +#define PLANAR 0 +#include "flacdsp_template.c" +#include "flacdsp_lpc_template.c" + +#undef PLANAR +#define PLANAR 1 +#include "flacdsp_template.c" + +#undef SAMPLE_SIZE +#undef PLANAR +#define SAMPLE_SIZE 32 +#define PLANAR 0 +#include "flacdsp_template.c" +#include "flacdsp_lpc_template.c" + +#undef PLANAR +#define PLANAR 1 +#include "flacdsp_template.c" + +static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32], + int pred_order, int qlevel, int len) +{ + int i, j; + + for (i = pred_order; i < len - 1; i += 2, decoded += 2) { + SUINT c = coeffs[0]; + SUINT d = decoded[0]; + int s0 = 0, s1 = 0; + for (j = 1; j < pred_order; j++) { + s0 += c*d; + d = decoded[j]; + s1 += c*d; + c = coeffs[j]; + } + s0 += c*d; + d = decoded[j] += (SUINT)(s0 >> qlevel); + s1 += c*d; + decoded[j + 1] += (SUINT)(s1 >> qlevel); + } + if (i < len) { + int sum = 0; + for (j = 0; j < pred_order; j++) + sum += coeffs[j] * (SUINT)decoded[j]; + decoded[j] = decoded[j] + (unsigned)(sum >> qlevel); + } +} + +static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32], + int pred_order, int qlevel, int len) +{ + int i, j; + + for (i = pred_order; i < len; i++, decoded++) { + int64_t sum = 0; + for (j = 0; j < pred_order; j++) + sum += (int64_t)coeffs[j] * decoded[j]; + decoded[j] += sum >> qlevel; + } + +} + +av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, + int bps) +{ + c->lpc16 = flac_lpc_16_c; + c->lpc32 = flac_lpc_32_c; + c->lpc16_encode = flac_lpc_encode_c_16; + c->lpc32_encode = flac_lpc_encode_c_32; + + switch (fmt) { + case AV_SAMPLE_FMT_S32: + c->decorrelate[0] = flac_decorrelate_indep_c_32; + c->decorrelate[1] = flac_decorrelate_ls_c_32; + c->decorrelate[2] = flac_decorrelate_rs_c_32; + c->decorrelate[3] = flac_decorrelate_ms_c_32; + break; + + case AV_SAMPLE_FMT_S32P: + c->decorrelate[0] = flac_decorrelate_indep_c_32p; + c->decorrelate[1] = flac_decorrelate_ls_c_32p; + c->decorrelate[2] = flac_decorrelate_rs_c_32p; + c->decorrelate[3] = flac_decorrelate_ms_c_32p; + break; + + case AV_SAMPLE_FMT_S16: + c->decorrelate[0] = flac_decorrelate_indep_c_16; + c->decorrelate[1] = flac_decorrelate_ls_c_16; + c->decorrelate[2] = flac_decorrelate_rs_c_16; + c->decorrelate[3] = flac_decorrelate_ms_c_16; + break; + + case AV_SAMPLE_FMT_S16P: + c->decorrelate[0] = flac_decorrelate_indep_c_16p; + c->decorrelate[1] = flac_decorrelate_ls_c_16p; + c->decorrelate[2] = flac_decorrelate_rs_c_16p; + c->decorrelate[3] = flac_decorrelate_ms_c_16p; + break; + } + + if (ARCH_ARM) + ff_flacdsp_init_arm(c, fmt, channels, bps); + if (ARCH_X86) + ff_flacdsp_init_x86(c, fmt, channels, bps); +} diff --git a/libs/ffvpx/libavcodec/flacdsp.h b/libs/ffvpx/libavcodec/flacdsp.h new file mode 100644 index 000000000..7bb0dd0e9 --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdsp.h @@ -0,0 +1,43 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FLACDSP_H +#define AVCODEC_FLACDSP_H + +#include <stdint.h> +#include "libavutil/internal.h" +#include "libavutil/samplefmt.h" + +typedef struct FLACDSPContext { + void (*decorrelate[4])(uint8_t **out, int32_t **in, int channels, + int len, int shift); + void (*lpc16)(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); + void (*lpc32)(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); + void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order, + const int32_t coefs[32], int shift); + void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order, + const int32_t coefs[32], int shift); +} FLACDSPContext; + +void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps); +void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps); +void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps); + +#endif /* AVCODEC_FLACDSP_H */ diff --git a/libs/ffvpx/libavcodec/flacdsp_lpc_template.c b/libs/ffvpx/libavcodec/flacdsp_lpc_template.c new file mode 100644 index 000000000..5d532e067 --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdsp_lpc_template.c @@ -0,0 +1,159 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "libavutil/avutil.h" +#include "mathops.h" + +#undef FUNC +#undef sum_type +#undef MUL +#undef CLIP +#undef FSUF + +#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE) + +#if SAMPLE_SIZE == 32 +# define sum_type int64_t +# define MUL(a, b) MUL64(a, b) +# define CLIP(x) av_clipl_int32(x) +#else +# define sum_type int32_t +# define MUL(a, b) ((a) * (b)) +# define CLIP(x) (x) +#endif + +#define LPC1(x) { \ + int c = coefs[(x)-1]; \ + p0 += MUL(c, s); \ + s = smp[i-(x)+1]; \ + p1 += MUL(c, s); \ +} + +static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res, + const int32_t *smp, int len, int order, + const int32_t *coefs, int shift, int big) +{ + int i; + for (i = order; i < len; i += 2) { + int s = smp[i-order]; + sum_type p0 = 0, p1 = 0; + if (big) { + switch (order) { + case 32: LPC1(32) + case 31: LPC1(31) + case 30: LPC1(30) + case 29: LPC1(29) + case 28: LPC1(28) + case 27: LPC1(27) + case 26: LPC1(26) + case 25: LPC1(25) + case 24: LPC1(24) + case 23: LPC1(23) + case 22: LPC1(22) + case 21: LPC1(21) + case 20: LPC1(20) + case 19: LPC1(19) + case 18: LPC1(18) + case 17: LPC1(17) + case 16: LPC1(16) + case 15: LPC1(15) + case 14: LPC1(14) + case 13: LPC1(13) + case 12: LPC1(12) + case 11: LPC1(11) + case 10: LPC1(10) + case 9: LPC1( 9) + LPC1( 8) + LPC1( 7) + LPC1( 6) + LPC1( 5) + LPC1( 4) + LPC1( 3) + LPC1( 2) + LPC1( 1) + } + } else { + switch (order) { + case 8: LPC1( 8) + case 7: LPC1( 7) + case 6: LPC1( 6) + case 5: LPC1( 5) + case 4: LPC1( 4) + case 3: LPC1( 3) + case 2: LPC1( 2) + case 1: LPC1( 1) + } + } + res[i ] = smp[i ] - CLIP(p0 >> shift); + res[i+1] = smp[i+1] - CLIP(p1 >> shift); + } +} + +static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len, + int order, const int32_t *coefs, int shift) +{ + int i; + for (i = 0; i < order; i++) + res[i] = smp[i]; +#if CONFIG_SMALL + for (i = order; i < len; i += 2) { + int j; + int s = smp[i]; + sum_type p0 = 0, p1 = 0; + for (j = 0; j < order; j++) { + int c = coefs[j]; + p1 += MUL(c, s); + s = smp[i-j-1]; + p0 += MUL(c, s); + } + res[i ] = smp[i ] - CLIP(p0 >> shift); + res[i+1] = smp[i+1] - CLIP(p1 >> shift); + } +#else + switch (order) { + case 1: FUNC(lpc_encode_unrolled)(res, smp, len, 1, coefs, shift, 0); break; + case 2: FUNC(lpc_encode_unrolled)(res, smp, len, 2, coefs, shift, 0); break; + case 3: FUNC(lpc_encode_unrolled)(res, smp, len, 3, coefs, shift, 0); break; + case 4: FUNC(lpc_encode_unrolled)(res, smp, len, 4, coefs, shift, 0); break; + case 5: FUNC(lpc_encode_unrolled)(res, smp, len, 5, coefs, shift, 0); break; + case 6: FUNC(lpc_encode_unrolled)(res, smp, len, 6, coefs, shift, 0); break; + case 7: FUNC(lpc_encode_unrolled)(res, smp, len, 7, coefs, shift, 0); break; + case 8: FUNC(lpc_encode_unrolled)(res, smp, len, 8, coefs, shift, 0); break; + default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break; + } +#endif +} + +/* Comment for clarity/de-obfuscation. + * + * for (int i = order; i < len; i++) { + * int32_t p = 0; + * for (int j = 0; j < order; j++) { + * int c = coefs[j]; + * int s = smp[(i-1)-j]; + * p += c*s; + * } + * res[i] = smp[i] - (p >> shift); + * } + * + * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE + * not being equal to 32 (at the present time that means for 16-bit audio). The + * code above does 2 samples per iteration. Commit bfdd5bc (made all the way + * back in 2007) says that way is faster. + */ diff --git a/libs/ffvpx/libavcodec/flacdsp_template.c b/libs/ffvpx/libavcodec/flacdsp_template.c new file mode 100644 index 000000000..776c78da7 --- /dev/null +++ b/libs/ffvpx/libavcodec/flacdsp_template.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "libavutil/avutil.h" + +#undef FUNC +#undef FSUF +#undef sample +#undef sample_type +#undef OUT +#undef S + +#if SAMPLE_SIZE == 32 +# define sample_type int32_t +#else +# define sample_type int16_t +#endif + +#if PLANAR +# define FSUF AV_JOIN(SAMPLE_SIZE, p) +# define sample sample_type * +# define OUT(n) n +# define S(s, c, i) (s[c][i]) +#else +# define FSUF SAMPLE_SIZE +# define sample sample_type +# define OUT(n) n[0] +# define S(s, c, i) (*s++) +#endif + +#define FUNC(n) AV_JOIN(n ## _, FSUF) + +static void FUNC(flac_decorrelate_indep_c)(uint8_t **out, int32_t **in, + int channels, int len, int shift) +{ + sample *samples = (sample *) OUT(out); + int i, j; + + for (j = 0; j < len; j++) + for (i = 0; i < channels; i++) + S(samples, i, j) = (int)((unsigned)in[i][j] << shift); +} + +static void FUNC(flac_decorrelate_ls_c)(uint8_t **out, int32_t **in, + int channels, int len, int shift) +{ + sample *samples = (sample *) OUT(out); + int i; + + for (i = 0; i < len; i++) { + int a = in[0][i]; + int b = in[1][i]; + S(samples, 0, i) = a << shift; + S(samples, 1, i) = (a - b) << shift; + } +} + +static void FUNC(flac_decorrelate_rs_c)(uint8_t **out, int32_t **in, + int channels, int len, int shift) +{ + sample *samples = (sample *) OUT(out); + int i; + + for (i = 0; i < len; i++) { + int a = in[0][i]; + int b = in[1][i]; + S(samples, 0, i) = (a + b) << shift; + S(samples, 1, i) = b << shift; + } +} + +static void FUNC(flac_decorrelate_ms_c)(uint8_t **out, int32_t **in, + int channels, int len, int shift) +{ + sample *samples = (sample *) OUT(out); + int i; + + for (i = 0; i < len; i++) { + int a = in[0][i]; + int b = in[1][i]; + a -= b >> 1; + S(samples, 0, i) = (a + b) << shift; + S(samples, 1, i) = a << shift; + } +} diff --git a/libs/ffvpx/libavcodec/frame_thread_encoder.h b/libs/ffvpx/libavcodec/frame_thread_encoder.h new file mode 100644 index 000000000..1f79553f2 --- /dev/null +++ b/libs/ffvpx/libavcodec/frame_thread_encoder.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FRAME_THREAD_ENCODER_H +#define AVCODEC_FRAME_THREAD_ENCODER_H + +#include "avcodec.h" + +int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options); +void ff_frame_thread_encoder_free(AVCodecContext *avctx); +int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr); + +#endif /* AVCODEC_FRAME_THREAD_ENCODER_H */ diff --git a/libs/ffvpx/libavcodec/get_bits.h b/libs/ffvpx/libavcodec/get_bits.h new file mode 100644 index 000000000..56ef5f0cb --- /dev/null +++ b/libs/ffvpx/libavcodec/get_bits.h @@ -0,0 +1,615 @@ +/* + * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * bitstream reader API header. + */ + +#ifndef AVCODEC_GET_BITS_H +#define AVCODEC_GET_BITS_H + +#include <stdint.h> + +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/log.h" +#include "libavutil/avassert.h" +#include "avcodec.h" +#include "mathops.h" +#include "vlc.h" + +/* + * Safe bitstream reading: + * optionally, the get_bits API can check to ensure that we + * don't read past input buffer boundaries. This is protected + * with CONFIG_SAFE_BITSTREAM_READER at the global level, and + * then below that with UNCHECKED_BITSTREAM_READER at the per- + * decoder level. This means that decoders that check internally + * can "#define UNCHECKED_BITSTREAM_READER 1" to disable + * overread checks. + * Boundary checking causes a minor performance penalty so for + * applications that won't want/need this, it can be disabled + * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0". + */ +#ifndef UNCHECKED_BITSTREAM_READER +#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER +#endif + +typedef struct GetBitContext { + const uint8_t *buffer, *buffer_end; + int index; + int size_in_bits; + int size_in_bits_plus8; +} GetBitContext; + +/* Bitstream reader API docs: + * name + * arbitrary name which is used as prefix for the internal variables + * + * gb + * getbitcontext + * + * OPEN_READER(name, gb) + * load gb into local variables + * + * CLOSE_READER(name, gb) + * store local vars in gb + * + * UPDATE_CACHE(name, gb) + * Refill the internal cache from the bitstream. + * After this call at least MIN_CACHE_BITS will be available. + * + * GET_CACHE(name, gb) + * Will output the contents of the internal cache, + * next bit is MSB of 32 or 64 bits (FIXME 64 bits). + * + * SHOW_UBITS(name, gb, num) + * Will return the next num bits. + * + * SHOW_SBITS(name, gb, num) + * Will return the next num bits and do sign extension. + * + * SKIP_BITS(name, gb, num) + * Will skip over the next num bits. + * Note, this is equivalent to SKIP_CACHE; SKIP_COUNTER. + * + * SKIP_CACHE(name, gb, num) + * Will remove the next num bits from the cache (note SKIP_COUNTER + * MUST be called before UPDATE_CACHE / CLOSE_READER). + * + * SKIP_COUNTER(name, gb, num) + * Will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS). + * + * LAST_SKIP_BITS(name, gb, num) + * Like SKIP_BITS, to be used if next call is UPDATE_CACHE or CLOSE_READER. + * + * BITS_LEFT(name, gb) + * Return the number of bits left + * + * For examples see get_bits, show_bits, skip_bits, get_vlc. + */ + +#ifdef LONG_BITSTREAM_READER +# define MIN_CACHE_BITS 32 +#else +# define MIN_CACHE_BITS 25 +#endif + +#define OPEN_READER_NOSIZE(name, gb) \ + unsigned int name ## _index = (gb)->index; \ + unsigned int av_unused name ## _cache + +#if UNCHECKED_BITSTREAM_READER +#define OPEN_READER(name, gb) OPEN_READER_NOSIZE(name, gb) + +#define BITS_AVAILABLE(name, gb) 1 +#else +#define OPEN_READER(name, gb) \ + OPEN_READER_NOSIZE(name, gb); \ + unsigned int name ## _size_plus8 = (gb)->size_in_bits_plus8 + +#define BITS_AVAILABLE(name, gb) name ## _index < name ## _size_plus8 +#endif + +#define CLOSE_READER(name, gb) (gb)->index = name ## _index + +# ifdef LONG_BITSTREAM_READER + +# define UPDATE_CACHE_LE(name, gb) name ## _cache = \ + AV_RL64((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7) + +# define UPDATE_CACHE_BE(name, gb) name ## _cache = \ + AV_RB64((gb)->buffer + (name ## _index >> 3)) >> (32 - (name ## _index & 7)) + +#else + +# define UPDATE_CACHE_LE(name, gb) name ## _cache = \ + AV_RL32((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7) + +# define UPDATE_CACHE_BE(name, gb) name ## _cache = \ + AV_RB32((gb)->buffer + (name ## _index >> 3)) << (name ## _index & 7) + +#endif + + +#ifdef BITSTREAM_READER_LE + +# define UPDATE_CACHE(name, gb) UPDATE_CACHE_LE(name, gb) + +# define SKIP_CACHE(name, gb, num) name ## _cache >>= (num) + +#else + +# define UPDATE_CACHE(name, gb) UPDATE_CACHE_BE(name, gb) + +# define SKIP_CACHE(name, gb, num) name ## _cache <<= (num) + +#endif + +#if UNCHECKED_BITSTREAM_READER +# define SKIP_COUNTER(name, gb, num) name ## _index += (num) +#else +# define SKIP_COUNTER(name, gb, num) \ + name ## _index = FFMIN(name ## _size_plus8, name ## _index + (num)) +#endif + +#define BITS_LEFT(name, gb) ((int)((gb)->size_in_bits - name ## _index)) + +#define SKIP_BITS(name, gb, num) \ + do { \ + SKIP_CACHE(name, gb, num); \ + SKIP_COUNTER(name, gb, num); \ + } while (0) + +#define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num) + +#define SHOW_UBITS_LE(name, gb, num) zero_extend(name ## _cache, num) +#define SHOW_SBITS_LE(name, gb, num) sign_extend(name ## _cache, num) + +#define SHOW_UBITS_BE(name, gb, num) NEG_USR32(name ## _cache, num) +#define SHOW_SBITS_BE(name, gb, num) NEG_SSR32(name ## _cache, num) + +#ifdef BITSTREAM_READER_LE +# define SHOW_UBITS(name, gb, num) SHOW_UBITS_LE(name, gb, num) +# define SHOW_SBITS(name, gb, num) SHOW_SBITS_LE(name, gb, num) +#else +# define SHOW_UBITS(name, gb, num) SHOW_UBITS_BE(name, gb, num) +# define SHOW_SBITS(name, gb, num) SHOW_SBITS_BE(name, gb, num) +#endif + +#define GET_CACHE(name, gb) ((uint32_t) name ## _cache) + +static inline int get_bits_count(const GetBitContext *s) +{ + return s->index; +} + +/** + * Skips the specified number of bits. + * @param n the number of bits to skip, + * For the UNCHECKED_BITSTREAM_READER this must not cause the distance + * from the start to overflow int32_t. Staying within the bitstream + padding + * is sufficient, too. + */ +static inline void skip_bits_long(GetBitContext *s, int n) +{ +#if UNCHECKED_BITSTREAM_READER + s->index += n; +#else + s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index); +#endif +} + +/** + * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB). + * if MSB not set it is negative + * @param n length in bits + */ +static inline int get_xbits(GetBitContext *s, int n) +{ + register int sign; + register int32_t cache; + OPEN_READER(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE(re, s); + cache = GET_CACHE(re, s); + sign = ~cache >> 31; + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); + return (NEG_USR32(sign ^ cache, n) ^ sign) - sign; +} + +static inline int get_xbits_le(GetBitContext *s, int n) +{ + register int sign; + register int32_t cache; + OPEN_READER(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE_LE(re, s); + cache = GET_CACHE(re, s); + sign = sign_extend(~cache, n) >> 31; + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); + return (zero_extend(sign ^ cache, n) ^ sign) - sign; +} + +static inline int get_sbits(GetBitContext *s, int n) +{ + register int tmp; + OPEN_READER(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE(re, s); + tmp = SHOW_SBITS(re, s, n); + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); + return tmp; +} + +/** + * Read 1-25 bits. + */ +static inline unsigned int get_bits(GetBitContext *s, int n) +{ + register int tmp; + OPEN_READER(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE(re, s); + tmp = SHOW_UBITS(re, s, n); + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); + return tmp; +} + +/** + * Read 0-25 bits. + */ +static av_always_inline int get_bitsz(GetBitContext *s, int n) +{ + return n ? get_bits(s, n) : 0; +} + +static inline unsigned int get_bits_le(GetBitContext *s, int n) +{ + register int tmp; + OPEN_READER(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE_LE(re, s); + tmp = SHOW_UBITS_LE(re, s, n); + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); + return tmp; +} + +/** + * Show 1-25 bits. + */ +static inline unsigned int show_bits(GetBitContext *s, int n) +{ + register int tmp; + OPEN_READER_NOSIZE(re, s); + av_assert2(n>0 && n<=25); + UPDATE_CACHE(re, s); + tmp = SHOW_UBITS(re, s, n); + return tmp; +} + +static inline void skip_bits(GetBitContext *s, int n) +{ + OPEN_READER(re, s); + LAST_SKIP_BITS(re, s, n); + CLOSE_READER(re, s); +} + +static inline unsigned int get_bits1(GetBitContext *s) +{ + unsigned int index = s->index; + uint8_t result = s->buffer[index >> 3]; +#ifdef BITSTREAM_READER_LE + result >>= index & 7; + result &= 1; +#else + result <<= index & 7; + result >>= 8 - 1; +#endif +#if !UNCHECKED_BITSTREAM_READER + if (s->index < s->size_in_bits_plus8) +#endif + index++; + s->index = index; + + return result; +} + +static inline unsigned int show_bits1(GetBitContext *s) +{ + return show_bits(s, 1); +} + +static inline void skip_bits1(GetBitContext *s) +{ + skip_bits(s, 1); +} + +/** + * Read 0-32 bits. + */ +static inline unsigned int get_bits_long(GetBitContext *s, int n) +{ + av_assert2(n>=0 && n<=32); + if (!n) { + return 0; + } else if (n <= MIN_CACHE_BITS) { + return get_bits(s, n); + } else { +#ifdef BITSTREAM_READER_LE + unsigned ret = get_bits(s, 16); + return ret | (get_bits(s, n - 16) << 16); +#else + unsigned ret = get_bits(s, 16) << (n - 16); + return ret | get_bits(s, n - 16); +#endif + } +} + +/** + * Read 0-64 bits. + */ +static inline uint64_t get_bits64(GetBitContext *s, int n) +{ + if (n <= 32) { + return get_bits_long(s, n); + } else { +#ifdef BITSTREAM_READER_LE + uint64_t ret = get_bits_long(s, 32); + return ret | (uint64_t) get_bits_long(s, n - 32) << 32; +#else + uint64_t ret = (uint64_t) get_bits_long(s, n - 32) << 32; + return ret | get_bits_long(s, 32); +#endif + } +} + +/** + * Read 0-32 bits as a signed integer. + */ +static inline int get_sbits_long(GetBitContext *s, int n) +{ + // sign_extend(x, 0) is undefined + if (!n) + return 0; + + return sign_extend(get_bits_long(s, n), n); +} + +/** + * Show 0-32 bits. + */ +static inline unsigned int show_bits_long(GetBitContext *s, int n) +{ + if (n <= MIN_CACHE_BITS) { + return show_bits(s, n); + } else { + GetBitContext gb = *s; + return get_bits_long(&gb, n); + } +} + +static inline int check_marker(void *logctx, GetBitContext *s, const char *msg) +{ + int bit = get_bits1(s); + if (!bit) + av_log(logctx, AV_LOG_INFO, "Marker bit missing at %d of %d %s\n", + get_bits_count(s) - 1, s->size_in_bits, msg); + + return bit; +} + +/** + * Initialize GetBitContext. + * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes + * larger than the actual read bits because some optimized bitstream + * readers read 32 or 64 bit at once and could read over the end + * @param bit_size the size of the buffer in bits + * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow. + */ +static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer, + int bit_size) +{ + int buffer_size; + int ret = 0; + + if (bit_size >= INT_MAX - FFMAX(7, AV_INPUT_BUFFER_PADDING_SIZE*8) || bit_size < 0 || !buffer) { + bit_size = 0; + buffer = NULL; + ret = AVERROR_INVALIDDATA; + } + + buffer_size = (bit_size + 7) >> 3; + + s->buffer = buffer; + s->size_in_bits = bit_size; + s->size_in_bits_plus8 = bit_size + 8; + s->buffer_end = buffer + buffer_size; + s->index = 0; + + return ret; +} + +/** + * Initialize GetBitContext. + * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes + * larger than the actual read bits because some optimized bitstream + * readers read 32 or 64 bit at once and could read over the end + * @param byte_size the size of the buffer in bytes + * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow. + */ +static inline int init_get_bits8(GetBitContext *s, const uint8_t *buffer, + int byte_size) +{ + if (byte_size > INT_MAX / 8 || byte_size < 0) + byte_size = -1; + return init_get_bits(s, buffer, byte_size * 8); +} + +static inline const uint8_t *align_get_bits(GetBitContext *s) +{ + int n = -get_bits_count(s) & 7; + if (n) + skip_bits(s, n); + return s->buffer + (s->index >> 3); +} + +/** + * If the vlc code is invalid and max_depth=1, then no bits will be removed. + * If the vlc code is invalid and max_depth>1, then the number of bits removed + * is undefined. + */ +#define GET_VLC(code, name, gb, table, bits, max_depth) \ + do { \ + int n, nb_bits; \ + unsigned int index; \ + \ + index = SHOW_UBITS(name, gb, bits); \ + code = table[index][0]; \ + n = table[index][1]; \ + \ + if (max_depth > 1 && n < 0) { \ + LAST_SKIP_BITS(name, gb, bits); \ + UPDATE_CACHE(name, gb); \ + \ + nb_bits = -n; \ + \ + index = SHOW_UBITS(name, gb, nb_bits) + code; \ + code = table[index][0]; \ + n = table[index][1]; \ + if (max_depth > 2 && n < 0) { \ + LAST_SKIP_BITS(name, gb, nb_bits); \ + UPDATE_CACHE(name, gb); \ + \ + nb_bits = -n; \ + \ + index = SHOW_UBITS(name, gb, nb_bits) + code; \ + code = table[index][0]; \ + n = table[index][1]; \ + } \ + } \ + SKIP_BITS(name, gb, n); \ + } while (0) + +#define GET_RL_VLC(level, run, name, gb, table, bits, \ + max_depth, need_update) \ + do { \ + int n, nb_bits; \ + unsigned int index; \ + \ + index = SHOW_UBITS(name, gb, bits); \ + level = table[index].level; \ + n = table[index].len; \ + \ + if (max_depth > 1 && n < 0) { \ + SKIP_BITS(name, gb, bits); \ + if (need_update) { \ + UPDATE_CACHE(name, gb); \ + } \ + \ + nb_bits = -n; \ + \ + index = SHOW_UBITS(name, gb, nb_bits) + level; \ + level = table[index].level; \ + n = table[index].len; \ + if (max_depth > 2 && n < 0) { \ + LAST_SKIP_BITS(name, gb, nb_bits); \ + if (need_update) { \ + UPDATE_CACHE(name, gb); \ + } \ + nb_bits = -n; \ + \ + index = SHOW_UBITS(name, gb, nb_bits) + level; \ + level = table[index].level; \ + n = table[index].len; \ + } \ + } \ + run = table[index].run; \ + SKIP_BITS(name, gb, n); \ + } while (0) + +/** + * Parse a vlc code. + * @param bits is the number of bits which will be read at once, must be + * identical to nb_bits in init_vlc() + * @param max_depth is the number of times bits bits must be read to completely + * read the longest vlc code + * = (max_vlc_length + bits - 1) / bits + * @returns the code parsed or -1 if no vlc matches + */ +static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], + int bits, int max_depth) +{ + int code; + + OPEN_READER(re, s); + UPDATE_CACHE(re, s); + + GET_VLC(code, re, s, table, bits, max_depth); + + CLOSE_READER(re, s); + + return code; +} + +static inline int decode012(GetBitContext *gb) +{ + int n; + n = get_bits1(gb); + if (n == 0) + return 0; + else + return get_bits1(gb) + 1; +} + +static inline int decode210(GetBitContext *gb) +{ + if (get_bits1(gb)) + return 0; + else + return 2 - get_bits1(gb); +} + +static inline int get_bits_left(GetBitContext *gb) +{ + return gb->size_in_bits - get_bits_count(gb); +} + +static inline int skip_1stop_8data_bits(GetBitContext *gb) +{ + if (get_bits_left(gb) <= 0) + return AVERROR_INVALIDDATA; + + while (get_bits1(gb)) { + skip_bits(gb, 8); + if (get_bits_left(gb) <= 0) + return AVERROR_INVALIDDATA; + } + + return 0; +} + +#endif /* AVCODEC_GET_BITS_H */ diff --git a/libs/ffvpx/libavcodec/golomb.c b/libs/ffvpx/libavcodec/golomb.c new file mode 100644 index 000000000..937ac22ce --- /dev/null +++ b/libs/ffvpx/libavcodec/golomb.c @@ -0,0 +1,173 @@ +/* + * exp golomb vlc stuff + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * @brief + * exp golomb vlc stuff + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/common.h" + +const uint8_t ff_golomb_vlc_len[512]={ +19,17,15,15,13,13,13,13,11,11,11,11,11,11,11,11,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +}; + +const uint8_t ff_ue_golomb_vlc_code[512]={ +32,32,32,32,32,32,32,32,31,32,32,32,32,32,32,32,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, + 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const int8_t ff_se_golomb_vlc_code[512]={ + 17, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17, 17, 17, 17, 17, 8, -8, 9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15, + 4, 4, 4, 4, -4, -4, -4, -4, 5, 5, 5, 5, -5, -5, -5, -5, 6, 6, 6, 6, -6, -6, -6, -6, 7, 7, 7, 7, -7, -7, -7, -7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +const uint8_t ff_ue_golomb_len[256]={ + 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11, +11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17, +}; + +const uint8_t ff_interleaved_golomb_vlc_len[256]={ +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +}; + +const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={ + 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3, + 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5, + 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int8_t ff_interleaved_se_golomb_vlc_code[256]={ + 8, -8, 4, 4, 9, -9, -4, -4, 2, 2, 2, 2, 2, 2, 2, 2, + 10,-10, 5, 5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12,-12, 6, 6, 13,-13, -6, -6, 3, 3, 3, 3, 3, 3, 3, 3, + 14,-14, 7, 7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const uint8_t ff_interleaved_dirac_golomb_vlc_code[256]={ +0, 1, 0, 0, 2, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +4, 5, 2, 2, 6, 7, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +8, 9, 4, 4, 10,11,5, 5, 2, 2, 2, 2, 2, 2, 2, 2, +12,13,6, 6, 14,15,7, 7, 3, 3, 3, 3, 3, 3, 3, 3, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}; diff --git a/libs/ffvpx/libavcodec/golomb.h b/libs/ffvpx/libavcodec/golomb.h new file mode 100644 index 000000000..efb1eff8a --- /dev/null +++ b/libs/ffvpx/libavcodec/golomb.h @@ -0,0 +1,590 @@ +/* + * exp golomb vlc stuff + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2004 Alex Beregszaszi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * @brief + * exp golomb vlc stuff + * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi + */ + +#ifndef AVCODEC_GOLOMB_H +#define AVCODEC_GOLOMB_H + +#include <stdint.h> + +#include "get_bits.h" +#include "put_bits.h" + +#define INVALID_VLC 0x80000000 + +extern const uint8_t ff_golomb_vlc_len[512]; +extern const uint8_t ff_ue_golomb_vlc_code[512]; +extern const int8_t ff_se_golomb_vlc_code[512]; +extern const uint8_t ff_ue_golomb_len[256]; + +extern const uint8_t ff_interleaved_golomb_vlc_len[256]; +extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256]; +extern const int8_t ff_interleaved_se_golomb_vlc_code[256]; +extern const uint8_t ff_interleaved_dirac_golomb_vlc_code[256]; + +/** + * Read an unsigned Exp-Golomb code in the range 0 to 8190. + */ +static inline int get_ue_golomb(GetBitContext *gb) +{ + unsigned int buf; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + if (buf >= (1 << 27)) { + buf >>= 32 - 9; + LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_ue_golomb_vlc_code[buf]; + } else { + int log = 2 * av_log2(buf) - 31; + LAST_SKIP_BITS(re, gb, 32 - log); + CLOSE_READER(re, gb); + if (log < 7) { + av_log(NULL, AV_LOG_ERROR, "Invalid UE golomb code\n"); + return AVERROR_INVALIDDATA; + } + buf >>= log; + buf--; + + return buf; + } +} + +/** + * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1. + */ +static inline unsigned get_ue_golomb_long(GetBitContext *gb) +{ + unsigned buf, log; + + buf = show_bits_long(gb, 32); + log = 31 - av_log2(buf); + skip_bits_long(gb, log); + + return get_bits_long(gb, log + 1) - 1; +} + +/** + * read unsigned exp golomb code, constraint to a max of 31. + * the return value is undefined if the stored value exceeds 31. + */ +static inline int get_ue_golomb_31(GetBitContext *gb) +{ + unsigned int buf; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + buf >>= 32 - 9; + LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_ue_golomb_vlc_code[buf]; +} + +static inline unsigned get_interleaved_ue_golomb(GetBitContext *gb) +{ + uint32_t buf; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + if (buf & 0xAA800000) { + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_ue_golomb_vlc_code[buf]; + } else { + unsigned ret = 1; + + do { + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, + FFMIN(ff_interleaved_golomb_vlc_len[buf], 8)); + + if (ff_interleaved_golomb_vlc_len[buf] != 9) { + ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1; + ret |= ff_interleaved_dirac_golomb_vlc_code[buf]; + break; + } + ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf]; + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + } while (ret<0x8000000U && BITS_AVAILABLE(re, gb)); + + CLOSE_READER(re, gb); + return ret - 1; + } +} + +/** + * read unsigned truncated exp golomb code. + */ +static inline int get_te0_golomb(GetBitContext *gb, int range) +{ + av_assert2(range >= 1); + + if (range == 1) + return 0; + else if (range == 2) + return get_bits1(gb) ^ 1; + else + return get_ue_golomb(gb); +} + +/** + * read unsigned truncated exp golomb code. + */ +static inline int get_te_golomb(GetBitContext *gb, int range) +{ + av_assert2(range >= 1); + + if (range == 2) + return get_bits1(gb) ^ 1; + else + return get_ue_golomb(gb); +} + +/** + * read signed exp golomb code. + */ +static inline int get_se_golomb(GetBitContext *gb) +{ + unsigned int buf; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + if (buf >= (1 << 27)) { + buf >>= 32 - 9; + LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_se_golomb_vlc_code[buf]; + } else { + int log = av_log2(buf), sign; + LAST_SKIP_BITS(re, gb, 31 - log); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + buf >>= log; + + LAST_SKIP_BITS(re, gb, 32 - log); + CLOSE_READER(re, gb); + + sign = -(buf & 1); + buf = ((buf >> 1) ^ sign) - sign; + + return buf; + } +} + +static inline int get_se_golomb_long(GetBitContext *gb) +{ + unsigned int buf = get_ue_golomb_long(gb); + int sign = (buf & 1) - 1; + return ((buf >> 1) ^ sign) + 1; +} + +static inline int get_interleaved_se_golomb(GetBitContext *gb) +{ + unsigned int buf; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + if (buf & 0xAA800000) { + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_se_golomb_vlc_code[buf]; + } else { + int log; + LAST_SKIP_BITS(re, gb, 8); + UPDATE_CACHE(re, gb); + buf |= 1 | (GET_CACHE(re, gb) >> 8); + + if ((buf & 0xAAAAAAAA) == 0) + return INVALID_VLC; + + for (log = 31; (buf & 0x80000000) == 0; log--) + buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + + LAST_SKIP_BITS(re, gb, 63 - 2 * log - 8); + CLOSE_READER(re, gb); + + return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1; + } +} + +static inline int dirac_get_se_golomb(GetBitContext *gb) +{ + uint32_t ret = get_interleaved_ue_golomb(gb); + + if (ret) { + int sign = -get_bits1(gb); + ret = (ret ^ sign) - sign; + } + + return ret; +} + +/** + * read unsigned golomb rice code (ffv1). + */ +static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, + int esc_len) +{ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + log = av_log2(buf); + + if (log > 31 - limit) { + buf >>= log - k; + buf += (30U - log) << k; + LAST_SKIP_BITS(re, gb, 32 + k - log); + CLOSE_READER(re, gb); + + return buf; + } else { + LAST_SKIP_BITS(re, gb, limit); + UPDATE_CACHE(re, gb); + + buf = SHOW_UBITS(re, gb, esc_len); + + LAST_SKIP_BITS(re, gb, esc_len); + CLOSE_READER(re, gb); + + return buf + limit - 1; + } +} + +/** + * read unsigned golomb rice code (jpegls). + */ +static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, + int esc_len) +{ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf = GET_CACHE(re, gb); + + log = av_log2(buf); + + av_assert2(k <= 31); + + if (log - k >= 32 - MIN_CACHE_BITS + (MIN_CACHE_BITS == 32) && + 32 - log < limit) { + buf >>= log - k; + buf += (30U - log) << k; + LAST_SKIP_BITS(re, gb, 32 + k - log); + CLOSE_READER(re, gb); + + return buf; + } else { + int i; + for (i = 0; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) { + if (gb->size_in_bits <= re_index) { + CLOSE_READER(re, gb); + return -1; + } + LAST_SKIP_BITS(re, gb, 1); + UPDATE_CACHE(re, gb); + } + SKIP_BITS(re, gb, 1); + + if (i < limit - 1) { + if (k) { + if (k > MIN_CACHE_BITS - 1) { + buf = SHOW_UBITS(re, gb, 16) << (k-16); + LAST_SKIP_BITS(re, gb, 16); + UPDATE_CACHE(re, gb); + buf |= SHOW_UBITS(re, gb, k-16); + LAST_SKIP_BITS(re, gb, k-16); + } else { + buf = SHOW_UBITS(re, gb, k); + LAST_SKIP_BITS(re, gb, k); + } + } else { + buf = 0; + } + + buf += ((SUINT)i << k); + } else if (i == limit - 1) { + buf = SHOW_UBITS(re, gb, esc_len); + LAST_SKIP_BITS(re, gb, esc_len); + + buf ++; + } else { + buf = -1; + } + CLOSE_READER(re, gb); + return buf; + } +} + +/** + * read signed golomb rice code (ffv1). + */ +static inline int get_sr_golomb(GetBitContext *gb, int k, int limit, + int esc_len) +{ + unsigned v = get_ur_golomb(gb, k, limit, esc_len); + return (v >> 1) ^ -(v & 1); +} + +/** + * read signed golomb rice code (flac). + */ +static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, + int esc_len) +{ + unsigned v = get_ur_golomb_jpegls(gb, k, limit, esc_len); + return (v >> 1) ^ -(v & 1); +} + +/** + * read unsigned golomb rice code (shorten). + */ +static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k) +{ + return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); +} + +/** + * read signed golomb rice code (shorten). + */ +static inline int get_sr_golomb_shorten(GetBitContext *gb, int k) +{ + int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0); + return (uvar >> 1) ^ -(uvar & 1); +} + +#ifdef TRACE + +static inline int get_ue(GetBitContext *s, const char *file, const char *func, + int line) +{ + int show = show_bits(s, 24); + int pos = get_bits_count(s); + int i = get_ue_golomb(s); + int len = get_bits_count(s) - pos; + int bits = show >> (24 - len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue @%5d in %s %s:%d\n", + bits, len, i, pos, file, func, line); + + return i; +} + +static inline int get_se(GetBitContext *s, const char *file, const char *func, + int line) +{ + int show = show_bits(s, 24); + int pos = get_bits_count(s); + int i = get_se_golomb(s); + int len = get_bits_count(s) - pos; + int bits = show >> (24 - len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se @%5d in %s %s:%d\n", + bits, len, i, pos, file, func, line); + + return i; +} + +static inline int get_te(GetBitContext *s, int r, char *file, const char *func, + int line) +{ + int show = show_bits(s, 24); + int pos = get_bits_count(s); + int i = get_te0_golomb(s, r); + int len = get_bits_count(s) - pos; + int bits = show >> (24 - len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te @%5d in %s %s:%d\n", + bits, len, i, pos, file, func, line); + + return i; +} + +#define get_ue_golomb(a) get_ue(a, __FILE__, __func__, __LINE__) +#define get_se_golomb(a) get_se(a, __FILE__, __func__, __LINE__) +#define get_te_golomb(a, r) get_te(a, r, __FILE__, __func__, __LINE__) +#define get_te0_golomb(a, r) get_te(a, r, __FILE__, __func__, __LINE__) + +#endif /* TRACE */ + +/** + * write unsigned exp golomb code. 2^16 - 2 at most + */ +static inline void set_ue_golomb(PutBitContext *pb, int i) +{ + av_assert2(i >= 0); + av_assert2(i <= 0xFFFE); + + if (i < 256) + put_bits(pb, ff_ue_golomb_len[i], i + 1); + else { + int e = av_log2(i + 1); + put_bits(pb, 2 * e + 1, i + 1); + } +} + +/** + * write unsigned exp golomb code. 2^32-2 at most. + */ +static inline void set_ue_golomb_long(PutBitContext *pb, uint32_t i) +{ + av_assert2(i <= (UINT32_MAX - 1)); + + if (i < 256) + put_bits(pb, ff_ue_golomb_len[i], i + 1); + else { + int e = av_log2(i + 1); + put_bits64(pb, 2 * e + 1, i + 1); + } +} + +/** + * write truncated unsigned exp golomb code. + */ +static inline void set_te_golomb(PutBitContext *pb, int i, int range) +{ + av_assert2(range >= 1); + av_assert2(i <= range); + + if (range == 2) + put_bits(pb, 1, i ^ 1); + else + set_ue_golomb(pb, i); +} + +/** + * write signed exp golomb code. 16 bits at most. + */ +static inline void set_se_golomb(PutBitContext *pb, int i) +{ + i = 2 * i - 1; + if (i < 0) + i ^= -1; //FIXME check if gcc does the right thing + set_ue_golomb(pb, i); +} + +/** + * write unsigned golomb rice code (ffv1). + */ +static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit, + int esc_len) +{ + int e; + + av_assert2(i >= 0); + + e = i >> k; + if (e < limit) + put_bits(pb, e + k + 1, (1 << k) + av_mod_uintp2(i, k)); + else + put_bits(pb, limit + esc_len, i - limit + 1); +} + +/** + * write unsigned golomb rice code (jpegls). + */ +static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k, + int limit, int esc_len) +{ + int e; + + av_assert2(i >= 0); + + e = (i >> k) + 1; + if (e < limit) { + while (e > 31) { + put_bits(pb, 31, 0); + e -= 31; + } + put_bits(pb, e, 1); + if (k) + put_sbits(pb, k, i); + } else { + while (limit > 31) { + put_bits(pb, 31, 0); + limit -= 31; + } + put_bits(pb, limit, 1); + put_bits(pb, esc_len, i - 1); + } +} + +/** + * write signed golomb rice code (ffv1). + */ +static inline void set_sr_golomb(PutBitContext *pb, int i, int k, int limit, + int esc_len) +{ + int v; + + v = -2 * i - 1; + v ^= (v >> 31); + + set_ur_golomb(pb, v, k, limit, esc_len); +} + +/** + * write signed golomb rice code (flac). + */ +static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k, + int limit, int esc_len) +{ + int v; + + v = -2 * i - 1; + v ^= (v >> 31); + + set_ur_golomb_jpegls(pb, v, k, limit, esc_len); +} + +#endif /* AVCODEC_GOLOMB_H */ diff --git a/libs/ffvpx/libavcodec/h263dsp.h b/libs/ffvpx/libavcodec/h263dsp.h new file mode 100644 index 000000000..1abea3ca8 --- /dev/null +++ b/libs/ffvpx/libavcodec/h263dsp.h @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_H263DSP_H +#define AVCODEC_H263DSP_H + +#include <stdint.h> + +extern const uint8_t ff_h263_loop_filter_strength[32]; + +typedef struct H263DSPContext { + void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); + void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); +} H263DSPContext; + +void ff_h263dsp_init(H263DSPContext *ctx); +void ff_h263dsp_init_x86(H263DSPContext *ctx); +void ff_h263dsp_init_mips(H263DSPContext *ctx); + +#endif /* AVCODEC_H263DSP_H */ diff --git a/libs/ffvpx/libavcodec/h264chroma.h b/libs/ffvpx/libavcodec/h264chroma.h new file mode 100644 index 000000000..5c89fd12d --- /dev/null +++ b/libs/ffvpx/libavcodec/h264chroma.h @@ -0,0 +1,40 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_H264CHROMA_H +#define AVCODEC_H264CHROMA_H + +#include <stddef.h> +#include <stdint.h> + +typedef void (*h264_chroma_mc_func)(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ptrdiff_t srcStride, int h, int x, int y); + +typedef struct H264ChromaContext { + h264_chroma_mc_func put_h264_chroma_pixels_tab[4]; + h264_chroma_mc_func avg_h264_chroma_pixels_tab[4]; +} H264ChromaContext; + +void ff_h264chroma_init(H264ChromaContext *c, int bit_depth); + +void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth); +void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth); +void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth); +void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth); +void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth); + +#endif /* AVCODEC_H264CHROMA_H */ diff --git a/libs/ffvpx/libavcodec/h264dsp.h b/libs/ffvpx/libavcodec/h264dsp.h new file mode 100644 index 000000000..bcd76abcc --- /dev/null +++ b/libs/ffvpx/libavcodec/h264dsp.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 DSP functions. + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#ifndef AVCODEC_H264DSP_H +#define AVCODEC_H264DSP_H + +#include <stdint.h> +#include <stddef.h> + +typedef void (*h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height, + int log2_denom, int weight, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, + ptrdiff_t stride, int height, int log2_denom, + int weightd, int weights, int offset); + +/** + * Context for storing H.264 DSP functions + */ +typedef struct H264DSPContext { + /* weighted MC */ + h264_weight_func weight_h264_pixels_tab[4]; + h264_biweight_func biweight_h264_pixels_tab[4]; + + /* loop filter */ + void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); + /* v/h_loop_filter_luma_intra: align 16 */ + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/, + int stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta, + int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], + int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, + int mask_mv0, int mask_mv1, int field); + + /* IDCT */ + void (*h264_idct_add)(uint8_t *dst /*align 4*/, + int16_t *block /*align 16*/, int stride); + void (*h264_idct8_add)(uint8_t *dst /*align 8*/, + int16_t *block /*align 16*/, int stride); + void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/, + int16_t *block /*align 16*/, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/, + int16_t *block /*align 16*/, int stride); + + void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset, + int16_t *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset, + int16_t *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset, + int16_t *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset, + int16_t *block /*align 16*/, + int stride, const uint8_t nnzc[15 * 8]); + void (*h264_luma_dc_dequant_idct)(int16_t *output, + int16_t *input /*align 16*/, int qmul); + void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul); + + /* bypass-transform */ + void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); + void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); + + /** + * Search buf from the start for up to size bytes. Return the index + * of a zero byte, or >= size if not found. Ideally, use lookahead + * to filter out any zero bytes that are known to not be followed by + * one or more further zero bytes and a one byte. Better still, filter + * out any bytes that form the trailing_zero_8bits syntax element too. + */ + int (*startcode_find_candidate)(const uint8_t *buf, int size); +} H264DSPContext; + +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); + +#endif /* AVCODEC_H264DSP_H */ diff --git a/libs/ffvpx/libavcodec/h264pred.c b/libs/ffvpx/libavcodec/h264pred.c new file mode 100644 index 000000000..5632a58fd --- /dev/null +++ b/libs/ffvpx/libavcodec/h264pred.c @@ -0,0 +1,603 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG-4 part10 prediction functions. + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/intreadwrite.h" +#include "avcodec.h" +#include "h264pred.h" + +#define BIT_DEPTH 8 +#include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 9 +#include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 12 +#include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 14 +#include "h264pred_template.c" +#undef BIT_DEPTH + +static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + const unsigned lt = src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2, + (t0 + 2*t1 + t2 + 2) >> 2, + (t1 + 2*t2 + t3 + 2) >> 2, + (t2 + 2*t3 + t4 + 2) >> 2); + + AV_WN32A(src+0*stride, v); + AV_WN32A(src+1*stride, v); + AV_WN32A(src+2*stride, v); + AV_WN32A(src+3*stride, v); +} + +static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + const unsigned lt = src[-1-1*stride]; + LOAD_LEFT_EDGE + + AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101); + AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101); + AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101); + AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101); +} + +static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]=(l1 + t1)>>1; + src[1+0*stride]= + src[0+1*stride]=(l2 + t2)>>1; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]= + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]= + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]= + src[3+2*stride]= + src[2+3*stride]= + src[3+3*stride]=(l3 + t3)>>1; +} + +static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3; + src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; +} + +static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, + const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3; + src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; +} + +static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride, + const int l0, const int l1, const int l2, + const int l3, const int l4) +{ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4+ 1)>>1; + src[3+2*stride]=(t4 + t5+ 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; +} + +static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + + pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4); +} + +static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, + const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_LEFT_EDGE + + pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3); +} + +static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t0 + t1 + 1)>>1; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4 + 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2; + src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; +} + +static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_LEFT_EDGE + LOAD_DOWN_LEFT_EDGE + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; + src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; + src[2+0*stride]= + src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; + src[3+0*stride]= + src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; + src[2+1*stride]= + src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; + src[3+1*stride]= + src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; + src[3+2*stride]= + src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2; + src[0+3*stride]= + src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2; + src[2+3*stride]=(l4 + l5 + 1)>>1; + src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; +} + +static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, + const uint8_t *topright, + ptrdiff_t stride) +{ + LOAD_LEFT_EDGE + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; + src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; + src[2+0*stride]= + src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; + src[3+0*stride]= + src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; + src[2+1*stride]= + src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; + src[3+1*stride]= + src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; + src[3+2*stride]= + src[1+3*stride]=l3; + src[0+3*stride]= + src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2; + src[2+3*stride]= + src[3+3*stride]=l3; +} + +static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride) +{ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 4; y++) { + const uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src += stride; + } +} + +static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride) +{ + pred16x16_plane_compat_8_c(src, stride, 1, 0); +} + +static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride) +{ + pred16x16_plane_compat_8_c(src, stride, 0, 1); +} + +static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride) +{ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 16; y++) { + const uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src[4] = cm_in[top[4]]; + src[5] = cm_in[top[5]]; + src[6] = cm_in[top[6]]; + src[7] = cm_in[top[7]]; + src[8] = cm_in[top[8]]; + src[9] = cm_in[top[9]]; + src[10] = cm_in[top[10]]; + src[11] = cm_in[top[11]]; + src[12] = cm_in[top[12]]; + src[13] = cm_in[top[13]]; + src[14] = cm_in[top[14]]; + src[15] = cm_in[top[15]]; + src += stride; + } +} + +static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride) +{ + int i; + unsigned dc0; + + dc0=0; + for(i=0;i<8; i++) + dc0+= src[-1+i*stride]; + dc0= 0x01010101*((dc0 + 4)>>3); + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride) +{ + int i; + unsigned dc0; + + dc0=0; + for(i=0;i<8; i++) + dc0+= src[i-stride]; + dc0= 0x01010101*((dc0 + 4)>>3); + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride) +{ + int i; + unsigned dc0 = 0; + + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc0+= src[4+i-stride]; + dc0+= src[-1+(i+4)*stride]; + } + dc0= 0x01010101*((dc0 + 8)>>4); + + for(i=0; i<4; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc0; + } + for(i=4; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc0; + } +} + +static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride) +{ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride]; + uint8_t *top = src-stride; + int y; + + for (y = 0; y < 8; y++) { + const uint8_t *cm_in = cm + src[-1]; + src[0] = cm_in[top[0]]; + src[1] = cm_in[top[1]]; + src[2] = cm_in[top[2]]; + src[3] = cm_in[top[3]]; + src[4] = cm_in[top[4]]; + src[5] = cm_in[top[5]]; + src[6] = cm_in[top[6]]; + src[7] = cm_in[top[7]]; + src += stride; + } +} + +/** + * Set the intra prediction function pointers. + */ +av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, + const int bit_depth, + int chroma_format_idc) +{ +#undef FUNC +#undef FUNCC +#define FUNC(a, depth) a ## _ ## depth +#define FUNCC(a, depth) a ## _ ## depth ## _c +#define FUNCD(a) a ## _c + +#define H264_PRED(depth) \ + if(codec_id != AV_CODEC_ID_RV40){\ + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\ + h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\ + h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\ + } else {\ + h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + }\ + h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ + if(codec_id == AV_CODEC_ID_SVQ3)\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\ + else\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\ + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ + h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ + h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\ + } else\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\ + h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\ + if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\ + h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\ + h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ + } else {\ + h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\ + h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\ + h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\ + h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_VP8_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + }\ + if (codec_id != AV_CODEC_ID_VP8)\ + h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ + }else{\ + h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\ + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ + h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ + h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\ + h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\ + h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\ + h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ + h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\ + h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\ + h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\ + }\ +\ + h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\ + h->pred8x8l[HOR_PRED ]= FUNCC(pred8x8l_horizontal , depth);\ + h->pred8x8l[DC_PRED ]= FUNCC(pred8x8l_dc , depth);\ + h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left , depth);\ + h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right , depth);\ + h->pred8x8l[VERT_RIGHT_PRED ]= FUNCC(pred8x8l_vertical_right , depth);\ + h->pred8x8l[HOR_DOWN_PRED ]= FUNCC(pred8x8l_horizontal_down , depth);\ + h->pred8x8l[VERT_LEFT_PRED ]= FUNCC(pred8x8l_vertical_left , depth);\ + h->pred8x8l[HOR_UP_PRED ]= FUNCC(pred8x8l_horizontal_up , depth);\ + h->pred8x8l[LEFT_DC_PRED ]= FUNCC(pred8x8l_left_dc , depth);\ + h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ + h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ +\ + if (chroma_format_idc <= 1) {\ + h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ + h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ + } else {\ + h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x16_vertical , depth);\ + h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x16_horizontal , depth);\ + }\ + if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\ + if (chroma_format_idc <= 1) {\ + h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ + } else {\ + h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane , depth);\ + }\ + } else\ + h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ + if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && \ + codec_id != AV_CODEC_ID_VP8) {\ + if (chroma_format_idc <= 1) {\ + h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + } else {\ + h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\ + }\ + }else{\ + h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\ + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\ + h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\ + h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ + }\ + }\ + if (chroma_format_idc <= 1) {\ + h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ + } else {\ + h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc , depth);\ + }\ +\ + h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ + h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ + h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\ + switch(codec_id){\ + case AV_CODEC_ID_SVQ3:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\ + break;\ + case AV_CODEC_ID_RV40:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\ + break;\ + case AV_CODEC_ID_VP7:\ + case AV_CODEC_ID_VP8:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\ + h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\ + h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\ + break;\ + default:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane , depth);\ + break;\ + }\ + h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc , depth);\ + h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc , depth);\ + h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc , depth);\ +\ + /* special lossless h/v prediction for H.264 */ \ + h->pred4x4_add [VERT_PRED ]= FUNCC(pred4x4_vertical_add , depth);\ + h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ + h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ + h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ + h->pred8x8l_filter_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_filter_add , depth);\ + h->pred8x8l_filter_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_filter_add , depth);\ + if (chroma_format_idc <= 1) {\ + h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ + h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ + } else {\ + h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\ + h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\ + }\ + h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ + h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ + + switch (bit_depth) { + case 9: + H264_PRED(9) + break; + case 10: + H264_PRED(10) + break; + case 12: + H264_PRED(12) + break; + case 14: + H264_PRED(14) + break; + default: + av_assert0(bit_depth<=8); + H264_PRED(8) + break; + } + + if (ARCH_AARCH64) + ff_h264_pred_init_aarch64(h, codec_id, bit_depth, chroma_format_idc); + if (ARCH_ARM) + ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc); + if (ARCH_X86) + ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc); + if (ARCH_MIPS) + ff_h264_pred_init_mips(h, codec_id, bit_depth, chroma_format_idc); +} diff --git a/libs/ffvpx/libavcodec/h264pred.h b/libs/ffvpx/libavcodec/h264pred.h new file mode 100644 index 000000000..2863dc9bd --- /dev/null +++ b/libs/ffvpx/libavcodec/h264pred.h @@ -0,0 +1,126 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG-4 prediction functions. + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#ifndef AVCODEC_H264PRED_H +#define AVCODEC_H264PRED_H + +#include <stddef.h> +#include <stdint.h> + +/** + * Prediction types + */ +//@{ +#define VERT_PRED 0 +#define HOR_PRED 1 +#define DC_PRED 2 +#define DIAG_DOWN_LEFT_PRED 3 +#define DIAG_DOWN_RIGHT_PRED 4 +#define VERT_RIGHT_PRED 5 +#define HOR_DOWN_PRED 6 +#define VERT_LEFT_PRED 7 +#define HOR_UP_PRED 8 + +// DC edge (not for VP8) +#define LEFT_DC_PRED 9 +#define TOP_DC_PRED 10 +#define DC_128_PRED 11 + +// RV40 specific +#define DIAG_DOWN_LEFT_PRED_RV40_NODOWN 12 +#define HOR_UP_PRED_RV40_NODOWN 13 +#define VERT_LEFT_PRED_RV40_NODOWN 14 + +// VP8 specific +#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane +#define VERT_VP8_PRED 10 ///< for VP8, #VERT_PRED is the average of + ///< (left col+cur col x2+right col) / 4; + ///< this is the "unaveraged" one +#define HOR_VP8_PRED 14 ///< unaveraged version of #HOR_PRED, see + ///< #VERT_VP8_PRED for details +#define DC_127_PRED 12 +#define DC_129_PRED 13 + +#define DC_PRED8x8 0 +#define HOR_PRED8x8 1 +#define VERT_PRED8x8 2 +#define PLANE_PRED8x8 3 + +// DC edge +#define LEFT_DC_PRED8x8 4 +#define TOP_DC_PRED8x8 5 +#define DC_128_PRED8x8 6 + +// H.264/SVQ3 (8x8) specific +#define ALZHEIMER_DC_L0T_PRED8x8 7 +#define ALZHEIMER_DC_0LT_PRED8x8 8 +#define ALZHEIMER_DC_L00_PRED8x8 9 +#define ALZHEIMER_DC_0L0_PRED8x8 10 + +// VP8 specific +#define DC_127_PRED8x8 7 +#define DC_129_PRED8x8 8 +//@} + +/** + * Context for storing H.264 prediction functions + */ +typedef struct H264PredContext { + void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, + ptrdiff_t stride); + void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, + ptrdiff_t stride); + void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride); + void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride); + + void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, + int16_t *block /*align 16*/, ptrdiff_t stride); + void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, ptrdiff_t stride); + void(*pred8x8l_filter_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, int topleft, int topright, ptrdiff_t stride); + void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, + const int *block_offset, + int16_t *block /*align 16*/, ptrdiff_t stride); + void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, + const int *block_offset, + int16_t *block /*align 16*/, ptrdiff_t stride); +} H264PredContext; + +void ff_h264_pred_init(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id, + const int bit_depth, + const int chroma_format_idc); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_mips(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); + +#endif /* AVCODEC_H264PRED_H */ diff --git a/libs/ffvpx/libavcodec/h264pred_template.c b/libs/ffvpx/libavcodec/h264pred_template.c new file mode 100644 index 000000000..2b30fff70 --- /dev/null +++ b/libs/ffvpx/libavcodec/h264pred_template.c @@ -0,0 +1,1355 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG-4 part10 prediction functions. + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/intreadwrite.h" + +#include "mathops.h" + +#include "bit_depth_template.c" + +static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a= AV_RN4PA(src-stride); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride])); + AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride])); + AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride])); + AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride])); +} + +static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; + const pixel4 a = PIXEL_SPLAT_X4(dc); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; + const pixel4 a = PIXEL_SPLAT_X4(dc); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; + const pixel4 a = PIXEL_SPLAT_X4(dc); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + +static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); + + AV_WN4PA(src+0*stride, a); + AV_WN4PA(src+1*stride, a); + AV_WN4PA(src+2*stride, a); + AV_WN4PA(src+3*stride, a); +} + + +#define LOAD_TOP_RIGHT_EDGE\ + const unsigned av_unused t4 = topright[0];\ + const unsigned av_unused t5 = topright[1];\ + const unsigned av_unused t6 = topright[2];\ + const unsigned av_unused t7 = topright[3];\ + +#define LOAD_DOWN_LEFT_EDGE\ + const unsigned av_unused l4 = src[-1+4*stride];\ + const unsigned av_unused l5 = src[-1+5*stride];\ + const unsigned av_unused l6 = src[-1+6*stride];\ + const unsigned av_unused l7 = src[-1+7*stride];\ + +#define LOAD_LEFT_EDGE\ + const unsigned av_unused l0 = src[-1+0*stride];\ + const unsigned av_unused l1 = src[-1+1*stride];\ + const unsigned av_unused l2 = src[-1+2*stride];\ + const unsigned av_unused l3 = src[-1+3*stride];\ + +#define LOAD_TOP_EDGE\ + const unsigned av_unused t0 = src[ 0-1*stride];\ + const unsigned av_unused t1 = src[ 1-1*stride];\ + const unsigned av_unused t2 = src[ 2-1*stride];\ + const unsigned av_unused t3 = src[ 3-1*stride];\ + +static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; + src[0+2*stride]= + src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; + src[0+1*stride]= + src[1+2*stride]= + src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; + src[0+0*stride]= + src[1+1*stride]= + src[2+2*stride]= + src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+0*stride]= + src[2+1*stride]= + src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+0*stride]= + src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; +} + +static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + const pixel *topright = (const pixel*)_topright; + int stride = _stride>>(sizeof(pixel)-1); + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE +// LOAD_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; + src[3+3*stride]=(t6 + 3*t7 + 2)>>2; +} + +static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, + const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]= + src[1+2*stride]=(lt + t0 + 1)>>1; + src[1+0*stride]= + src[2+2*stride]=(t0 + t1 + 1)>>1; + src[2+0*stride]= + src[3+2*stride]=(t1 + t2 + 1)>>1; + src[3+0*stride]=(t2 + t3 + 1)>>1; + src[0+1*stride]= + src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+1*stride]= + src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+1*stride]= + src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; +} + +static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, + const uint8_t *_topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + const pixel *topright = (const pixel*)_topright; + int stride = _stride>>(sizeof(pixel)-1); + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + + src[0+0*stride]=(t0 + t1 + 1)>>1; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4+ 1)>>1; + src[3+2*stride]=(t4 + t5+ 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; +} + +static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + LOAD_LEFT_EDGE + + src[0+0*stride]=(l0 + l1 + 1)>>1; + src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[2+0*stride]= + src[0+1*stride]=(l1 + l2 + 1)>>1; + src[3+0*stride]= + src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; + src[2+1*stride]= + src[0+2*stride]=(l2 + l3 + 1)>>1; + src[3+1*stride]= + src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; + src[3+2*stride]= + src[1+3*stride]= + src[0+3*stride]= + src[2+2*stride]= + src[2+3*stride]= + src[3+3*stride]=l3; +} + +static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, + const uint8_t *topright, + ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+0*stride]= + src[2+1*stride]=(lt + l0 + 1)>>1; + src[1+0*stride]= + src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[0+1*stride]= + src[2+2*stride]=(l0 + l1 + 1)>>1; + src[1+1*stride]= + src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+2*stride]= + src[2+3*stride]=(l1 + l2+ 1)>>1; + src[1+2*stride]= + src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[0+3*stride]=(l2 + l3 + 1)>>1; + src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; +} + +static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0); + const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1); + const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2); + const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3); + + for(i=0; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, b); + AV_WN4PA(((pixel4*)(src+i*stride))+2, c); + AV_WN4PA(((pixel4*)(src+i*stride))+3, d); + } +} + +static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + for(i=0; i<16; i++){ + const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); + + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, a); + AV_WN4PA(((pixel4*)(src+i*stride))+2, a); + AV_WN4PA(((pixel4*)(src+i*stride))+3, a); + } +} + +#define PREDICT_16x16_DC(v)\ + for(i=0; i<16; i++){\ + AV_WN4PA(src+ 0, v);\ + AV_WN4PA(src+ 4, v);\ + AV_WN4PA(src+ 8, v);\ + AV_WN4PA(src+12, v);\ + src += stride;\ + } + +static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride >>= sizeof(pixel)-1; + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); + PREDICT_16x16_DC(dcsplat); +} + +static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride >>= sizeof(pixel)-1; + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); + PREDICT_16x16_DC(dcsplat); +} + +static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i, dc=0; + pixel *src = (pixel*)_src; + pixel4 dcsplat; + stride >>= sizeof(pixel)-1; + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + + dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); + PREDICT_16x16_DC(dcsplat); +} + +#define PRED16x16_X(n, v) \ +static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\ +{\ + int i;\ + pixel *src = (pixel*)_src;\ + stride >>= sizeof(pixel)-1;\ + PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ +} + +PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1) +PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0) +PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1) + +static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, + ptrdiff_t _stride, + const int svq3, + const int rv40) +{ + int i, j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel * const src0 = src +7-stride; + const pixel * src1 = src +8*stride-1; + const pixel * src2 = src1-2*stride; // == src+6*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=8; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + if(svq3){ + H = ( 5*(H/4) ) / 16; + V = ( 5*(V/4) ) / 16; + + /* required for 100% accuracy */ + i = H; H = V; V = i; + }else if(rv40){ + H = ( H + (H>>2) ) >> 4; + V = ( V + (V>>2) ) >> 4; + }else{ + H = ( 5*H+32 ) >> 6; + V = ( 5*V+32 ) >> 6; + } + + a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); + for(j=16; j>0; --j) { + int b = a; + a += V; + for(i=-16; i<0; i+=4) { + src[16+i] = CLIP((b ) >> 5); + src[17+i] = CLIP((b+ H) >> 5); + src[18+i] = CLIP((b+2*H) >> 5); + src[19+i] = CLIP((b+3*H) >> 5); + b += 4*H; + } + src += stride; + } +} + +static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); +} + +static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); + const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); + + for(i=0; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, b); + } +} + +static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); + const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); + + for(i=0; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, b); + } +} + +static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + for(i=0; i<8; i++){ + const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, a); + } +} + +static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + for(i=0; i<16; i++){ + const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); + AV_WN4PA(((pixel4*)(src+i*stride))+0, a); + AV_WN4PA(((pixel4*)(src+i*stride))+1, a); + } +} + +#define PRED8x8_X(n, v)\ +static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\ +{\ + int i;\ + const pixel4 a = PIXEL_SPLAT_X4(v);\ + pixel *src = (pixel*)_src;\ + stride >>= sizeof(pixel)-1;\ + for(i=0; i<8; i++){\ + AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\ + AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\ + }\ +} + +PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1) +PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0) +PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1) + +static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride) +{ + FUNCC(pred8x8_128_dc)(_src, stride); + FUNCC(pred8x8_128_dc)(_src+8*stride, stride); +} + +static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + int dc0, dc2; + pixel4 dc0splat, dc2splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + + for(i=0; i<4; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat); + } + for(i=4; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat); + } +} + +static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride) +{ + FUNCC(pred8x8_left_dc)(_src, stride); + FUNCC(pred8x8_left_dc)(_src+8*stride, stride); +} + +static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + int dc0, dc1; + pixel4 dc0splat, dc1splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=0; + for(i=0;i<4; i++){ + dc0+= src[i-stride]; + dc1+= src[4+i-stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + + for(i=0; i<4; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } + for(i=4; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } +} + +static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + int dc0, dc1; + pixel4 dc0splat, dc1splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=0; + for(i=0;i<4; i++){ + dc0+= src[i-stride]; + dc1+= src[4+i-stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + + for(i=0; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } +} + +static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + int dc0, dc1, dc2; + pixel4 dc0splat, dc1splat, dc2splat, dc3splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc1+= src[4+i-stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); + + for(i=0; i<4; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } + for(i=4; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); + } +} + +static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride) +{ + int i; + int dc0, dc1, dc2, dc3, dc4; + pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; + pixel *src = (pixel*)_src; + stride >>= sizeof(pixel)-1; + + dc0=dc1=dc2=dc3=dc4=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc1+= src[4+i-stride]; + dc2+= src[-1+(i+4)*stride]; + dc3+= src[-1+(i+8)*stride]; + dc4+= src[-1+(i+12)*stride]; + } + dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); + dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); + dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); + dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); + dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); + dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); + dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); + dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); + + for(i=0; i<4; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); + } + for(i=4; i<8; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); + } + for(i=8; i<12; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); + } + for(i=12; i<16; i++){ + AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); + AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); + } +} + +//the following 4 function should not be optimized! +static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x8_top_dc)(src, stride); + FUNCC(pred4x4_dc)(src, NULL, stride); +} + +static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x16_top_dc)(src, stride); + FUNCC(pred4x4_dc)(src, NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x8_dc)(src, stride); + FUNCC(pred4x4_top_dc)(src, NULL, stride); +} + +static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x16_dc)(src, stride); + FUNCC(pred4x4_top_dc)(src, NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x8_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); +} + +static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x16_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); +} + +static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x8_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); +} + +static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride) +{ + FUNCC(pred8x16_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); +} + +static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride) +{ + int j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel * const src0 = src +3-stride; + const pixel * src1 = src +4*stride-1; + const pixel * src2 = src1-2*stride; // == src+2*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=4; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + H = ( 17*H+16 ) >> 5; + V = ( 17*V+16 ) >> 5; + + a = 16*(src1[0] + src2[8]+1) - 3*(V+H); + for(j=8; j>0; --j) { + int b = a; + a += V; + src[0] = CLIP((b ) >> 5); + src[1] = CLIP((b+ H) >> 5); + src[2] = CLIP((b+2*H) >> 5); + src[3] = CLIP((b+3*H) >> 5); + src[4] = CLIP((b+4*H) >> 5); + src[5] = CLIP((b+5*H) >> 5); + src[6] = CLIP((b+6*H) >> 5); + src[7] = CLIP((b+7*H) >> 5); + src += stride; + } +} + +static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride) +{ + int j, k; + int a; + INIT_CLIP + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + const pixel * const src0 = src +3-stride; + const pixel * src1 = src +8*stride-1; + const pixel * src2 = src1-2*stride; // == src+6*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + + for (k = 2; k <= 4; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + for (; k <= 8; ++k) { + src1 += stride; src2 -= stride; + V += k*(src1[0] - src2[0]); + } + + H = (17*H+16) >> 5; + V = (5*V+32) >> 6; + + a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; + for(j=16; j>0; --j) { + int b = a; + a += V; + src[0] = CLIP((b ) >> 5); + src[1] = CLIP((b+ H) >> 5); + src[2] = CLIP((b+2*H) >> 5); + src[3] = CLIP((b+3*H) >> 5); + src[4] = CLIP((b+4*H) >> 5); + src[5] = CLIP((b+5*H) >> 5); + src[6] = CLIP((b+6*H) >> 5); + src[7] = CLIP((b+7*H) >> 5); + src += stride; + } +} + +#define SRC(x,y) src[(x)+(y)*stride] +#define PL(y) \ + const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; +#define PREDICT_8x8_LOAD_LEFT \ + const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ + + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ + PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ + const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 + +#define PT(x) \ + const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOP \ + const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ + + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ + PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ + const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ + + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 + +#define PTR(x) \ + t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOPRIGHT \ + int t8, t9, t10, t11, t12, t13, t14, t15; \ + if(has_topright) { \ + PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ + t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ + } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); + +#define PREDICT_8x8_LOAD_TOPLEFT \ + const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 + +#define PREDICT_8x8_DC(v) \ + int y; \ + for( y = 0; y < 8; y++ ) { \ + AV_WN4PA(((pixel4*)src)+0, v); \ + AV_WN4PA(((pixel4*)src)+1, v); \ + src += stride; \ + } + +static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + + PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); +} +static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + + PREDICT_8x8_LOAD_LEFT; + const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + + PREDICT_8x8_LOAD_TOP; + const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOP; + const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 + +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); + PREDICT_8x8_DC(dc); +} +static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + pixel4 a; + + PREDICT_8x8_LOAD_LEFT; +#define ROW(y) a = PIXEL_SPLAT_X4(l##y); \ + AV_WN4PA(src+y*stride, a); \ + AV_WN4PA(src+y*stride+4, a); + ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); +#undef ROW +} +static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + int y; + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + pixel4 a, b; + + PREDICT_8x8_LOAD_TOP; + src[0] = t0; + src[1] = t1; + src[2] = t2; + src[3] = t3; + src[4] = t4; + src[5] = t5; + src[6] = t6; + src[7] = t7; + a = AV_RN4PA(((pixel4*)src)+0); + b = AV_RN4PA(((pixel4*)src)+1); + for( y = 1; y < 8; y++ ) { + AV_WN4PA(((pixel4*)(src+y*stride))+0, a); + AV_WN4PA(((pixel4*)(src+y*stride))+1, b); + } +} +static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; + SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; + SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; + SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; + SRC(7,7)= (t14 + 3*t15 + 2) >> 2; +} +static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; + SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; +} +static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; + SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; + SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; + SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; + SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; + SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; + SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; + SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(7,0)= (t6 + t7 + 1) >> 1; +} +static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l6 + l7 + 1) >> 1; + SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; + SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; + SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; + SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; + SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; + SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; + SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; + SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; + SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; + SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; + SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; + SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; + SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; + SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; +} +static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + t1 + 1) >> 1; + SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; + SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; + SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; + SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; + SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; + SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; + SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; + SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; + SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; + SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(7,6)= (t10 + t11 + 1) >> 1; + SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; +} +static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + pixel *src = (pixel*)_src; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_LEFT; + SRC(0,0)= (l0 + l1 + 1) >> 1; + SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; + SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; + SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; + SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; + SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; + SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; + SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; + SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= + SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= + SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= + SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; +} + +static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + const dctcoef *block = (const dctcoef*)_block; + pixel pix[8]; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_TOP; + + pix[0] = t0; + pix[1] = t1; + pix[2] = t2; + pix[3] = t3; + pix[4] = t4; + pix[5] = t5; + pix[6] = t6; + pix[7] = t7; + + for(i=0; i<8; i++){ + pixel v = pix[i]; + src[0*stride]= v += block[0]; + src[1*stride]= v += block[8]; + src[2*stride]= v += block[16]; + src[3*stride]= v += block[24]; + src[4*stride]= v += block[32]; + src[5*stride]= v += block[40]; + src[6*stride]= v += block[48]; + src[7*stride]= v + block[56]; + src++; + block++; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + +static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + const dctcoef *block = (const dctcoef*)_block; + pixel pix[8]; + int stride = _stride>>(sizeof(pixel)-1); + PREDICT_8x8_LOAD_LEFT; + + pix[0] = l0; + pix[1] = l1; + pix[2] = l2; + pix[3] = l3; + pix[4] = l4; + pix[5] = l5; + pix[6] = l6; + pix[7] = l7; + + for(i=0; i<8; i++){ + pixel v = pix[i]; + src[0]= v += block[0]; + src[1]= v += block[1]; + src[2]= v += block[2]; + src[3]= v += block[3]; + src[4]= v += block[4]; + src[5]= v += block[5]; + src[6]= v += block[6]; + src[7]= v + block[7]; + src+= stride; + block+= 8; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + +#undef PREDICT_8x8_LOAD_LEFT +#undef PREDICT_8x8_LOAD_TOP +#undef PREDICT_8x8_LOAD_TOPLEFT +#undef PREDICT_8x8_LOAD_TOPRIGHT +#undef PREDICT_8x8_DC +#undef PTR +#undef PT +#undef PL +#undef SRC + +static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block, + ptrdiff_t stride) +{ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride >>= sizeof(pixel)-1; + pix -= stride; + for(i=0; i<4; i++){ + pixel v = pix[0]; + pix[1*stride]= v += block[0]; + pix[2*stride]= v += block[4]; + pix[3*stride]= v += block[8]; + pix[4*stride]= v + block[12]; + pix++; + block++; + } + + memset(_block, 0, sizeof(dctcoef) * 16); +} + +static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block, + ptrdiff_t stride) +{ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride >>= sizeof(pixel)-1; + for(i=0; i<4; i++){ + pixel v = pix[-1]; + pix[0]= v += block[0]; + pix[1]= v += block[1]; + pix[2]= v += block[2]; + pix[3]= v + block[3]; + pix+= stride; + block+= 4; + } + + memset(_block, 0, sizeof(dctcoef) * 16); +} + +static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block, + ptrdiff_t stride) +{ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride >>= sizeof(pixel)-1; + pix -= stride; + for(i=0; i<8; i++){ + pixel v = pix[0]; + pix[1*stride]= v += block[0]; + pix[2*stride]= v += block[8]; + pix[3*stride]= v += block[16]; + pix[4*stride]= v += block[24]; + pix[5*stride]= v += block[32]; + pix[6*stride]= v += block[40]; + pix[7*stride]= v += block[48]; + pix[8*stride]= v + block[56]; + pix++; + block++; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + +static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block, + ptrdiff_t stride) +{ + int i; + pixel *pix = (pixel*)_pix; + const dctcoef *block = (const dctcoef*)_block; + stride >>= sizeof(pixel)-1; + for(i=0; i<8; i++){ + pixel v = pix[-1]; + pix[0]= v += block[0]; + pix[1]= v += block[1]; + pix[2]= v += block[2]; + pix[3]= v += block[3]; + pix[4]= v += block[4]; + pix[5]= v += block[5]; + pix[6]= v += block[6]; + pix[7]= v + block[7]; + pix+= stride; + block+= 8; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + +static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, + int16_t *block, + ptrdiff_t stride) +{ + int i; + for(i=0; i<16; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, + const int *block_offset, + int16_t *block, + ptrdiff_t stride) +{ + int i; + for(i=0; i<16; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, + int16_t *block, ptrdiff_t stride) +{ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, + int16_t *block, ptrdiff_t stride) +{ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); + for(i=4; i<8; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, + int16_t *block, + ptrdiff_t stride) +{ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); +} + +static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, + const int *block_offset, + int16_t *block, ptrdiff_t stride) +{ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); + for(i=4; i<8; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); +} diff --git a/libs/ffvpx/libavcodec/hpeldsp.h b/libs/ffvpx/libavcodec/hpeldsp.h new file mode 100644 index 000000000..768139bfc --- /dev/null +++ b/libs/ffvpx/libavcodec/hpeldsp.h @@ -0,0 +1,106 @@ +/* + * Half-pel DSP functions. + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Half-pel DSP functions. + */ + +#ifndef AVCODEC_HPELDSP_H +#define AVCODEC_HPELDSP_H + +#include <stdint.h> +#include <stddef.h> + +/* add and put pixel (decoding) */ +// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16 +// h for hpel_pixels_func is limited to {width/2, width} but never larger +// than 16 and never smaller than 4 +typedef void (*op_pixels_func)(uint8_t *block /*align width (8 or 16)*/, + const uint8_t *pixels /*align 1*/, + ptrdiff_t line_size, int h); + +/** + * Half-pel DSP context. + */ +typedef struct HpelDSPContext { + /** + * Halfpel motion compensation with rounding (a+b+1)>>1. + * this is an array[4][4] of motion compensation functions for 4 + * horizontal blocksizes (8,16) and the 4 halfpel positions<br> + * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func put_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with rounding (a+b+1)>>1. + * This is an array[4][4] of motion compensation functions for 4 + * horizontal blocksizes (8,16) and the 4 halfpel positions<br> + * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination into which the result is averaged (a+b+1)>>1 + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func avg_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with no rounding (a+b)>>1. + * this is an array[4][4] of motion compensation functions for 2 + * horizontal blocksizes (8,16) and the 4 halfpel positions<br> + * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + * @note The size is kept at [4][4] to match the above pixel_tabs and avoid + * out of bounds reads in the motion estimation code. + */ + op_pixels_func put_no_rnd_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with no rounding (a+b)>>1. + * this is an array[4] of motion compensation functions for 1 + * horizontal blocksize (16) and the 4 halfpel positions<br> + * *pixels_tab[0][ xhalfpel + 2*yhalfpel ] + * @param block destination into which the result is averaged (a+b)>>1 + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func avg_no_rnd_pixels_tab[4]; +} HpelDSPContext; + +void ff_hpeldsp_init(HpelDSPContext *c, int flags); + +void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags); +void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags); + +#endif /* AVCODEC_HPELDSP_H */ diff --git a/libs/ffvpx/libavcodec/hwaccel.h b/libs/ffvpx/libavcodec/hwaccel.h new file mode 100644 index 000000000..3aaa92571 --- /dev/null +++ b/libs/ffvpx/libavcodec/hwaccel.h @@ -0,0 +1,84 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_HWACCEL_H +#define AVCODEC_HWACCEL_H + +#include "avcodec.h" +#include "hwaccels.h" + + +#define HWACCEL_CAP_ASYNC_SAFE (1 << 0) + + +typedef struct AVCodecHWConfigInternal { + /** + * This is the structure which will be returned to the user by + * avcodec_get_hw_config(). + */ + AVCodecHWConfig public; + /** + * If this configuration uses a hwaccel, a pointer to it. + * If not, NULL. + */ + const AVHWAccel *hwaccel; +} AVCodecHWConfigInternal; + + +// These macros are used to simplify AVCodecHWConfigInternal definitions. + +#define HW_CONFIG_HWACCEL(device, frames, ad_hoc, format, device_type_, name) \ + &(const AVCodecHWConfigInternal) { \ + .public = { \ + .pix_fmt = AV_PIX_FMT_ ## format, \ + .methods = (device ? AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX : 0) | \ + (frames ? AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX : 0) | \ + (ad_hoc ? AV_CODEC_HW_CONFIG_METHOD_AD_HOC : 0), \ + .device_type = AV_HWDEVICE_TYPE_ ## device_type_, \ + }, \ + .hwaccel = &name, \ + } + +#define HW_CONFIG_INTERNAL(format) \ + &(const AVCodecHWConfigInternal) { \ + .public = { \ + .pix_fmt = AV_PIX_FMT_ ## format, \ + .methods = AV_CODEC_HW_CONFIG_METHOD_INTERNAL, \ + .device_type = AV_HWDEVICE_TYPE_NONE, \ + }, \ + .hwaccel = NULL, \ + } + +#define HWACCEL_DXVA2(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, DXVA2_VLD, DXVA2, ff_ ## codec ## _dxva2_hwaccel) +#define HWACCEL_D3D11VA2(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) +#define HWACCEL_NVDEC(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) +#define HWACCEL_VAAPI(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) +#define HWACCEL_VDPAU(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel) +#define HWACCEL_VIDEOTOOLBOX(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel) +#define HWACCEL_D3D11VA(codec) \ + HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) +#define HWACCEL_XVMC(codec) \ + HW_CONFIG_HWACCEL(0, 0, 1, XVMC, NONE, ff_ ## codec ## _xvmc_hwaccel) + +#endif /* AVCODEC_HWACCEL_H */ diff --git a/libs/ffvpx/libavcodec/hwaccels.h b/libs/ffvpx/libavcodec/hwaccels.h new file mode 100644 index 000000000..7d73da867 --- /dev/null +++ b/libs/ffvpx/libavcodec/hwaccels.h @@ -0,0 +1,78 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_HWACCELS_H +#define AVCODEC_HWACCELS_H + +#include "avcodec.h" + +extern const AVHWAccel ff_h263_vaapi_hwaccel; +extern const AVHWAccel ff_h263_videotoolbox_hwaccel; +extern const AVHWAccel ff_h264_d3d11va_hwaccel; +extern const AVHWAccel ff_h264_d3d11va2_hwaccel; +extern const AVHWAccel ff_h264_dxva2_hwaccel; +extern const AVHWAccel ff_h264_nvdec_hwaccel; +extern const AVHWAccel ff_h264_vaapi_hwaccel; +extern const AVHWAccel ff_h264_vdpau_hwaccel; +extern const AVHWAccel ff_h264_videotoolbox_hwaccel; +extern const AVHWAccel ff_hevc_d3d11va_hwaccel; +extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; +extern const AVHWAccel ff_hevc_dxva2_hwaccel; +extern const AVHWAccel ff_hevc_nvdec_hwaccel; +extern const AVHWAccel ff_hevc_vaapi_hwaccel; +extern const AVHWAccel ff_hevc_vdpau_hwaccel; +extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +extern const AVHWAccel ff_mjpeg_nvdec_hwaccel; +extern const AVHWAccel ff_mjpeg_vaapi_hwaccel; +extern const AVHWAccel ff_mpeg1_nvdec_hwaccel; +extern const AVHWAccel ff_mpeg1_vdpau_hwaccel; +extern const AVHWAccel ff_mpeg1_videotoolbox_hwaccel; +extern const AVHWAccel ff_mpeg1_xvmc_hwaccel; +extern const AVHWAccel ff_mpeg2_d3d11va_hwaccel; +extern const AVHWAccel ff_mpeg2_d3d11va2_hwaccel; +extern const AVHWAccel ff_mpeg2_nvdec_hwaccel; +extern const AVHWAccel ff_mpeg2_dxva2_hwaccel; +extern const AVHWAccel ff_mpeg2_vaapi_hwaccel; +extern const AVHWAccel ff_mpeg2_vdpau_hwaccel; +extern const AVHWAccel ff_mpeg2_videotoolbox_hwaccel; +extern const AVHWAccel ff_mpeg2_xvmc_hwaccel; +extern const AVHWAccel ff_mpeg4_nvdec_hwaccel; +extern const AVHWAccel ff_mpeg4_vaapi_hwaccel; +extern const AVHWAccel ff_mpeg4_vdpau_hwaccel; +extern const AVHWAccel ff_mpeg4_videotoolbox_hwaccel; +extern const AVHWAccel ff_vc1_d3d11va_hwaccel; +extern const AVHWAccel ff_vc1_d3d11va2_hwaccel; +extern const AVHWAccel ff_vc1_dxva2_hwaccel; +extern const AVHWAccel ff_vc1_nvdec_hwaccel; +extern const AVHWAccel ff_vc1_vaapi_hwaccel; +extern const AVHWAccel ff_vc1_vdpau_hwaccel; +extern const AVHWAccel ff_vp8_nvdec_hwaccel; +extern const AVHWAccel ff_vp8_vaapi_hwaccel; +extern const AVHWAccel ff_vp9_d3d11va_hwaccel; +extern const AVHWAccel ff_vp9_d3d11va2_hwaccel; +extern const AVHWAccel ff_vp9_dxva2_hwaccel; +extern const AVHWAccel ff_vp9_nvdec_hwaccel; +extern const AVHWAccel ff_vp9_vaapi_hwaccel; +extern const AVHWAccel ff_wmv3_d3d11va_hwaccel; +extern const AVHWAccel ff_wmv3_d3d11va2_hwaccel; +extern const AVHWAccel ff_wmv3_dxva2_hwaccel; +extern const AVHWAccel ff_wmv3_nvdec_hwaccel; +extern const AVHWAccel ff_wmv3_vaapi_hwaccel; +extern const AVHWAccel ff_wmv3_vdpau_hwaccel; + +#endif /* AVCODEC_HWACCELS_H */ diff --git a/libs/ffvpx/libavcodec/idctdsp.h b/libs/ffvpx/libavcodec/idctdsp.h new file mode 100644 index 000000000..ca21a31a0 --- /dev/null +++ b/libs/ffvpx/libavcodec/idctdsp.h @@ -0,0 +1,122 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_IDCTDSP_H +#define AVCODEC_IDCTDSP_H + +#include <stdint.h> + +#include "config.h" + +#include "avcodec.h" + +/** + * Scantable. + */ +typedef struct ScanTable { + const uint8_t *scantable; + uint8_t permutated[64]; + uint8_t raster_end[64]; +} ScanTable; + +enum idct_permutation_type { + FF_IDCT_PERM_NONE, + FF_IDCT_PERM_LIBMPEG2, + FF_IDCT_PERM_SIMPLE, + FF_IDCT_PERM_TRANSPOSE, + FF_IDCT_PERM_PARTTRANS, + FF_IDCT_PERM_SSE2, +}; + +void ff_init_scantable(uint8_t *permutation, ScanTable *st, + const uint8_t *src_scantable); +void ff_init_scantable_permutation(uint8_t *idct_permutation, + enum idct_permutation_type perm_type); +int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, + enum idct_permutation_type perm_type); + +typedef struct IDCTDSPContext { + /* pixel ops : interface with DCT */ + void (*put_pixels_clamped)(const int16_t *block /* align 16 */, + uint8_t *av_restrict pixels /* align 8 */, + ptrdiff_t line_size); + void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */, + uint8_t *av_restrict pixels /* align 8 */, + ptrdiff_t line_size); + void (*add_pixels_clamped)(const int16_t *block /* align 16 */, + uint8_t *av_restrict pixels /* align 8 */, + ptrdiff_t line_size); + + void (*idct)(int16_t *block /* align 16 */); + + /** + * block -> idct -> clip to unsigned 8 bit -> dest. + * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) + * @param line_size size in bytes of a horizontal line of dest + */ + void (*idct_put)(uint8_t *dest /* align 8 */, + ptrdiff_t line_size, int16_t *block /* align 16 */); + + /** + * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. + * @param line_size size in bytes of a horizontal line of dest + */ + void (*idct_add)(uint8_t *dest /* align 8 */, + ptrdiff_t line_size, int16_t *block /* align 16 */); + + /** + * IDCT input permutation. + * Several optimized IDCTs need a permutated input (relative to the + * normal order of the reference IDCT). + * This permutation must be performed before the idct_put/add. + * Note, normally this can be merged with the zigzag/alternate scan<br> + * An example to avoid confusion: + * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...) + * - (x -> reference DCT -> reference IDCT -> x) + * - (x -> reference DCT -> simple_mmx_perm = idct_permutation + * -> simple_idct_mmx -> x) + * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant + * -> simple_idct_mmx -> ...) + */ + uint8_t idct_permutation[64]; + enum idct_permutation_type perm_type; + + int mpeg4_studio_profile; +} IDCTDSPContext; + +void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, + ptrdiff_t line_size); +void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, + ptrdiff_t line_size); + +void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx); + +void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + +#endif /* AVCODEC_IDCTDSP_H */ diff --git a/libs/ffvpx/libavcodec/imgconvert.c b/libs/ffvpx/libavcodec/imgconvert.c new file mode 100644 index 000000000..1fd636c83 --- /dev/null +++ b/libs/ffvpx/libavcodec/imgconvert.c @@ -0,0 +1,232 @@ +/* + * Misc image conversion routines + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * misc image conversion routines + */ + +#include "avcodec.h" +#include "internal.h" +#include "mathops.h" +#include "libavutil/avassert.h" +#include "libavutil/colorspace.h" +#include "libavutil/common.h" +#include "libavutil/pixdesc.h" +#include "libavutil/internal.h" +#include "libavutil/imgutils.h" + +#if FF_API_GETCHROMA +void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + *h_shift = desc->log2_chroma_w; + *v_shift = desc->log2_chroma_h; +} +#endif + +int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, + enum AVPixelFormat src_pix_fmt, + int has_alpha) +{ + return av_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha); +} + +enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2, + enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr) +{ + return av_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr); +} + +enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2, + enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr) +{ + return avcodec_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr); +} + +enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list, + enum AVPixelFormat src_pix_fmt, + int has_alpha, int *loss_ptr){ + int i; + + enum AVPixelFormat best = AV_PIX_FMT_NONE; + int loss; + + for (i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++) { + loss = loss_ptr ? *loss_ptr : 0; + best = avcodec_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, &loss); + } + + if (loss_ptr) + *loss_ptr = loss; + return best; +} + +#if FF_API_AVPICTURE +FF_DISABLE_DEPRECATION_WARNINGS +/* return true if yuv planar */ +static inline int is_yuv_planar(const AVPixFmtDescriptor *desc) +{ + int i; + int planes[4] = { 0 }; + + if ( desc->flags & AV_PIX_FMT_FLAG_RGB + || !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) + return 0; + + /* set the used planes */ + for (i = 0; i < desc->nb_components; i++) + planes[desc->comp[i].plane] = 1; + + /* if there is an unused plane, the format is not planar */ + for (i = 0; i < desc->nb_components; i++) + if (!planes[i]) + return 0; + return 1; +} + +int av_picture_crop(AVPicture *dst, const AVPicture *src, + enum AVPixelFormat pix_fmt, int top_band, int left_band) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + int y_shift; + int x_shift; + int max_step[4]; + + if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB) + return -1; + + y_shift = desc->log2_chroma_h; + x_shift = desc->log2_chroma_w; + av_image_fill_max_pixsteps(max_step, NULL, desc); + + if (is_yuv_planar(desc)) { + dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band; + dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift); + dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift); + } else{ + if(top_band % (1<<y_shift) || left_band % (1<<x_shift)) + return -1; + dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + (left_band * max_step[0]); + } + + dst->linesize[0] = src->linesize[0]; + dst->linesize[1] = src->linesize[1]; + dst->linesize[2] = src->linesize[2]; + return 0; +} + +int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, + enum AVPixelFormat pix_fmt, int padtop, int padbottom, int padleft, int padright, + int *color) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + uint8_t *optr; + int y_shift; + int x_shift; + int yheight; + int i, y; + int max_step[4]; + + if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB) + return -1; + + if (!is_yuv_planar(desc)) { + if (src) + return -1; //TODO: Not yet implemented + + av_image_fill_max_pixsteps(max_step, NULL, desc); + + if (padtop || padleft) { + memset(dst->data[0], color[0], + dst->linesize[0] * padtop + (padleft * max_step[0])); + } + + if (padleft || padright) { + optr = dst->data[0] + dst->linesize[0] * padtop + + (dst->linesize[0] - (padright * max_step[0])); + yheight = height - 1 - (padtop + padbottom); + for (y = 0; y < yheight; y++) { + memset(optr, color[0], (padleft + padright) * max_step[0]); + optr += dst->linesize[0]; + } + } + + if (padbottom || padright) { + optr = dst->data[0] + dst->linesize[0] * (height - padbottom) - + (padright * max_step[0]); + memset(optr, color[0], dst->linesize[0] * padbottom + + (padright * max_step[0])); + } + + return 0; + } + + for (i = 0; i < 3; i++) { + x_shift = i ? desc->log2_chroma_w : 0; + y_shift = i ? desc->log2_chroma_h : 0; + + if (padtop || padleft) { + memset(dst->data[i], color[i], + dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift)); + } + + if (padleft || padright) { + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + + (dst->linesize[i] - (padright >> x_shift)); + yheight = (height - 1 - (padtop + padbottom)) >> y_shift; + for (y = 0; y < yheight; y++) { + memset(optr, color[i], (padleft + padright) >> x_shift); + optr += dst->linesize[i]; + } + } + + if (src) { /* first line */ + uint8_t *iptr = src->data[i]; + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + + (padleft >> x_shift); + memcpy(optr, iptr, (width - padleft - padright) >> x_shift); + iptr += src->linesize[i]; + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + + (dst->linesize[i] - (padright >> x_shift)); + yheight = (height - 1 - (padtop + padbottom)) >> y_shift; + for (y = 0; y < yheight; y++) { + memset(optr, color[i], (padleft + padright) >> x_shift); + memcpy(optr + ((padleft + padright) >> x_shift), iptr, + (width - padleft - padright) >> x_shift); + iptr += src->linesize[i]; + optr += dst->linesize[i]; + } + } + + if (padbottom || padright) { + optr = dst->data[i] + dst->linesize[i] * + ((height - padbottom) >> y_shift) - (padright >> x_shift); + memset(optr, color[i],dst->linesize[i] * + (padbottom >> y_shift) + (padright >> x_shift)); + } + } + + return 0; +} +FF_ENABLE_DEPRECATION_WARNINGS +#endif /* FF_API_AVPICTURE */ diff --git a/libs/ffvpx/libavcodec/internal.h b/libs/ffvpx/libavcodec/internal.h new file mode 100644 index 000000000..bb92873d7 --- /dev/null +++ b/libs/ffvpx/libavcodec/internal.h @@ -0,0 +1,413 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * common internal api header. + */ + +#ifndef AVCODEC_INTERNAL_H +#define AVCODEC_INTERNAL_H + +#include <stdint.h> + +#include "libavutil/buffer.h" +#include "libavutil/channel_layout.h" +#include "libavutil/mathematics.h" +#include "libavutil/pixfmt.h" +#include "avcodec.h" +#include "config.h" + +/** + * The codec does not modify any global variables in the init function, + * allowing to call the init function without locking any global mutexes. + */ +#define FF_CODEC_CAP_INIT_THREADSAFE (1 << 0) +/** + * The codec allows calling the close function for deallocation even if + * the init function returned a failure. Without this capability flag, a + * codec does such cleanup internally when returning failures from the + * init function and does not expect the close function to be called at + * all. + */ +#define FF_CODEC_CAP_INIT_CLEANUP (1 << 1) +/** + * Decoders marked with FF_CODEC_CAP_SETS_PKT_DTS want to set + * AVFrame.pkt_dts manually. If the flag is set, decode.c won't overwrite + * this field. If it's unset, decode.c tries to guess the pkt_dts field + * from the input AVPacket. + */ +#define FF_CODEC_CAP_SETS_PKT_DTS (1 << 2) +/** + * The decoder extracts and fills its parameters even if the frame is + * skipped due to the skip_frame setting. + */ +#define FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM (1 << 3) +/** + * The decoder sets the cropping fields in the output frames manually. + * If this cap is set, the generic code will initialize output frame + * dimensions to coded rather than display values. + */ +#define FF_CODEC_CAP_EXPORTS_CROPPING (1 << 4) +/** + * Codec initializes slice-based threading with a main function + */ +#define FF_CODEC_CAP_SLICE_THREAD_HAS_MF (1 << 5) + +#ifdef TRACE +# define ff_tlog(ctx, ...) av_log(ctx, AV_LOG_TRACE, __VA_ARGS__) +#else +# define ff_tlog(ctx, ...) do { } while(0) +#endif + + +#define FF_DEFAULT_QUANT_BIAS 999999 + +#define FF_QSCALE_TYPE_MPEG1 0 +#define FF_QSCALE_TYPE_MPEG2 1 +#define FF_QSCALE_TYPE_H264 2 +#define FF_QSCALE_TYPE_VP56 3 + +#define FF_SANE_NB_CHANNELS 64U + +#define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1) + +#if HAVE_SIMD_ALIGN_64 +# define STRIDE_ALIGN 64 /* AVX-512 */ +#elif HAVE_SIMD_ALIGN_32 +# define STRIDE_ALIGN 32 +#elif HAVE_SIMD_ALIGN_16 +# define STRIDE_ALIGN 16 +#else +# define STRIDE_ALIGN 8 +#endif + +typedef struct FramePool { + /** + * Pools for each data plane. For audio all the planes have the same size, + * so only pools[0] is used. + */ + AVBufferPool *pools[4]; + + /* + * Pool parameters + */ + int format; + int width, height; + int stride_align[AV_NUM_DATA_POINTERS]; + int linesize[4]; + int planes; + int channels; + int samples; +} FramePool; + +typedef struct DecodeSimpleContext { + AVPacket *in_pkt; + AVFrame *out_frame; +} DecodeSimpleContext; + +typedef struct DecodeFilterContext { + AVBSFContext **bsfs; + int nb_bsfs; +} DecodeFilterContext; + +typedef struct AVCodecInternal { + /** + * Whether the parent AVCodecContext is a copy of the context which had + * init() called on it. + * This is used by multithreading - shared tables and picture pointers + * should be freed from the original context only. + */ + int is_copy; + + /** + * Whether to allocate progress for frame threading. + * + * The codec must set it to 1 if it uses ff_thread_await/report_progress(), + * then progress will be allocated in ff_thread_get_buffer(). The frames + * then MUST be freed with ff_thread_release_buffer(). + * + * If the codec does not need to call the progress functions (there are no + * dependencies between the frames), it should leave this at 0. Then it can + * decode straight to the user-provided frames (which the user will then + * free with av_frame_unref()), there is no need to call + * ff_thread_release_buffer(). + */ + int allocate_progress; + + /** + * An audio frame with less than required samples has been submitted and + * padded with silence. Reject all subsequent frames. + */ + int last_audio_frame; + + AVFrame *to_free; + + FramePool *pool; + + void *thread_ctx; + + DecodeSimpleContext ds; + DecodeFilterContext filter; + + /** + * Properties (timestamps+side data) extracted from the last packet passed + * for decoding. + */ + AVPacket *last_pkt_props; + + /** + * temporary buffer used for encoders to store their bitstream + */ + uint8_t *byte_buffer; + unsigned int byte_buffer_size; + + void *frame_thread_encoder; + + /** + * Number of audio samples to skip at the start of the next decoded frame + */ + int skip_samples; + + /** + * hwaccel-specific private data + */ + void *hwaccel_priv_data; + + /** + * checks API usage: after codec draining, flush is required to resume operation + */ + int draining; + + /** + * buffers for using new encode/decode API through legacy API + */ + AVPacket *buffer_pkt; + int buffer_pkt_valid; // encoding: packet without data can be valid + AVFrame *buffer_frame; + int draining_done; + /* set to 1 when the caller is using the old decoding API */ + int compat_decode; + int compat_decode_warned; + /* this variable is set by the decoder internals to signal to the old + * API compat wrappers the amount of data consumed from the last packet */ + size_t compat_decode_consumed; + /* when a partial packet has been consumed, this stores the remaining size + * of the packet (that should be submitted in the next decode call */ + size_t compat_decode_partial_size; + AVFrame *compat_decode_frame; + + int showed_multi_packet_warning; + + int skip_samples_multiplier; + + /* to prevent infinite loop on errors when draining */ + int nb_draining_errors; +} AVCodecInternal; + +struct AVCodecDefault { + const uint8_t *key; + const uint8_t *value; +}; + +extern const uint8_t ff_log2_run[41]; + +/** + * Return the index into tab at which {a,b} match elements {[0],[1]} of tab. + * If there is no such matching pair then size is returned. + */ +int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b); + +unsigned int avpriv_toupper4(unsigned int x); + +void ff_color_frame(AVFrame *frame, const int color[4]); + +/** + * Maximum size in bytes of extradata. + * This value was chosen such that every bit of the buffer is + * addressable by a 32-bit signed integer as used by get_bits. + */ +#define FF_MAX_EXTRADATA_SIZE ((1 << 28) - AV_INPUT_BUFFER_PADDING_SIZE) + +/** + * Check AVPacket size and/or allocate data. + * + * Encoders supporting AVCodec.encode2() can use this as a convenience to + * ensure the output packet data is large enough, whether provided by the user + * or allocated in this function. + * + * @param avctx the AVCodecContext of the encoder + * @param avpkt the AVPacket + * If avpkt->data is already set, avpkt->size is checked + * to ensure it is large enough. + * If avpkt->data is NULL, a new buffer is allocated. + * avpkt->size is set to the specified size. + * All other AVPacket fields will be reset with av_init_packet(). + * @param size the minimum required packet size + * @param min_size This is a hint to the allocation algorithm, which indicates + * to what minimal size the caller might later shrink the packet + * to. Encoders often allocate packets which are larger than the + * amount of data that is written into them as the exact amount is + * not known at the time of allocation. min_size represents the + * size a packet might be shrunk to by the caller. Can be set to + * 0. setting this roughly correctly allows the allocation code + * to choose between several allocation strategies to improve + * speed slightly. + * @return non negative on success, negative error code on failure + */ +int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size); + +attribute_deprecated int ff_alloc_packet(AVPacket *avpkt, int size); + +/** + * Rescale from sample rate to AVCodecContext.time_base. + */ +static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, + int64_t samples) +{ + if(samples == AV_NOPTS_VALUE) + return AV_NOPTS_VALUE; + return av_rescale_q(samples, (AVRational){ 1, avctx->sample_rate }, + avctx->time_base); +} + +/** + * 2^(x) for integer x + * @return correctly rounded float + */ +static av_always_inline float ff_exp2fi(int x) { + /* Normal range */ + if (-126 <= x && x <= 128) + return av_int2float((x+127) << 23); + /* Too large */ + else if (x > 128) + return INFINITY; + /* Subnormal numbers */ + else if (x > -150) + return av_int2float(1 << (x+149)); + /* Negligibly small */ + else + return 0; +} + +/** + * Get a buffer for a frame. This is a wrapper around + * AVCodecContext.get_buffer() and should be used instead calling get_buffer() + * directly. + */ +int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags); + +/** + * Identical in function to av_frame_make_writable(), except it uses + * ff_get_buffer() to allocate the buffer when needed. + */ +int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame); + +int ff_thread_can_start_frame(AVCodecContext *avctx); + +int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx); + +/** + * Call avcodec_open2 recursively by decrementing counter, unlocking mutex, + * calling the function and then restoring again. Assumes the mutex is + * already locked + */ +int ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options); + +/** + * Finalize buf into extradata and set its size appropriately. + */ +int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf); + +const uint8_t *avpriv_find_start_code(const uint8_t *p, + const uint8_t *end, + uint32_t *state); + +int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec); + +/** + * Check that the provided frame dimensions are valid and set them on the codec + * context. + */ +int ff_set_dimensions(AVCodecContext *s, int width, int height); + +/** + * Check that the provided sample aspect ratio is valid and set it on the codec + * context. + */ +int ff_set_sar(AVCodecContext *avctx, AVRational sar); + +/** + * Add or update AV_FRAME_DATA_MATRIXENCODING side data. + */ +int ff_side_data_update_matrix_encoding(AVFrame *frame, + enum AVMatrixEncoding matrix_encoding); + +/** + * Select the (possibly hardware accelerated) pixel format. + * This is a wrapper around AVCodecContext.get_format() and should be used + * instead of calling get_format() directly. + * + * The list of pixel formats must contain at least one valid entry, and is + * terminated with AV_PIX_FMT_NONE. If it is possible to decode to software, + * the last entry in the list must be the most accurate software format. + * If it is not possible to decode to software, AVCodecContext.sw_pix_fmt + * must be set before calling this function. + */ +int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt); + +/** + * Set various frame properties from the codec context / packet data. + */ +int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame); + +/** + * Add a CPB properties side data to an encoding context. + */ +AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx); + +int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type); + +/** + * Check AVFrame for A53 side data and allocate and fill SEI message with A53 info + * + * @param frame Raw frame to get A53 side data from + * @param prefix_len Number of bytes to allocate before SEI message + * @param data Pointer to a variable to store allocated memory + * Upon return the variable will hold NULL on error or if frame has no A53 info. + * Otherwise it will point to prefix_len uninitialized bytes followed by + * *sei_size SEI message + * @param sei_size Pointer to a variable to store generated SEI message length + * @return Zero on success, negative error code on failure + */ +int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len, + void **data, size_t *sei_size); + +/** + * Get an estimated video bitrate based on frame size, frame rate and coded + * bits per pixel. + */ +int64_t ff_guess_coded_bitrate(AVCodecContext *avctx); + +#if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avcodec) +# define av_export_avcodec __declspec(dllimport) +#else +# define av_export_avcodec +#endif + +#endif /* AVCODEC_INTERNAL_H */ diff --git a/libs/ffvpx/libavcodec/log2_tab.c b/libs/ffvpx/libavcodec/log2_tab.c new file mode 100644 index 000000000..47a1df03b --- /dev/null +++ b/libs/ffvpx/libavcodec/log2_tab.c @@ -0,0 +1 @@ +#include "libavutil/log2_tab.c" diff --git a/libs/ffvpx/libavcodec/mathops.h b/libs/ffvpx/libavcodec/mathops.h new file mode 100644 index 000000000..1c3566431 --- /dev/null +++ b/libs/ffvpx/libavcodec/mathops.h @@ -0,0 +1,251 @@ +/* + * simple math operations + * Copyright (c) 2001, 2002 Fabrice Bellard + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVCODEC_MATHOPS_H +#define AVCODEC_MATHOPS_H + +#include <stdint.h> + +#include "libavutil/common.h" +#include "libavutil/reverse.h" +#include "config.h" + +#define MAX_NEG_CROP 1024 + +extern const uint32_t ff_inverse[257]; +extern const uint8_t ff_sqrt_tab[256]; +extern const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP]; +extern const uint8_t ff_zigzag_direct[64]; +extern const uint8_t ff_zigzag_scan[16+1]; + +#if ARCH_ARM +# include "arm/mathops.h" +#elif ARCH_AVR32 +# include "avr32/mathops.h" +#elif ARCH_MIPS +# include "mips/mathops.h" +#elif ARCH_PPC +# include "ppc/mathops.h" +#elif ARCH_X86 +# include "x86/mathops.h" +#endif + +/* generic implementation */ + +#ifndef MUL64 +# define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) +#endif + +#ifndef MULL +# define MULL(a,b,s) (MUL64(a, b) >> (s)) +#endif + +#ifndef MULH +static av_always_inline int MULH(int a, int b){ + return MUL64(a, b) >> 32; +} +#endif + +#ifndef UMULH +static av_always_inline unsigned UMULH(unsigned a, unsigned b){ + return ((uint64_t)(a) * (uint64_t)(b))>>32; +} +#endif + +#ifndef MAC64 +# define MAC64(d, a, b) ((d) += MUL64(a, b)) +#endif + +#ifndef MLS64 +# define MLS64(d, a, b) ((d) -= MUL64(a, b)) +#endif + +/* signed 16x16 -> 32 multiply add accumulate */ +#ifndef MAC16 +# define MAC16(rt, ra, rb) rt += (ra) * (rb) +#endif + +/* signed 16x16 -> 32 multiply */ +#ifndef MUL16 +# define MUL16(ra, rb) ((ra) * (rb)) +#endif + +#ifndef MLS16 +# define MLS16(rt, ra, rb) ((rt) -= (ra) * (rb)) +#endif + +/* median of 3 */ +#ifndef mid_pred +#define mid_pred mid_pred +static inline av_const int mid_pred(int a, int b, int c) +{ + if(a>b){ + if(c>b){ + if(c>a) b=a; + else b=c; + } + }else{ + if(b>c){ + if(c>a) b=c; + else b=a; + } + } + return b; +} +#endif + +#ifndef median4 +#define median4 median4 +static inline av_const int median4(int a, int b, int c, int d) +{ + if (a < b) { + if (c < d) return (FFMIN(b, d) + FFMAX(a, c)) / 2; + else return (FFMIN(b, c) + FFMAX(a, d)) / 2; + } else { + if (c < d) return (FFMIN(a, d) + FFMAX(b, c)) / 2; + else return (FFMIN(a, c) + FFMAX(b, d)) / 2; + } +} +#endif + +#ifndef sign_extend +static inline av_const int sign_extend(int val, unsigned bits) +{ + unsigned shift = 8 * sizeof(int) - bits; + union { unsigned u; int s; } v = { (unsigned) val << shift }; + return v.s >> shift; +} +#endif + +#ifndef zero_extend +static inline av_const unsigned zero_extend(unsigned val, unsigned bits) +{ + return (val << ((8 * sizeof(int)) - bits)) >> ((8 * sizeof(int)) - bits); +} +#endif + +#ifndef COPY3_IF_LT +#define COPY3_IF_LT(x, y, a, b, c, d)\ +if ((y) < (x)) {\ + (x) = (y);\ + (a) = (b);\ + (c) = (d);\ +} +#endif + +#ifndef MASK_ABS +#define MASK_ABS(mask, level) do { \ + mask = level >> 31; \ + level = (level ^ mask) - mask; \ + } while (0) +#endif + +#ifndef NEG_SSR32 +# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) +#endif + +#ifndef NEG_USR32 +# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) +#endif + +#if HAVE_BIGENDIAN +# ifndef PACK_2U8 +# define PACK_2U8(a,b) (((a) << 8) | (b)) +# endif +# ifndef PACK_4U8 +# define PACK_4U8(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) +# endif +# ifndef PACK_2U16 +# define PACK_2U16(a,b) (((a) << 16) | (b)) +# endif +#else +# ifndef PACK_2U8 +# define PACK_2U8(a,b) (((b) << 8) | (a)) +# endif +# ifndef PACK_4U2 +# define PACK_4U8(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) +# endif +# ifndef PACK_2U16 +# define PACK_2U16(a,b) (((b) << 16) | (a)) +# endif +#endif + +#ifndef PACK_2S8 +# define PACK_2S8(a,b) PACK_2U8((a)&255, (b)&255) +#endif +#ifndef PACK_4S8 +# define PACK_4S8(a,b,c,d) PACK_4U8((a)&255, (b)&255, (c)&255, (d)&255) +#endif +#ifndef PACK_2S16 +# define PACK_2S16(a,b) PACK_2U16((a)&0xffff, (b)&0xffff) +#endif + +#ifndef FASTDIV +# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) +#endif /* FASTDIV */ + +#ifndef ff_sqrt +#define ff_sqrt ff_sqrt +static inline av_const unsigned int ff_sqrt(unsigned int a) +{ + unsigned int b; + + if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4; + else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2; +#if !CONFIG_SMALL + else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1; + else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8] ; +#endif + else { + int s = av_log2_16bit(a >> 16) >> 1; + unsigned int c = a >> (s + 2); + b = ff_sqrt_tab[c >> (s + 8)]; + b = FASTDIV(c,b) + (b << s); + } + + return b - (a < b * b); +} +#endif + +static inline av_const float ff_sqrf(float a) +{ + return a*a; +} + +static inline int8_t ff_u8_to_s8(uint8_t a) +{ + union { + uint8_t u8; + int8_t s8; + } b; + b.u8 = a; + return b.s8; +} + +static av_always_inline uint32_t bitswap_32(uint32_t x) +{ + return (uint32_t)ff_reverse[ x & 0xFF] << 24 | + (uint32_t)ff_reverse[(x >> 8) & 0xFF] << 16 | + (uint32_t)ff_reverse[(x >> 16) & 0xFF] << 8 | + (uint32_t)ff_reverse[ x >> 24]; +} + +#endif /* AVCODEC_MATHOPS_H */ diff --git a/libs/ffvpx/libavcodec/mathtables.c b/libs/ffvpx/libavcodec/mathtables.c new file mode 100644 index 000000000..81eabc7a6 --- /dev/null +++ b/libs/ffvpx/libavcodec/mathtables.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "mathops.h" + +/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256 + * for a>16909558, is an overestimate by less than 1 part in 1<<24 */ +const uint32_t ff_inverse[257]={ + 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, + 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, + 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, + 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, + 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, + 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, + 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, + 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, + 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, + 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, + 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, + 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, + 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, + 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, + 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, + 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, + 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, + 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, + 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, + 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, + 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, + 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, + 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, + 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, + 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, + 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, + 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, + 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, + 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, + 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, + 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, + 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, + 16777216 +}; + +const uint8_t ff_sqrt_tab[256]={ + 0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90, + 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127, +128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156, +157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181, +182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202, +203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222, +222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239, +240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255 +}; + +#define times4(x) x, x, x, x +#define times256(x) times4(times4(times4(times4(times4(x))))) + +const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP] = { +times256(0x00), +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF, +times256(0xFF) +}; + +const uint8_t ff_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +const uint8_t ff_zigzag_scan[16+1] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, +}; diff --git a/libs/ffvpx/libavcodec/me_cmp.h b/libs/ffvpx/libavcodec/me_cmp.h new file mode 100644 index 000000000..0a589e3c3 --- /dev/null +++ b/libs/ffvpx/libavcodec/me_cmp.h @@ -0,0 +1,95 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ME_CMP_H +#define AVCODEC_ME_CMP_H + +#include <stdint.h> + +#include "avcodec.h" + +extern const uint32_t ff_square_tab[512]; + + +/* minimum alignment rules ;) + * If you notice errors in the align stuff, need more alignment for some ASM code + * for some CPU or need to use a function with less aligned data then send a mail + * to the ffmpeg-devel mailing list, ... + * + * !warning These alignments might not match reality, (missing attribute((align)) + * stuff somewhere possible). + * I (Michael) did not check them, these are just the alignments which I think + * could be reached easily ... + * + * !future video codecs might need functions with less strict alignment + */ + +struct MpegEncContext; +/* Motion estimation: + * h is limited to { width / 2, width, 2 * width }, + * but never larger than 16 and never smaller than 2. + * Although currently h < 4 is not used as functions with + * width < 8 are neither used nor implemented. */ +typedef int (*me_cmp_func)(struct MpegEncContext *c, + uint8_t *blk1 /* align width (8 or 16) */, + uint8_t *blk2 /* align 1 */, ptrdiff_t stride, + int h); + +typedef struct MECmpContext { + int (*sum_abs_dctelem)(int16_t *block /* align 16 */); + + me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[6]; + me_cmp_func hadamard8_diff[6]; + me_cmp_func dct_sad[6]; + me_cmp_func quant_psnr[6]; + me_cmp_func bit[6]; + me_cmp_func rd[6]; + me_cmp_func vsad[6]; + me_cmp_func vsse[6]; + me_cmp_func nsse[6]; + me_cmp_func w53[6]; + me_cmp_func w97[6]; + me_cmp_func dct_max[6]; + me_cmp_func dct264_sad[6]; + + me_cmp_func me_pre_cmp[6]; + me_cmp_func me_cmp[6]; + me_cmp_func me_sub_cmp[6]; + me_cmp_func mb_cmp[6]; + me_cmp_func ildct_cmp[6]; // only width 16 used + me_cmp_func frame_skip_cmp[6]; // only width 8 used + + me_cmp_func pix_abs[2][4]; + me_cmp_func median_sad[6]; +} MECmpContext; + +int ff_check_alignment(void); + +void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx); +void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx); + +void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type); + +void ff_dsputil_init_dwt(MECmpContext *c); + +#endif /* AVCODEC_ME_CMP_H */ diff --git a/libs/ffvpx/libavcodec/motion_est.h b/libs/ffvpx/libavcodec/motion_est.h new file mode 100644 index 000000000..3b3a8d734 --- /dev/null +++ b/libs/ffvpx/libavcodec/motion_est.h @@ -0,0 +1,135 @@ +/* + * Motion estimation + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MOTION_EST_H +#define AVCODEC_MOTION_EST_H + +#include <stdint.h> + +#include "avcodec.h" +#include "hpeldsp.h" +#include "qpeldsp.h" + +struct MpegEncContext; + +#if ARCH_IA64 // Limit static arrays to avoid gcc failing "short data segment overflowed" +#define MAX_MV 1024 +#else +#define MAX_MV 4096 +#endif +#define MAX_DMV (2*MAX_MV) +#define ME_MAP_SIZE 64 + +#define FF_ME_ZERO 0 +#define FF_ME_EPZS 1 +#define FF_ME_XONE 2 + +/** + * Motion estimation context. + */ +typedef struct MotionEstContext { + AVCodecContext *avctx; + int skip; ///< set if ME is skipped for the current MB + int co_located_mv[4][2]; ///< mv from last P-frame for direct mode ME + int direct_basis_mv[4][2]; + uint8_t *scratchpad; /**< data area for the ME algo, so that + * the ME does not need to malloc/free. */ + uint8_t *best_mb; + uint8_t *temp_mb[2]; + uint8_t *temp; + int best_bits; + uint32_t *map; ///< map to avoid duplicate evaluations + uint32_t *score_map; ///< map to store the scores + unsigned map_generation; + int pre_penalty_factor; + int penalty_factor; /**< an estimate of the bits required to + * code a given mv value, e.g. (1,0) takes + * more bits than (0,0). We have to + * estimate whether any reduction in + * residual is worth the extra bits. */ + int sub_penalty_factor; + int mb_penalty_factor; + int flags; + int sub_flags; + int mb_flags; + int pre_pass; ///< = 1 for the pre pass + int dia_size; + int xmin; + int xmax; + int ymin; + int ymax; + int pred_x; + int pred_y; + uint8_t *src[4][4]; + uint8_t *ref[4][4]; + int stride; + int uvstride; + /* temp variables for picture complexity calculation */ + int64_t mc_mb_var_sum_temp; + int64_t mb_var_sum_temp; + int scene_change_score; + + op_pixels_func(*hpel_put)[4]; + op_pixels_func(*hpel_avg)[4]; + qpel_mc_func(*qpel_put)[16]; + qpel_mc_func(*qpel_avg)[16]; + uint8_t (*mv_penalty)[MAX_DMV * 2 + 1]; ///< bit amount needed to encode a MV + uint8_t *current_mv_penalty; + int (*sub_motion_search)(struct MpegEncContext *s, + int *mx_ptr, int *my_ptr, int dmin, + int src_index, int ref_index, + int size, int h); +} MotionEstContext; + +static inline int ff_h263_round_chroma(int x) +{ + //FIXME static or not? + static const uint8_t h263_chroma_roundtab[16] = { + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, + }; + return h263_chroma_roundtab[x & 0xf] + (x >> 3); +} + +int ff_init_me(struct MpegEncContext *s); + +void ff_estimate_p_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y); +void ff_estimate_b_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y); + +int ff_pre_estimate_p_frame_motion(struct MpegEncContext *s, + int mb_x, int mb_y); + +int ff_epzs_motion_search(struct MpegEncContext *s, int *mx_ptr, int *my_ptr, + int P[10][2], int src_index, int ref_index, + int16_t (*last_mv)[2], int ref_mv_scale, int size, + int h); + +int ff_get_mb_score(struct MpegEncContext *s, int mx, int my, int src_index, + int ref_index, int size, int h, int add_rate); + +int ff_get_best_fcode(struct MpegEncContext *s, + int16_t (*mv_table)[2], int type); + +void ff_fix_long_p_mvs(struct MpegEncContext *s); +void ff_fix_long_mvs(struct MpegEncContext *s, uint8_t *field_select_table, + int field_select, int16_t (*mv_table)[2], int f_code, + int type, int truncate); + +#endif /* AVCODEC_MOTION_EST_H */ diff --git a/libs/ffvpx/libavcodec/moz.build b/libs/ffvpx/libavcodec/moz.build new file mode 100644 index 000000000..0dbabcf75 --- /dev/null +++ b/libs/ffvpx/libavcodec/moz.build @@ -0,0 +1,78 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Due to duplicate file names, we compile libavutil/x86 in its own +# moz.build file. +if CONFIG['FFVPX_ASFLAGS']: + DIRS += ['x86'] + +SharedLibrary('mozavcodec') +SOURCES += [ + 'allcodecs.c', + 'avpacket.c', + 'avpicture.c', + 'bitstream.c', + 'bitstream_filter.c', + 'bitstream_filters.c', + 'bsf.c', + 'codec_desc.c', + 'decode.c', + 'dummy_funcs.c', + 'flac.c', + 'flac_parser.c', + 'flacdata.c', + 'flacdec.c', + 'flacdsp.c', + 'golomb.c', + 'h264pred.c', + 'imgconvert.c', + 'log2_tab.c', + 'mathtables.c', + 'null_bsf.c', + 'options.c', + 'parser.c', + 'profiles.c', + 'pthread.c', + 'pthread_frame.c', + 'pthread_slice.c', + 'qsv_api.c', + 'raw.c', + 'reverse.c', + 'utils.c', + 'videodsp.c', + 'vorbis_parser.c', + 'vp56rac.c', + 'vp8.c', + 'vp8_parser.c', + 'vp8dsp.c', + 'vp9.c', + 'vp9_parser.c', + 'vp9_superframe_split_bsf.c', + 'vp9block.c', + 'vp9data.c', + 'vp9dsp.c', + 'vp9dsp_10bpp.c', + 'vp9dsp_12bpp.c', + 'vp9dsp_8bpp.c', + 'vp9lpf.c', + 'vp9mvs.c', + 'vp9prob.c', + 'vp9recon.c', + 'xiph.c' +] + +SYMBOLS_FILE = 'avcodec.symbols' +NO_VISIBILITY_FLAGS = True + +# GCC 10 defaults -fno-common, we don't care to solve this "properly" yet +# so use GCC < 10 behavior. +if CONFIG['GNU_CC'] and CONFIG['CC_VERSION'] >= '10.0.0': + CFLAGS += ['-fcommon'] + +USE_LIBS += [ + 'mozavutil' +] + +include("../ffvpxcommon.mozbuild") diff --git a/libs/ffvpx/libavcodec/mpeg12data.h b/libs/ffvpx/libavcodec/mpeg12data.h new file mode 100644 index 000000000..f51faf460 --- /dev/null +++ b/libs/ffvpx/libavcodec/mpeg12data.h @@ -0,0 +1,57 @@ +/* + * MPEG-1/2 tables + * copyright (c) 2000,2001 Fabrice Bellard + * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * MPEG-1/2 tables. + */ + +#ifndef AVCODEC_MPEG12DATA_H +#define AVCODEC_MPEG12DATA_H + +#include <stdint.h> +#include "libavutil/rational.h" +#include "rl.h" + +extern const uint16_t ff_mpeg1_default_intra_matrix[]; +extern const uint16_t ff_mpeg1_default_non_intra_matrix[64]; + +extern const uint16_t ff_mpeg12_vlc_dc_lum_code[12]; +extern const unsigned char ff_mpeg12_vlc_dc_lum_bits[12]; +extern const uint16_t ff_mpeg12_vlc_dc_chroma_code[12]; +extern const unsigned char ff_mpeg12_vlc_dc_chroma_bits[12]; + +extern RLTable ff_rl_mpeg1; +extern RLTable ff_rl_mpeg2; + +extern const uint8_t ff_mpeg12_mbAddrIncrTable[36][2]; +extern const uint8_t ff_mpeg12_mbPatTable[64][2]; + +extern const uint8_t ff_mpeg12_mbMotionVectorTable[17][2]; + +extern const AVRational ff_mpeg12_frame_rate_tab[]; +extern const AVRational ff_mpeg2_frame_rate_tab[]; + +extern const float ff_mpeg1_aspect[16]; +extern const AVRational ff_mpeg2_aspect[16]; + +#endif /* AVCODEC_MPEG12DATA_H */ diff --git a/libs/ffvpx/libavcodec/mpegpicture.h b/libs/ffvpx/libavcodec/mpegpicture.h new file mode 100644 index 000000000..2db3d6733 --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegpicture.h @@ -0,0 +1,114 @@ +/* + * Mpeg video formats-related defines and utility functions + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGPICTURE_H +#define AVCODEC_MPEGPICTURE_H + +#include <stdint.h> + +#include "libavutil/frame.h" + +#include "avcodec.h" +#include "motion_est.h" +#include "thread.h" + +#define MAX_PICTURE_COUNT 36 +#define EDGE_WIDTH 16 + +typedef struct ScratchpadContext { + uint8_t *edge_emu_buffer; ///< temporary buffer for if MVs point to out-of-frame data + uint8_t *rd_scratchpad; ///< scratchpad for rate distortion mb decision + uint8_t *obmc_scratchpad; + uint8_t *b_scratchpad; ///< scratchpad used for writing into write only buffers +} ScratchpadContext; + +/** + * Picture. + */ +typedef struct Picture { + struct AVFrame *f; + ThreadFrame tf; + + AVBufferRef *qscale_table_buf; + int8_t *qscale_table; + + AVBufferRef *motion_val_buf[2]; + int16_t (*motion_val[2])[2]; + + AVBufferRef *mb_type_buf; + uint32_t *mb_type; ///< types and macros are defined in mpegutils.h + + AVBufferRef *mbskip_table_buf; + uint8_t *mbskip_table; + + AVBufferRef *ref_index_buf[2]; + int8_t *ref_index[2]; + + AVBufferRef *mb_var_buf; + uint16_t *mb_var; ///< Table for MB variances + + AVBufferRef *mc_mb_var_buf; + uint16_t *mc_mb_var; ///< Table for motion compensated MB variances + + int alloc_mb_width; ///< mb_width used to allocate tables + int alloc_mb_height; ///< mb_height used to allocate tables + + AVBufferRef *mb_mean_buf; + uint8_t *mb_mean; ///< Table for MB luminance + + AVBufferRef *hwaccel_priv_buf; + void *hwaccel_picture_private; ///< Hardware accelerator private data + + int field_picture; ///< whether or not the picture was encoded in separate fields + + int64_t mb_var_sum; ///< sum of MB variance for current frame + int64_t mc_mb_var_sum; ///< motion compensated MB variance for current frame + + int b_frame_score; + int needs_realloc; ///< Picture needs to be reallocated (eg due to a frame size change) + + int reference; + int shared; + + uint64_t encoding_error[AV_NUM_DATA_POINTERS]; +} Picture; + +/** + * Allocate a Picture. + * The pixels are allocated/set by calling get_buffer() if shared = 0. + */ +int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me, + ScratchpadContext *sc, int shared, int encoding, + int chroma_x_shift, int chroma_y_shift, int out_format, + int mb_stride, int mb_width, int mb_height, int b8_stride, + ptrdiff_t *linesize, ptrdiff_t *uvlinesize); + +int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me, + ScratchpadContext *sc, int linesize); + +int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src); +void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *picture); + +void ff_free_picture_tables(Picture *pic); +int ff_update_picture_tables(Picture *dst, Picture *src); + +int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared); + +#endif /* AVCODEC_MPEGPICTURE_H */ diff --git a/libs/ffvpx/libavcodec/mpegutils.h b/libs/ffvpx/libavcodec/mpegutils.h new file mode 100644 index 000000000..1ed21c19b --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegutils.h @@ -0,0 +1,148 @@ +/* + * Mpeg video formats-related defines and utility functions + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGUTILS_H +#define AVCODEC_MPEGUTILS_H + +#include <stdint.h> + +#include "libavutil/frame.h" + +#include "avcodec.h" +#include "version.h" + +/** + * Return value for header parsers if frame is not coded. + * */ +#define FRAME_SKIPPED 100 + +/* picture type */ +#define PICT_TOP_FIELD 1 +#define PICT_BOTTOM_FIELD 2 +#define PICT_FRAME 3 + +/** + * Value of Picture.reference when Picture is not a reference picture, but + * is held for delayed output. + */ +#define DELAYED_PIC_REF 4 + +#define MAX_MB_BYTES (30 * 16 * 16 * 3 / 8 + 120) +#define MAX_FCODE 7 + +/* MB types */ +#define MB_TYPE_INTRA4x4 (1 << 0) +#define MB_TYPE_INTRA16x16 (1 << 1) // FIXME H.264-specific +#define MB_TYPE_INTRA_PCM (1 << 2) // FIXME H.264-specific +#define MB_TYPE_16x16 (1 << 3) +#define MB_TYPE_16x8 (1 << 4) +#define MB_TYPE_8x16 (1 << 5) +#define MB_TYPE_8x8 (1 << 6) +#define MB_TYPE_INTERLACED (1 << 7) +#define MB_TYPE_DIRECT2 (1 << 8) // FIXME +#define MB_TYPE_ACPRED (1 << 9) +#define MB_TYPE_GMC (1 << 10) +#define MB_TYPE_SKIP (1 << 11) +#define MB_TYPE_P0L0 (1 << 12) +#define MB_TYPE_P1L0 (1 << 13) +#define MB_TYPE_P0L1 (1 << 14) +#define MB_TYPE_P1L1 (1 << 15) +#define MB_TYPE_L0 (MB_TYPE_P0L0 | MB_TYPE_P1L0) +#define MB_TYPE_L1 (MB_TYPE_P0L1 | MB_TYPE_P1L1) +#define MB_TYPE_L0L1 (MB_TYPE_L0 | MB_TYPE_L1) +#define MB_TYPE_QUANT (1 << 16) +#define MB_TYPE_CBP (1 << 17) + +#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 // default mb_type if there is just one type + +#define IS_INTRA4x4(a) ((a) & MB_TYPE_INTRA4x4) +#define IS_INTRA16x16(a) ((a) & MB_TYPE_INTRA16x16) +#define IS_PCM(a) ((a) & MB_TYPE_INTRA_PCM) +#define IS_INTRA(a) ((a) & 7) +#define IS_INTER(a) ((a) & (MB_TYPE_16x16 | MB_TYPE_16x8 | \ + MB_TYPE_8x16 | MB_TYPE_8x8)) +#define IS_SKIP(a) ((a) & MB_TYPE_SKIP) +#define IS_INTRA_PCM(a) ((a) & MB_TYPE_INTRA_PCM) +#define IS_INTERLACED(a) ((a) & MB_TYPE_INTERLACED) +#define IS_DIRECT(a) ((a) & MB_TYPE_DIRECT2) +#define IS_GMC(a) ((a) & MB_TYPE_GMC) +#define IS_16X16(a) ((a) & MB_TYPE_16x16) +#define IS_16X8(a) ((a) & MB_TYPE_16x8) +#define IS_8X16(a) ((a) & MB_TYPE_8x16) +#define IS_8X8(a) ((a) & MB_TYPE_8x8) +#define IS_SUB_8X8(a) ((a) & MB_TYPE_16x16) // note reused +#define IS_SUB_8X4(a) ((a) & MB_TYPE_16x8) // note reused +#define IS_SUB_4X8(a) ((a) & MB_TYPE_8x16) // note reused +#define IS_SUB_4X4(a) ((a) & MB_TYPE_8x8) // note reused +#define IS_ACPRED(a) ((a) & MB_TYPE_ACPRED) +#define IS_QUANT(a) ((a) & MB_TYPE_QUANT) +#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0 << ((part) + 2 * (list)))) + +// does this mb use listX, note does not work if subMBs +#define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0 | MB_TYPE_P1L0) << (2 * (list)))) + +#define HAS_CBP(a) ((a) & MB_TYPE_CBP) + +/* MB types for encoding */ +#define CANDIDATE_MB_TYPE_INTRA (1 << 0) +#define CANDIDATE_MB_TYPE_INTER (1 << 1) +#define CANDIDATE_MB_TYPE_INTER4V (1 << 2) +#define CANDIDATE_MB_TYPE_SKIPPED (1 << 3) + +#define CANDIDATE_MB_TYPE_DIRECT (1 << 4) +#define CANDIDATE_MB_TYPE_FORWARD (1 << 5) +#define CANDIDATE_MB_TYPE_BACKWARD (1 << 6) +#define CANDIDATE_MB_TYPE_BIDIR (1 << 7) + +#define CANDIDATE_MB_TYPE_INTER_I (1 << 8) +#define CANDIDATE_MB_TYPE_FORWARD_I (1 << 9) +#define CANDIDATE_MB_TYPE_BACKWARD_I (1 << 10) +#define CANDIDATE_MB_TYPE_BIDIR_I (1 << 11) + +#define CANDIDATE_MB_TYPE_DIRECT0 (1 << 12) + +#define INPLACE_OFFSET 16 + +enum OutputFormat { + FMT_MPEG1, + FMT_H261, + FMT_H263, + FMT_MJPEG, +}; + + +/** + * Draw a horizontal band if supported. + * + * @param h is the normal height, this will be reduced automatically if needed + */ +void ff_draw_horiz_band(AVCodecContext *avctx, AVFrame *cur, AVFrame *last, + int y, int h, int picture_structure, int first_field, + int low_delay); + +/** + * Print debugging info for the given picture. + */ +void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table, + uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2], + int *low_delay, + int mb_width, int mb_height, int mb_stride, int quarter_sample); + +#endif /* AVCODEC_MPEGUTILS_H */ diff --git a/libs/ffvpx/libavcodec/mpegvideo.h b/libs/ffvpx/libavcodec/mpegvideo.h new file mode 100644 index 000000000..541909cbb --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegvideo.h @@ -0,0 +1,772 @@ +/* + * Generic DCT based hybrid video encoder + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * mpegvideo header. + */ + +#ifndef AVCODEC_MPEGVIDEO_H +#define AVCODEC_MPEGVIDEO_H + +#include <float.h> + +#include "avcodec.h" +#include "blockdsp.h" +#include "error_resilience.h" +#include "fdctdsp.h" +#include "get_bits.h" +#include "h264chroma.h" +#include "h263dsp.h" +#include "hpeldsp.h" +#include "idctdsp.h" +#include "internal.h" +#include "me_cmp.h" +#include "motion_est.h" +#include "mpegpicture.h" +#include "mpegvideodsp.h" +#include "mpegvideoencdsp.h" +#include "mpegvideodata.h" +#include "pixblockdsp.h" +#include "put_bits.h" +#include "ratecontrol.h" +#include "parser.h" +#include "mpegutils.h" +#include "mpeg12data.h" +#include "qpeldsp.h" +#include "thread.h" +#include "videodsp.h" + +#include "libavutil/opt.h" +#include "libavutil/timecode.h" + +#define MAX_THREADS 32 + +#define MAX_B_FRAMES 16 + +/* Start codes. */ +#define SEQ_END_CODE 0x000001b7 +#define SEQ_START_CODE 0x000001b3 +#define GOP_START_CODE 0x000001b8 +#define PICTURE_START_CODE 0x00000100 +#define SLICE_MIN_START_CODE 0x00000101 +#define SLICE_MAX_START_CODE 0x000001af +#define EXT_START_CODE 0x000001b5 +#define USER_START_CODE 0x000001b2 +#define SLICE_START_CODE 0x000001b7 + + +/** + * MpegEncContext. + */ +typedef struct MpegEncContext { + AVClass *class; + + int y_dc_scale, c_dc_scale; + int ac_pred; + int block_last_index[12]; ///< last non zero coefficient in block + int h263_aic; ///< Advanced INTRA Coding (AIC) + + /* scantables */ + ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce the cache usage + ScanTable intra_scantable; + ScanTable intra_h_scantable; + ScanTable intra_v_scantable; + + /* WARNING: changes above this line require updates to hardcoded + * offsets used in ASM. */ + + struct AVCodecContext *avctx; + /* the following parameters must be initialized before encoding */ + int width, height;///< picture size. must be a multiple of 16 + int gop_size; + int intra_only; ///< if true, only intra pictures are generated + int64_t bit_rate; ///< wanted bit rate + enum OutputFormat out_format; ///< output format + int h263_pred; ///< use MPEG-4/H.263 ac/dc predictions + int pb_frame; ///< PB-frame mode (0 = none, 1 = base, 2 = improved) + +/* the following codec id fields are deprecated in favor of codec_id */ + int h263_plus; ///< H.263+ headers + int h263_flv; ///< use flv H.263 header + + enum AVCodecID codec_id; /* see AV_CODEC_ID_xxx */ + int fixed_qscale; ///< fixed qscale if non zero + int encoding; ///< true if we are encoding (vs decoding) + int max_b_frames; ///< max number of B-frames for encoding + int luma_elim_threshold; + int chroma_elim_threshold; + int strict_std_compliance; ///< strictly follow the std (MPEG-4, ...) + int workaround_bugs; ///< workaround bugs in encoders which cannot be detected automatically + int codec_tag; ///< internal codec_tag upper case converted from avctx codec_tag + /* the following fields are managed internally by the encoder */ + + /* sequence parameters */ + int context_initialized; + int input_picture_number; ///< used to set pic->display_picture_number, should not be used for/by anything else + int coded_picture_number; ///< used to set pic->coded_picture_number, should not be used for/by anything else + int picture_number; //FIXME remove, unclear definition + int picture_in_gop_number; ///< 0-> first pic in gop, ... + int mb_width, mb_height; ///< number of MBs horizontally & vertically + int mb_stride; ///< mb_width+1 used for some arrays to allow simple addressing of left & top MBs without sig11 + int b8_stride; ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressing + int h_edge_pos, v_edge_pos;///< horizontal / vertical position of the right/bottom edge (pixel replication) + int mb_num; ///< number of MBs of a picture + ptrdiff_t linesize; ///< line size, in bytes, may be different from width + ptrdiff_t uvlinesize; ///< line size, for chroma in bytes, may be different from width + Picture *picture; ///< main picture buffer + Picture **input_picture; ///< next pictures on display order for encoding + Picture **reordered_input_picture; ///< pointer to the next pictures in coded order for encoding + + int64_t user_specified_pts; ///< last non-zero pts from AVFrame which was passed into avcodec_encode_video2() + /** + * pts difference between the first and second input frame, used for + * calculating dts of the first frame when there's a delay */ + int64_t dts_delta; + /** + * reordered pts to be used as dts for the next output frame when there's + * a delay */ + int64_t reordered_pts; + + /** bit output */ + PutBitContext pb; + + int start_mb_y; ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + int end_mb_y; ///< end mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + struct MpegEncContext *thread_context[MAX_THREADS]; + int slice_context_count; ///< number of used thread_contexts + + /** + * copy of the previous picture structure. + * note, linesize & data, might not match the previous picture (for field pictures) + */ + Picture last_picture; + + /** + * copy of the next picture structure. + * note, linesize & data, might not match the next picture (for field pictures) + */ + Picture next_picture; + + /** + * copy of the source picture structure for encoding. + * note, linesize & data, might not match the source picture (for field pictures) + */ + Picture new_picture; + + /** + * copy of the current picture structure. + * note, linesize & data, might not match the current picture (for field pictures) + */ + Picture current_picture; ///< buffer to store the decompressed current picture + + Picture *last_picture_ptr; ///< pointer to the previous picture. + Picture *next_picture_ptr; ///< pointer to the next picture (for bidir pred) + Picture *current_picture_ptr; ///< pointer to the current picture + int last_dc[3]; ///< last DC values for MPEG-1 + int16_t *dc_val_base; + int16_t *dc_val[3]; ///< used for MPEG-4 DC prediction, all 3 arrays must be continuous + const uint8_t *y_dc_scale_table; ///< qscale -> y_dc_scale table + const uint8_t *c_dc_scale_table; ///< qscale -> c_dc_scale table + const uint8_t *chroma_qscale_table; ///< qscale -> chroma_qscale (H.263) + uint8_t *coded_block_base; + uint8_t *coded_block; ///< used for coded block pattern prediction (msmpeg4v3, wmv1) + int16_t (*ac_val_base)[16]; + int16_t (*ac_val[3])[16]; ///< used for MPEG-4 AC prediction, all 3 arrays must be continuous + int mb_skipped; ///< MUST BE SET only during DECODING + uint8_t *mbskip_table; /**< used to avoid copy if macroblock skipped (for black regions for example) + and used for B-frame encoding & decoding (contains skip table of next P-frame) */ + uint8_t *mbintra_table; ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding + uint8_t *cbp_table; ///< used to store cbp, ac_pred for partitioned decoding + uint8_t *pred_dir_table; ///< used to store pred_dir for partitioned decoding + + ScratchpadContext sc; + + int qscale; ///< QP + int chroma_qscale; ///< chroma QP + unsigned int lambda; ///< Lagrange multiplier used in rate distortion + unsigned int lambda2; ///< (lambda*lambda) >> FF_LAMBDA_SHIFT + int *lambda_table; + int adaptive_quant; ///< use adaptive quantization + int dquant; ///< qscale difference to prev qscale + int closed_gop; ///< MPEG1/2 GOP is closed + int pict_type; ///< AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, ... + int vbv_delay; + int last_pict_type; //FIXME removes + int last_non_b_pict_type; ///< used for MPEG-4 gmc B-frames & ratecontrol + int droppable; + int frame_rate_index; + AVRational mpeg2_frame_rate_ext; + int last_lambda_for[5]; ///< last lambda for a specific pict type + int skipdct; ///< skip dct and code zero residual + + /* motion compensation */ + int unrestricted_mv; ///< mv can point outside of the coded picture + int h263_long_vectors; ///< use horrible H.263v1 long vector mode + + BlockDSPContext bdsp; + FDCTDSPContext fdsp; + H264ChromaContext h264chroma; + HpelDSPContext hdsp; + IDCTDSPContext idsp; + MECmpContext mecc; + MpegVideoDSPContext mdsp; + MpegvideoEncDSPContext mpvencdsp; + PixblockDSPContext pdsp; + QpelDSPContext qdsp; + VideoDSPContext vdsp; + H263DSPContext h263dsp; + int f_code; ///< forward MV resolution + int b_code; ///< backward MV resolution for B-frames (MPEG-4) + int16_t (*p_mv_table_base)[2]; + int16_t (*b_forw_mv_table_base)[2]; + int16_t (*b_back_mv_table_base)[2]; + int16_t (*b_bidir_forw_mv_table_base)[2]; + int16_t (*b_bidir_back_mv_table_base)[2]; + int16_t (*b_direct_mv_table_base)[2]; + int16_t (*p_field_mv_table_base[2][2])[2]; + int16_t (*b_field_mv_table_base[2][2][2])[2]; + int16_t (*p_mv_table)[2]; ///< MV table (1MV per MB) P-frame encoding + int16_t (*b_forw_mv_table)[2]; ///< MV table (1MV per MB) forward mode B-frame encoding + int16_t (*b_back_mv_table)[2]; ///< MV table (1MV per MB) backward mode B-frame encoding + int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding + int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding + int16_t (*b_direct_mv_table)[2]; ///< MV table (1MV per MB) direct mode B-frame encoding + int16_t (*p_field_mv_table[2][2])[2]; ///< MV table (2MV per MB) interlaced P-frame encoding + int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced B-frame encoding + uint8_t (*p_field_select_table[2]); + uint8_t (*b_field_select_table[2][2]); + int motion_est; ///< ME algorithm + int me_penalty_compensation; + int me_pre; ///< prepass for motion estimation + int mv_dir; +#define MV_DIR_FORWARD 1 +#define MV_DIR_BACKWARD 2 +#define MV_DIRECT 4 ///< bidirectional mode where the difference equals the MV of the last P/S/I-Frame (MPEG-4) + int mv_type; +#define MV_TYPE_16X16 0 ///< 1 vector for the whole mb +#define MV_TYPE_8X8 1 ///< 4 vectors (H.263, MPEG-4 4MV) +#define MV_TYPE_16X8 2 ///< 2 vectors, one per 16x8 block +#define MV_TYPE_FIELD 3 ///< 2 vectors, one per field +#define MV_TYPE_DMV 4 ///< 2 vectors, special mpeg2 Dual Prime Vectors + /**motion vectors for a macroblock + first coordinate : 0 = forward 1 = backward + second " : depend on type + third " : 0 = x, 1 = y + */ + int mv[2][4][2]; + int field_select[2][2]; + int last_mv[2][2][2]; ///< last MV, used for MV prediction in MPEG-1 & B-frame MPEG-4 + uint8_t *fcode_tab; ///< smallest fcode needed for each MV + int16_t direct_scale_mv[2][64]; ///< precomputed to avoid divisions in ff_mpeg4_set_direct_mv + + MotionEstContext me; + + int no_rounding; /**< apply no rounding to motion compensation (MPEG-4, msmpeg4, ...) + for B-frames rounding mode is always 0 */ + + /* macroblock layer */ + int mb_x, mb_y; + int mb_skip_run; + int mb_intra; + uint16_t *mb_type; ///< Table for candidate MB types for encoding (defines in mpegutils.h) + + int block_index[6]; ///< index to current MB in block based arrays with edges + int block_wrap[6]; + uint8_t *dest[3]; + + int *mb_index2xy; ///< mb_index -> mb_x + mb_y*mb_stride + + /** matrix transmitted in the bitstream */ + uint16_t intra_matrix[64]; + uint16_t chroma_intra_matrix[64]; + uint16_t inter_matrix[64]; + uint16_t chroma_inter_matrix[64]; + int force_duplicated_matrix; ///< Force duplication of mjpeg matrices, useful for rtp streaming + + int intra_quant_bias; ///< bias for the quantizer + int inter_quant_bias; ///< bias for the quantizer + int min_qcoeff; ///< minimum encodable coefficient + int max_qcoeff; ///< maximum encodable coefficient + int ac_esc_length; ///< num of bits needed to encode the longest esc + uint8_t *intra_ac_vlc_length; + uint8_t *intra_ac_vlc_last_length; + uint8_t *intra_chroma_ac_vlc_length; + uint8_t *intra_chroma_ac_vlc_last_length; + uint8_t *inter_ac_vlc_length; + uint8_t *inter_ac_vlc_last_length; + uint8_t *luma_dc_vlc_length; +#define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level)) + + int coded_score[12]; + + /** precomputed matrix (combine qscale and DCT renorm) */ + int (*q_intra_matrix)[64]; + int (*q_chroma_intra_matrix)[64]; + int (*q_inter_matrix)[64]; + /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/ + uint16_t (*q_intra_matrix16)[2][64]; + uint16_t (*q_chroma_intra_matrix16)[2][64]; + uint16_t (*q_inter_matrix16)[2][64]; + + /* noise reduction */ + int (*dct_error_sum)[64]; + int dct_count[2]; + uint16_t (*dct_offset)[64]; + + /* bit rate control */ + int64_t total_bits; + int frame_bits; ///< bits used for the current frame + int stuffing_bits; ///< bits used for stuffing + int next_lambda; ///< next lambda used for retrying to encode a frame + RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c + + /* statistics, used for 2-pass encoding */ + int mv_bits; + int header_bits; + int i_tex_bits; + int p_tex_bits; + int i_count; + int f_count; + int b_count; + int skip_count; + int misc_bits; ///< cbp, mb_type + int last_bits; ///< temp var used for calculating the above vars + + /* error concealment / resync */ + int resync_mb_x; ///< x position of last resync marker + int resync_mb_y; ///< y position of last resync marker + GetBitContext last_resync_gb; ///< used to search for the next resync marker + int mb_num_left; ///< number of MBs left in this video packet (for partitioned Slices only) + int next_p_frame_damaged; ///< set if the next p frame is damaged, to avoid showing trashed B-frames + + ParseContext parse_context; + + /* H.263 specific */ + int gob_index; + int obmc; ///< overlapped block motion compensation + int mb_info; ///< interval for outputting info about mb offsets as side data + int prev_mb_info, last_mb_info; + uint8_t *mb_info_ptr; + int mb_info_size; + int ehc_mode; + int rc_strategy; + + /* H.263+ specific */ + int umvplus; ///< == H.263+ && unrestricted_mv + int h263_aic_dir; ///< AIC direction: 0 = left, 1 = top + int h263_slice_structured; + int alt_inter_vlc; ///< alternative inter vlc + int modified_quant; + int loop_filter; + int custom_pcf; + + /* MPEG-4 specific */ + int studio_profile; + int dct_precision; + ///< number of bits to represent the fractional part of time (encoder only) + int time_increment_bits; + int last_time_base; + int time_base; ///< time in seconds of last I,P,S Frame + int64_t time; ///< time of current frame + int64_t last_non_b_time; + uint16_t pp_time; ///< time distance between the last 2 p,s,i frames + uint16_t pb_time; ///< time distance between the last b and p,s,i frame + uint16_t pp_field_time; + uint16_t pb_field_time; ///< like above, just for interlaced + int real_sprite_warping_points; + int sprite_offset[2][2]; ///< sprite offset[isChroma][isMVY] + int sprite_delta[2][2]; ///< sprite_delta [isY][isMVY] + int mcsel; + int quant_precision; + int quarter_sample; ///< 1->qpel, 0->half pel ME/MC + int aspect_ratio_info; //FIXME remove + int sprite_warping_accuracy; + int data_partitioning; ///< data partitioning flag from header + int partitioned_frame; ///< is current frame partitioned + int low_delay; ///< no reordering needed / has no B-frames + int vo_type; + PutBitContext tex_pb; ///< used for data partitioned VOPs + PutBitContext pb2; ///< used for data partitioned VOPs + int mpeg_quant; + int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG-4 + + /* divx specific, used to workaround (many) bugs in divx5 */ + int divx_packed; + uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them + int bitstream_buffer_size; + unsigned int allocated_bitstream_buffer_size; + + /* RV10 specific */ + int rv10_version; ///< RV10 version: 0 or 3 + int rv10_first_dc_coded[3]; + + /* MJPEG specific */ + struct MJpegContext *mjpeg_ctx; + int esc_pos; + int pred; + int huffman; + + /* MSMPEG4 specific */ + int mv_table_index; + int rl_table_index; + int rl_chroma_table_index; + int dc_table_index; + int use_skip_mb_code; + int slice_height; ///< in macroblocks + int first_slice_line; ///< used in MPEG-4 too to handle resync markers + int flipflop_rounding; + int msmpeg4_version; ///< 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 4=wmv1/7 5=wmv2/8 + int per_mb_rl_table; + int esc3_level_length; + int esc3_run_length; + /** [mb_intra][isChroma][level][run][last] */ + int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; + int inter_intra_pred; + int mspel; + + /* decompression specific */ + GetBitContext gb; + + /* MPEG-1 specific */ + int gop_picture_number; ///< index of the first picture of a GOP based on fake_pic_num & MPEG-1 specific + int last_mv_dir; ///< last mv_dir, used for B-frame encoding + uint8_t *vbv_delay_ptr; ///< pointer to vbv_delay in the bitstream + + /* MPEG-2-specific - I wished not to have to support this mess. */ + int progressive_sequence; + int mpeg_f_code[2][2]; + + // picture structure defines are loaded from mpegutils.h + int picture_structure; + + int64_t timecode_frame_start; ///< GOP timecode frame start number, in non drop frame format + int intra_dc_precision; + int frame_pred_frame_dct; + int top_field_first; + int concealment_motion_vectors; + int q_scale_type; + int brd_scale; + int intra_vlc_format; + int alternate_scan; + int seq_disp_ext; + int video_format; +#define VIDEO_FORMAT_COMPONENT 0 +#define VIDEO_FORMAT_PAL 1 +#define VIDEO_FORMAT_NTSC 2 +#define VIDEO_FORMAT_SECAM 3 +#define VIDEO_FORMAT_MAC 4 +#define VIDEO_FORMAT_UNSPECIFIED 5 + int repeat_first_field; + int chroma_420_type; + int chroma_format; +#define CHROMA_420 1 +#define CHROMA_422 2 +#define CHROMA_444 3 + int chroma_x_shift;//depend on pix_format, that depend on chroma_format + int chroma_y_shift; + + int progressive_frame; + int full_pel[2]; + int interlaced_dct; + int first_field; ///< is 1 for the first field of a field picture 0 otherwise + int drop_frame_timecode; ///< timecode is in drop frame format. + int scan_offset; ///< reserve space for SVCD scan offset user data. + + /* RTP specific */ + int rtp_mode; + int rtp_payload_size; + + char *tc_opt_str; ///< timecode option string + AVTimecode tc; ///< timecode context + + uint8_t *ptr_lastgob; + int swap_uv; //vcr2 codec is an MPEG-2 variant with U and V swapped + int pack_pblocks; //xvmc needs to keep blocks without gaps. + int16_t (*pblocks[12])[64]; + + int16_t (*block)[64]; ///< points to one of the following blocks + int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block + int (*decode_mb)(struct MpegEncContext *s, int16_t block[12][64]); // used by some codecs to avoid a switch() + + int32_t (*block32)[12][64]; + +#define SLICE_OK 0 +#define SLICE_ERROR -1 +#define SLICE_END -2 ///<end marker found +#define SLICE_NOEND -3 ///<no end marker or error found but mb count exceeded + + void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_h263_intra)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_h263_inter)(struct MpegEncContext *s, + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both) + int16_t *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both) + int16_t *block/*align 16*/, int n, int qscale); + int (*dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow); + int (*fast_dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow); + void (*denoise_dct)(struct MpegEncContext *s, int16_t *block); + + int mpv_flags; ///< flags set by private options + int quantizer_noise_shaping; + + /** + * ratecontrol qmin qmax limiting method + * 0-> clipping, 1-> use a nice continuous function to limit qscale within qmin/qmax. + */ + float rc_qsquish; + float rc_qmod_amp; + int rc_qmod_freq; + float rc_initial_cplx; + float rc_buffer_aggressivity; + float border_masking; + int lmin, lmax; + int vbv_ignore_qmax; + + char *rc_eq; + + /* temp buffers for rate control */ + float *cplx_tab, *bits_tab; + + /* flag to indicate a reinitialization is required, e.g. after + * a frame size change */ + int context_reinit; + + ERContext er; + + int error_rate; + + /* temporary frames used by b_frame_strategy = 2 */ + AVFrame *tmp_frames[MAX_B_FRAMES + 2]; + int b_frame_strategy; + int b_sensitivity; + + /* frame skip options for encoding */ + int frame_skip_threshold; + int frame_skip_factor; + int frame_skip_exp; + int frame_skip_cmp; + + int scenechange_threshold; + int noise_reduction; +} MpegEncContext; + +/* mpegvideo_enc common options */ +#define FF_MPV_FLAG_SKIP_RD 0x0001 +#define FF_MPV_FLAG_STRICT_GOP 0x0002 +#define FF_MPV_FLAG_QP_RD 0x0004 +#define FF_MPV_FLAG_CBP_RD 0x0008 +#define FF_MPV_FLAG_NAQ 0x0010 +#define FF_MPV_FLAG_MV0 0x0020 + +enum rc_strategy { + MPV_RC_STRATEGY_FFMPEG, + MPV_RC_STRATEGY_XVID, + NB_MPV_RC_STRATEGY +}; + +#define FF_MPV_OPT_CMP_FUNC \ +{ "sad", "Sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "sse", "Sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "satd", "Sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "dct", "Sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "psnr", "Sum of squared quantization errors, low quality", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "bit", "Number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "rd", "Rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "zero", "Zero", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "vsad", "Sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "vsse", "Sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "nsse", "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{ "msad", "Sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" } + +#ifndef FF_MPV_OFFSET +#define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x) +#endif +#define FF_MPV_OPT_FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +#define FF_MPV_COMMON_OPTS \ +FF_MPV_OPT_CMP_FUNC, \ +{ "mpv_flags", "Flags common for all mpegvideo-based encoders.", FF_MPV_OFFSET(mpv_flags), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "skip_rd", "RD optimal MB level residual skipping", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_SKIP_RD }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "strict_gop", "Strictly enforce gop size", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_STRICT_GOP }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "qp_rd", "Use rate distortion optimization for qp selection", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_QP_RD }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "cbp_rd", "use rate distortion optimization for CBP", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_CBP_RD }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "naq", "normalize adaptive quantization", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_NAQ }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "mv0", "always try a mb with mv=<0,0>", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_MV0 }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\ +{ "luma_elim_threshold", "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)",\ + FF_MPV_OFFSET(luma_elim_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },\ +{ "chroma_elim_threshold", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)",\ + FF_MPV_OFFSET(chroma_elim_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },\ +{ "quantizer_noise_shaping", NULL, FF_MPV_OFFSET(quantizer_noise_shaping), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FF_MPV_OPT_FLAGS },\ +{ "error_rate", "Simulate errors in the bitstream to test error concealment.", \ + FF_MPV_OFFSET(error_rate), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FF_MPV_OPT_FLAGS },\ +{"qsquish", "how to keep quantizer between qmin and qmax (0 = clip, 1 = use differentiable function)", \ + FF_MPV_OFFSET(rc_qsquish), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, 0, 99, FF_MPV_OPT_FLAGS}, \ +{"rc_qmod_amp", "experimental quantizer modulation", FF_MPV_OFFSET(rc_qmod_amp), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS}, \ +{"rc_qmod_freq", "experimental quantizer modulation", FF_MPV_OFFSET(rc_qmod_freq), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS}, \ +{"rc_eq", "Set rate control equation. When computing the expression, besides the standard functions " \ + "defined in the section 'Expression Evaluation', the following functions are available: " \ + "bits2qp(bits), qp2bits(qp). Also the following constants are available: iTex pTex tex mv " \ + "fCode iCount mcVar var isI isP isB avgQP qComp avgIITex avgPITex avgPPTex avgBPTex avgTex.", \ + FF_MPV_OFFSET(rc_eq), AV_OPT_TYPE_STRING, .flags = FF_MPV_OPT_FLAGS }, \ +{"rc_init_cplx", "initial complexity for 1-pass encoding", FF_MPV_OFFSET(rc_initial_cplx), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS}, \ +{"rc_buf_aggressivity", "currently useless", FF_MPV_OFFSET(rc_buffer_aggressivity), AV_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS}, \ +{"border_mask", "increase the quantizer for macroblocks close to borders", FF_MPV_OFFSET(border_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS}, \ +{"lmin", "minimum Lagrange factor (VBR)", FF_MPV_OFFSET(lmin), AV_OPT_TYPE_INT, {.i64 = 2*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"lmax", "maximum Lagrange factor (VBR)", FF_MPV_OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 = 31*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"ibias", "intra quant bias", FF_MPV_OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"pbias", "inter quant bias", FF_MPV_OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"rc_strategy", "ratecontrol method", FF_MPV_OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = MPV_RC_STRATEGY_FFMPEG }, 0, NB_MPV_RC_STRATEGY-1, FF_MPV_OPT_FLAGS, "rc_strategy" }, \ + { "ffmpeg", "default native rate control", 0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_FFMPEG }, 0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \ + { "xvid", "libxvid (2 pass only)", 0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_XVID }, 0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \ +{"motion_est", "motion estimation algorithm", FF_MPV_OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_XONE, FF_MPV_OPT_FLAGS, "motion_est" }, \ +{ "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \ +{ "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \ +{ "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \ +{ "force_duplicated_matrix", "Always write luma and chroma matrix for mjpeg, useful for rtp streaming.", FF_MPV_OFFSET(force_duplicated_matrix), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS }, \ +{"b_strategy", "Strategy to choose between I/P/B-frames", FF_MPV_OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FF_MPV_OPT_FLAGS }, \ +{"b_sensitivity", "Adjust sensitivity of b_frame_strategy 1", FF_MPV_OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"brd_scale", "Downscale frames for dynamic B-frame decision", FF_MPV_OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 3, FF_MPV_OPT_FLAGS }, \ +{"skip_threshold", "Frame skip threshold", FF_MPV_OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"skip_factor", "Frame skip factor", FF_MPV_OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"skip_exp", "Frame skip exponent", FF_MPV_OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"skip_cmp", "Frame skip compare function", FF_MPV_OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \ +{"sc_threshold", "Scene change threshold", FF_MPV_OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"noise_reduction", "Noise reduction", FF_MPV_OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"mpeg_quant", "Use MPEG quantizers instead of H.263", FF_MPV_OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS }, \ +{"ps", "RTP payload size in bytes", FF_MPV_OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"mepc", "Motion estimation bitrate penalty compensation (1.0 = 256)", FF_MPV_OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ +{"mepre", "pre motion estimation", FF_MPV_OFFSET(me_pre), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \ + +extern const AVOption ff_mpv_generic_options[]; + +/** + * Set the given MpegEncContext to common defaults (same for encoding + * and decoding). The changed fields will not depend upon the prior + * state of the MpegEncContext. + */ +void ff_mpv_common_defaults(MpegEncContext *s); + +void ff_dct_encode_init_x86(MpegEncContext *s); + +int ff_mpv_common_init(MpegEncContext *s); +void ff_mpv_common_init_arm(MpegEncContext *s); +void ff_mpv_common_init_axp(MpegEncContext *s); +void ff_mpv_common_init_neon(MpegEncContext *s); +void ff_mpv_common_init_ppc(MpegEncContext *s); +void ff_mpv_common_init_x86(MpegEncContext *s); +void ff_mpv_common_init_mips(MpegEncContext *s); + +int ff_mpv_common_frame_size_change(MpegEncContext *s); +void ff_mpv_common_end(MpegEncContext *s); + +void ff_mpv_decode_defaults(MpegEncContext *s); +void ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx); +void ff_mpv_reconstruct_mb(MpegEncContext *s, int16_t block[12][64]); +void ff_mpv_report_decode_progress(MpegEncContext *s); + +int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx); +void ff_mpv_frame_end(MpegEncContext *s); + +int ff_mpv_encode_init(AVCodecContext *avctx); +void ff_mpv_encode_init_x86(MpegEncContext *s); + +int ff_mpv_encode_end(AVCodecContext *avctx); +int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *frame, int *got_packet); +int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase); + +void ff_clean_intra_table_entries(MpegEncContext *s); +void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h); +void ff_mpeg_flush(AVCodecContext *avctx); + +void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict); + +int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_type); + +void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix); + +int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src); +int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); +void ff_set_qscale(MpegEncContext * s, int qscale); + +void ff_mpv_idct_init(MpegEncContext *s); +int ff_dct_encode_init(MpegEncContext *s); +void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64], + const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra); +int ff_dct_quantize_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow); +void ff_block_permute(int16_t *block, uint8_t *permutation, + const uint8_t *scantable, int last); +void ff_init_block_index(MpegEncContext *s); + +void ff_mpv_motion(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, + uint8_t *dest_cr, int dir, + uint8_t **ref_picture, + op_pixels_func (*pix_op)[4], + qpel_mc_func (*qpix_op)[16]); + +static inline void ff_update_block_index(MpegEncContext *s){ + const int bytes_per_pixel = 1 + (s->avctx->bits_per_raw_sample > 8); + const int block_size= (8*bytes_per_pixel) >> s->avctx->lowres; + + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; + s->block_index[4]++; + s->block_index[5]++; + s->dest[0]+= 2*block_size; + s->dest[1]+= (2 >> s->chroma_x_shift) * block_size; + s->dest[2]+= (2 >> s->chroma_x_shift) * block_size; +} + +static inline int get_bits_diff(MpegEncContext *s){ + const int bits= put_bits_count(&s->pb); + const int last= s->last_bits; + + s->last_bits = bits; + + return bits - last; +} + +static inline int mpeg_get_qscale(MpegEncContext *s) +{ + int qscale = get_bits(&s->gb, 5); + if (s->q_scale_type) + return ff_mpeg2_non_linear_qscale[qscale]; + else + return qscale << 1; +} + +#endif /* AVCODEC_MPEGVIDEO_H */ diff --git a/libs/ffvpx/libavcodec/mpegvideodata.h b/libs/ffvpx/libavcodec/mpegvideodata.h new file mode 100644 index 000000000..14f4806d6 --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegvideodata.h @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGVIDEODATA_H +#define AVCODEC_MPEGVIDEODATA_H + +#include <stdint.h> + +/* encoding scans */ +extern const uint8_t ff_alternate_horizontal_scan[64]; +extern const uint8_t ff_alternate_vertical_scan[64]; + +extern const uint8_t ff_mpeg1_dc_scale_table[128]; +extern const uint8_t * const ff_mpeg2_dc_scale_table[4]; + +extern const uint8_t ff_mpeg2_non_linear_qscale[32]; + +extern const uint8_t ff_default_chroma_qscale_table[32]; + +#endif /* AVCODEC_MPEGVIDEODATA_H */ diff --git a/libs/ffvpx/libavcodec/mpegvideodsp.h b/libs/ffvpx/libavcodec/mpegvideodsp.h new file mode 100644 index 000000000..293e2548d --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegvideodsp.h @@ -0,0 +1,47 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGVIDEODSP_H +#define AVCODEC_MPEGVIDEODSP_H + +#include <stdint.h> + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, + int width, int height); + +typedef struct MpegVideoDSPContext { + /** + * translational global motion compensation. + */ + void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int srcStride, int h, int x16, int y16, int rounder); + /** + * global motion compensation. + */ + void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height); +} MpegVideoDSPContext; + +void ff_mpegvideodsp_init(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c); + +#endif /* AVCODEC_MPEGVIDEODSP_H */ diff --git a/libs/ffvpx/libavcodec/mpegvideoencdsp.h b/libs/ffvpx/libavcodec/mpegvideoencdsp.h new file mode 100644 index 000000000..33f0282fc --- /dev/null +++ b/libs/ffvpx/libavcodec/mpegvideoencdsp.h @@ -0,0 +1,58 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGVIDEOENCDSP_H +#define AVCODEC_MPEGVIDEOENCDSP_H + +#include <stdint.h> + +#include "avcodec.h" + +#define BASIS_SHIFT 16 +#define RECON_SHIFT 6 + +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + +typedef struct MpegvideoEncDSPContext { + int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], + int16_t basis[64], int scale); + void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); + + int (*pix_sum)(uint8_t *pix, int line_size); + int (*pix_norm1)(uint8_t *pix, int line_size); + + void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, + int src_wrap, int width, int height); + + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); +} MpegvideoEncDSPContext; + +void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); +void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); +void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); +void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); +void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c, + AVCodecContext *avctx); + +#endif /* AVCODEC_MPEGVIDEOENCDSP_H */ diff --git a/libs/ffvpx/libavcodec/null_bsf.c b/libs/ffvpx/libavcodec/null_bsf.c new file mode 100644 index 000000000..24d26dfb1 --- /dev/null +++ b/libs/ffvpx/libavcodec/null_bsf.c @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Null bitstream filter -- pass the input through unchanged. + */ + +#include "avcodec.h" +#include "bsf.h" + +static int null_filter(AVBSFContext *ctx, AVPacket *pkt) +{ + return ff_bsf_get_packet_ref(ctx, pkt); +} + +const AVBitStreamFilter ff_null_bsf = { + .name = "null", + .filter = null_filter, +}; diff --git a/libs/ffvpx/libavcodec/options.c b/libs/ffvpx/libavcodec/options.c new file mode 100644 index 000000000..41b60521c --- /dev/null +++ b/libs/ffvpx/libavcodec/options.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Options definition for AVCodecContext. + */ + +#include "avcodec.h" +#include "internal.h" +#include "libavutil/avassert.h" +#include "libavutil/internal.h" +#include "libavutil/mem.h" +#include "libavutil/opt.h" +#include <string.h> + +FF_DISABLE_DEPRECATION_WARNINGS +#include "options_table.h" +FF_ENABLE_DEPRECATION_WARNINGS + +static const char* context_to_name(void* ptr) { + AVCodecContext *avc= ptr; + + if(avc && avc->codec && avc->codec->name) + return avc->codec->name; + else + return "NULL"; +} + +static void *codec_child_next(void *obj, void *prev) +{ + AVCodecContext *s = obj; + if (!prev && s->codec && s->codec->priv_class && s->priv_data) + return s->priv_data; + return NULL; +} + +static const AVClass *codec_child_class_next(const AVClass *prev) +{ + AVCodec *c = NULL; + + /* find the codec that corresponds to prev */ + while (prev && (c = av_codec_next(c))) + if (c->priv_class == prev) + break; + + /* find next codec with priv options */ + while (c = av_codec_next(c)) + if (c->priv_class) + return c->priv_class; + return NULL; +} + +static AVClassCategory get_category(void *ptr) +{ + AVCodecContext* avctx = ptr; + if(avctx->codec && avctx->codec->decode) return AV_CLASS_CATEGORY_DECODER; + else return AV_CLASS_CATEGORY_ENCODER; +} + +static const AVClass av_codec_context_class = { + .class_name = "AVCodecContext", + .item_name = context_to_name, + .option = avcodec_options, + .version = LIBAVUTIL_VERSION_INT, + .log_level_offset_offset = offsetof(AVCodecContext, log_level_offset), + .child_next = codec_child_next, + .child_class_next = codec_child_class_next, + .category = AV_CLASS_CATEGORY_ENCODER, + .get_category = get_category, +}; + +static int init_context_defaults(AVCodecContext *s, const AVCodec *codec) +{ + int flags=0; + memset(s, 0, sizeof(AVCodecContext)); + + s->av_class = &av_codec_context_class; + + s->codec_type = codec ? codec->type : AVMEDIA_TYPE_UNKNOWN; + if (codec) { + s->codec = codec; + s->codec_id = codec->id; + } + + if(s->codec_type == AVMEDIA_TYPE_AUDIO) + flags= AV_OPT_FLAG_AUDIO_PARAM; + else if(s->codec_type == AVMEDIA_TYPE_VIDEO) + flags= AV_OPT_FLAG_VIDEO_PARAM; + else if(s->codec_type == AVMEDIA_TYPE_SUBTITLE) + flags= AV_OPT_FLAG_SUBTITLE_PARAM; + av_opt_set_defaults2(s, flags, flags); + + s->time_base = (AVRational){0,1}; + s->framerate = (AVRational){ 0, 1 }; + s->pkt_timebase = (AVRational){ 0, 1 }; + s->get_buffer2 = avcodec_default_get_buffer2; + s->get_format = avcodec_default_get_format; + s->execute = avcodec_default_execute; + s->execute2 = avcodec_default_execute2; + s->sample_aspect_ratio = (AVRational){0,1}; + s->pix_fmt = AV_PIX_FMT_NONE; + s->sw_pix_fmt = AV_PIX_FMT_NONE; + s->sample_fmt = AV_SAMPLE_FMT_NONE; + + s->reordered_opaque = AV_NOPTS_VALUE; + if(codec && codec->priv_data_size){ + if(!s->priv_data){ + s->priv_data= av_mallocz(codec->priv_data_size); + if (!s->priv_data) { + return AVERROR(ENOMEM); + } + } + if(codec->priv_class){ + *(const AVClass**)s->priv_data = codec->priv_class; + av_opt_set_defaults(s->priv_data); + } + } + if (codec && codec->defaults) { + int ret; + const AVCodecDefault *d = codec->defaults; + while (d->key) { + ret = av_opt_set(s, d->key, d->value, 0); + av_assert0(ret >= 0); + d++; + } + } + return 0; +} + +#if FF_API_GET_CONTEXT_DEFAULTS +int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec) +{ + return init_context_defaults(s, codec); +} +#endif + +AVCodecContext *avcodec_alloc_context3(const AVCodec *codec) +{ + AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext)); + + if (!avctx) + return NULL; + + if (init_context_defaults(avctx, codec) < 0) { + av_free(avctx); + return NULL; + } + + return avctx; +} + +void avcodec_free_context(AVCodecContext **pavctx) +{ + AVCodecContext *avctx = *pavctx; + + if (!avctx) + return; + + avcodec_close(avctx); + + av_freep(&avctx->extradata); + av_freep(&avctx->subtitle_header); + av_freep(&avctx->intra_matrix); + av_freep(&avctx->inter_matrix); + av_freep(&avctx->rc_override); + + av_freep(pavctx); +} + +#if FF_API_COPY_CONTEXT +static void copy_context_reset(AVCodecContext *avctx) +{ + int i; + + av_opt_free(avctx); +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + av_frame_free(&avctx->coded_frame); +FF_ENABLE_DEPRECATION_WARNINGS +#endif + av_freep(&avctx->rc_override); + av_freep(&avctx->intra_matrix); + av_freep(&avctx->inter_matrix); + av_freep(&avctx->extradata); + av_freep(&avctx->subtitle_header); + av_buffer_unref(&avctx->hw_frames_ctx); + av_buffer_unref(&avctx->hw_device_ctx); + for (i = 0; i < avctx->nb_coded_side_data; i++) + av_freep(&avctx->coded_side_data[i].data); + av_freep(&avctx->coded_side_data); + avctx->subtitle_header_size = 0; + avctx->nb_coded_side_data = 0; + avctx->extradata_size = 0; +} + +int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src) +{ + const AVCodec *orig_codec = dest->codec; + uint8_t *orig_priv_data = dest->priv_data; + + if (avcodec_is_open(dest)) { // check that the dest context is uninitialized + av_log(dest, AV_LOG_ERROR, + "Tried to copy AVCodecContext %p into already-initialized %p\n", + src, dest); + return AVERROR(EINVAL); + } + + copy_context_reset(dest); + + memcpy(dest, src, sizeof(*dest)); + av_opt_copy(dest, src); + + dest->priv_data = orig_priv_data; + dest->codec = orig_codec; + + if (orig_priv_data && src->codec && src->codec->priv_class && + dest->codec && dest->codec->priv_class) + av_opt_copy(orig_priv_data, src->priv_data); + + + /* set values specific to opened codecs back to their default state */ + dest->slice_offset = NULL; + dest->hwaccel = NULL; + dest->internal = NULL; +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + dest->coded_frame = NULL; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + /* reallocate values that should be allocated separately */ + dest->extradata = NULL; + dest->coded_side_data = NULL; + dest->intra_matrix = NULL; + dest->inter_matrix = NULL; + dest->rc_override = NULL; + dest->subtitle_header = NULL; + dest->hw_frames_ctx = NULL; + dest->hw_device_ctx = NULL; + dest->nb_coded_side_data = 0; + +#define alloc_and_copy_or_fail(obj, size, pad) \ + if (src->obj && size > 0) { \ + dest->obj = av_malloc(size + pad); \ + if (!dest->obj) \ + goto fail; \ + memcpy(dest->obj, src->obj, size); \ + if (pad) \ + memset(((uint8_t *) dest->obj) + size, 0, pad); \ + } + alloc_and_copy_or_fail(extradata, src->extradata_size, + AV_INPUT_BUFFER_PADDING_SIZE); + dest->extradata_size = src->extradata_size; + alloc_and_copy_or_fail(intra_matrix, 64 * sizeof(int16_t), 0); + alloc_and_copy_or_fail(inter_matrix, 64 * sizeof(int16_t), 0); + alloc_and_copy_or_fail(rc_override, src->rc_override_count * sizeof(*src->rc_override), 0); + alloc_and_copy_or_fail(subtitle_header, src->subtitle_header_size, 1); + av_assert0(dest->subtitle_header_size == src->subtitle_header_size); +#undef alloc_and_copy_or_fail + + if (src->hw_frames_ctx) { + dest->hw_frames_ctx = av_buffer_ref(src->hw_frames_ctx); + if (!dest->hw_frames_ctx) + goto fail; + } + + return 0; + +fail: + copy_context_reset(dest); + return AVERROR(ENOMEM); +} +#endif + +const AVClass *avcodec_get_class(void) +{ + return &av_codec_context_class; +} + +#define FOFFSET(x) offsetof(AVFrame,x) + +static const AVOption frame_options[]={ +{"best_effort_timestamp", "", FOFFSET(best_effort_timestamp), AV_OPT_TYPE_INT64, {.i64 = AV_NOPTS_VALUE }, INT64_MIN, INT64_MAX, 0}, +{"pkt_pos", "", FOFFSET(pkt_pos), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0}, +{"pkt_size", "", FOFFSET(pkt_size), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0}, +{"sample_aspect_ratio", "", FOFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0}, +{"width", "", FOFFSET(width), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"height", "", FOFFSET(height), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"format", "", FOFFSET(format), AV_OPT_TYPE_INT, {.i64 = -1 }, 0, INT_MAX, 0}, +{"channel_layout", "", FOFFSET(channel_layout), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, 0}, +{"sample_rate", "", FOFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{NULL}, +}; + +static const AVClass av_frame_class = { + .class_name = "AVFrame", + .item_name = NULL, + .option = frame_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const AVClass *avcodec_get_frame_class(void) +{ + return &av_frame_class; +} + +#define SROFFSET(x) offsetof(AVSubtitleRect,x) + +static const AVOption subtitle_rect_options[]={ +{"x", "", SROFFSET(x), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"y", "", SROFFSET(y), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"w", "", SROFFSET(w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"h", "", SROFFSET(h), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"type", "", SROFFSET(type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0}, +{"flags", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0, "flags"}, +{"forced", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0}, +{NULL}, +}; + +static const AVClass av_subtitle_rect_class = { + .class_name = "AVSubtitleRect", + .item_name = NULL, + .option = subtitle_rect_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const AVClass *avcodec_get_subtitle_rect_class(void) +{ + return &av_subtitle_rect_class; +} + +#ifdef TEST +static int dummy_init(AVCodecContext *ctx) +{ + //TODO: this code should set every possible pointer that could be set by codec and is not an option; + ctx->extradata_size = 8; + ctx->extradata = av_malloc(ctx->extradata_size); + return 0; +} + +static int dummy_close(AVCodecContext *ctx) +{ + av_freep(&ctx->extradata); + ctx->extradata_size = 0; + return 0; +} + +static int dummy_encode(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame, int *got_packet) +{ + return AVERROR(ENOSYS); +} + +typedef struct Dummy12Context { + AVClass *av_class; + int num; + char* str; +} Dummy12Context; + +typedef struct Dummy3Context { + void *fake_av_class; + int num; + char* str; +} Dummy3Context; + +#define OFFSET(x) offsetof(Dummy12Context, x) +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM +static const AVOption dummy_options[] = { + { "str", "set str", OFFSET(str), AV_OPT_TYPE_STRING, { .str = "i'm src default value" }, 0, 0, VE}, + { "num", "set num", OFFSET(num), AV_OPT_TYPE_INT, { .i64 = 1500100900 }, 0, INT_MAX, VE}, + { NULL }, +}; + +static const AVClass dummy_v1_class = { + .class_name = "dummy_v1_class", + .item_name = av_default_item_name, + .option = dummy_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVClass dummy_v2_class = { + .class_name = "dummy_v2_class", + .item_name = av_default_item_name, + .option = dummy_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +/* codec with options */ +static AVCodec dummy_v1_encoder = { + .name = "dummy_v1_codec", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_NONE - 1, + .encode2 = dummy_encode, + .init = dummy_init, + .close = dummy_close, + .priv_class = &dummy_v1_class, + .priv_data_size = sizeof(Dummy12Context), +}; + +/* codec with options, different class */ +static AVCodec dummy_v2_encoder = { + .name = "dummy_v2_codec", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_NONE - 2, + .encode2 = dummy_encode, + .init = dummy_init, + .close = dummy_close, + .priv_class = &dummy_v2_class, + .priv_data_size = sizeof(Dummy12Context), +}; + +/* codec with priv data, but no class */ +static AVCodec dummy_v3_encoder = { + .name = "dummy_v3_codec", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_NONE - 3, + .encode2 = dummy_encode, + .init = dummy_init, + .close = dummy_close, + .priv_data_size = sizeof(Dummy3Context), +}; + +/* codec without priv data */ +static AVCodec dummy_v4_encoder = { + .name = "dummy_v4_codec", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_NONE - 4, + .encode2 = dummy_encode, + .init = dummy_init, + .close = dummy_close, +}; + +static void test_copy_print_codec(const AVCodecContext *ctx) +{ + printf("%-14s: %dx%d prv: %s", + ctx->codec ? ctx->codec->name : "NULL", + ctx->width, ctx->height, + ctx->priv_data ? "set" : "null"); + if (ctx->codec && ctx->codec->priv_class && ctx->codec->priv_data_size) { + int64_t i64; + char *str = NULL; + av_opt_get_int(ctx->priv_data, "num", 0, &i64); + av_opt_get(ctx->priv_data, "str", 0, (uint8_t**)&str); + printf(" opts: %"PRId64" %s", i64, str); + av_free(str); + } + printf("\n"); +} + +static void test_copy(const AVCodec *c1, const AVCodec *c2) +{ + AVCodecContext *ctx1, *ctx2; + printf("%s -> %s\nclosed:\n", c1 ? c1->name : "NULL", c2 ? c2->name : "NULL"); + ctx1 = avcodec_alloc_context3(c1); + ctx2 = avcodec_alloc_context3(c2); + ctx1->width = ctx1->height = 128; + if (ctx2->codec && ctx2->codec->priv_class && ctx2->codec->priv_data_size) { + av_opt_set(ctx2->priv_data, "num", "667", 0); + av_opt_set(ctx2->priv_data, "str", "i'm dest value before copy", 0); + } + avcodec_copy_context(ctx2, ctx1); + test_copy_print_codec(ctx1); + test_copy_print_codec(ctx2); + if (ctx1->codec) { + printf("opened:\n"); + avcodec_open2(ctx1, ctx1->codec, NULL); + if (ctx2->codec && ctx2->codec->priv_class && ctx2->codec->priv_data_size) { + av_opt_set(ctx2->priv_data, "num", "667", 0); + av_opt_set(ctx2->priv_data, "str", "i'm dest value before copy", 0); + } + avcodec_copy_context(ctx2, ctx1); + test_copy_print_codec(ctx1); + test_copy_print_codec(ctx2); + avcodec_close(ctx1); + } + avcodec_free_context(&ctx1); + avcodec_free_context(&ctx2); +} + +int main(void) +{ + AVCodec *dummy_codec[] = { + &dummy_v1_encoder, + &dummy_v2_encoder, + &dummy_v3_encoder, + &dummy_v4_encoder, + NULL, + }; + int i, j; + + for (i = 0; dummy_codec[i]; i++) + avcodec_register(dummy_codec[i]); + + printf("testing avcodec_copy_context()\n"); + for (i = 0; i < FF_ARRAY_ELEMS(dummy_codec); i++) + for (j = 0; j < FF_ARRAY_ELEMS(dummy_codec); j++) + test_copy(dummy_codec[i], dummy_codec[j]); + return 0; +} +#endif diff --git a/libs/ffvpx/libavcodec/options_table.h b/libs/ffvpx/libavcodec/options_table.h new file mode 100644 index 000000000..099261e16 --- /dev/null +++ b/libs/ffvpx/libavcodec/options_table.h @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_OPTIONS_TABLE_H +#define AVCODEC_OPTIONS_TABLE_H + +#include <float.h> +#include <limits.h> +#include <stdint.h> + +#include "libavutil/opt.h" +#include "avcodec.h" +#include "version.h" + +#define OFFSET(x) offsetof(AVCodecContext,x) +#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C +//these names are too long to be readable +#define V AV_OPT_FLAG_VIDEO_PARAM +#define A AV_OPT_FLAG_AUDIO_PARAM +#define S AV_OPT_FLAG_SUBTITLE_PARAM +#define E AV_OPT_FLAG_ENCODING_PARAM +#define D AV_OPT_FLAG_DECODING_PARAM + +#define AV_CODEC_DEFAULT_BITRATE 200*1000 + +static const AVOption avcodec_options[] = { +{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = AV_CODEC_DEFAULT_BITRATE }, 0, INT64_MAX, A|V|E}, +{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = 128*1000 }, 0, INT_MAX, A|E}, +{"bt", "Set video bitrate tolerance (in bits/s). In 1-pass mode, bitrate tolerance specifies how far " + "ratecontrol is willing to deviate from the target average bitrate value. This is not related " + "to minimum/maximum bitrate. Lowering tolerance too much has an adverse effect on quality.", + OFFSET(bit_rate_tolerance), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE*20 }, 1, INT_MAX, V|E}, +{"flags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, UINT_MAX, V|A|S|E|D, "flags"}, +{"unaligned", "allow decoders to produce unaligned output", 0, AV_OPT_TYPE_CONST, { .i64 = AV_CODEC_FLAG_UNALIGNED }, INT_MIN, INT_MAX, V | D, "flags" }, +{"mv4", "use four motion vectors per macroblock (MPEG-4)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"}, +{"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"}, +{"loop", "use loop filter", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"}, +{"qscale", "use fixed qscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"}, +{"pass1", "use internal 2-pass ratecontrol in first pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"}, +{"pass2", "use internal 2-pass ratecontrol in second pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"}, +{"gray", "only decode/encode grayscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"}, +{"psnr", "error[?] variables will be set during encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"}, +{"truncated", "Input bitstream might be randomly truncated", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_TRUNCATED }, INT_MIN, INT_MAX, V|D, "flags"}, +{"ildct", "use interlaced DCT", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"}, +{"low_delay", "force low delay", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"}, +{"global_header", "place global headers in extradata instead of every keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"}, +{"bitexact", "use only bitexact functions (except (I)DCT)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_BITEXACT }, INT_MIN, INT_MAX, A|V|S|D|E, "flags"}, +{"aic", "H.263 advanced intra coding / MPEG-4 AC prediction", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_AC_PRED }, INT_MIN, INT_MAX, V|E, "flags"}, +{"ilme", "interlaced motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"}, +{"cgop", "closed GOP", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"}, +{"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"}, +{"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"}, +{"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"}, +{"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"}, +{"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"}, +{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"}, +{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"}, +{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"}, +{"skip_manual", "do not skip samples and export skip information as frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SKIP_MANUAL}, INT_MIN, INT_MAX, V|D, "flags2"}, +{"ass_ro_flush_noop", "do not reset ASS ReadOrder field on flush", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_RO_FLUSH_NOOP}, INT_MIN, INT_MAX, S|D, "flags2"}, +{"time_base", NULL, OFFSET(time_base), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, INT_MAX}, +{"g", "set the group of picture (GOP) size", OFFSET(gop_size), AV_OPT_TYPE_INT, {.i64 = 12 }, INT_MIN, INT_MAX, V|E}, +{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E}, +{"ac", "set number of audio channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E}, +{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|E}, +{"frame_size", NULL, OFFSET(frame_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|E}, +{"frame_number", NULL, OFFSET(frame_number), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"delay", NULL, OFFSET(delay), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"qcomp", "video quantizer scale compression (VBR). Constant of ratecontrol equation. " + "Recommended range for default rc_eq: 0.0-1.0", + OFFSET(qcompress), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -FLT_MAX, FLT_MAX, V|E}, +{"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -1, FLT_MAX, V|E}, +{"qmin", "minimum video quantizer scale (VBR)", OFFSET(qmin), AV_OPT_TYPE_INT, {.i64 = 2 }, -1, 69, V|E}, +{"qmax", "maximum video quantizer scale (VBR)", OFFSET(qmax), AV_OPT_TYPE_INT, {.i64 = 31 }, -1, 1024, V|E}, +{"qdiff", "maximum difference between the quantizer scales (VBR)", OFFSET(max_qdiff), AV_OPT_TYPE_INT, {.i64 = 3 }, INT_MIN, INT_MAX, V|E}, +{"bf", "set maximum number of B-frames between non-B-frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E}, +{"b_qfactor", "QP factor between P- and B-frames", OFFSET(b_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E}, +#if FF_API_PRIVATE_OPT +{"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, V|E}, +{"ps", "RTP payload size in bytes", OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +#if FF_API_STAT_BITS +{"mv_bits", NULL, OFFSET(mv_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"header_bits", NULL, OFFSET(header_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"i_tex_bits", NULL, OFFSET(i_tex_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"p_tex_bits", NULL, OFFSET(p_tex_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"i_count", NULL, OFFSET(i_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"p_count", NULL, OFFSET(p_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"skip_count", NULL, OFFSET(skip_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"misc_bits", NULL, OFFSET(misc_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"frame_bits", NULL, OFFSET(frame_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +#endif +{"codec_tag", NULL, OFFSET(codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"bug", "work around not autodetected encoder bugs", OFFSET(workaround_bugs), AV_OPT_TYPE_FLAGS, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"}, +{"autodetect", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"}, +{"xvid_ilace", "Xvid interlacing bug (autodetected if FOURCC == XVIX)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_XVID_ILACE }, INT_MIN, INT_MAX, V|D, "bug"}, +{"ump4", "(autodetected if FOURCC == UMP4)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_UMP4 }, INT_MIN, INT_MAX, V|D, "bug"}, +{"no_padding", "padding bug (autodetected)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_NO_PADDING }, INT_MIN, INT_MAX, V|D, "bug"}, +{"amv", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AMV }, INT_MIN, INT_MAX, V|D, "bug"}, +{"qpel_chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"}, +{"std_qpel", "old standard qpel (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_STD_QPEL }, INT_MIN, INT_MAX, V|D, "bug"}, +{"qpel_chroma2", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_QPEL_CHROMA2 }, INT_MIN, INT_MAX, V|D, "bug"}, +{"direct_blocksize", "direct-qpel-blocksize bug (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_DIRECT_BLOCKSIZE }, INT_MIN, INT_MAX, V|D, "bug"}, +{"edge", "edge padding bug (autodetected per FOURCC/version)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_EDGE }, INT_MIN, INT_MAX, V|D, "bug"}, +{"hpel_chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_HPEL_CHROMA }, INT_MIN, INT_MAX, V|D, "bug"}, +{"dc_clip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_DC_CLIP }, INT_MIN, INT_MAX, V|D, "bug"}, +{"ms", "work around various bugs in Microsoft's broken decoders", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_MS }, INT_MIN, INT_MAX, V|D, "bug"}, +{"trunc", "truncated frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_TRUNCATED}, INT_MIN, INT_MAX, V|D, "bug"}, +{"iedge", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_IEDGE }, INT_MIN, INT_MAX, V|D, "bug"}, +{"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"very", "strictly conform to a older more strict version of the spec or reference software", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_VERY_STRICT }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"strict", "strictly conform to all the things in the spec no matter what the consequences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_STRICT }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"normal", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_NORMAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"experimental", "allow non-standardized experimental things", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, A|V|D|E, "strict"}, +{"b_qoffset", "QP offset between P- and B-frames", OFFSET(b_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E}, +{"err_detect", "set error detection flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.i64 = 0 }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"crccheck", "verify embedded CRCs", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"bitstream", "detect bitstream specification deviations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"buffer", "detect improper bitstream length", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BUFFER }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"explode", "abort decoding on minor error detection", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_EXPLODE }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"ignore_err", "ignore errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_IGNORE_ERR }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"careful", "consider things that violate the spec, are fast to check and have not been seen in the wild as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CAREFUL }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"compliant", "consider all spec non compliancies as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_COMPLIANT }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"aggressive", "consider things that a sane encoder should not do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE }, INT_MIN, INT_MAX, A|V|D, "err_detect"}, +{"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX}, +{"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX}, +#if FF_API_PRIVATE_OPT +{"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +{"rc_override_count", NULL, OFFSET(rc_override_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E}, +{"minrate", "minimum bitrate (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.", + OFFSET(rc_min_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E}, +{"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|E}, +{"i_qfactor", "QP factor between P- and I-frames", OFFSET(i_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = -0.8 }, -FLT_MAX, FLT_MAX, V|E}, +{"i_qoffset", "QP offset between P- and I-frames", OFFSET(i_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 0.0 }, -FLT_MAX, FLT_MAX, V|E}, +{"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"}, +{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"}, +{"fastint", "fast integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"}, +{"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"}, +{"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"}, +{"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"}, +{"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"}, +{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E}, +{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E}, +{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E}, +{"p_mask", "inter masking", OFFSET(p_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E}, +{"dark_mask", "compresses dark areas stronger than medium ones", OFFSET(dark_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E}, +{"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"}, +{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"int", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simple", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simplemmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"arm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simplearm", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simplearmv5te", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simplearmv6", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"simpleneon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"xvid", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"xvidmmx", "deprecated, for compatibility only", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"}, +{"simpleauto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"}, +{"slice_count", NULL, OFFSET(slice_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"ec", "set error concealment strategy", OFFSET(error_concealment), AV_OPT_TYPE_FLAGS, {.i64 = 3 }, INT_MIN, INT_MAX, V|D, "ec"}, +{"guess_mvs", "iterative motion vector (MV) search (slow)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_GUESS_MVS }, INT_MIN, INT_MAX, V|D, "ec"}, +{"deblock", "use strong deblock filter for damaged MBs", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_DEBLOCK }, INT_MIN, INT_MAX, V|D, "ec"}, +{"favor_inter", "favor predicting from the previous frame", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_FAVOR_INTER }, INT_MIN, INT_MAX, V|D, "ec"}, +{"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX}, +#if FF_API_PRIVATE_OPT +{"pred", "prediction method", OFFSET(prediction_method), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "pred"}, +{"left", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"}, +{"plane", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_PLANE }, INT_MIN, INT_MAX, V|E, "pred"}, +{"median", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_MEDIAN }, INT_MIN, INT_MAX, V|E, "pred"}, +#endif +{"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E}, +{"sar", "sample aspect ratio", OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E}, +{"debug", "print specific debug info", OFFSET(debug), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"}, +{"pict", "picture info", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"}, +{"rc", "rate control", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"}, +{"bitstream", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BITSTREAM }, INT_MIN, INT_MAX, V|D, "debug"}, +{"mb_type", "macroblock (MB) type", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"}, +{"qp", "per-block quantization parameter (QP)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_QP }, INT_MIN, INT_MAX, V|D, "debug"}, +#if FF_API_DEBUG_MV +{"mv", "motion vector", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MV }, INT_MIN, INT_MAX, V|D, "debug"}, +#endif +{"dct_coeff", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_DCT_COEFF }, INT_MIN, INT_MAX, V|D, "debug"}, +{"green_metadata", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_GREEN_MD }, INT_MIN, INT_MAX, V|D, "debug"}, +{"skip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_SKIP }, INT_MIN, INT_MAX, V|D, "debug"}, +{"startcode", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_STARTCODE }, INT_MIN, INT_MAX, V|D, "debug"}, +{"er", "error recognition", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_ER }, INT_MIN, INT_MAX, V|D, "debug"}, +{"mmco", "memory management control operations (H.264)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MMCO }, INT_MIN, INT_MAX, V|D, "debug"}, +{"bugs", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUGS }, INT_MIN, INT_MAX, V|D, "debug"}, +#if FF_API_DEBUG_MV +{"vis_qp", "visualize quantization parameter (QP), lower QP are tinted greener", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_QP }, INT_MIN, INT_MAX, V|D, "debug"}, +{"vis_mb_type", "visualize block types", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"}, +#endif +{"buffers", "picture buffer allocations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"}, +{"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|A|D, "debug"}, +{"nomc", "skip motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_NOMC }, INT_MIN, INT_MAX, V|A|D, "debug"}, +{"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"ildctcmp", "interlaced DCT compare function", OFFSET(ildct_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#if FF_API_PRIVATE_OPT +{"preme", "pre motion estimation", OFFSET(pre_me), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"sad", "sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"sse", "sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"satd", "sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"dct", "sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"bit", "number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"rd", "rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"zero", "0", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"vsad", "sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"vsse", "sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"nsse", "noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +#if CONFIG_SNOW_ENCODER +{"w53", "5/3 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"w97", "9/7 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +#endif +{"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +{"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E}, +{"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"global_quality", NULL, OFFSET(global_quality), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E}, +#if FF_API_CODER_TYPE +{"coder", NULL, OFFSET(coder_type), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "coder"}, +{"vlc", "variable length coder / Huffman coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_VLC }, INT_MIN, INT_MAX, V|E, "coder"}, +{"ac", "arithmetic coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_AC }, INT_MIN, INT_MAX, V|E, "coder"}, +{"raw", "raw (no encoding)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_RAW }, INT_MIN, INT_MAX, V|E, "coder"}, +{"rle", "run-length coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_RLE }, INT_MIN, INT_MAX, V|E, "coder"}, +#endif /* FF_API_CODER_TYPE */ +#if FF_API_PRIVATE_OPT +{"context", "context model", OFFSET(context_model), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +{"slice_flags", NULL, OFFSET(slice_flags), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX}, +{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 2, V|E, "mbd"}, +{"simple", "use mbcmp", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"}, +{"bits", "use fewest bits", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"}, +{"rd", "use best rate distortion", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"}, +#if FF_API_PRIVATE_OPT +{"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +#if FF_API_PRIVATE_OPT +{"nr", "noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +{"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D, "flags2"}, +{"threads", "set the number of threads", OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"}, +{"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"}, +{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, -8, 16, V|E}, +{"nssew", "nsse weight", OFFSET(nsse_weight), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E}, +{"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D}, +{"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D}, +{"profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT, {.i64 = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "profile"}, +{"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "profile"}, +{"aac_main", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_MAIN }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_low", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_LOW }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_ssr", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_SSR }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_ltp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_LTP }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_he", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_HE }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_he_v2", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_HE_V2 }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_ld", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_LD }, INT_MIN, INT_MAX, A|E, "profile"}, +{"aac_eld", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_AAC_ELD }, INT_MIN, INT_MAX, A|E, "profile"}, +{"mpeg2_aac_low", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG2_AAC_LOW }, INT_MIN, INT_MAX, A|E, "profile"}, +{"mpeg2_aac_he", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG2_AAC_HE }, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS }, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_es", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS_ES }, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_96_24", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS_96_24 }, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_hd_hra", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS_HD_HRA }, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_hd_ma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS_HD_MA }, INT_MIN, INT_MAX, A|E, "profile"}, +{"mpeg4_sp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_SIMPLE }, INT_MIN, INT_MAX, V|E, "profile"}, +{"mpeg4_core", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_CORE }, INT_MIN, INT_MAX, V|E, "profile"}, +{"mpeg4_main", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_MAIN }, INT_MIN, INT_MAX, V|E, "profile"}, +{"mpeg4_asp", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_MPEG4_ADVANCED_SIMPLE }, INT_MIN, INT_MAX, V|E, "profile"}, +{"main10", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_HEVC_MAIN_10 }, INT_MIN, INT_MAX, V|E, "profile"}, +{"msbc", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_SBC_MSBC }, INT_MIN, INT_MAX, A|E, "profile"}, +{"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"}, +{"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"}, +{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|A|D}, +#if FF_API_PRIVATE_OPT +{"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +{"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"}, +#endif +{"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E}, +{"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E}, +#if FF_API_PRIVATE_OPT +{"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, V|E}, +#endif +{"skip_loop_filter", "skip loop filtering process for the selected frames", OFFSET(skip_loop_filter), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"skip_idct" , "skip IDCT/dequantization for the selected frames", OFFSET(skip_idct), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"skip_frame" , "skip decoding for the selected frames", OFFSET(skip_frame), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"none" , "discard no frame", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONE }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"default" , "discard useless frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"noref" , "discard all non-reference frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONREF }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"bidir" , "discard all bidirectional frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_BIDIR }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"nokey" , "discard all frames except keyframes", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONKEY }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"nointra" , "discard all frames except I frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONINTRA}, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"all" , "discard all frames", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_ALL }, INT_MIN, INT_MAX, V|D, "avdiscard"}, +{"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 4, V|E}, +#if FF_API_PRIVATE_OPT +{"brd_scale", "downscale frames for dynamic B-frame decision", OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 10, V|E}, +#endif +{"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), AV_OPT_TYPE_INT, {.i64 = 25 }, INT_MIN, INT_MAX, V|E}, +{"refs", "reference frames to consider for motion compensation", OFFSET(refs), AV_OPT_TYPE_INT, {.i64 = 1 }, INT_MIN, INT_MAX, V|E}, +#if FF_API_PRIVATE_OPT +{"chromaoffset", "chroma QP offset from luma", OFFSET(chromaoffset), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E}, +#endif +{"trellis", "rate-distortion optimal quantization", OFFSET(trellis), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E}, +{"mv0_threshold", NULL, OFFSET(mv0_threshold), AV_OPT_TYPE_INT, {.i64 = 256 }, 0, INT_MAX, V|E}, +#if FF_API_PRIVATE_OPT +{"b_sensitivity", "adjust sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, V|E}, +#endif +{"compression_level", NULL, OFFSET(compression_level), AV_OPT_TYPE_INT, {.i64 = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E}, +#if FF_API_PRIVATE_OPT +{"min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, {.i64 = -1 }, INT_MIN, INT_MAX, A|E}, +{"max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, {.i64 = -1 }, INT_MIN, INT_MAX, A|E}, +{"timecode_frame_start", "GOP timecode frame start number, in non-drop-frame format", OFFSET(timecode_frame_start), AV_OPT_TYPE_INT64, {.i64 = -1 }, -1, INT64_MAX, V|E}, +#endif +{"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX}, +{"channel_layout", NULL, OFFSET(channel_layout), AV_OPT_TYPE_UINT64, {.i64 = DEFAULT }, 0, UINT64_MAX, A|E|D, "channel_layout"}, +{"request_channel_layout", NULL, OFFSET(request_channel_layout), AV_OPT_TYPE_UINT64, {.i64 = DEFAULT }, 0, UINT64_MAX, A|D, "request_channel_layout"}, +{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, 0.0, FLT_MAX, V|E}, +{"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use), AV_OPT_TYPE_FLOAT, {.dbl = 3 }, 0.0, FLT_MAX, V|E}, +{"ticks_per_frame", NULL, OFFSET(ticks_per_frame), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, INT_MAX, A|V|E|D}, +{"color_primaries", "color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_UNSPECIFIED }, 1, INT_MAX, V|E|D, "color_primaries_type"}, +{"bt709", "BT.709", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"unknown", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"bt470m", "BT.470 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470M }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"bt470bg", "BT.470 BG", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470BG }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte170m", "SMPTE 170 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE170M }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte240m", "SMPTE 240 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE240M }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"film", "Film", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_FILM }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"bt2020", "BT.2020", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte428", "SMPTE 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE428 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte428_1", "SMPTE 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE428 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte431", "SMPTE 431-2", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE431 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"smpte432", "SMPTE 422-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE432 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"jedec-p22", "JEDEC P22", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_JEDEC_P22 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"}, +{"color_trc", "color transfer characteristics", OFFSET(color_trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_UNSPECIFIED }, 1, INT_MAX, V|E|D, "color_trc_type"}, +{"bt709", "BT.709", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"unknown", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"gamma22", "BT.470 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA22 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"gamma28", "BT.470 BG", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA28 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"smpte170m", "SMPTE 170 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE170M }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"smpte240m", "SMPTE 240 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE240M }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"linear", "Linear", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LINEAR }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"log100", "Log", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"log316", "Log square root", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG_SQRT }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"iec61966-2-4", "IEC 61966-2-4", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_4 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt1361e", "BT.1361", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT1361_ECG }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"iec61966-2-1", "IEC 61966-2-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt2020-10", "BT.2020 - 10 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt2020-12", "BT.2020 - 12 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"smpte2084", "SMPTE 2084", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE2084 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"smpte428", "SMPTE 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE428 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"arib-std-b67", "ARIB STD-B67", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_ARIB_STD_B67 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"log", "Log", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"log_sqrt", "Log square root", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG_SQRT }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"iec61966_2_4", "IEC 61966-2-4", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_4 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt1361", "BT.1361", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT1361_ECG }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"iec61966_2_1", "IEC 61966-2-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt2020_10bit", "BT.2020 - 10 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"bt2020_12bit", "BT.2020 - 12 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"smpte428_1", "SMPTE 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE428 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"}, +{"colorspace", "color space", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_UNSPECIFIED }, 0, INT_MAX, V|E|D, "colorspace_type"}, +{"rgb", "RGB", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_RGB }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt709", "BT.709", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709 }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"unknown", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"fcc", "FCC", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_FCC }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt470bg", "BT.470 BG", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT470BG }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"smpte170m", "SMPTE 170 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE170M }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"smpte240m", "SMPTE 240 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE240M }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"ycgco", "YCGCO", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_YCGCO }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt2020nc", "BT.2020 NCL", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt2020c", "BT.2020 CL", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_CL }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"smpte2085", "SMPTE 2085", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_SMPTE2085 }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"ycocg", "YCGCO", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_YCGCO }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt2020_ncl", "BT.2020 NCL", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"bt2020_cl", "BT.2020 CL", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_CL }, INT_MIN, INT_MAX, V|E|D, "colorspace_type"}, +{"color_range", "color range", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED }, 0, INT_MAX, V|E|D, "color_range_type"}, +{"unknown", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"tv", "MPEG (219*2^(n-8))", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"pc", "JPEG (2^n-1)", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"mpeg", "MPEG (219*2^(n-8))", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"jpeg", "JPEG (2^n-1)", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG }, INT_MIN, INT_MAX, V|E|D, "color_range_type"}, +{"chroma_sample_location", "chroma sample location", OFFSET(chroma_sample_location), AV_OPT_TYPE_INT, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, 0, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"unknown", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"left", "Left", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_LEFT }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"center", "Center", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_CENTER }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"topleft", "Top-left", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_TOPLEFT }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"top", "Top", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_TOP }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"bottomleft", "Bottom-left", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOMLEFT }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"bottom", "Bottom", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOM }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"}, +{"log_level_offset", "set the log level offset", OFFSET(log_level_offset), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX }, +{"slices", "set the number of slices, used in parallelized encoding", OFFSET(slices), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|E}, +{"thread_type", "select multithreading type", OFFSET(thread_type), AV_OPT_TYPE_FLAGS, {.i64 = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|A|E|D, "thread_type"}, +{"slice", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"}, +{"frame", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_FRAME }, INT_MIN, INT_MAX, V|E|D, "thread_type"}, +{"audio_service_type", "audio service type", OFFSET(audio_service_type), AV_OPT_TYPE_INT, {.i64 = AV_AUDIO_SERVICE_TYPE_MAIN }, 0, AV_AUDIO_SERVICE_TYPE_NB-1, A|E, "audio_service_type"}, +{"ma", "Main Audio Service", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_MAIN }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"ef", "Effects", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_EFFECTS }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"vi", "Visually Impaired", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"hi", "Hearing Impaired", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"di", "Dialogue", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_DIALOGUE }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"co", "Commentary", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_COMMENTARY }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"em", "Emergency", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_EMERGENCY }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"vo", "Voice Over", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_VOICE_OVER }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, +{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, INT_MAX, A|D, "request_sample_fmt"}, +{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0}, +{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D}, +{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"}, +{"do_nothing", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +{"ignore", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_IGNORE}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +#if FF_API_ASS_TIMING +{"sub_text_format", "set decoded text subtitle format", OFFSET(sub_text_format), AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, 0, 1, S|D, "sub_text_format"}, +#else +{"sub_text_format", "set decoded text subtitle format", OFFSET(sub_text_format), AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS}, 0, 1, S|D, "sub_text_format"}, +#endif +{"ass", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_TEXT_FMT_ASS}, INT_MIN, INT_MAX, S|D, "sub_text_format"}, +#if FF_API_ASS_TIMING +{"ass_with_timings", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, INT_MIN, INT_MAX, S|D, "sub_text_format"}, +#endif +{"refcounted_frames", NULL, OFFSET(refcounted_frames), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A|V|D }, +#if FF_API_SIDEDATA_ONLY_PKT +{"side_data_only_packets", NULL, OFFSET(side_data_only_packets), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, A|V|E }, +#endif +{"apply_cropping", NULL, OFFSET(apply_cropping), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, V | D }, +{"skip_alpha", "Skip processing alpha", OFFSET(skip_alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, V|D }, +{"field_order", "Field order", OFFSET(field_order), AV_OPT_TYPE_INT, {.i64 = AV_FIELD_UNKNOWN }, 0, 5, V|D|E, "field_order" }, +{"progressive", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_PROGRESSIVE }, 0, 0, V|D|E, "field_order" }, +{"tt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TT }, 0, 0, V|D|E, "field_order" }, +{"bb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BB }, 0, 0, V|D|E, "field_order" }, +{"tb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TB }, 0, 0, V|D|E, "field_order" }, +{"bt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BT }, 0, 0, V|D|E, "field_order" }, +{"dump_separator", "set information dump field separator", OFFSET(dump_separator), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, A|V|S|D|E}, +{"codec_whitelist", "List of decoders that are allowed to be used", OFFSET(codec_whitelist), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, A|V|S|D }, +{"pixel_format", "set pixel format", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64=AV_PIX_FMT_NONE}, -1, INT_MAX, 0 }, +{"video_size", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, {.str=NULL}, 0, INT_MAX, 0 }, +{"max_pixels", "Maximum number of pixels", OFFSET(max_pixels), AV_OPT_TYPE_INT64, {.i64 = INT_MAX }, 0, INT_MAX, A|V|S|D|E }, +{"hwaccel_flags", NULL, OFFSET(hwaccel_flags), AV_OPT_TYPE_FLAGS, {.i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, 0, UINT_MAX, V|D, "hwaccel_flags"}, +{"ignore_level", "ignore level even if the codec level used is unknown or higher than the maximum supported level reported by the hardware driver", 0, AV_OPT_TYPE_CONST, { .i64 = AV_HWACCEL_FLAG_IGNORE_LEVEL }, INT_MIN, INT_MAX, V | D, "hwaccel_flags" }, +{"allow_high_depth", "allow to output YUV pixel formats with a different chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"}, +{"allow_profile_mismatch", "attempt to decode anyway if HW accelerated decoder's supported profiles do not exactly match the stream", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"}, +{"extra_hw_frames", "Number of extra hardware frames to allocate for the user", OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, V|D }, +{NULL}, +}; + +#undef A +#undef V +#undef S +#undef E +#undef D +#undef DEFAULT +#undef OFFSET + +#endif /* AVCODEC_OPTIONS_TABLE_H */ diff --git a/libs/ffvpx/libavcodec/parser.c b/libs/ffvpx/libavcodec/parser.c new file mode 100644 index 000000000..f43b197d5 --- /dev/null +++ b/libs/ffvpx/libavcodec/parser.c @@ -0,0 +1,416 @@ +/* + * Audio and Video frame extraction + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2003 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include <stdint.h> +#include <string.h> + +#include "libavutil/avassert.h" +#include "libavutil/internal.h" +#include "libavutil/mem.h" +#include "libavutil/thread.h" + +#include "internal.h" +#include "parser.h" + +/* Parsers */ +extern AVCodecParser ff_aac_parser; +extern AVCodecParser ff_aac_latm_parser; +extern AVCodecParser ff_ac3_parser; +extern AVCodecParser ff_adx_parser; +extern AVCodecParser ff_bmp_parser; +extern AVCodecParser ff_cavsvideo_parser; +extern AVCodecParser ff_cook_parser; +extern AVCodecParser ff_dca_parser; +extern AVCodecParser ff_dirac_parser; +extern AVCodecParser ff_dnxhd_parser; +extern AVCodecParser ff_dpx_parser; +extern AVCodecParser ff_dvaudio_parser; +extern AVCodecParser ff_dvbsub_parser; +extern AVCodecParser ff_dvdsub_parser; +extern AVCodecParser ff_dvd_nav_parser; +extern AVCodecParser ff_flac_parser; +extern AVCodecParser ff_g729_parser; +extern AVCodecParser ff_gsm_parser; +extern AVCodecParser ff_h261_parser; +extern AVCodecParser ff_h263_parser; +extern AVCodecParser ff_h264_parser; +extern AVCodecParser ff_hevc_parser; +extern AVCodecParser ff_mjpeg_parser; +extern AVCodecParser ff_mlp_parser; +extern AVCodecParser ff_mpeg4video_parser; +extern AVCodecParser ff_mpegaudio_parser; +extern AVCodecParser ff_mpegvideo_parser; +extern AVCodecParser ff_opus_parser; +extern AVCodecParser ff_png_parser; +extern AVCodecParser ff_pnm_parser; +extern AVCodecParser ff_rv30_parser; +extern AVCodecParser ff_rv40_parser; +extern AVCodecParser ff_sbc_parser; +extern AVCodecParser ff_sipr_parser; +extern AVCodecParser ff_tak_parser; +extern AVCodecParser ff_vc1_parser; +extern AVCodecParser ff_vorbis_parser; +extern AVCodecParser ff_vp3_parser; +extern AVCodecParser ff_vp8_parser; +extern AVCodecParser ff_vp9_parser; +extern AVCodecParser ff_xma_parser; + +#include "libavcodec/parser_list.c" + +static AVOnce av_parser_next_init = AV_ONCE_INIT; + +static void av_parser_init_next(void) +{ + AVCodecParser *prev = NULL, *p; + int i = 0; + while ((p = (AVCodecParser*)parser_list[i++])) { + if (prev) + prev->next = p; + prev = p; + } +} + +AVCodecParser *av_parser_next(const AVCodecParser *p) +{ + ff_thread_once(&av_parser_next_init, av_parser_init_next); + + if (p) + return p->next; + else + return (AVCodecParser*)parser_list[0]; +} + +const AVCodecParser *av_parser_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVCodecParser *p = parser_list[i]; + + if (p) + *opaque = (void*)(i + 1); + + return p; +} + +void av_register_codec_parser(AVCodecParser *parser) +{ + ff_thread_once(&av_parser_next_init, av_parser_init_next); +} + +AVCodecParserContext *av_parser_init(int codec_id) +{ + AVCodecParserContext *s = NULL; + const AVCodecParser *parser; + void *i = 0; + int ret; + + if (codec_id == AV_CODEC_ID_NONE) + return NULL; + + while ((parser = av_parser_iterate(&i))) { + if (parser->codec_ids[0] == codec_id || + parser->codec_ids[1] == codec_id || + parser->codec_ids[2] == codec_id || + parser->codec_ids[3] == codec_id || + parser->codec_ids[4] == codec_id) + goto found; + } + return NULL; + +found: + s = av_mallocz(sizeof(AVCodecParserContext)); + if (!s) + goto err_out; + s->parser = (AVCodecParser*)parser; + s->priv_data = av_mallocz(parser->priv_data_size); + if (!s->priv_data) + goto err_out; + s->fetch_timestamp=1; + s->pict_type = AV_PICTURE_TYPE_I; + if (parser->parser_init) { + ret = parser->parser_init(s); + if (ret != 0) + goto err_out; + } + s->key_frame = -1; +#if FF_API_CONVERGENCE_DURATION +FF_DISABLE_DEPRECATION_WARNINGS + s->convergence_duration = 0; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + s->dts_sync_point = INT_MIN; + s->dts_ref_dts_delta = INT_MIN; + s->pts_dts_delta = INT_MIN; + s->format = -1; + + return s; + +err_out: + if (s) + av_freep(&s->priv_data); + av_free(s); + return NULL; +} + +void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy) +{ + int i; + + if (!fuzzy) { + s->dts = + s->pts = AV_NOPTS_VALUE; + s->pos = -1; + s->offset = 0; + } + for (i = 0; i < AV_PARSER_PTS_NB; i++) { + if (s->cur_offset + off >= s->cur_frame_offset[i] && + (s->frame_offset < s->cur_frame_offset[i] || + (!s->frame_offset && !s->next_frame_offset)) && // first field/frame + // check disabled since MPEG-TS does not send complete PES packets + /*s->next_frame_offset + off <*/ s->cur_frame_end[i]){ + + if (!fuzzy || s->cur_frame_dts[i] != AV_NOPTS_VALUE) { + s->dts = s->cur_frame_dts[i]; + s->pts = s->cur_frame_pts[i]; + s->pos = s->cur_frame_pos[i]; + s->offset = s->next_frame_offset - s->cur_frame_offset[i]; + } + if (remove) + s->cur_frame_offset[i] = INT64_MAX; + if (s->cur_offset + off < s->cur_frame_end[i]) + break; + } + } +} + +int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, + int64_t pts, int64_t dts, int64_t pos) +{ + int index, i; + uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE]; + + av_assert1(avctx->codec_id != AV_CODEC_ID_NONE); + + /* Parsers only work for the specified codec ids. */ + av_assert1(avctx->codec_id == s->parser->codec_ids[0] || + avctx->codec_id == s->parser->codec_ids[1] || + avctx->codec_id == s->parser->codec_ids[2] || + avctx->codec_id == s->parser->codec_ids[3] || + avctx->codec_id == s->parser->codec_ids[4]); + + if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) { + s->next_frame_offset = + s->cur_offset = pos; + s->flags |= PARSER_FLAG_FETCHED_OFFSET; + } + + if (buf_size == 0) { + /* padding is always necessary even if EOF, so we add it here */ + memset(dummy_buf, 0, sizeof(dummy_buf)); + buf = dummy_buf; + } else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */ + /* add a new packet descriptor */ + i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1); + s->cur_frame_start_index = i; + s->cur_frame_offset[i] = s->cur_offset; + s->cur_frame_end[i] = s->cur_offset + buf_size; + s->cur_frame_pts[i] = pts; + s->cur_frame_dts[i] = dts; + s->cur_frame_pos[i] = pos; + } + + if (s->fetch_timestamp) { + s->fetch_timestamp = 0; + s->last_pts = s->pts; + s->last_dts = s->dts; + s->last_pos = s->pos; + ff_fetch_timestamp(s, 0, 0, 0); + } + /* WARNING: the returned index can be negative */ + index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf, + poutbuf_size, buf, buf_size); + av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes +#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name + if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) { + FILL(field_order); + } + + /* update the file pointer */ + if (*poutbuf_size) { + /* fill the data for the current frame */ + s->frame_offset = s->next_frame_offset; + + /* offset of the next frame */ + s->next_frame_offset = s->cur_offset + index; + s->fetch_timestamp = 1; + } + if (index < 0) + index = 0; + s->cur_offset += index; + return index; +} + +int av_parser_change(AVCodecParserContext *s, AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe) +{ + if (s && s->parser->split) { + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER || + avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER) { + int i = s->parser->split(avctx, buf, buf_size); + buf += i; + buf_size -= i; + } + } + + /* cast to avoid warning about discarding qualifiers */ + *poutbuf = (uint8_t *) buf; + *poutbuf_size = buf_size; + if (avctx->extradata) { + if (keyframe && (avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER)) { + int size = buf_size + avctx->extradata_size; + + *poutbuf_size = size; + *poutbuf = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!*poutbuf) + return AVERROR(ENOMEM); + + memcpy(*poutbuf, avctx->extradata, avctx->extradata_size); + memcpy(*poutbuf + avctx->extradata_size, buf, + buf_size + AV_INPUT_BUFFER_PADDING_SIZE); + return 1; + } + } + + return 0; +} + +void av_parser_close(AVCodecParserContext *s) +{ + if (s) { + if (s->parser->parser_close) + s->parser->parser_close(s); + av_freep(&s->priv_data); + av_free(s); + } +} + +int ff_combine_frame(ParseContext *pc, int next, + const uint8_t **buf, int *buf_size) +{ + if (pc->overread) { + ff_dlog(NULL, "overread %d, state:%"PRIX32" next:%d index:%d o_index:%d\n", + pc->overread, pc->state, next, pc->index, pc->overread_index); + ff_dlog(NULL, "%X %X %X %X\n", + (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]); + } + + /* Copy overread bytes from last frame into buffer. */ + for (; pc->overread > 0; pc->overread--) + pc->buffer[pc->index++] = pc->buffer[pc->overread_index++]; + + /* flush remaining if EOF */ + if (!*buf_size && next == END_NOT_FOUND) + next = 0; + + pc->last_index = pc->index; + + /* copy into buffer end return */ + if (next == END_NOT_FOUND) { + void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size, + *buf_size + pc->index + + AV_INPUT_BUFFER_PADDING_SIZE); + + if (!new_buffer) { + av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", *buf_size + pc->index + AV_INPUT_BUFFER_PADDING_SIZE); + pc->index = 0; + return AVERROR(ENOMEM); + } + pc->buffer = new_buffer; + memcpy(&pc->buffer[pc->index], *buf, *buf_size); + pc->index += *buf_size; + return -1; + } + + av_assert0(next >= 0 || pc->buffer); + + *buf_size = + pc->overread_index = pc->index + next; + + /* append to buffer */ + if (pc->index) { + void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size, + next + pc->index + + AV_INPUT_BUFFER_PADDING_SIZE); + if (!new_buffer) { + av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", next + pc->index + AV_INPUT_BUFFER_PADDING_SIZE); + pc->overread_index = + pc->index = 0; + return AVERROR(ENOMEM); + } + pc->buffer = new_buffer; + if (next > -AV_INPUT_BUFFER_PADDING_SIZE) + memcpy(&pc->buffer[pc->index], *buf, + next + AV_INPUT_BUFFER_PADDING_SIZE); + pc->index = 0; + *buf = pc->buffer; + } + + /* store overread bytes */ + for (; next < 0; next++) { + pc->state = pc->state << 8 | pc->buffer[pc->last_index + next]; + pc->state64 = pc->state64 << 8 | pc->buffer[pc->last_index + next]; + pc->overread++; + } + + if (pc->overread) { + ff_dlog(NULL, "overread %d, state:%"PRIX32" next:%d index:%d o_index:%d\n", + pc->overread, pc->state, next, pc->index, pc->overread_index); + ff_dlog(NULL, "%X %X %X %X\n", + (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]); + } + + return 0; +} + +void ff_parse_close(AVCodecParserContext *s) +{ + ParseContext *pc = s->priv_data; + + av_freep(&pc->buffer); +} + +int ff_mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf, int buf_size) +{ + uint32_t state = -1; + const uint8_t *ptr = buf, *end = buf + buf_size; + + while (ptr < end) { + ptr = avpriv_find_start_code(ptr, end, &state); + if (state == 0x1B3 || state == 0x1B6) + return ptr - 4 - buf; + } + + return 0; +} diff --git a/libs/ffvpx/libavcodec/parser.h b/libs/ffvpx/libavcodec/parser.h new file mode 100644 index 000000000..ef35547e9 --- /dev/null +++ b/libs/ffvpx/libavcodec/parser.h @@ -0,0 +1,60 @@ +/* + * AVCodecParser prototypes and definitions + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2003 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PARSER_H +#define AVCODEC_PARSER_H + +#include "avcodec.h" + +typedef struct ParseContext{ + uint8_t *buffer; + int index; + int last_index; + unsigned int buffer_size; + uint32_t state; ///< contains the last few bytes in MSB order + int frame_start_found; + int overread; ///< the number of bytes which where irreversibly read from the next frame + int overread_index; ///< the index into ParseContext.buffer of the overread bytes + uint64_t state64; ///< contains the last 8 bytes in MSB order +} ParseContext; + +#define END_NOT_FOUND (-100) + +/** + * Combine the (truncated) bitstream to a complete frame. + * @return -1 if no complete frame could be created, + * AVERROR(ENOMEM) if there was a memory allocation error + */ +int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_size); +int ff_mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf, + int buf_size); +void ff_parse_close(AVCodecParserContext *s); + +/** + * Fetch timestamps for a specific byte within the current access unit. + * @param off byte position within the access unit + * @param remove Found timestamps will be removed if set to 1, kept if set to 0. + * @param fuzzy Only use found value if it is more informative than what we already have + */ +void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy); + +#endif /* AVCODEC_PARSER_H */ diff --git a/libs/ffvpx/libavcodec/parser_list.c b/libs/ffvpx/libavcodec/parser_list.c new file mode 100644 index 000000000..b60c60bce --- /dev/null +++ b/libs/ffvpx/libavcodec/parser_list.c @@ -0,0 +1,8 @@ +static const AVCodecParser * const parser_list[] = { +#if CONFIG_VP8_PARSER + &ff_vp8_parser, +#endif +#if CONFIG_VP9_PARSER + &ff_vp9_parser, +#endif + NULL }; diff --git a/libs/ffvpx/libavcodec/pixblockdsp.h b/libs/ffvpx/libavcodec/pixblockdsp.h new file mode 100644 index 000000000..e036700ff --- /dev/null +++ b/libs/ffvpx/libavcodec/pixblockdsp.h @@ -0,0 +1,55 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PIXBLOCKDSP_H +#define AVCODEC_PIXBLOCKDSP_H + +#include <stdint.h> + +#include "config.h" + +#include "avcodec.h" + +typedef struct PixblockDSPContext { + void (*get_pixels)(int16_t *av_restrict block /* align 16 */, + const uint8_t *pixels /* align 8 */, + ptrdiff_t stride); + void (*diff_pixels)(int16_t *av_restrict block /* align 16 */, + const uint8_t *s1 /* align 8 */, + const uint8_t *s2 /* align 8 */, + ptrdiff_t stride); + void (*diff_pixels_unaligned)(int16_t *av_restrict block /* align 16 */, + const uint8_t *s1, + const uint8_t *s2, + ptrdiff_t stride); + +} PixblockDSPContext; + +void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); +void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + +#endif /* AVCODEC_PIXBLOCKDSP_H */ diff --git a/libs/ffvpx/libavcodec/profiles.c b/libs/ffvpx/libavcodec/profiles.c new file mode 100644 index 000000000..d7dc960f3 --- /dev/null +++ b/libs/ffvpx/libavcodec/profiles.c @@ -0,0 +1,155 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "avcodec.h" +#include "profiles.h" + +#if !CONFIG_SMALL + +const AVProfile ff_aac_profiles[] = { + { FF_PROFILE_AAC_LOW, "LC" }, + { FF_PROFILE_AAC_HE, "HE-AAC" }, + { FF_PROFILE_AAC_HE_V2, "HE-AACv2" }, + { FF_PROFILE_AAC_LD, "LD" }, + { FF_PROFILE_AAC_ELD, "ELD" }, + { FF_PROFILE_AAC_MAIN, "Main" }, + { FF_PROFILE_AAC_LOW, "LC" }, + { FF_PROFILE_AAC_SSR, "SSR" }, + { FF_PROFILE_AAC_LTP, "LTP" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_dca_profiles[] = { + { FF_PROFILE_DTS, "DTS" }, + { FF_PROFILE_DTS_ES, "DTS-ES" }, + { FF_PROFILE_DTS_96_24, "DTS 96/24" }, + { FF_PROFILE_DTS_HD_HRA, "DTS-HD HRA" }, + { FF_PROFILE_DTS_HD_MA, "DTS-HD MA" }, + { FF_PROFILE_DTS_EXPRESS, "DTS Express" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_dnxhd_profiles[] = { + { FF_PROFILE_DNXHD, "DNXHD"}, + { FF_PROFILE_DNXHR_LB, "DNXHR LB"}, + { FF_PROFILE_DNXHR_SQ, "DNXHR SQ"}, + { FF_PROFILE_DNXHR_HQ, "DNXHR HQ" }, + { FF_PROFILE_DNXHR_HQX, "DNXHR HQX"}, + { FF_PROFILE_DNXHR_444, "DNXHR 444"}, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_h264_profiles[] = { + { FF_PROFILE_H264_BASELINE, "Baseline" }, + { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" }, + { FF_PROFILE_H264_MAIN, "Main" }, + { FF_PROFILE_H264_EXTENDED, "Extended" }, + { FF_PROFILE_H264_HIGH, "High" }, + { FF_PROFILE_H264_HIGH_10, "High 10" }, + { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" }, + { FF_PROFILE_H264_HIGH_422, "High 4:2:2" }, + { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" }, + { FF_PROFILE_H264_HIGH_444, "High 4:4:4" }, + { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" }, + { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" }, + { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" }, + { FF_PROFILE_H264_MULTIVIEW_HIGH, "Multiview High" }, + { FF_PROFILE_H264_STEREO_HIGH, "Stereo High" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_hevc_profiles[] = { + { FF_PROFILE_HEVC_MAIN, "Main" }, + { FF_PROFILE_HEVC_MAIN_10, "Main 10" }, + { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" }, + { FF_PROFILE_HEVC_REXT, "Rext" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_jpeg2000_profiles[] = { + { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0, "JPEG 2000 codestream restriction 0" }, + { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1, "JPEG 2000 codestream restriction 1" }, + { FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION, "JPEG 2000 no codestream restrictions" }, + { FF_PROFILE_JPEG2000_DCINEMA_2K, "JPEG 2000 digital cinema 2K" }, + { FF_PROFILE_JPEG2000_DCINEMA_4K, "JPEG 2000 digital cinema 4K" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_mpeg2_video_profiles[] = { + { FF_PROFILE_MPEG2_422, "4:2:2" }, + { FF_PROFILE_MPEG2_HIGH, "High" }, + { FF_PROFILE_MPEG2_SS, "Spatially Scalable" }, + { FF_PROFILE_MPEG2_SNR_SCALABLE, "SNR Scalable" }, + { FF_PROFILE_MPEG2_MAIN, "Main" }, + { FF_PROFILE_MPEG2_SIMPLE, "Simple" }, + { FF_PROFILE_RESERVED, "Reserved" }, + { FF_PROFILE_RESERVED, "Reserved" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_mpeg4_video_profiles[] = { + { FF_PROFILE_MPEG4_SIMPLE, "Simple Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_SCALABLE, "Simple Scalable Profile" }, + { FF_PROFILE_MPEG4_CORE, "Core Profile" }, + { FF_PROFILE_MPEG4_MAIN, "Main Profile" }, + { FF_PROFILE_MPEG4_N_BIT, "N-bit Profile" }, + { FF_PROFILE_MPEG4_SCALABLE_TEXTURE, "Scalable Texture Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION, "Simple Face Animation Profile" }, + { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE, "Basic Animated Texture Profile" }, + { FF_PROFILE_MPEG4_HYBRID, "Hybrid Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME, "Advanced Real Time Simple Profile" }, + { FF_PROFILE_MPEG4_CORE_SCALABLE, "Code Scalable Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_CODING, "Advanced Coding Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_CORE, "Advanced Core Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_STUDIO, "Simple Studio Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_SIMPLE, "Advanced Simple Profile" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_vc1_profiles[] = { + { FF_PROFILE_VC1_SIMPLE, "Simple" }, + { FF_PROFILE_VC1_MAIN, "Main" }, + { FF_PROFILE_VC1_COMPLEX, "Complex" }, + { FF_PROFILE_VC1_ADVANCED, "Advanced" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_vp9_profiles[] = { + { FF_PROFILE_VP9_0, "Profile 0" }, + { FF_PROFILE_VP9_1, "Profile 1" }, + { FF_PROFILE_VP9_2, "Profile 2" }, + { FF_PROFILE_VP9_3, "Profile 3" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_av1_profiles[] = { + { FF_PROFILE_AV1_MAIN, "Main" }, + { FF_PROFILE_AV1_HIGH, "High" }, + { FF_PROFILE_AV1_PROFESSIONAL, "Professional" }, + { FF_PROFILE_UNKNOWN }, +}; + +const AVProfile ff_sbc_profiles[] = { + { FF_PROFILE_SBC_MSBC, "mSBC" }, + { FF_PROFILE_UNKNOWN }, +}; + +#endif /* !CONFIG_SMALL */ diff --git a/libs/ffvpx/libavcodec/profiles.h b/libs/ffvpx/libavcodec/profiles.h new file mode 100644 index 000000000..9d7e211e1 --- /dev/null +++ b/libs/ffvpx/libavcodec/profiles.h @@ -0,0 +1,37 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PROFILES_H +#define AVCODEC_PROFILES_H + +#include "avcodec.h" + +extern const AVProfile ff_aac_profiles[]; +extern const AVProfile ff_dca_profiles[]; +extern const AVProfile ff_dnxhd_profiles[]; +extern const AVProfile ff_h264_profiles[]; +extern const AVProfile ff_hevc_profiles[]; +extern const AVProfile ff_jpeg2000_profiles[]; +extern const AVProfile ff_mpeg2_video_profiles[]; +extern const AVProfile ff_mpeg4_video_profiles[]; +extern const AVProfile ff_vc1_profiles[]; +extern const AVProfile ff_vp9_profiles[]; +extern const AVProfile ff_av1_profiles[]; +extern const AVProfile ff_sbc_profiles[]; + +#endif /* AVCODEC_PROFILES_H */ diff --git a/libs/ffvpx/libavcodec/pthread.c b/libs/ffvpx/libavcodec/pthread.c new file mode 100644 index 000000000..572471586 --- /dev/null +++ b/libs/ffvpx/libavcodec/pthread.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004 Roman Shaposhnik + * Copyright (c) 2008 Alexander Strange (astrange@ithinksw.com) + * + * Many thanks to Steven M. Schultz for providing clever ideas and + * to Michael Niedermayer <michaelni@gmx.at> for writing initial + * implementation. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Multithreading support functions + * @see doc/multithreading.txt + */ + +#include "avcodec.h" +#include "internal.h" +#include "pthread_internal.h" +#include "thread.h" + +/** + * Set the threading algorithms used. + * + * Threading requires more than one thread. + * Frame threading requires entire frames to be passed to the codec, + * and introduces extra decoding delay, so is incompatible with low_delay. + * + * @param avctx The context. + */ +static void validate_thread_parameters(AVCodecContext *avctx) +{ + int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) + && !(avctx->flags & AV_CODEC_FLAG_TRUNCATED) + && !(avctx->flags & AV_CODEC_FLAG_LOW_DELAY) + && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS); + if (avctx->thread_count == 1) { + avctx->active_thread_type = 0; + } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) { + avctx->active_thread_type = FF_THREAD_FRAME; + } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS && + avctx->thread_type & FF_THREAD_SLICE) { + avctx->active_thread_type = FF_THREAD_SLICE; + } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) { + avctx->thread_count = 1; + avctx->active_thread_type = 0; + } + + if (avctx->thread_count > MAX_AUTO_THREADS) + av_log(avctx, AV_LOG_WARNING, + "Application has requested %d threads. Using a thread count greater than %d is not recommended.\n", + avctx->thread_count, MAX_AUTO_THREADS); +} + +int ff_thread_init(AVCodecContext *avctx) +{ + validate_thread_parameters(avctx); + + if (avctx->active_thread_type&FF_THREAD_SLICE) + return ff_slice_thread_init(avctx); + else if (avctx->active_thread_type&FF_THREAD_FRAME) + return ff_frame_thread_init(avctx); + + return 0; +} + +void ff_thread_free(AVCodecContext *avctx) +{ + if (avctx->active_thread_type&FF_THREAD_FRAME) + ff_frame_thread_free(avctx, avctx->thread_count); + else + ff_slice_thread_free(avctx); +} diff --git a/libs/ffvpx/libavcodec/pthread_frame.c b/libs/ffvpx/libavcodec/pthread_frame.c new file mode 100644 index 000000000..5104b1beb --- /dev/null +++ b/libs/ffvpx/libavcodec/pthread_frame.c @@ -0,0 +1,1015 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Frame multithreading support functions + * @see doc/multithreading.txt + */ + +#include "config.h" + +#include <stdatomic.h> +#include <stdint.h> + +#include "avcodec.h" +#include "hwaccel.h" +#include "internal.h" +#include "pthread_internal.h" +#include "thread.h" +#include "version.h" + +#include "libavutil/avassert.h" +#include "libavutil/buffer.h" +#include "libavutil/common.h" +#include "libavutil/cpu.h" +#include "libavutil/frame.h" +#include "libavutil/internal.h" +#include "libavutil/log.h" +#include "libavutil/mem.h" +#include "libavutil/opt.h" +#include "libavutil/thread.h" + +enum { + ///< Set when the thread is awaiting a packet. + STATE_INPUT_READY, + ///< Set before the codec has called ff_thread_finish_setup(). + STATE_SETTING_UP, + /** + * Set when the codec calls get_buffer(). + * State is returned to STATE_SETTING_UP afterwards. + */ + STATE_GET_BUFFER, + /** + * Set when the codec calls get_format(). + * State is returned to STATE_SETTING_UP afterwards. + */ + STATE_GET_FORMAT, + ///< Set after the codec has called ff_thread_finish_setup(). + STATE_SETUP_FINISHED, +}; + +/** + * Context used by codec threads and stored in their AVCodecInternal thread_ctx. + */ +typedef struct PerThreadContext { + struct FrameThreadContext *parent; + + pthread_t thread; + int thread_init; + pthread_cond_t input_cond; ///< Used to wait for a new packet from the main thread. + pthread_cond_t progress_cond; ///< Used by child threads to wait for progress to change. + pthread_cond_t output_cond; ///< Used by the main thread to wait for frames to finish. + + pthread_mutex_t mutex; ///< Mutex used to protect the contents of the PerThreadContext. + pthread_mutex_t progress_mutex; ///< Mutex used to protect frame progress values and progress_cond. + + AVCodecContext *avctx; ///< Context used to decode packets passed to this thread. + + AVPacket avpkt; ///< Input packet (for decoding) or output (for encoding). + + AVFrame *frame; ///< Output frame (for decoding) or input (for encoding). + int got_frame; ///< The output of got_picture_ptr from the last avcodec_decode_video() call. + int result; ///< The result of the last codec decode/encode() call. + + atomic_int state; + + /** + * Array of frames passed to ff_thread_release_buffer(). + * Frames are released after all threads referencing them are finished. + */ + AVFrame *released_buffers; + int num_released_buffers; + int released_buffers_allocated; + + AVFrame *requested_frame; ///< AVFrame the codec passed to get_buffer() + int requested_flags; ///< flags passed to get_buffer() for requested_frame + + const enum AVPixelFormat *available_formats; ///< Format array for get_format() + enum AVPixelFormat result_format; ///< get_format() result + + int die; ///< Set when the thread should exit. + + int hwaccel_serializing; + int async_serializing; + + atomic_int debug_threads; ///< Set if the FF_DEBUG_THREADS option is set. +} PerThreadContext; + +/** + * Context stored in the client AVCodecInternal thread_ctx. + */ +typedef struct FrameThreadContext { + PerThreadContext *threads; ///< The contexts for each thread. + PerThreadContext *prev_thread; ///< The last thread submit_packet() was called on. + + pthread_mutex_t buffer_mutex; ///< Mutex used to protect get/release_buffer(). + /** + * This lock is used for ensuring threads run in serial when hwaccel + * is used. + */ + pthread_mutex_t hwaccel_mutex; + pthread_mutex_t async_mutex; + pthread_cond_t async_cond; + int async_lock; + + int next_decoding; ///< The next context to submit a packet to. + int next_finished; ///< The next context to return output from. + + int delaying; /**< + * Set for the first N packets, where N is the number of threads. + * While it is set, ff_thread_en/decode_frame won't return any results. + */ +} FrameThreadContext; + +#define THREAD_SAFE_CALLBACKS(avctx) \ +((avctx)->thread_safe_callbacks || (avctx)->get_buffer2 == avcodec_default_get_buffer2) + +static void async_lock(FrameThreadContext *fctx) +{ + pthread_mutex_lock(&fctx->async_mutex); + while (fctx->async_lock) + pthread_cond_wait(&fctx->async_cond, &fctx->async_mutex); + fctx->async_lock = 1; + pthread_mutex_unlock(&fctx->async_mutex); +} + +static void async_unlock(FrameThreadContext *fctx) +{ + pthread_mutex_lock(&fctx->async_mutex); + av_assert0(fctx->async_lock); + fctx->async_lock = 0; + pthread_cond_broadcast(&fctx->async_cond); + pthread_mutex_unlock(&fctx->async_mutex); +} + +/** + * Codec worker thread. + * + * Automatically calls ff_thread_finish_setup() if the codec does + * not provide an update_thread_context method, or if the codec returns + * before calling it. + */ +static attribute_align_arg void *frame_worker_thread(void *arg) +{ + PerThreadContext *p = arg; + AVCodecContext *avctx = p->avctx; + const AVCodec *codec = avctx->codec; + + pthread_mutex_lock(&p->mutex); + while (1) { + while (atomic_load(&p->state) == STATE_INPUT_READY && !p->die) + pthread_cond_wait(&p->input_cond, &p->mutex); + + if (p->die) break; + + if (!codec->update_thread_context && THREAD_SAFE_CALLBACKS(avctx)) + ff_thread_finish_setup(avctx); + + /* If a decoder supports hwaccel, then it must call ff_get_format(). + * Since that call must happen before ff_thread_finish_setup(), the + * decoder is required to implement update_thread_context() and call + * ff_thread_finish_setup() manually. Therefore the above + * ff_thread_finish_setup() call did not happen and hwaccel_serializing + * cannot be true here. */ + av_assert0(!p->hwaccel_serializing); + + /* if the previous thread uses hwaccel then we take the lock to ensure + * the threads don't run concurrently */ + if (avctx->hwaccel) { + pthread_mutex_lock(&p->parent->hwaccel_mutex); + p->hwaccel_serializing = 1; + } + + av_frame_unref(p->frame); + p->got_frame = 0; + p->result = codec->decode(avctx, p->frame, &p->got_frame, &p->avpkt); + + if ((p->result < 0 || !p->got_frame) && p->frame->buf[0]) { + if (avctx->internal->allocate_progress) + av_log(avctx, AV_LOG_ERROR, "A frame threaded decoder did not " + "free the frame on failure. This is a bug, please report it.\n"); + av_frame_unref(p->frame); + } + + if (atomic_load(&p->state) == STATE_SETTING_UP) + ff_thread_finish_setup(avctx); + + if (p->hwaccel_serializing) { + p->hwaccel_serializing = 0; + pthread_mutex_unlock(&p->parent->hwaccel_mutex); + } + + if (p->async_serializing) { + p->async_serializing = 0; + + async_unlock(p->parent); + } + + pthread_mutex_lock(&p->progress_mutex); + + atomic_store(&p->state, STATE_INPUT_READY); + + pthread_cond_broadcast(&p->progress_cond); + pthread_cond_signal(&p->output_cond); + pthread_mutex_unlock(&p->progress_mutex); + } + pthread_mutex_unlock(&p->mutex); + + return NULL; +} + +/** + * Update the next thread's AVCodecContext with values from the reference thread's context. + * + * @param dst The destination context. + * @param src The source context. + * @param for_user 0 if the destination is a codec thread, 1 if the destination is the user's thread + * @return 0 on success, negative error code on failure + */ +static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int for_user) +{ + int err = 0; + + if (dst != src && (for_user || !(src->codec_descriptor->props & AV_CODEC_PROP_INTRA_ONLY))) { + dst->time_base = src->time_base; + dst->framerate = src->framerate; + dst->width = src->width; + dst->height = src->height; + dst->pix_fmt = src->pix_fmt; + dst->sw_pix_fmt = src->sw_pix_fmt; + + dst->coded_width = src->coded_width; + dst->coded_height = src->coded_height; + + dst->has_b_frames = src->has_b_frames; + dst->idct_algo = src->idct_algo; + + dst->bits_per_coded_sample = src->bits_per_coded_sample; + dst->sample_aspect_ratio = src->sample_aspect_ratio; + + dst->profile = src->profile; + dst->level = src->level; + + dst->bits_per_raw_sample = src->bits_per_raw_sample; + dst->ticks_per_frame = src->ticks_per_frame; + dst->color_primaries = src->color_primaries; + + dst->color_trc = src->color_trc; + dst->colorspace = src->colorspace; + dst->color_range = src->color_range; + dst->chroma_sample_location = src->chroma_sample_location; + + dst->hwaccel = src->hwaccel; + dst->hwaccel_context = src->hwaccel_context; + + dst->channels = src->channels; + dst->sample_rate = src->sample_rate; + dst->sample_fmt = src->sample_fmt; + dst->channel_layout = src->channel_layout; + dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data; + + if (!!dst->hw_frames_ctx != !!src->hw_frames_ctx || + (dst->hw_frames_ctx && dst->hw_frames_ctx->data != src->hw_frames_ctx->data)) { + av_buffer_unref(&dst->hw_frames_ctx); + + if (src->hw_frames_ctx) { + dst->hw_frames_ctx = av_buffer_ref(src->hw_frames_ctx); + if (!dst->hw_frames_ctx) + return AVERROR(ENOMEM); + } + } + + dst->hwaccel_flags = src->hwaccel_flags; + } + + if (for_user) { + dst->delay = src->thread_count - 1; +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + dst->coded_frame = src->coded_frame; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + } else { + if (dst->codec->update_thread_context) + err = dst->codec->update_thread_context(dst, src); + } + + return err; +} + +/** + * Update the next thread's AVCodecContext with values set by the user. + * + * @param dst The destination context. + * @param src The source context. + * @return 0 on success, negative error code on failure + */ +static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src) +{ +#define copy_fields(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s); + dst->flags = src->flags; + + dst->draw_horiz_band= src->draw_horiz_band; + dst->get_buffer2 = src->get_buffer2; + + dst->opaque = src->opaque; + dst->debug = src->debug; + dst->debug_mv = src->debug_mv; + + dst->slice_flags = src->slice_flags; + dst->flags2 = src->flags2; + + copy_fields(skip_loop_filter, subtitle_header); + + dst->frame_number = src->frame_number; + dst->reordered_opaque = src->reordered_opaque; + dst->thread_safe_callbacks = src->thread_safe_callbacks; + + if (src->slice_count && src->slice_offset) { + if (dst->slice_count < src->slice_count) { + int err = av_reallocp_array(&dst->slice_offset, src->slice_count, + sizeof(*dst->slice_offset)); + if (err < 0) + return err; + } + memcpy(dst->slice_offset, src->slice_offset, + src->slice_count * sizeof(*dst->slice_offset)); + } + dst->slice_count = src->slice_count; + return 0; +#undef copy_fields +} + +/// Releases the buffers that this decoding thread was the last user of. +static void release_delayed_buffers(PerThreadContext *p) +{ + FrameThreadContext *fctx = p->parent; + + while (p->num_released_buffers > 0) { + AVFrame *f; + + pthread_mutex_lock(&fctx->buffer_mutex); + + // fix extended data in case the caller screwed it up + av_assert0(p->avctx->codec_type == AVMEDIA_TYPE_VIDEO || + p->avctx->codec_type == AVMEDIA_TYPE_AUDIO); + f = &p->released_buffers[--p->num_released_buffers]; + f->extended_data = f->data; + av_frame_unref(f); + + pthread_mutex_unlock(&fctx->buffer_mutex); + } +} + +static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx, + AVPacket *avpkt) +{ + FrameThreadContext *fctx = p->parent; + PerThreadContext *prev_thread = fctx->prev_thread; + const AVCodec *codec = p->avctx->codec; + int ret; + + if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY)) + return 0; + + pthread_mutex_lock(&p->mutex); + + ret = update_context_from_user(p->avctx, user_avctx); + if (ret) { + pthread_mutex_unlock(&p->mutex); + return ret; + } + atomic_store_explicit(&p->debug_threads, + (p->avctx->debug & FF_DEBUG_THREADS) != 0, + memory_order_relaxed); + + release_delayed_buffers(p); + + if (prev_thread) { + int err; + if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) { + pthread_mutex_lock(&prev_thread->progress_mutex); + while (atomic_load(&prev_thread->state) == STATE_SETTING_UP) + pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex); + pthread_mutex_unlock(&prev_thread->progress_mutex); + } + + err = update_context_from_thread(p->avctx, prev_thread->avctx, 0); + if (err) { + pthread_mutex_unlock(&p->mutex); + return err; + } + } + + av_packet_unref(&p->avpkt); + ret = av_packet_ref(&p->avpkt, avpkt); + if (ret < 0) { + pthread_mutex_unlock(&p->mutex); + av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n"); + return ret; + } + + atomic_store(&p->state, STATE_SETTING_UP); + pthread_cond_signal(&p->input_cond); + pthread_mutex_unlock(&p->mutex); + + /* + * If the client doesn't have a thread-safe get_buffer(), + * then decoding threads call back to the main thread, + * and it calls back to the client here. + */ + + if (!p->avctx->thread_safe_callbacks && ( + p->avctx->get_format != avcodec_default_get_format || + p->avctx->get_buffer2 != avcodec_default_get_buffer2)) { + while (atomic_load(&p->state) != STATE_SETUP_FINISHED && atomic_load(&p->state) != STATE_INPUT_READY) { + int call_done = 1; + pthread_mutex_lock(&p->progress_mutex); + while (atomic_load(&p->state) == STATE_SETTING_UP) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + + switch (atomic_load_explicit(&p->state, memory_order_acquire)) { + case STATE_GET_BUFFER: + p->result = ff_get_buffer(p->avctx, p->requested_frame, p->requested_flags); + break; + case STATE_GET_FORMAT: + p->result_format = ff_get_format(p->avctx, p->available_formats); + break; + default: + call_done = 0; + break; + } + if (call_done) { + atomic_store(&p->state, STATE_SETTING_UP); + pthread_cond_signal(&p->progress_cond); + } + pthread_mutex_unlock(&p->progress_mutex); + } + } + + fctx->prev_thread = p; + fctx->next_decoding++; + + return 0; +} + +int ff_thread_decode_frame(AVCodecContext *avctx, + AVFrame *picture, int *got_picture_ptr, + AVPacket *avpkt) +{ + FrameThreadContext *fctx = avctx->internal->thread_ctx; + int finished = fctx->next_finished; + PerThreadContext *p; + int err; + + /* release the async lock, permitting blocked hwaccel threads to + * go forward while we are in this function */ + async_unlock(fctx); + + /* + * Submit a packet to the next decoding thread. + */ + + p = &fctx->threads[fctx->next_decoding]; + err = submit_packet(p, avctx, avpkt); + if (err) + goto finish; + + /* + * If we're still receiving the initial packets, don't return a frame. + */ + + if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1))) + fctx->delaying = 0; + + if (fctx->delaying) { + *got_picture_ptr=0; + if (avpkt->size) { + err = avpkt->size; + goto finish; + } + } + + /* + * Return the next available frame from the oldest thread. + * If we're at the end of the stream, then we have to skip threads that + * didn't output a frame/error, because we don't want to accidentally signal + * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0). + */ + + do { + p = &fctx->threads[finished++]; + + if (atomic_load(&p->state) != STATE_INPUT_READY) { + pthread_mutex_lock(&p->progress_mutex); + while (atomic_load_explicit(&p->state, memory_order_relaxed) != STATE_INPUT_READY) + pthread_cond_wait(&p->output_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); + } + + av_frame_move_ref(picture, p->frame); + *got_picture_ptr = p->got_frame; + picture->pkt_dts = p->avpkt.dts; + err = p->result; + + /* + * A later call with avkpt->size == 0 may loop over all threads, + * including this one, searching for a frame/error to return before being + * stopped by the "finished != fctx->next_finished" condition. + * Make sure we don't mistakenly return the same frame/error again. + */ + p->got_frame = 0; + p->result = 0; + + if (finished >= avctx->thread_count) finished = 0; + } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished); + + update_context_from_thread(avctx, p->avctx, 1); + + if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0; + + fctx->next_finished = finished; + + /* return the size of the consumed packet if no error occurred */ + if (err >= 0) + err = avpkt->size; +finish: + async_lock(fctx); + return err; +} + +void ff_thread_report_progress(ThreadFrame *f, int n, int field) +{ + PerThreadContext *p; + atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL; + + if (!progress || + atomic_load_explicit(&progress[field], memory_order_relaxed) >= n) + return; + + p = f->owner[field]->internal->thread_ctx; + + if (atomic_load_explicit(&p->debug_threads, memory_order_relaxed)) + av_log(f->owner[field], AV_LOG_DEBUG, + "%p finished %d field %d\n", progress, n, field); + + pthread_mutex_lock(&p->progress_mutex); + + atomic_store_explicit(&progress[field], n, memory_order_release); + + pthread_cond_broadcast(&p->progress_cond); + pthread_mutex_unlock(&p->progress_mutex); +} + +void ff_thread_await_progress(ThreadFrame *f, int n, int field) +{ + PerThreadContext *p; + atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL; + + if (!progress || + atomic_load_explicit(&progress[field], memory_order_acquire) >= n) + return; + + p = f->owner[field]->internal->thread_ctx; + + if (atomic_load_explicit(&p->debug_threads, memory_order_relaxed)) + av_log(f->owner[field], AV_LOG_DEBUG, + "thread awaiting %d field %d from %p\n", n, field, progress); + + pthread_mutex_lock(&p->progress_mutex); + while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); +} + +void ff_thread_finish_setup(AVCodecContext *avctx) { + PerThreadContext *p = avctx->internal->thread_ctx; + + if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; + + if (avctx->hwaccel && !p->hwaccel_serializing) { + pthread_mutex_lock(&p->parent->hwaccel_mutex); + p->hwaccel_serializing = 1; + } + + /* this assumes that no hwaccel calls happen before ff_thread_finish_setup() */ + if (avctx->hwaccel && + !(avctx->hwaccel->caps_internal & HWACCEL_CAP_ASYNC_SAFE)) { + p->async_serializing = 1; + + async_lock(p->parent); + } + + pthread_mutex_lock(&p->progress_mutex); + if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ + av_log(avctx, AV_LOG_WARNING, "Multiple ff_thread_finish_setup() calls\n"); + } + + atomic_store(&p->state, STATE_SETUP_FINISHED); + + pthread_cond_broadcast(&p->progress_cond); + pthread_mutex_unlock(&p->progress_mutex); +} + +/// Waits for all threads to finish. +static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count) +{ + int i; + + async_unlock(fctx); + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + if (atomic_load(&p->state) != STATE_INPUT_READY) { + pthread_mutex_lock(&p->progress_mutex); + while (atomic_load(&p->state) != STATE_INPUT_READY) + pthread_cond_wait(&p->output_cond, &p->progress_mutex); + pthread_mutex_unlock(&p->progress_mutex); + } + p->got_frame = 0; + } + + async_lock(fctx); +} + +void ff_frame_thread_free(AVCodecContext *avctx, int thread_count) +{ + FrameThreadContext *fctx = avctx->internal->thread_ctx; + const AVCodec *codec = avctx->codec; + int i; + + park_frame_worker_threads(fctx, thread_count); + + if (fctx->prev_thread && fctx->prev_thread != fctx->threads) + if (update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0) < 0) { + av_log(avctx, AV_LOG_ERROR, "Final thread update failed\n"); + fctx->prev_thread->avctx->internal->is_copy = fctx->threads->avctx->internal->is_copy; + fctx->threads->avctx->internal->is_copy = 1; + } + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + pthread_mutex_lock(&p->mutex); + p->die = 1; + pthread_cond_signal(&p->input_cond); + pthread_mutex_unlock(&p->mutex); + + if (p->thread_init) + pthread_join(p->thread, NULL); + p->thread_init=0; + + if (codec->close && p->avctx) + codec->close(p->avctx); + + release_delayed_buffers(p); + av_frame_free(&p->frame); + } + + for (i = 0; i < thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + + pthread_mutex_destroy(&p->mutex); + pthread_mutex_destroy(&p->progress_mutex); + pthread_cond_destroy(&p->input_cond); + pthread_cond_destroy(&p->progress_cond); + pthread_cond_destroy(&p->output_cond); + av_packet_unref(&p->avpkt); + av_freep(&p->released_buffers); + + if (i && p->avctx) { + av_freep(&p->avctx->priv_data); + av_freep(&p->avctx->slice_offset); + } + + if (p->avctx) { + av_freep(&p->avctx->internal); + av_buffer_unref(&p->avctx->hw_frames_ctx); + } + + av_freep(&p->avctx); + } + + av_freep(&fctx->threads); + pthread_mutex_destroy(&fctx->buffer_mutex); + pthread_mutex_destroy(&fctx->hwaccel_mutex); + pthread_mutex_destroy(&fctx->async_mutex); + pthread_cond_destroy(&fctx->async_cond); + + av_freep(&avctx->internal->thread_ctx); + + if (avctx->priv_data && avctx->codec && avctx->codec->priv_class) + av_opt_free(avctx->priv_data); + avctx->codec = NULL; +} + +int ff_frame_thread_init(AVCodecContext *avctx) +{ + int thread_count = avctx->thread_count; + const AVCodec *codec = avctx->codec; + AVCodecContext *src = avctx; + FrameThreadContext *fctx; + int i, err = 0; + + if (!thread_count) { + int nb_cpus = av_cpu_count(); +#if FF_API_DEBUG_MV + if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) || avctx->debug_mv) + nb_cpus = 1; +#endif + // use number of cores + 1 as thread count if there is more than one + if (nb_cpus > 1) + thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS); + else + thread_count = avctx->thread_count = 1; + } + + if (thread_count <= 1) { + avctx->active_thread_type = 0; + return 0; + } + + avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext)); + if (!fctx) + return AVERROR(ENOMEM); + + fctx->threads = av_mallocz_array(thread_count, sizeof(PerThreadContext)); + if (!fctx->threads) { + av_freep(&avctx->internal->thread_ctx); + return AVERROR(ENOMEM); + } + + pthread_mutex_init(&fctx->buffer_mutex, NULL); + pthread_mutex_init(&fctx->hwaccel_mutex, NULL); + pthread_mutex_init(&fctx->async_mutex, NULL); + pthread_cond_init(&fctx->async_cond, NULL); + + fctx->async_lock = 1; + fctx->delaying = 1; + + for (i = 0; i < thread_count; i++) { + AVCodecContext *copy = av_malloc(sizeof(AVCodecContext)); + PerThreadContext *p = &fctx->threads[i]; + + pthread_mutex_init(&p->mutex, NULL); + pthread_mutex_init(&p->progress_mutex, NULL); + pthread_cond_init(&p->input_cond, NULL); + pthread_cond_init(&p->progress_cond, NULL); + pthread_cond_init(&p->output_cond, NULL); + + p->frame = av_frame_alloc(); + if (!p->frame) { + av_freep(©); + err = AVERROR(ENOMEM); + goto error; + } + + p->parent = fctx; + p->avctx = copy; + + if (!copy) { + err = AVERROR(ENOMEM); + goto error; + } + + *copy = *src; + + copy->internal = av_malloc(sizeof(AVCodecInternal)); + if (!copy->internal) { + copy->priv_data = NULL; + err = AVERROR(ENOMEM); + goto error; + } + *copy->internal = *src->internal; + copy->internal->thread_ctx = p; + copy->internal->last_pkt_props = &p->avpkt; + + if (!i) { + src = copy; + + if (codec->init) + err = codec->init(copy); + + update_context_from_thread(avctx, copy, 1); + } else { + copy->priv_data = av_malloc(codec->priv_data_size); + if (!copy->priv_data) { + err = AVERROR(ENOMEM); + goto error; + } + memcpy(copy->priv_data, src->priv_data, codec->priv_data_size); + copy->internal->is_copy = 1; + + if (codec->init_thread_copy) + err = codec->init_thread_copy(copy); + } + + if (err) goto error; + + atomic_init(&p->debug_threads, (copy->debug & FF_DEBUG_THREADS) != 0); + + err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p)); + p->thread_init= !err; + if(!p->thread_init) + goto error; + } + + return 0; + +error: + ff_frame_thread_free(avctx, i+1); + + return err; +} + +void ff_thread_flush(AVCodecContext *avctx) +{ + int i; + FrameThreadContext *fctx = avctx->internal->thread_ctx; + + if (!fctx) return; + + park_frame_worker_threads(fctx, avctx->thread_count); + if (fctx->prev_thread) { + if (fctx->prev_thread != &fctx->threads[0]) + update_context_from_thread(fctx->threads[0].avctx, fctx->prev_thread->avctx, 0); + } + + fctx->next_decoding = fctx->next_finished = 0; + fctx->delaying = 1; + fctx->prev_thread = NULL; + for (i = 0; i < avctx->thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + // Make sure decode flush calls with size=0 won't return old frames + p->got_frame = 0; + av_frame_unref(p->frame); + p->result = 0; + + release_delayed_buffers(p); + + if (avctx->codec->flush) + avctx->codec->flush(p->avctx); + } +} + +int ff_thread_can_start_frame(AVCodecContext *avctx) +{ + PerThreadContext *p = avctx->internal->thread_ctx; + if ((avctx->active_thread_type&FF_THREAD_FRAME) && atomic_load(&p->state) != STATE_SETTING_UP && + (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) { + return 0; + } + return 1; +} + +static int thread_get_buffer_internal(AVCodecContext *avctx, ThreadFrame *f, int flags) +{ + PerThreadContext *p = avctx->internal->thread_ctx; + int err; + + f->owner[0] = f->owner[1] = avctx; + + if (!(avctx->active_thread_type & FF_THREAD_FRAME)) + return ff_get_buffer(avctx, f->f, flags); + + if (atomic_load(&p->state) != STATE_SETTING_UP && + (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() cannot be called after ff_thread_finish_setup()\n"); + return -1; + } + + if (avctx->internal->allocate_progress) { + atomic_int *progress; + f->progress = av_buffer_alloc(2 * sizeof(*progress)); + if (!f->progress) { + return AVERROR(ENOMEM); + } + progress = (atomic_int*)f->progress->data; + + atomic_init(&progress[0], -1); + atomic_init(&progress[1], -1); + } + + pthread_mutex_lock(&p->parent->buffer_mutex); + if (avctx->thread_safe_callbacks || + avctx->get_buffer2 == avcodec_default_get_buffer2) { + err = ff_get_buffer(avctx, f->f, flags); + } else { + pthread_mutex_lock(&p->progress_mutex); + p->requested_frame = f->f; + p->requested_flags = flags; + atomic_store_explicit(&p->state, STATE_GET_BUFFER, memory_order_release); + pthread_cond_broadcast(&p->progress_cond); + + while (atomic_load(&p->state) != STATE_SETTING_UP) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + + err = p->result; + + pthread_mutex_unlock(&p->progress_mutex); + + } + if (!THREAD_SAFE_CALLBACKS(avctx) && !avctx->codec->update_thread_context) + ff_thread_finish_setup(avctx); + if (err) + av_buffer_unref(&f->progress); + + pthread_mutex_unlock(&p->parent->buffer_mutex); + + return err; +} + +enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) +{ + enum AVPixelFormat res; + PerThreadContext *p = avctx->internal->thread_ctx; + if (!(avctx->active_thread_type & FF_THREAD_FRAME) || avctx->thread_safe_callbacks || + avctx->get_format == avcodec_default_get_format) + return ff_get_format(avctx, fmt); + if (atomic_load(&p->state) != STATE_SETTING_UP) { + av_log(avctx, AV_LOG_ERROR, "get_format() cannot be called after ff_thread_finish_setup()\n"); + return -1; + } + pthread_mutex_lock(&p->progress_mutex); + p->available_formats = fmt; + atomic_store(&p->state, STATE_GET_FORMAT); + pthread_cond_broadcast(&p->progress_cond); + + while (atomic_load(&p->state) != STATE_SETTING_UP) + pthread_cond_wait(&p->progress_cond, &p->progress_mutex); + + res = p->result_format; + + pthread_mutex_unlock(&p->progress_mutex); + + return res; +} + +int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags) +{ + int ret = thread_get_buffer_internal(avctx, f, flags); + if (ret < 0) + av_log(avctx, AV_LOG_ERROR, "thread_get_buffer() failed\n"); + return ret; +} + +void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f) +{ + PerThreadContext *p = avctx->internal->thread_ctx; + FrameThreadContext *fctx; + AVFrame *dst, *tmp; + int can_direct_free = !(avctx->active_thread_type & FF_THREAD_FRAME) || + avctx->thread_safe_callbacks || + avctx->get_buffer2 == avcodec_default_get_buffer2; + + if (!f->f || !f->f->buf[0]) + return; + + if (avctx->debug & FF_DEBUG_BUFFERS) + av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p\n", f); + + av_buffer_unref(&f->progress); + f->owner[0] = f->owner[1] = NULL; + + if (can_direct_free) { + av_frame_unref(f->f); + return; + } + + fctx = p->parent; + pthread_mutex_lock(&fctx->buffer_mutex); + + if (p->num_released_buffers + 1 >= INT_MAX / sizeof(*p->released_buffers)) + goto fail; + tmp = av_fast_realloc(p->released_buffers, &p->released_buffers_allocated, + (p->num_released_buffers + 1) * + sizeof(*p->released_buffers)); + if (!tmp) + goto fail; + p->released_buffers = tmp; + + dst = &p->released_buffers[p->num_released_buffers]; + av_frame_move_ref(dst, f->f); + + p->num_released_buffers++; + +fail: + pthread_mutex_unlock(&fctx->buffer_mutex); +} diff --git a/libs/ffvpx/libavcodec/pthread_internal.h b/libs/ffvpx/libavcodec/pthread_internal.h new file mode 100644 index 000000000..d2115cbba --- /dev/null +++ b/libs/ffvpx/libavcodec/pthread_internal.h @@ -0,0 +1,34 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PTHREAD_INTERNAL_H +#define AVCODEC_PTHREAD_INTERNAL_H + +#include "avcodec.h" + +/* H.264 slice threading seems to be buggy with more than 16 threads, + * limit the number of threads to 16 for automatic detection */ +#define MAX_AUTO_THREADS 16 + +int ff_slice_thread_init(AVCodecContext *avctx); +void ff_slice_thread_free(AVCodecContext *avctx); + +int ff_frame_thread_init(AVCodecContext *avctx); +void ff_frame_thread_free(AVCodecContext *avctx, int thread_count); + +#endif // AVCODEC_PTHREAD_INTERNAL_H diff --git a/libs/ffvpx/libavcodec/pthread_slice.c b/libs/ffvpx/libavcodec/pthread_slice.c new file mode 100644 index 000000000..77cfe3c9f --- /dev/null +++ b/libs/ffvpx/libavcodec/pthread_slice.c @@ -0,0 +1,242 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Slice multithreading support functions + * @see doc/multithreading.txt + */ + +#include "config.h" + +#include "avcodec.h" +#include "internal.h" +#include "pthread_internal.h" +#include "thread.h" + +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/thread.h" +#include "libavutil/slicethread.h" + +typedef int (action_func)(AVCodecContext *c, void *arg); +typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); +typedef int (main_func)(AVCodecContext *c); + +typedef struct SliceThreadContext { + AVSliceThread *thread; + action_func *func; + action_func2 *func2; + main_func *mainfunc; + void *args; + int *rets; + int job_size; + + int *entries; + int entries_count; + int thread_count; + pthread_cond_t *progress_cond; + pthread_mutex_t *progress_mutex; +} SliceThreadContext; + +static void main_function(void *priv) { + AVCodecContext *avctx = priv; + SliceThreadContext *c = avctx->internal->thread_ctx; + c->mainfunc(avctx); +} + +static void worker_func(void *priv, int jobnr, int threadnr, int nb_jobs, int nb_threads) +{ + AVCodecContext *avctx = priv; + SliceThreadContext *c = avctx->internal->thread_ctx; + int ret; + + ret = c->func ? c->func(avctx, (char *)c->args + c->job_size * jobnr) + : c->func2(avctx, c->args, jobnr, threadnr); + if (c->rets) + c->rets[jobnr] = ret; +} + +void ff_slice_thread_free(AVCodecContext *avctx) +{ + SliceThreadContext *c = avctx->internal->thread_ctx; + int i; + + avpriv_slicethread_free(&c->thread); + + for (i = 0; i < c->thread_count; i++) { + pthread_mutex_destroy(&c->progress_mutex[i]); + pthread_cond_destroy(&c->progress_cond[i]); + } + + av_freep(&c->entries); + av_freep(&c->progress_mutex); + av_freep(&c->progress_cond); + av_freep(&avctx->internal->thread_ctx); +} + +static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size) +{ + SliceThreadContext *c = avctx->internal->thread_ctx; + + if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1) + return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size); + + if (job_count <= 0) + return 0; + + c->job_size = job_size; + c->args = arg; + c->func = func; + c->rets = ret; + + avpriv_slicethread_execute(c->thread, job_count, !!c->mainfunc ); + return 0; +} + +static int thread_execute2(AVCodecContext *avctx, action_func2* func2, void *arg, int *ret, int job_count) +{ + SliceThreadContext *c = avctx->internal->thread_ctx; + c->func2 = func2; + return thread_execute(avctx, NULL, arg, ret, job_count, 0); +} + +int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx, action_func2* func2, main_func *mainfunc, void *arg, int *ret, int job_count) +{ + SliceThreadContext *c = avctx->internal->thread_ctx; + c->func2 = func2; + c->mainfunc = mainfunc; + return thread_execute(avctx, NULL, arg, ret, job_count, 0); +} + +int ff_slice_thread_init(AVCodecContext *avctx) +{ + SliceThreadContext *c; + int thread_count = avctx->thread_count; + static void (*mainfunc)(void *); + + // We cannot do this in the encoder init as the threads are created before + if (av_codec_is_encoder(avctx->codec) && + avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && + avctx->height > 2800) + thread_count = avctx->thread_count = 1; + + if (!thread_count) { + int nb_cpus = av_cpu_count(); + if (avctx->height) + nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16); + // use number of cores + 1 as thread count if there is more than one + if (nb_cpus > 1) + thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS); + else + thread_count = avctx->thread_count = 1; + } + + if (thread_count <= 1) { + avctx->active_thread_type = 0; + return 0; + } + + avctx->internal->thread_ctx = c = av_mallocz(sizeof(*c)); + mainfunc = avctx->codec->caps_internal & FF_CODEC_CAP_SLICE_THREAD_HAS_MF ? &main_function : NULL; + if (!c || (thread_count = avpriv_slicethread_create(&c->thread, avctx, worker_func, mainfunc, thread_count)) <= 1) { + if (c) + avpriv_slicethread_free(&c->thread); + av_freep(&avctx->internal->thread_ctx); + avctx->thread_count = 1; + avctx->active_thread_type = 0; + return 0; + } + avctx->thread_count = thread_count; + + avctx->execute = thread_execute; + avctx->execute2 = thread_execute2; + return 0; +} + +void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n) +{ + SliceThreadContext *p = avctx->internal->thread_ctx; + int *entries = p->entries; + + pthread_mutex_lock(&p->progress_mutex[thread]); + entries[field] +=n; + pthread_cond_signal(&p->progress_cond[thread]); + pthread_mutex_unlock(&p->progress_mutex[thread]); +} + +void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift) +{ + SliceThreadContext *p = avctx->internal->thread_ctx; + int *entries = p->entries; + + if (!entries || !field) return; + + thread = thread ? thread - 1 : p->thread_count - 1; + + pthread_mutex_lock(&p->progress_mutex[thread]); + while ((entries[field - 1] - entries[field]) < shift){ + pthread_cond_wait(&p->progress_cond[thread], &p->progress_mutex[thread]); + } + pthread_mutex_unlock(&p->progress_mutex[thread]); +} + +int ff_alloc_entries(AVCodecContext *avctx, int count) +{ + int i; + + if (avctx->active_thread_type & FF_THREAD_SLICE) { + SliceThreadContext *p = avctx->internal->thread_ctx; + + if (p->entries) { + av_assert0(p->thread_count == avctx->thread_count); + av_freep(&p->entries); + } + + p->thread_count = avctx->thread_count; + p->entries = av_mallocz_array(count, sizeof(int)); + + if (!p->progress_mutex) { + p->progress_mutex = av_malloc_array(p->thread_count, sizeof(pthread_mutex_t)); + p->progress_cond = av_malloc_array(p->thread_count, sizeof(pthread_cond_t)); + } + + if (!p->entries || !p->progress_mutex || !p->progress_cond) { + av_freep(&p->entries); + av_freep(&p->progress_mutex); + av_freep(&p->progress_cond); + return AVERROR(ENOMEM); + } + p->entries_count = count; + + for (i = 0; i < p->thread_count; i++) { + pthread_mutex_init(&p->progress_mutex[i], NULL); + pthread_cond_init(&p->progress_cond[i], NULL); + } + } + + return 0; +} + +void ff_reset_entries(AVCodecContext *avctx) +{ + SliceThreadContext *p = avctx->internal->thread_ctx; + memset(p->entries, 0, p->entries_count * sizeof(int)); +} diff --git a/libs/ffvpx/libavcodec/put_bits.h b/libs/ffvpx/libavcodec/put_bits.h new file mode 100644 index 000000000..1ceb1cc76 --- /dev/null +++ b/libs/ffvpx/libavcodec/put_bits.h @@ -0,0 +1,365 @@ +/* + * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * bitstream writer API + */ + +#ifndef AVCODEC_PUT_BITS_H +#define AVCODEC_PUT_BITS_H + +#include <stdint.h> +#include <stddef.h> + +#include "libavutil/intreadwrite.h" +#include "libavutil/avassert.h" + +typedef struct PutBitContext { + uint32_t bit_buf; + int bit_left; + uint8_t *buf, *buf_ptr, *buf_end; + int size_in_bits; +} PutBitContext; + +/** + * Initialize the PutBitContext s. + * + * @param buffer the buffer where to put bits + * @param buffer_size the size in bytes of buffer + */ +static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, + int buffer_size) +{ + if (buffer_size < 0) { + buffer_size = 0; + buffer = NULL; + } + + s->size_in_bits = 8 * buffer_size; + s->buf = buffer; + s->buf_end = s->buf + buffer_size; + s->buf_ptr = s->buf; + s->bit_left = 32; + s->bit_buf = 0; +} + +/** + * Rebase the bit writer onto a reallocated buffer. + * + * @param buffer the buffer where to put bits + * @param buffer_size the size in bytes of buffer, + * must be larger than the previous size + */ +static inline void rebase_put_bits(PutBitContext *s, uint8_t *buffer, + int buffer_size) +{ + av_assert0(8*buffer_size > s->size_in_bits); + + s->buf_end = buffer + buffer_size; + s->buf_ptr = buffer + (s->buf_ptr - s->buf); + s->buf = buffer; + s->size_in_bits = 8 * buffer_size; +} + +/** + * @return the total number of bits written to the bitstream. + */ +static inline int put_bits_count(PutBitContext *s) +{ + return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left; +} + +/** + * @return the number of bits available in the bitstream. + */ +static inline int put_bits_left(PutBitContext* s) +{ + return (s->buf_end - s->buf_ptr) * 8 - 32 + s->bit_left; +} + +/** + * Pad the end of the output stream with zeros. + */ +static inline void flush_put_bits(PutBitContext *s) +{ +#ifndef BITSTREAM_WRITER_LE + if (s->bit_left < 32) + s->bit_buf <<= s->bit_left; +#endif + while (s->bit_left < 32) { + av_assert0(s->buf_ptr < s->buf_end); +#ifdef BITSTREAM_WRITER_LE + *s->buf_ptr++ = s->bit_buf; + s->bit_buf >>= 8; +#else + *s->buf_ptr++ = s->bit_buf >> 24; + s->bit_buf <<= 8; +#endif + s->bit_left += 8; + } + s->bit_left = 32; + s->bit_buf = 0; +} + +static inline void flush_put_bits_le(PutBitContext *s) +{ + while (s->bit_left < 32) { + av_assert0(s->buf_ptr < s->buf_end); + *s->buf_ptr++ = s->bit_buf; + s->bit_buf >>= 8; + s->bit_left += 8; + } + s->bit_left = 32; + s->bit_buf = 0; +} + +#ifdef BITSTREAM_WRITER_LE +#define avpriv_align_put_bits align_put_bits_unsupported_here +#define avpriv_put_string ff_put_string_unsupported_here +#define avpriv_copy_bits avpriv_copy_bits_unsupported_here +#else +/** + * Pad the bitstream with zeros up to the next byte boundary. + */ +void avpriv_align_put_bits(PutBitContext *s); + +/** + * Put the string string in the bitstream. + * + * @param terminate_string 0-terminates the written string if value is 1 + */ +void avpriv_put_string(PutBitContext *pb, const char *string, + int terminate_string); + +/** + * Copy the content of src to the bitstream. + * + * @param length the number of bits of src to copy + */ +void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length); +#endif + +/** + * Write up to 31 bits into a bitstream. + * Use put_bits32 to write 32 bits. + */ +static inline void put_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_left; + + av_assert2(n <= 31 && value < (1U << n)); + + bit_buf = s->bit_buf; + bit_left = s->bit_left; + + /* XXX: optimize */ +#ifdef BITSTREAM_WRITER_LE + bit_buf |= value << (32 - bit_left); + if (n >= bit_left) { + if (3 < s->buf_end - s->buf_ptr) { + AV_WL32(s->buf_ptr, bit_buf); + s->buf_ptr += 4; + } else { + av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); + av_assert2(0); + } + bit_buf = value >> bit_left; + bit_left += 32; + } + bit_left -= n; +#else + if (n < bit_left) { + bit_buf = (bit_buf << n) | value; + bit_left -= n; + } else { + bit_buf <<= bit_left; + bit_buf |= value >> (n - bit_left); + if (3 < s->buf_end - s->buf_ptr) { + AV_WB32(s->buf_ptr, bit_buf); + s->buf_ptr += 4; + } else { + av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); + av_assert2(0); + } + bit_left += 32 - n; + bit_buf = value; + } +#endif + + s->bit_buf = bit_buf; + s->bit_left = bit_left; +} + +static inline void put_bits_le(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_left; + + av_assert2(n <= 31 && value < (1U << n)); + + bit_buf = s->bit_buf; + bit_left = s->bit_left; + + bit_buf |= value << (32 - bit_left); + if (n >= bit_left) { + if (3 < s->buf_end - s->buf_ptr) { + AV_WL32(s->buf_ptr, bit_buf); + s->buf_ptr += 4; + } else { + av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); + av_assert2(0); + } + bit_buf = value >> bit_left; + bit_left += 32; + } + bit_left -= n; + + s->bit_buf = bit_buf; + s->bit_left = bit_left; +} + +static inline void put_sbits(PutBitContext *pb, int n, int32_t value) +{ + av_assert2(n >= 0 && n <= 31); + + put_bits(pb, n, av_mod_uintp2(value, n)); +} + +/** + * Write exactly 32 bits into a bitstream. + */ +static void av_unused put_bits32(PutBitContext *s, uint32_t value) +{ + unsigned int bit_buf; + int bit_left; + + bit_buf = s->bit_buf; + bit_left = s->bit_left; + +#ifdef BITSTREAM_WRITER_LE + bit_buf |= value << (32 - bit_left); + if (3 < s->buf_end - s->buf_ptr) { + AV_WL32(s->buf_ptr, bit_buf); + s->buf_ptr += 4; + } else { + av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); + av_assert2(0); + } + bit_buf = (uint64_t)value >> bit_left; +#else + bit_buf = (uint64_t)bit_buf << bit_left; + bit_buf |= value >> (32 - bit_left); + if (3 < s->buf_end - s->buf_ptr) { + AV_WB32(s->buf_ptr, bit_buf); + s->buf_ptr += 4; + } else { + av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n"); + av_assert2(0); + } + bit_buf = value; +#endif + + s->bit_buf = bit_buf; + s->bit_left = bit_left; +} + +/** + * Write up to 64 bits into a bitstream. + */ +static inline void put_bits64(PutBitContext *s, int n, uint64_t value) +{ + av_assert2((n == 64) || (n < 64 && value < (UINT64_C(1) << n))); + + if (n < 32) + put_bits(s, n, value); + else if (n == 32) + put_bits32(s, value); + else if (n < 64) { + uint32_t lo = value & 0xffffffff; + uint32_t hi = value >> 32; +#ifdef BITSTREAM_WRITER_LE + put_bits32(s, lo); + put_bits(s, n - 32, hi); +#else + put_bits(s, n - 32, hi); + put_bits32(s, lo); +#endif + } else { + uint32_t lo = value & 0xffffffff; + uint32_t hi = value >> 32; +#ifdef BITSTREAM_WRITER_LE + put_bits32(s, lo); + put_bits32(s, hi); +#else + put_bits32(s, hi); + put_bits32(s, lo); +#endif + + } +} + +/** + * Return the pointer to the byte where the bitstream writer will put + * the next bit. + */ +static inline uint8_t *put_bits_ptr(PutBitContext *s) +{ + return s->buf_ptr; +} + +/** + * Skip the given number of bytes. + * PutBitContext must be flushed & aligned to a byte boundary before calling this. + */ +static inline void skip_put_bytes(PutBitContext *s, int n) +{ + av_assert2((put_bits_count(s) & 7) == 0); + av_assert2(s->bit_left == 32); + av_assert0(n <= s->buf_end - s->buf_ptr); + s->buf_ptr += n; +} + +/** + * Skip the given number of bits. + * Must only be used if the actual values in the bitstream do not matter. + * If n is 0 the behavior is undefined. + */ +static inline void skip_put_bits(PutBitContext *s, int n) +{ + s->bit_left -= n; + s->buf_ptr -= 4 * (s->bit_left >> 5); + s->bit_left &= 31; +} + +/** + * Change the end of the buffer. + * + * @param size the new size in bytes of the buffer where to put bits + */ +static inline void set_put_bits_buffer_size(PutBitContext *s, int size) +{ + av_assert0(size <= INT_MAX/8 - 32); + s->buf_end = s->buf + size; + s->size_in_bits = 8*size; +} + +#endif /* AVCODEC_PUT_BITS_H */ diff --git a/libs/ffvpx/libavcodec/qpeldsp.h b/libs/ffvpx/libavcodec/qpeldsp.h new file mode 100644 index 000000000..91019eda9 --- /dev/null +++ b/libs/ffvpx/libavcodec/qpeldsp.h @@ -0,0 +1,83 @@ +/* + * quarterpel DSP functions + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * quarterpel DSP functions + */ + +#ifndef AVCODEC_QPELDSP_H +#define AVCODEC_QPELDSP_H + +#include <stddef.h> +#include <stdint.h> + +void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); +void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); + +void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, + int dst_stride, int src_stride1, int src_stride2, + int h); + +#define DEF_OLD_QPEL(name) \ +void ff_put_ ## name(uint8_t *dst /* align width (8 or 16) */, \ + const uint8_t *src /* align 1 */, \ + ptrdiff_t stride); \ +void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */, \ + const uint8_t *src /* align 1 */, \ + ptrdiff_t stride); \ +void ff_avg_ ## name(uint8_t *dst /* align width (8 or 16) */, \ + const uint8_t *src /* align 1 */, \ + ptrdiff_t stride); + +DEF_OLD_QPEL(qpel16_mc11_old_c) +DEF_OLD_QPEL(qpel16_mc31_old_c) +DEF_OLD_QPEL(qpel16_mc12_old_c) +DEF_OLD_QPEL(qpel16_mc32_old_c) +DEF_OLD_QPEL(qpel16_mc13_old_c) +DEF_OLD_QPEL(qpel16_mc33_old_c) +DEF_OLD_QPEL(qpel8_mc11_old_c) +DEF_OLD_QPEL(qpel8_mc31_old_c) +DEF_OLD_QPEL(qpel8_mc12_old_c) +DEF_OLD_QPEL(qpel8_mc32_old_c) +DEF_OLD_QPEL(qpel8_mc13_old_c) +DEF_OLD_QPEL(qpel8_mc33_old_c) + +typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, + const uint8_t *src /* align 1 */, + ptrdiff_t stride); + +/** + * quarterpel DSP context + */ +typedef struct QpelDSPContext { + qpel_mc_func put_qpel_pixels_tab[2][16]; + qpel_mc_func avg_qpel_pixels_tab[2][16]; + qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; +} QpelDSPContext; + +void ff_qpeldsp_init(QpelDSPContext *c); + +void ff_qpeldsp_init_x86(QpelDSPContext *c); +void ff_qpeldsp_init_mips(QpelDSPContext *c); + +#endif /* AVCODEC_QPELDSP_H */ diff --git a/libs/ffvpx/libavcodec/qsv_api.c b/libs/ffvpx/libavcodec/qsv_api.c new file mode 100644 index 000000000..327ff7d81 --- /dev/null +++ b/libs/ffvpx/libavcodec/qsv_api.c @@ -0,0 +1,42 @@ +/* + * Intel MediaSDK QSV public API functions + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include <stddef.h> + +#include "libavutil/mem.h" + +#if CONFIG_QSV +#include "qsv.h" + +AVQSVContext *av_qsv_alloc_context(void) +{ + return av_mallocz(sizeof(AVQSVContext)); +} +#else + +struct AVQSVContext *av_qsv_alloc_context(void); + +struct AVQSVContext *av_qsv_alloc_context(void) +{ + return NULL; +} +#endif diff --git a/libs/ffvpx/libavcodec/ratecontrol.h b/libs/ffvpx/libavcodec/ratecontrol.h new file mode 100644 index 000000000..2a7aaec64 --- /dev/null +++ b/libs/ffvpx/libavcodec/ratecontrol.h @@ -0,0 +1,99 @@ +/* + * Ratecontrol + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_RATECONTROL_H +#define AVCODEC_RATECONTROL_H + +/** + * @file + * ratecontrol header. + */ + +#include <stdio.h> +#include <stdint.h> +#include "libavutil/eval.h" + +typedef struct Predictor{ + double coeff; + double count; + double decay; +} Predictor; + +typedef struct RateControlEntry{ + int pict_type; + float qscale; + int mv_bits; + int i_tex_bits; + int p_tex_bits; + int misc_bits; + int header_bits; + uint64_t expected_bits; + int new_pict_type; + float new_qscale; + int64_t mc_mb_var_sum; + int64_t mb_var_sum; + int i_count; + int skip_count; + int f_code; + int b_code; +}RateControlEntry; + +/** + * rate control context. + */ +typedef struct RateControlContext{ + int num_entries; ///< number of RateControlEntries + RateControlEntry *entry; + double buffer_index; ///< amount of bits in the video/audio buffer + Predictor pred[5]; + double short_term_qsum; ///< sum of recent qscales + double short_term_qcount; ///< count of recent qscales + double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization + double pass1_wanted_bits; ///< bits which should have been output by the pass1 code (including complexity init) + double last_qscale; + double last_qscale_for[5]; ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff + int64_t last_mc_mb_var_sum; + int64_t last_mb_var_sum; + uint64_t i_cplx_sum[5]; + uint64_t p_cplx_sum[5]; + uint64_t mv_bits_sum[5]; + uint64_t qscale_sum[5]; + int frame_count[5]; + int last_non_b_pict_type; + + void *non_lavc_opaque; ///< context for non lavc rc code (for example xvid) + float dry_run_qscale; ///< for xvid rc + int last_picture_number; ///< for xvid rc + AVExpr * rc_eq_eval; +}RateControlContext; + +struct MpegEncContext; + +/* rate control */ +int ff_rate_control_init(struct MpegEncContext *s); +float ff_rate_estimate_qscale(struct MpegEncContext *s, int dry_run); +void ff_write_pass1_stats(struct MpegEncContext *s); +void ff_rate_control_uninit(struct MpegEncContext *s); +int ff_vbv_update(struct MpegEncContext *s, int frame_size); +void ff_get_2pass_fcode(struct MpegEncContext *s); + +#endif /* AVCODEC_RATECONTROL_H */ diff --git a/libs/ffvpx/libavcodec/raw.c b/libs/ffvpx/libavcodec/raw.c new file mode 100644 index 000000000..8da2a9735 --- /dev/null +++ b/libs/ffvpx/libavcodec/raw.c @@ -0,0 +1,332 @@ +/* + * Raw Video Codec + * Copyright (c) 2001 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Raw Video Codec + */ + +#include "avcodec.h" +#include "raw.h" +#include "libavutil/common.h" + +const PixelFormatTag ff_raw_pix_fmt_tags[] = { + { AV_PIX_FMT_YUV420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */ + { AV_PIX_FMT_YUV420P, MKTAG('I', 'Y', 'U', 'V') }, + { AV_PIX_FMT_YUV420P, MKTAG('y', 'v', '1', '2') }, + { AV_PIX_FMT_YUV420P, MKTAG('Y', 'V', '1', '2') }, + { AV_PIX_FMT_YUV410P, MKTAG('Y', 'U', 'V', '9') }, + { AV_PIX_FMT_YUV410P, MKTAG('Y', 'V', 'U', '9') }, + { AV_PIX_FMT_YUV411P, MKTAG('Y', '4', '1', 'B') }, + { AV_PIX_FMT_YUV422P, MKTAG('Y', '4', '2', 'B') }, + { AV_PIX_FMT_YUV422P, MKTAG('P', '4', '2', '2') }, + { AV_PIX_FMT_YUV422P, MKTAG('Y', 'V', '1', '6') }, + /* yuvjXXX formats are deprecated hacks specific to libav*, + they are identical to yuvXXX */ + { AV_PIX_FMT_YUVJ420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */ + { AV_PIX_FMT_YUVJ420P, MKTAG('I', 'Y', 'U', 'V') }, + { AV_PIX_FMT_YUVJ420P, MKTAG('Y', 'V', '1', '2') }, + { AV_PIX_FMT_YUVJ422P, MKTAG('Y', '4', '2', 'B') }, + { AV_PIX_FMT_YUVJ422P, MKTAG('P', '4', '2', '2') }, + { AV_PIX_FMT_GRAY8, MKTAG('Y', '8', '0', '0') }, + { AV_PIX_FMT_GRAY8, MKTAG('Y', '8', ' ', ' ') }, + + { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', '2') }, /* Packed formats */ + { AV_PIX_FMT_YUYV422, MKTAG('Y', '4', '2', '2') }, + { AV_PIX_FMT_YUYV422, MKTAG('V', '4', '2', '2') }, + { AV_PIX_FMT_YUYV422, MKTAG('V', 'Y', 'U', 'Y') }, + { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'N', 'V') }, + { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', 'V') }, + { AV_PIX_FMT_YVYU422, MKTAG('Y', 'V', 'Y', 'U') }, /* Philips */ + { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') }, + { AV_PIX_FMT_UYVY422, MKTAG('H', 'D', 'Y', 'C') }, + { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'V') }, + { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'Y') }, + { AV_PIX_FMT_UYVY422, MKTAG('u', 'y', 'v', '1') }, + { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', '1') }, + { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'R', 'n') }, /* Avid AVI Codec 1:1 */ + { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', '1', 'x') }, /* Avid 1:1x */ + { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'u', 'p') }, + { AV_PIX_FMT_UYVY422, MKTAG('V', 'D', 'T', 'Z') }, /* SoftLab-NSK VideoTizer */ + { AV_PIX_FMT_UYVY422, MKTAG('a', 'u', 'v', '2') }, + { AV_PIX_FMT_UYVY422, MKTAG('c', 'y', 'u', 'v') }, /* CYUV is also Creative YUV */ + { AV_PIX_FMT_UYYVYY411, MKTAG('Y', '4', '1', '1') }, + { AV_PIX_FMT_GRAY8, MKTAG('G', 'R', 'E', 'Y') }, + { AV_PIX_FMT_NV12, MKTAG('N', 'V', '1', '2') }, + { AV_PIX_FMT_NV21, MKTAG('N', 'V', '2', '1') }, + + /* nut */ + { AV_PIX_FMT_RGB555LE, MKTAG('R', 'G', 'B', 15) }, + { AV_PIX_FMT_BGR555LE, MKTAG('B', 'G', 'R', 15) }, + { AV_PIX_FMT_RGB565LE, MKTAG('R', 'G', 'B', 16) }, + { AV_PIX_FMT_BGR565LE, MKTAG('B', 'G', 'R', 16) }, + { AV_PIX_FMT_RGB555BE, MKTAG(15 , 'B', 'G', 'R') }, + { AV_PIX_FMT_BGR555BE, MKTAG(15 , 'R', 'G', 'B') }, + { AV_PIX_FMT_RGB565BE, MKTAG(16 , 'B', 'G', 'R') }, + { AV_PIX_FMT_BGR565BE, MKTAG(16 , 'R', 'G', 'B') }, + { AV_PIX_FMT_RGB444LE, MKTAG('R', 'G', 'B', 12) }, + { AV_PIX_FMT_BGR444LE, MKTAG('B', 'G', 'R', 12) }, + { AV_PIX_FMT_RGB444BE, MKTAG(12 , 'B', 'G', 'R') }, + { AV_PIX_FMT_BGR444BE, MKTAG(12 , 'R', 'G', 'B') }, + { AV_PIX_FMT_RGBA64LE, MKTAG('R', 'B', 'A', 64 ) }, + { AV_PIX_FMT_BGRA64LE, MKTAG('B', 'R', 'A', 64 ) }, + { AV_PIX_FMT_RGBA64BE, MKTAG(64 , 'R', 'B', 'A') }, + { AV_PIX_FMT_BGRA64BE, MKTAG(64 , 'B', 'R', 'A') }, + { AV_PIX_FMT_RGBA, MKTAG('R', 'G', 'B', 'A') }, + { AV_PIX_FMT_RGB0, MKTAG('R', 'G', 'B', 0 ) }, + { AV_PIX_FMT_BGRA, MKTAG('B', 'G', 'R', 'A') }, + { AV_PIX_FMT_BGR0, MKTAG('B', 'G', 'R', 0 ) }, + { AV_PIX_FMT_ABGR, MKTAG('A', 'B', 'G', 'R') }, + { AV_PIX_FMT_0BGR, MKTAG( 0 , 'B', 'G', 'R') }, + { AV_PIX_FMT_ARGB, MKTAG('A', 'R', 'G', 'B') }, + { AV_PIX_FMT_0RGB, MKTAG( 0 , 'R', 'G', 'B') }, + { AV_PIX_FMT_RGB24, MKTAG('R', 'G', 'B', 24 ) }, + { AV_PIX_FMT_BGR24, MKTAG('B', 'G', 'R', 24 ) }, + { AV_PIX_FMT_YUV411P, MKTAG('4', '1', '1', 'P') }, + { AV_PIX_FMT_YUV422P, MKTAG('4', '2', '2', 'P') }, + { AV_PIX_FMT_YUVJ422P, MKTAG('4', '2', '2', 'P') }, + { AV_PIX_FMT_YUV440P, MKTAG('4', '4', '0', 'P') }, + { AV_PIX_FMT_YUVJ440P, MKTAG('4', '4', '0', 'P') }, + { AV_PIX_FMT_YUV444P, MKTAG('4', '4', '4', 'P') }, + { AV_PIX_FMT_YUVJ444P, MKTAG('4', '4', '4', 'P') }, + { AV_PIX_FMT_MONOWHITE,MKTAG('B', '1', 'W', '0') }, + { AV_PIX_FMT_MONOBLACK,MKTAG('B', '0', 'W', '1') }, + { AV_PIX_FMT_BGR8, MKTAG('B', 'G', 'R', 8 ) }, + { AV_PIX_FMT_RGB8, MKTAG('R', 'G', 'B', 8 ) }, + { AV_PIX_FMT_BGR4, MKTAG('B', 'G', 'R', 4 ) }, + { AV_PIX_FMT_RGB4, MKTAG('R', 'G', 'B', 4 ) }, + { AV_PIX_FMT_RGB4_BYTE,MKTAG('B', '4', 'B', 'Y') }, + { AV_PIX_FMT_BGR4_BYTE,MKTAG('R', '4', 'B', 'Y') }, + { AV_PIX_FMT_RGB48LE, MKTAG('R', 'G', 'B', 48 ) }, + { AV_PIX_FMT_RGB48BE, MKTAG( 48, 'R', 'G', 'B') }, + { AV_PIX_FMT_BGR48LE, MKTAG('B', 'G', 'R', 48 ) }, + { AV_PIX_FMT_BGR48BE, MKTAG( 48, 'B', 'G', 'R') }, + { AV_PIX_FMT_GRAY9LE, MKTAG('Y', '1', 0 , 9 ) }, + { AV_PIX_FMT_GRAY9BE, MKTAG( 9 , 0 , '1', 'Y') }, + { AV_PIX_FMT_GRAY10LE, MKTAG('Y', '1', 0 , 10 ) }, + { AV_PIX_FMT_GRAY10BE, MKTAG(10 , 0 , '1', 'Y') }, + { AV_PIX_FMT_GRAY12LE, MKTAG('Y', '1', 0 , 12 ) }, + { AV_PIX_FMT_GRAY12BE, MKTAG(12 , 0 , '1', 'Y') }, + { AV_PIX_FMT_GRAY16LE, MKTAG('Y', '1', 0 , 16 ) }, + { AV_PIX_FMT_GRAY16BE, MKTAG(16 , 0 , '1', 'Y') }, + { AV_PIX_FMT_YUV420P9LE, MKTAG('Y', '3', 11 , 9 ) }, + { AV_PIX_FMT_YUV420P9BE, MKTAG( 9 , 11 , '3', 'Y') }, + { AV_PIX_FMT_YUV422P9LE, MKTAG('Y', '3', 10 , 9 ) }, + { AV_PIX_FMT_YUV422P9BE, MKTAG( 9 , 10 , '3', 'Y') }, + { AV_PIX_FMT_YUV444P9LE, MKTAG('Y', '3', 0 , 9 ) }, + { AV_PIX_FMT_YUV444P9BE, MKTAG( 9 , 0 , '3', 'Y') }, + { AV_PIX_FMT_YUV420P10LE, MKTAG('Y', '3', 11 , 10 ) }, + { AV_PIX_FMT_YUV420P10BE, MKTAG(10 , 11 , '3', 'Y') }, + { AV_PIX_FMT_YUV422P10LE, MKTAG('Y', '3', 10 , 10 ) }, + { AV_PIX_FMT_YUV422P10BE, MKTAG(10 , 10 , '3', 'Y') }, + { AV_PIX_FMT_YUV444P10LE, MKTAG('Y', '3', 0 , 10 ) }, + { AV_PIX_FMT_YUV444P10BE, MKTAG(10 , 0 , '3', 'Y') }, + { AV_PIX_FMT_YUV420P12LE, MKTAG('Y', '3', 11 , 12 ) }, + { AV_PIX_FMT_YUV420P12BE, MKTAG(12 , 11 , '3', 'Y') }, + { AV_PIX_FMT_YUV422P12LE, MKTAG('Y', '3', 10 , 12 ) }, + { AV_PIX_FMT_YUV422P12BE, MKTAG(12 , 10 , '3', 'Y') }, + { AV_PIX_FMT_YUV444P12LE, MKTAG('Y', '3', 0 , 12 ) }, + { AV_PIX_FMT_YUV444P12BE, MKTAG(12 , 0 , '3', 'Y') }, + { AV_PIX_FMT_YUV420P14LE, MKTAG('Y', '3', 11 , 14 ) }, + { AV_PIX_FMT_YUV420P14BE, MKTAG(14 , 11 , '3', 'Y') }, + { AV_PIX_FMT_YUV422P14LE, MKTAG('Y', '3', 10 , 14 ) }, + { AV_PIX_FMT_YUV422P14BE, MKTAG(14 , 10 , '3', 'Y') }, + { AV_PIX_FMT_YUV444P14LE, MKTAG('Y', '3', 0 , 14 ) }, + { AV_PIX_FMT_YUV444P14BE, MKTAG(14 , 0 , '3', 'Y') }, + { AV_PIX_FMT_YUV420P16LE, MKTAG('Y', '3', 11 , 16 ) }, + { AV_PIX_FMT_YUV420P16BE, MKTAG(16 , 11 , '3', 'Y') }, + { AV_PIX_FMT_YUV422P16LE, MKTAG('Y', '3', 10 , 16 ) }, + { AV_PIX_FMT_YUV422P16BE, MKTAG(16 , 10 , '3', 'Y') }, + { AV_PIX_FMT_YUV444P16LE, MKTAG('Y', '3', 0 , 16 ) }, + { AV_PIX_FMT_YUV444P16BE, MKTAG(16 , 0 , '3', 'Y') }, + { AV_PIX_FMT_YUVA420P, MKTAG('Y', '4', 11 , 8 ) }, + { AV_PIX_FMT_YUVA422P, MKTAG('Y', '4', 10 , 8 ) }, + { AV_PIX_FMT_YUVA444P, MKTAG('Y', '4', 0 , 8 ) }, + { AV_PIX_FMT_YA8, MKTAG('Y', '2', 0 , 8 ) }, + { AV_PIX_FMT_PAL8, MKTAG('P', 'A', 'L', 8 ) }, + + { AV_PIX_FMT_YUVA420P9LE, MKTAG('Y', '4', 11 , 9 ) }, + { AV_PIX_FMT_YUVA420P9BE, MKTAG( 9 , 11 , '4', 'Y') }, + { AV_PIX_FMT_YUVA422P9LE, MKTAG('Y', '4', 10 , 9 ) }, + { AV_PIX_FMT_YUVA422P9BE, MKTAG( 9 , 10 , '4', 'Y') }, + { AV_PIX_FMT_YUVA444P9LE, MKTAG('Y', '4', 0 , 9 ) }, + { AV_PIX_FMT_YUVA444P9BE, MKTAG( 9 , 0 , '4', 'Y') }, + { AV_PIX_FMT_YUVA420P10LE, MKTAG('Y', '4', 11 , 10 ) }, + { AV_PIX_FMT_YUVA420P10BE, MKTAG(10 , 11 , '4', 'Y') }, + { AV_PIX_FMT_YUVA422P10LE, MKTAG('Y', '4', 10 , 10 ) }, + { AV_PIX_FMT_YUVA422P10BE, MKTAG(10 , 10 , '4', 'Y') }, + { AV_PIX_FMT_YUVA444P10LE, MKTAG('Y', '4', 0 , 10 ) }, + { AV_PIX_FMT_YUVA444P10BE, MKTAG(10 , 0 , '4', 'Y') }, + { AV_PIX_FMT_YUVA420P16LE, MKTAG('Y', '4', 11 , 16 ) }, + { AV_PIX_FMT_YUVA420P16BE, MKTAG(16 , 11 , '4', 'Y') }, + { AV_PIX_FMT_YUVA422P16LE, MKTAG('Y', '4', 10 , 16 ) }, + { AV_PIX_FMT_YUVA422P16BE, MKTAG(16 , 10 , '4', 'Y') }, + { AV_PIX_FMT_YUVA444P16LE, MKTAG('Y', '4', 0 , 16 ) }, + { AV_PIX_FMT_YUVA444P16BE, MKTAG(16 , 0 , '4', 'Y') }, + + { AV_PIX_FMT_GBRP, MKTAG('G', '3', 00 , 8 ) }, + { AV_PIX_FMT_GBRP9LE, MKTAG('G', '3', 00 , 9 ) }, + { AV_PIX_FMT_GBRP9BE, MKTAG( 9 , 00 , '3', 'G') }, + { AV_PIX_FMT_GBRP10LE, MKTAG('G', '3', 00 , 10 ) }, + { AV_PIX_FMT_GBRP10BE, MKTAG(10 , 00 , '3', 'G') }, + { AV_PIX_FMT_GBRP12LE, MKTAG('G', '3', 00 , 12 ) }, + { AV_PIX_FMT_GBRP12BE, MKTAG(12 , 00 , '3', 'G') }, + { AV_PIX_FMT_GBRP14LE, MKTAG('G', '3', 00 , 14 ) }, + { AV_PIX_FMT_GBRP14BE, MKTAG(14 , 00 , '3', 'G') }, + { AV_PIX_FMT_GBRP16LE, MKTAG('G', '3', 00 , 16 ) }, + { AV_PIX_FMT_GBRP16BE, MKTAG(16 , 00 , '3', 'G') }, + + { AV_PIX_FMT_GBRAP, MKTAG('G', '4', 00 , 8 ) }, + { AV_PIX_FMT_GBRAP10LE, MKTAG('G', '4', 00 , 10 ) }, + { AV_PIX_FMT_GBRAP10BE, MKTAG(10 , 00 , '4', 'G') }, + { AV_PIX_FMT_GBRAP12LE, MKTAG('G', '4', 00 , 12 ) }, + { AV_PIX_FMT_GBRAP12BE, MKTAG(12 , 00 , '4', 'G') }, + { AV_PIX_FMT_GBRAP16LE, MKTAG('G', '4', 00 , 16 ) }, + { AV_PIX_FMT_GBRAP16BE, MKTAG(16 , 00 , '4', 'G') }, + + { AV_PIX_FMT_XYZ12LE, MKTAG('X', 'Y', 'Z' , 36 ) }, + { AV_PIX_FMT_XYZ12BE, MKTAG(36 , 'Z' , 'Y', 'X') }, + + { AV_PIX_FMT_BAYER_BGGR8, MKTAG(0xBA, 'B', 'G', 8 ) }, + { AV_PIX_FMT_BAYER_BGGR16LE, MKTAG(0xBA, 'B', 'G', 16 ) }, + { AV_PIX_FMT_BAYER_BGGR16BE, MKTAG(16 , 'G', 'B', 0xBA) }, + { AV_PIX_FMT_BAYER_RGGB8, MKTAG(0xBA, 'R', 'G', 8 ) }, + { AV_PIX_FMT_BAYER_RGGB16LE, MKTAG(0xBA, 'R', 'G', 16 ) }, + { AV_PIX_FMT_BAYER_RGGB16BE, MKTAG(16 , 'G', 'R', 0xBA) }, + { AV_PIX_FMT_BAYER_GBRG8, MKTAG(0xBA, 'G', 'B', 8 ) }, + { AV_PIX_FMT_BAYER_GBRG16LE, MKTAG(0xBA, 'G', 'B', 16 ) }, + { AV_PIX_FMT_BAYER_GBRG16BE, MKTAG(16, 'B', 'G', 0xBA) }, + { AV_PIX_FMT_BAYER_GRBG8, MKTAG(0xBA, 'G', 'R', 8 ) }, + { AV_PIX_FMT_BAYER_GRBG16LE, MKTAG(0xBA, 'G', 'R', 16 ) }, + { AV_PIX_FMT_BAYER_GRBG16BE, MKTAG(16, 'R', 'G', 0xBA) }, + + /* quicktime */ + { AV_PIX_FMT_YUV420P, MKTAG('R', '4', '2', '0') }, /* Radius DV YUV PAL */ + { AV_PIX_FMT_YUV411P, MKTAG('R', '4', '1', '1') }, /* Radius DV YUV NTSC */ + { AV_PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') }, + { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', 'y') }, + { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'U', 'I') }, /* FIXME merge both fields */ + { AV_PIX_FMT_UYVY422, MKTAG('b', 'x', 'y', 'v') }, + { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') }, + { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') }, + { AV_PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */ + { AV_PIX_FMT_RGB555LE,MKTAG('L', '5', '5', '5') }, + { AV_PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') }, + { AV_PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') }, + { AV_PIX_FMT_BGR24, MKTAG('2', '4', 'B', 'G') }, + { AV_PIX_FMT_BGR24, MKTAG('b', 'x', 'b', 'g') }, + { AV_PIX_FMT_BGRA, MKTAG('B', 'G', 'R', 'A') }, + { AV_PIX_FMT_RGBA, MKTAG('R', 'G', 'B', 'A') }, + { AV_PIX_FMT_RGB24, MKTAG('b', 'x', 'r', 'g') }, + { AV_PIX_FMT_ABGR, MKTAG('A', 'B', 'G', 'R') }, + { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') }, + { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') }, + { AV_PIX_FMT_RGBA64BE,MKTAG('b', '6', '4', 'a') }, + + /* vlc */ + { AV_PIX_FMT_YUV410P, MKTAG('I', '4', '1', '0') }, + { AV_PIX_FMT_YUV411P, MKTAG('I', '4', '1', '1') }, + { AV_PIX_FMT_YUV422P, MKTAG('I', '4', '2', '2') }, + { AV_PIX_FMT_YUV440P, MKTAG('I', '4', '4', '0') }, + { AV_PIX_FMT_YUV444P, MKTAG('I', '4', '4', '4') }, + { AV_PIX_FMT_YUVJ420P, MKTAG('J', '4', '2', '0') }, + { AV_PIX_FMT_YUVJ422P, MKTAG('J', '4', '2', '2') }, + { AV_PIX_FMT_YUVJ440P, MKTAG('J', '4', '4', '0') }, + { AV_PIX_FMT_YUVJ444P, MKTAG('J', '4', '4', '4') }, + { AV_PIX_FMT_YUVA444P, MKTAG('Y', 'U', 'V', 'A') }, + { AV_PIX_FMT_YUVA420P, MKTAG('I', '4', '0', 'A') }, + { AV_PIX_FMT_YUVA422P, MKTAG('I', '4', '2', 'A') }, + { AV_PIX_FMT_RGB8, MKTAG('R', 'G', 'B', '2') }, + { AV_PIX_FMT_RGB555LE, MKTAG('R', 'V', '1', '5') }, + { AV_PIX_FMT_RGB565LE, MKTAG('R', 'V', '1', '6') }, + { AV_PIX_FMT_BGR24, MKTAG('R', 'V', '2', '4') }, + { AV_PIX_FMT_BGR0, MKTAG('R', 'V', '3', '2') }, + { AV_PIX_FMT_RGBA, MKTAG('A', 'V', '3', '2') }, + { AV_PIX_FMT_YUV420P9LE, MKTAG('I', '0', '9', 'L') }, + { AV_PIX_FMT_YUV420P9BE, MKTAG('I', '0', '9', 'B') }, + { AV_PIX_FMT_YUV422P9LE, MKTAG('I', '2', '9', 'L') }, + { AV_PIX_FMT_YUV422P9BE, MKTAG('I', '2', '9', 'B') }, + { AV_PIX_FMT_YUV444P9LE, MKTAG('I', '4', '9', 'L') }, + { AV_PIX_FMT_YUV444P9BE, MKTAG('I', '4', '9', 'B') }, + { AV_PIX_FMT_YUV420P10LE, MKTAG('I', '0', 'A', 'L') }, + { AV_PIX_FMT_YUV420P10BE, MKTAG('I', '0', 'A', 'B') }, + { AV_PIX_FMT_YUV422P10LE, MKTAG('I', '2', 'A', 'L') }, + { AV_PIX_FMT_YUV422P10BE, MKTAG('I', '2', 'A', 'B') }, + { AV_PIX_FMT_YUV444P10LE, MKTAG('I', '4', 'A', 'L') }, + { AV_PIX_FMT_YUV444P10BE, MKTAG('I', '4', 'A', 'B') }, + { AV_PIX_FMT_YUV420P12LE, MKTAG('I', '0', 'C', 'L') }, + { AV_PIX_FMT_YUV420P12BE, MKTAG('I', '0', 'C', 'B') }, + { AV_PIX_FMT_YUV422P12LE, MKTAG('I', '2', 'C', 'L') }, + { AV_PIX_FMT_YUV422P12BE, MKTAG('I', '2', 'C', 'B') }, + { AV_PIX_FMT_YUV444P12LE, MKTAG('I', '4', 'C', 'L') }, + { AV_PIX_FMT_YUV444P12BE, MKTAG('I', '4', 'C', 'B') }, + { AV_PIX_FMT_YUV420P16LE, MKTAG('I', '0', 'F', 'L') }, + { AV_PIX_FMT_YUV420P16BE, MKTAG('I', '0', 'F', 'B') }, + { AV_PIX_FMT_YUV444P16LE, MKTAG('I', '4', 'F', 'L') }, + { AV_PIX_FMT_YUV444P16BE, MKTAG('I', '4', 'F', 'B') }, + + /* special */ + { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ + { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ + + { AV_PIX_FMT_NONE, 0 }, +}; + +const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void) +{ + return ff_raw_pix_fmt_tags; +} + +unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat fmt) +{ + const PixelFormatTag *tags = ff_raw_pix_fmt_tags; + while (tags->pix_fmt >= 0) { + if (tags->pix_fmt == fmt) + return tags->fourcc; + tags++; + } + return 0; +} + +const PixelFormatTag avpriv_pix_fmt_bps_avi[] = { + { AV_PIX_FMT_PAL8, 1 }, + { AV_PIX_FMT_PAL8, 2 }, + { AV_PIX_FMT_PAL8, 4 }, + { AV_PIX_FMT_PAL8, 8 }, + { AV_PIX_FMT_RGB444LE, 12 }, + { AV_PIX_FMT_RGB555LE, 15 }, + { AV_PIX_FMT_RGB555LE, 16 }, + { AV_PIX_FMT_BGR24, 24 }, + { AV_PIX_FMT_BGRA, 32 }, + { AV_PIX_FMT_NONE, 0 }, +}; + +const PixelFormatTag avpriv_pix_fmt_bps_mov[] = { + { AV_PIX_FMT_PAL8, 1 }, + { AV_PIX_FMT_PAL8, 2 }, + { AV_PIX_FMT_PAL8, 4 }, + { AV_PIX_FMT_PAL8, 8 }, + { AV_PIX_FMT_RGB555BE, 16 }, + { AV_PIX_FMT_RGB24, 24 }, + { AV_PIX_FMT_ARGB, 32 }, + { AV_PIX_FMT_PAL8, 33 }, + { AV_PIX_FMT_NONE, 0 }, +}; diff --git a/libs/ffvpx/libavcodec/raw.h b/libs/ffvpx/libavcodec/raw.h new file mode 100644 index 000000000..28a27b1f9 --- /dev/null +++ b/libs/ffvpx/libavcodec/raw.h @@ -0,0 +1,48 @@ +/* + * Raw Video Codec + * Copyright (c) 2001 Fabrice Bellard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Raw Video Codec + */ + +#ifndef AVCODEC_RAW_H +#define AVCODEC_RAW_H + +#include "avcodec.h" +#include "internal.h" +#include "libavutil/internal.h" + +typedef struct PixelFormatTag { + enum AVPixelFormat pix_fmt; + unsigned int fourcc; +} PixelFormatTag; + +extern const PixelFormatTag ff_raw_pix_fmt_tags[]; // exposed through avpriv_get_raw_pix_fmt_tags() + +const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void); + +enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc); + +extern av_export_avcodec const PixelFormatTag avpriv_pix_fmt_bps_avi[]; +extern av_export_avcodec const PixelFormatTag avpriv_pix_fmt_bps_mov[]; + +#endif /* AVCODEC_RAW_H */ diff --git a/libs/ffvpx/libavcodec/rectangle.h b/libs/ffvpx/libavcodec/rectangle.h new file mode 100644 index 000000000..df7c18a4e --- /dev/null +++ b/libs/ffvpx/libavcodec/rectangle.h @@ -0,0 +1,124 @@ +/* + * rectangle filling function + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * useful rectangle filling function + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#ifndef AVCODEC_RECTANGLE_H +#define AVCODEC_RECTANGLE_H + +#include "config.h" +#include "libavutil/common.h" +#include "libavutil/avassert.h" + +/** + * fill a rectangle. + * @param h height of the rectangle, should be a constant + * @param w width of the rectangle, should be a constant + * @param size the size of val (1, 2 or 4), should be a constant + */ +static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ + uint8_t *p= (uint8_t*)vp; + av_assert2(size==1 || size==2 || size==4); + av_assert2(w<=4); + + w *= size; + stride *= size; + + av_assert2((((long)vp)&(FFMIN(w, 8<<(HAVE_NEON|ARCH_PPC|HAVE_MMX))-1)) == 0); + av_assert2((stride&(w-1))==0); + if(w==2){ + const uint16_t v= size==4 ? val : val*0x0101; + *(uint16_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint16_t*)(p + 1*stride)= v; + if(h==2) return; + *(uint16_t*)(p + 2*stride)= v; + *(uint16_t*)(p + 3*stride)= v; + }else if(w==4){ + const uint32_t v= size==4 ? val : size==2 ? val*0x00010001 : val*0x01010101; + *(uint32_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint32_t*)(p + 1*stride)= v; + if(h==2) return; + *(uint32_t*)(p + 2*stride)= v; + *(uint32_t*)(p + 3*stride)= v; + }else if(w==8){ + // gcc cannot optimize 64-bit math on x86_32 +#if HAVE_FAST_64BIT + const uint64_t v= size==2 ? val*0x0001000100010001ULL : val*0x0100000001ULL; + *(uint64_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint64_t*)(p + 1*stride)= v; + if(h==2) return; + *(uint64_t*)(p + 2*stride)= v; + *(uint64_t*)(p + 3*stride)= v; + }else if(w==16){ + const uint64_t v= val*0x0100000001ULL; + *(uint64_t*)(p + 0+0*stride)= v; + *(uint64_t*)(p + 8+0*stride)= v; + *(uint64_t*)(p + 0+1*stride)= v; + *(uint64_t*)(p + 8+1*stride)= v; + if(h==2) return; + *(uint64_t*)(p + 0+2*stride)= v; + *(uint64_t*)(p + 8+2*stride)= v; + *(uint64_t*)(p + 0+3*stride)= v; + *(uint64_t*)(p + 8+3*stride)= v; +#else + const uint32_t v= size==2 ? val*0x00010001 : val; + *(uint32_t*)(p + 0+0*stride)= v; + *(uint32_t*)(p + 4+0*stride)= v; + if(h==1) return; + *(uint32_t*)(p + 0+1*stride)= v; + *(uint32_t*)(p + 4+1*stride)= v; + if(h==2) return; + *(uint32_t*)(p + 0+2*stride)= v; + *(uint32_t*)(p + 4+2*stride)= v; + *(uint32_t*)(p + 0+3*stride)= v; + *(uint32_t*)(p + 4+3*stride)= v; + }else if(w==16){ + *(uint32_t*)(p + 0+0*stride)= val; + *(uint32_t*)(p + 4+0*stride)= val; + *(uint32_t*)(p + 8+0*stride)= val; + *(uint32_t*)(p +12+0*stride)= val; + *(uint32_t*)(p + 0+1*stride)= val; + *(uint32_t*)(p + 4+1*stride)= val; + *(uint32_t*)(p + 8+1*stride)= val; + *(uint32_t*)(p +12+1*stride)= val; + if(h==2) return; + *(uint32_t*)(p + 0+2*stride)= val; + *(uint32_t*)(p + 4+2*stride)= val; + *(uint32_t*)(p + 8+2*stride)= val; + *(uint32_t*)(p +12+2*stride)= val; + *(uint32_t*)(p + 0+3*stride)= val; + *(uint32_t*)(p + 4+3*stride)= val; + *(uint32_t*)(p + 8+3*stride)= val; + *(uint32_t*)(p +12+3*stride)= val; +#endif + }else + av_assert2(0); + av_assert2(h==4); +} + +#endif /* AVCODEC_RECTANGLE_H */ diff --git a/libs/ffvpx/libavcodec/reverse.c b/libs/ffvpx/libavcodec/reverse.c new file mode 100644 index 000000000..440badaf3 --- /dev/null +++ b/libs/ffvpx/libavcodec/reverse.c @@ -0,0 +1 @@ +#include "libavutil/reverse.c" diff --git a/libs/ffvpx/libavcodec/rl.h b/libs/ffvpx/libavcodec/rl.h new file mode 100644 index 000000000..9a767bc5f --- /dev/null +++ b/libs/ffvpx/libavcodec/rl.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2000-2002 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * rl header. + */ + +#ifndef AVCODEC_RL_H +#define AVCODEC_RL_H + +#include <stdint.h> + +#include "vlc.h" + +/* run length table */ +#define MAX_RUN 64 +#define MAX_LEVEL 64 + +/** RLTable. */ +typedef struct RLTable { + int n; ///< number of entries of table_vlc minus 1 + int last; ///< number of values for last = 0 + const uint16_t (*table_vlc)[2]; + const int8_t *table_run; + const int8_t *table_level; + uint8_t *index_run[2]; ///< encoding only + int8_t *max_level[2]; ///< encoding & decoding + int8_t *max_run[2]; ///< encoding & decoding + RL_VLC_ELEM *rl_vlc[32]; ///< decoding only +} RLTable; + +/** + * @param static_store static uint8_t array[2][2*MAX_RUN + MAX_LEVEL + 3] which will hold + * the level and run tables, if this is NULL av_malloc() will be used + */ +int ff_rl_init(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3]); +void ff_rl_init_vlc(RLTable *rl, unsigned static_size); + +/** + * Free the contents of a dynamically allocated table. + */ +void ff_rl_free(RLTable *rl); + +#define INIT_VLC_RL(rl, static_size)\ +{\ + int q;\ + static RL_VLC_ELEM rl_vlc_table[32][static_size];\ +\ + if(!rl.rl_vlc[0]){\ + for(q=0; q<32; q++)\ + rl.rl_vlc[q]= rl_vlc_table[q];\ +\ + ff_rl_init_vlc(&rl, static_size);\ + }\ +} + +static inline int get_rl_index(const RLTable *rl, int last, int run, int level) +{ + int index; + index = rl->index_run[last][run]; + if (index >= rl->n) + return rl->n; + if (level > rl->max_level[last][run]) + return rl->n; + return index + level - 1; +} + +#endif /* AVCODEC_RL_H */ diff --git a/libs/ffvpx/libavcodec/rnd_avg.h b/libs/ffvpx/libavcodec/rnd_avg.h new file mode 100644 index 000000000..344775e31 --- /dev/null +++ b/libs/ffvpx/libavcodec/rnd_avg.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> + * Copyright (c) 2011 Oskar Arvidsson + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_RND_AVG_H +#define AVCODEC_RND_AVG_H + +#include <stddef.h> +#include <stdint.h> + +#define BYTE_VEC32(c) ((c) * 0x01010101UL) +#define BYTE_VEC64(c) ((c) * 0x0001000100010001UL) + +static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) +{ + return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); +} + +static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) +{ + return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); +} + +static inline uint64_t rnd_avg64(uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1); +} + +static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b) +{ + return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1); +} + +#endif /* AVCODEC_RND_AVG_H */ diff --git a/libs/ffvpx/libavcodec/thread.h b/libs/ffvpx/libavcodec/thread.h new file mode 100644 index 000000000..540135fbc --- /dev/null +++ b/libs/ffvpx/libavcodec/thread.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2008 Alexander Strange <astrange@ithinksw.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Multithreading support functions + * @author Alexander Strange <astrange@ithinksw.com> + */ + +#ifndef AVCODEC_THREAD_H +#define AVCODEC_THREAD_H + +#include "libavutil/buffer.h" + +#include "avcodec.h" + +typedef struct ThreadFrame { + AVFrame *f; + AVCodecContext *owner[2]; + // progress->data is an array of 2 ints holding progress for top/bottom + // fields + AVBufferRef *progress; +} ThreadFrame; + +/** + * Wait for decoding threads to finish and reset internal state. + * Called by avcodec_flush_buffers(). + * + * @param avctx The context. + */ +void ff_thread_flush(AVCodecContext *avctx); + +/** + * Submit a new frame to a decoding thread. + * Returns the next available frame in picture. *got_picture_ptr + * will be 0 if none is available. + * The return value on success is the size of the consumed packet for + * compatibility with avcodec_decode_video2(). This means the decoder + * has to consume the full packet. + * + * Parameters are the same as avcodec_decode_video2(). + */ +int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture, + int *got_picture_ptr, AVPacket *avpkt); + +/** + * If the codec defines update_thread_context(), call this + * when they are ready for the next thread to start decoding + * the next frame. After calling it, do not change any variables + * read by the update_thread_context() method, or call ff_thread_get_buffer(). + * + * @param avctx The context. + */ +void ff_thread_finish_setup(AVCodecContext *avctx); + +/** + * Notify later decoding threads when part of their reference picture is ready. + * Call this when some part of the picture is finished decoding. + * Later calls with lower values of progress have no effect. + * + * @param f The picture being decoded. + * @param progress Value, in arbitrary units, of how much of the picture has decoded. + * @param field The field being decoded, for field-picture codecs. + * 0 for top field or frame pictures, 1 for bottom field. + */ +void ff_thread_report_progress(ThreadFrame *f, int progress, int field); + +/** + * Wait for earlier decoding threads to finish reference pictures. + * Call this before accessing some part of a picture, with a given + * value for progress, and it will return after the responsible decoding + * thread calls ff_thread_report_progress() with the same or + * higher value for progress. + * + * @param f The picture being referenced. + * @param progress Value, in arbitrary units, to wait for. + * @param field The field being referenced, for field-picture codecs. + * 0 for top field or frame pictures, 1 for bottom field. + */ +void ff_thread_await_progress(ThreadFrame *f, int progress, int field); + +/** + * Wrapper around get_format() for frame-multithreaded codecs. + * Call this function instead of avctx->get_format(). + * Cannot be called after the codec has called ff_thread_finish_setup(). + * + * @param avctx The current context. + * @param fmt The list of available formats. + */ +enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt); + +/** + * Wrapper around get_buffer() for frame-multithreaded codecs. + * Call this function instead of ff_get_buffer(f). + * Cannot be called after the codec has called ff_thread_finish_setup(). + * + * @param avctx The current context. + * @param f The frame to write into. + */ +int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags); + +/** + * Wrapper around release_buffer() frame-for multithreaded codecs. + * Call this function instead of avctx->release_buffer(f). + * The AVFrame will be copied and the actual release_buffer() call + * will be performed later. The contents of data pointed to by the + * AVFrame should not be changed until ff_thread_get_buffer() is called + * on it. + * + * @param avctx The current context. + * @param f The picture being released. + */ +void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f); + +int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src); + +int ff_thread_init(AVCodecContext *s); +int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx, + int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr), + int (*main_func)(AVCodecContext *c), void *arg, int *ret, int job_count); +void ff_thread_free(AVCodecContext *s); +int ff_alloc_entries(AVCodecContext *avctx, int count); +void ff_reset_entries(AVCodecContext *avctx); +void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n); +void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift); + +#endif /* AVCODEC_THREAD_H */ diff --git a/libs/ffvpx/libavcodec/unary.h b/libs/ffvpx/libavcodec/unary.h new file mode 100644 index 000000000..908dc9350 --- /dev/null +++ b/libs/ffvpx/libavcodec/unary.h @@ -0,0 +1,56 @@ +/* + * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_UNARY_H +#define AVCODEC_UNARY_H + +#include "get_bits.h" + +/** + * Get unary code of limited length + * @param gb GetBitContext + * @param[in] stop The bitstop value (unary code of 1's or 0's) + * @param[in] len Maximum length + * @return Unary length/index + */ +static inline int get_unary(GetBitContext *gb, int stop, int len) +{ + int i; + + for(i = 0; i < len && get_bits1(gb) != stop; i++); + return i; +} + +/** + * Get unary code terminated by a 0 with a maximum length of 33 + * @param gb GetBitContext + * @return Unary length/index + */ +static inline int get_unary_0_33(GetBitContext *gb) +{ + return get_unary(gb, 0, 33); +} + +static inline int get_unary_0_9(GetBitContext *gb) +{ + return get_unary(gb, 0, 9); +} + +#endif /* AVCODEC_UNARY_H */ diff --git a/libs/ffvpx/libavcodec/utils.c b/libs/ffvpx/libavcodec/utils.c new file mode 100644 index 000000000..59d41ccbb --- /dev/null +++ b/libs/ffvpx/libavcodec/utils.c @@ -0,0 +1,2197 @@ +/* + * utils for libavcodec + * Copyright (c) 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * utils. + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/bprint.h" +#include "libavutil/channel_layout.h" +#include "libavutil/crc.h" +#include "libavutil/frame.h" +#include "libavutil/hwcontext.h" +#include "libavutil/internal.h" +#include "libavutil/mathematics.h" +#include "libavutil/mem_internal.h" +#include "libavutil/pixdesc.h" +#include "libavutil/imgutils.h" +#include "libavutil/samplefmt.h" +#include "libavutil/dict.h" +#include "libavutil/thread.h" +#include "avcodec.h" +#include "decode.h" +#include "hwaccel.h" +#include "libavutil/opt.h" +#include "mpegvideo.h" +#include "thread.h" +#include "frame_thread_encoder.h" +#include "internal.h" +#include "raw.h" +#include "bytestream.h" +#include "version.h" +#include <stdlib.h> +#include <stdarg.h> +#include <stdatomic.h> +#include <limits.h> +#include <float.h> +#if CONFIG_ICONV +# include <iconv.h> +#endif + +#include "libavutil/ffversion.h" +const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION; + +static AVMutex codec_mutex = AV_MUTEX_INITIALIZER; + +void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size) +{ + uint8_t **p = ptr; + if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) { + av_freep(p); + *size = 0; + return; + } + if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1)) + memset(*p + min_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); +} + +void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size) +{ + uint8_t **p = ptr; + if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) { + av_freep(p); + *size = 0; + return; + } + if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1)) + memset(*p, 0, min_size + AV_INPUT_BUFFER_PADDING_SIZE); +} + +int av_codec_is_encoder(const AVCodec *codec) +{ + return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame); +} + +int av_codec_is_decoder(const AVCodec *codec) +{ + return codec && (codec->decode || codec->receive_frame); +} + +int ff_set_dimensions(AVCodecContext *s, int width, int height) +{ + int ret = av_image_check_size2(width, height, s->max_pixels, AV_PIX_FMT_NONE, 0, s); + + if (ret < 0) + width = height = 0; + + s->coded_width = width; + s->coded_height = height; + s->width = AV_CEIL_RSHIFT(width, s->lowres); + s->height = AV_CEIL_RSHIFT(height, s->lowres); + + return ret; +} + +int ff_set_sar(AVCodecContext *avctx, AVRational sar) +{ + int ret = av_image_check_sar(avctx->width, avctx->height, sar); + + if (ret < 0) { + av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %d/%d\n", + sar.num, sar.den); + avctx->sample_aspect_ratio = (AVRational){ 0, 1 }; + return ret; + } else { + avctx->sample_aspect_ratio = sar; + } + return 0; +} + +int ff_side_data_update_matrix_encoding(AVFrame *frame, + enum AVMatrixEncoding matrix_encoding) +{ + AVFrameSideData *side_data; + enum AVMatrixEncoding *data; + + side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_MATRIXENCODING); + if (!side_data) + side_data = av_frame_new_side_data(frame, AV_FRAME_DATA_MATRIXENCODING, + sizeof(enum AVMatrixEncoding)); + + if (!side_data) + return AVERROR(ENOMEM); + + data = (enum AVMatrixEncoding*)side_data->data; + *data = matrix_encoding; + + return 0; +} + +void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, + int linesize_align[AV_NUM_DATA_POINTERS]) +{ + int i; + int w_align = 1; + int h_align = 1; + AVPixFmtDescriptor const *desc = av_pix_fmt_desc_get(s->pix_fmt); + + if (desc) { + w_align = 1 << desc->log2_chroma_w; + h_align = 1 << desc->log2_chroma_h; + } + + switch (s->pix_fmt) { + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUYV422: + case AV_PIX_FMT_YVYU422: + case AV_PIX_FMT_UYVY422: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV440P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_GBRP: + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GRAY8: + case AV_PIX_FMT_GRAY16BE: + case AV_PIX_FMT_GRAY16LE: + case AV_PIX_FMT_YUVJ420P: + case AV_PIX_FMT_YUVJ422P: + case AV_PIX_FMT_YUVJ440P: + case AV_PIX_FMT_YUVJ444P: + case AV_PIX_FMT_YUVA420P: + case AV_PIX_FMT_YUVA422P: + case AV_PIX_FMT_YUVA444P: + case AV_PIX_FMT_YUV420P9LE: + case AV_PIX_FMT_YUV420P9BE: + case AV_PIX_FMT_YUV420P10LE: + case AV_PIX_FMT_YUV420P10BE: + case AV_PIX_FMT_YUV420P12LE: + case AV_PIX_FMT_YUV420P12BE: + case AV_PIX_FMT_YUV420P14LE: + case AV_PIX_FMT_YUV420P14BE: + case AV_PIX_FMT_YUV420P16LE: + case AV_PIX_FMT_YUV420P16BE: + case AV_PIX_FMT_YUVA420P9LE: + case AV_PIX_FMT_YUVA420P9BE: + case AV_PIX_FMT_YUVA420P10LE: + case AV_PIX_FMT_YUVA420P10BE: + case AV_PIX_FMT_YUVA420P16LE: + case AV_PIX_FMT_YUVA420P16BE: + case AV_PIX_FMT_YUV422P9LE: + case AV_PIX_FMT_YUV422P9BE: + case AV_PIX_FMT_YUV422P10LE: + case AV_PIX_FMT_YUV422P10BE: + case AV_PIX_FMT_YUV422P12LE: + case AV_PIX_FMT_YUV422P12BE: + case AV_PIX_FMT_YUV422P14LE: + case AV_PIX_FMT_YUV422P14BE: + case AV_PIX_FMT_YUV422P16LE: + case AV_PIX_FMT_YUV422P16BE: + case AV_PIX_FMT_YUVA422P9LE: + case AV_PIX_FMT_YUVA422P9BE: + case AV_PIX_FMT_YUVA422P10LE: + case AV_PIX_FMT_YUVA422P10BE: + case AV_PIX_FMT_YUVA422P16LE: + case AV_PIX_FMT_YUVA422P16BE: + case AV_PIX_FMT_YUV440P10LE: + case AV_PIX_FMT_YUV440P10BE: + case AV_PIX_FMT_YUV440P12LE: + case AV_PIX_FMT_YUV440P12BE: + case AV_PIX_FMT_YUV444P9LE: + case AV_PIX_FMT_YUV444P9BE: + case AV_PIX_FMT_YUV444P10LE: + case AV_PIX_FMT_YUV444P10BE: + case AV_PIX_FMT_YUV444P12LE: + case AV_PIX_FMT_YUV444P12BE: + case AV_PIX_FMT_YUV444P14LE: + case AV_PIX_FMT_YUV444P14BE: + case AV_PIX_FMT_YUV444P16LE: + case AV_PIX_FMT_YUV444P16BE: + case AV_PIX_FMT_YUVA444P9LE: + case AV_PIX_FMT_YUVA444P9BE: + case AV_PIX_FMT_YUVA444P10LE: + case AV_PIX_FMT_YUVA444P10BE: + case AV_PIX_FMT_YUVA444P16LE: + case AV_PIX_FMT_YUVA444P16BE: + case AV_PIX_FMT_GBRP9LE: + case AV_PIX_FMT_GBRP9BE: + case AV_PIX_FMT_GBRP10LE: + case AV_PIX_FMT_GBRP10BE: + case AV_PIX_FMT_GBRP12LE: + case AV_PIX_FMT_GBRP12BE: + case AV_PIX_FMT_GBRP14LE: + case AV_PIX_FMT_GBRP14BE: + case AV_PIX_FMT_GBRP16LE: + case AV_PIX_FMT_GBRP16BE: + case AV_PIX_FMT_GBRAP12LE: + case AV_PIX_FMT_GBRAP12BE: + case AV_PIX_FMT_GBRAP16LE: + case AV_PIX_FMT_GBRAP16BE: + w_align = 16; //FIXME assume 16 pixel per macroblock + h_align = 16 * 2; // interlaced needs 2 macroblocks height + break; + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_YUVJ411P: + case AV_PIX_FMT_UYYVYY411: + w_align = 32; + h_align = 16 * 2; + break; + case AV_PIX_FMT_YUV410P: + if (s->codec_id == AV_CODEC_ID_SVQ1) { + w_align = 64; + h_align = 64; + } + break; + case AV_PIX_FMT_RGB555: + if (s->codec_id == AV_CODEC_ID_RPZA) { + w_align = 4; + h_align = 4; + } + if (s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) { + w_align = 8; + h_align = 8; + } + break; + case AV_PIX_FMT_PAL8: + case AV_PIX_FMT_BGR8: + case AV_PIX_FMT_RGB8: + if (s->codec_id == AV_CODEC_ID_SMC || + s->codec_id == AV_CODEC_ID_CINEPAK) { + w_align = 4; + h_align = 4; + } + if (s->codec_id == AV_CODEC_ID_JV || + s->codec_id == AV_CODEC_ID_INTERPLAY_VIDEO) { + w_align = 8; + h_align = 8; + } + break; + case AV_PIX_FMT_BGR24: + if ((s->codec_id == AV_CODEC_ID_MSZH) || + (s->codec_id == AV_CODEC_ID_ZLIB)) { + w_align = 4; + h_align = 4; + } + break; + case AV_PIX_FMT_RGB24: + if (s->codec_id == AV_CODEC_ID_CINEPAK) { + w_align = 4; + h_align = 4; + } + break; + default: + break; + } + + if (s->codec_id == AV_CODEC_ID_IFF_ILBM) { + w_align = FFMAX(w_align, 8); + } + + *width = FFALIGN(*width, w_align); + *height = FFALIGN(*height, h_align); + if (s->codec_id == AV_CODEC_ID_H264 || s->lowres || + s->codec_id == AV_CODEC_ID_VP5 || s->codec_id == AV_CODEC_ID_VP6 || + s->codec_id == AV_CODEC_ID_VP6F || s->codec_id == AV_CODEC_ID_VP6A + ) { + // some of the optimized chroma MC reads one line too much + // which is also done in mpeg decoders with lowres > 0 + *height += 2; + + // H.264 uses edge emulation for out of frame motion vectors, for this + // it requires a temporary area large enough to hold a 21x21 block, + // increasing witdth ensure that the temporary area is large enough, + // the next rounded up width is 32 + *width = FFMAX(*width, 32); + } + + for (i = 0; i < 4; i++) + linesize_align[i] = STRIDE_ALIGN; +} + +void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->pix_fmt); + int chroma_shift = desc->log2_chroma_w; + int linesize_align[AV_NUM_DATA_POINTERS]; + int align; + + avcodec_align_dimensions2(s, width, height, linesize_align); + align = FFMAX(linesize_align[0], linesize_align[3]); + linesize_align[1] <<= chroma_shift; + linesize_align[2] <<= chroma_shift; + align = FFMAX3(align, linesize_align[1], linesize_align[2]); + *width = FFALIGN(*width, align); +} + +int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos) +{ + if (pos <= AVCHROMA_LOC_UNSPECIFIED || pos >= AVCHROMA_LOC_NB) + return AVERROR(EINVAL); + pos--; + + *xpos = (pos&1) * 128; + *ypos = ((pos>>1)^(pos<4)) * 128; + + return 0; +} + +enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos) +{ + int pos, xout, yout; + + for (pos = AVCHROMA_LOC_UNSPECIFIED + 1; pos < AVCHROMA_LOC_NB; pos++) { + if (avcodec_enum_to_chroma_pos(&xout, &yout, pos) == 0 && xout == xpos && yout == ypos) + return pos; + } + return AVCHROMA_LOC_UNSPECIFIED; +} + +int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, + enum AVSampleFormat sample_fmt, const uint8_t *buf, + int buf_size, int align) +{ + int ch, planar, needed_size, ret = 0; + + needed_size = av_samples_get_buffer_size(NULL, nb_channels, + frame->nb_samples, sample_fmt, + align); + if (buf_size < needed_size) + return AVERROR(EINVAL); + + planar = av_sample_fmt_is_planar(sample_fmt); + if (planar && nb_channels > AV_NUM_DATA_POINTERS) { + if (!(frame->extended_data = av_mallocz_array(nb_channels, + sizeof(*frame->extended_data)))) + return AVERROR(ENOMEM); + } else { + frame->extended_data = frame->data; + } + + if ((ret = av_samples_fill_arrays(frame->extended_data, &frame->linesize[0], + (uint8_t *)(intptr_t)buf, nb_channels, frame->nb_samples, + sample_fmt, align)) < 0) { + if (frame->extended_data != frame->data) + av_freep(&frame->extended_data); + return ret; + } + if (frame->extended_data != frame->data) { + for (ch = 0; ch < AV_NUM_DATA_POINTERS; ch++) + frame->data[ch] = frame->extended_data[ch]; + } + + return ret; +} + +void ff_color_frame(AVFrame *frame, const int c[4]) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + int p, y, x; + + av_assert0(desc->flags & AV_PIX_FMT_FLAG_PLANAR); + + for (p = 0; p<desc->nb_components; p++) { + uint8_t *dst = frame->data[p]; + int is_chroma = p == 1 || p == 2; + int bytes = is_chroma ? AV_CEIL_RSHIFT(frame->width, desc->log2_chroma_w) : frame->width; + int height = is_chroma ? AV_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height; + for (y = 0; y < height; y++) { + if (desc->comp[0].depth >= 9) { + for (x = 0; x<bytes; x++) + ((uint16_t*)dst)[x] = c[p]; + }else + memset(dst, c[p], bytes); + dst += frame->linesize[p]; + } + } +} + +int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2), void *arg, int *ret, int count, int size) +{ + int i; + + for (i = 0; i < count; i++) { + int r = func(c, (char *)arg + i * size); + if (ret) + ret[i] = r; + } + emms_c(); + return 0; +} + +int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int jobnr, int threadnr), void *arg, int *ret, int count) +{ + int i; + + for (i = 0; i < count; i++) { + int r = func(c, arg, i, 0); + if (ret) + ret[i] = r; + } + emms_c(); + return 0; +} + +enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags, + unsigned int fourcc) +{ + while (tags->pix_fmt >= 0) { + if (tags->fourcc == fourcc) + return tags->pix_fmt; + tags++; + } + return AV_PIX_FMT_NONE; +} + +#if FF_API_CODEC_GET_SET +MAKE_ACCESSORS(AVCodecContext, codec, AVRational, pkt_timebase) +MAKE_ACCESSORS(AVCodecContext, codec, const AVCodecDescriptor *, codec_descriptor) +MAKE_ACCESSORS(AVCodecContext, codec, int, lowres) +MAKE_ACCESSORS(AVCodecContext, codec, int, seek_preroll) +MAKE_ACCESSORS(AVCodecContext, codec, uint16_t*, chroma_intra_matrix) + +unsigned av_codec_get_codec_properties(const AVCodecContext *codec) +{ + return codec->properties; +} + +int av_codec_get_max_lowres(const AVCodec *codec) +{ + return codec->max_lowres; +} +#endif + +int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec){ + return !!(codec->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM); +} + +static int64_t get_bit_rate(AVCodecContext *ctx) +{ + int64_t bit_rate; + int bits_per_sample; + + switch (ctx->codec_type) { + case AVMEDIA_TYPE_VIDEO: + case AVMEDIA_TYPE_DATA: + case AVMEDIA_TYPE_SUBTITLE: + case AVMEDIA_TYPE_ATTACHMENT: + bit_rate = ctx->bit_rate; + break; + case AVMEDIA_TYPE_AUDIO: + bits_per_sample = av_get_bits_per_sample(ctx->codec_id); + bit_rate = bits_per_sample ? ctx->sample_rate * (int64_t)ctx->channels * bits_per_sample : ctx->bit_rate; + break; + default: + bit_rate = 0; + break; + } + return bit_rate; +} + + +static void ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec) +{ + if (!(codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE) && codec->init) + ff_mutex_lock(&codec_mutex); +} + +static void ff_unlock_avcodec(const AVCodec *codec) +{ + if (!(codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE) && codec->init) + ff_mutex_unlock(&codec_mutex); +} + +int attribute_align_arg ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options) +{ + int ret = 0; + + ff_unlock_avcodec(codec); + + ret = avcodec_open2(avctx, codec, options); + + ff_lock_avcodec(avctx, codec); + return ret; +} + +int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options) +{ + int ret = 0; + AVDictionary *tmp = NULL; + const AVPixFmtDescriptor *pixdesc; + + if (avcodec_is_open(avctx)) + return 0; + + if ((!codec && !avctx->codec)) { + av_log(avctx, AV_LOG_ERROR, "No codec provided to avcodec_open2()\n"); + return AVERROR(EINVAL); + } + if ((codec && avctx->codec && codec != avctx->codec)) { + av_log(avctx, AV_LOG_ERROR, "This AVCodecContext was allocated for %s, " + "but %s passed to avcodec_open2()\n", avctx->codec->name, codec->name); + return AVERROR(EINVAL); + } + if (!codec) + codec = avctx->codec; + + if (avctx->extradata_size < 0 || avctx->extradata_size >= FF_MAX_EXTRADATA_SIZE) + return AVERROR(EINVAL); + + if (options) + av_dict_copy(&tmp, *options, 0); + + ff_lock_avcodec(avctx, codec); + + avctx->internal = av_mallocz(sizeof(*avctx->internal)); + if (!avctx->internal) { + ret = AVERROR(ENOMEM); + goto end; + } + + avctx->internal->pool = av_mallocz(sizeof(*avctx->internal->pool)); + if (!avctx->internal->pool) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->to_free = av_frame_alloc(); + if (!avctx->internal->to_free) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->compat_decode_frame = av_frame_alloc(); + if (!avctx->internal->compat_decode_frame) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->buffer_frame = av_frame_alloc(); + if (!avctx->internal->buffer_frame) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->buffer_pkt = av_packet_alloc(); + if (!avctx->internal->buffer_pkt) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->ds.in_pkt = av_packet_alloc(); + if (!avctx->internal->ds.in_pkt) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->last_pkt_props = av_packet_alloc(); + if (!avctx->internal->last_pkt_props) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } + + avctx->internal->skip_samples_multiplier = 1; + + if (codec->priv_data_size > 0) { + if (!avctx->priv_data) { + avctx->priv_data = av_mallocz(codec->priv_data_size); + if (!avctx->priv_data) { + ret = AVERROR(ENOMEM); + goto end; + } + if (codec->priv_class) { + *(const AVClass **)avctx->priv_data = codec->priv_class; + av_opt_set_defaults(avctx->priv_data); + } + } + if (codec->priv_class && (ret = av_opt_set_dict(avctx->priv_data, &tmp)) < 0) + goto free_and_end; + } else { + avctx->priv_data = NULL; + } + if ((ret = av_opt_set_dict(avctx, &tmp)) < 0) + goto free_and_end; + + if (avctx->codec_whitelist && av_match_list(codec->name, avctx->codec_whitelist, ',') <= 0) { + av_log(avctx, AV_LOG_ERROR, "Codec (%s) not on whitelist \'%s\'\n", codec->name, avctx->codec_whitelist); + ret = AVERROR(EINVAL); + goto free_and_end; + } + + // only call ff_set_dimensions() for non H.264/VP6F/DXV codecs so as not to overwrite previously setup dimensions + if (!(avctx->coded_width && avctx->coded_height && avctx->width && avctx->height && + (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F || avctx->codec_id == AV_CODEC_ID_DXV))) { + if (avctx->coded_width && avctx->coded_height) + ret = ff_set_dimensions(avctx, avctx->coded_width, avctx->coded_height); + else if (avctx->width && avctx->height) + ret = ff_set_dimensions(avctx, avctx->width, avctx->height); + if (ret < 0) + goto free_and_end; + } + + if ((avctx->coded_width || avctx->coded_height || avctx->width || avctx->height) + && ( av_image_check_size2(avctx->coded_width, avctx->coded_height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx) < 0 + || av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx) < 0)) { + av_log(avctx, AV_LOG_WARNING, "Ignoring invalid width/height values\n"); + ff_set_dimensions(avctx, 0, 0); + } + + if (avctx->width > 0 && avctx->height > 0) { + if (av_image_check_sar(avctx->width, avctx->height, + avctx->sample_aspect_ratio) < 0) { + av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n", + avctx->sample_aspect_ratio.num, + avctx->sample_aspect_ratio.den); + avctx->sample_aspect_ratio = (AVRational){ 0, 1 }; + } + } + + /* if the decoder init function was already called previously, + * free the already allocated subtitle_header before overwriting it */ + if (av_codec_is_decoder(codec)) + av_freep(&avctx->subtitle_header); + + if (avctx->channels > FF_SANE_NB_CHANNELS) { + ret = AVERROR(EINVAL); + goto free_and_end; + } + + avctx->codec = codec; + if ((avctx->codec_type == AVMEDIA_TYPE_UNKNOWN || avctx->codec_type == codec->type) && + avctx->codec_id == AV_CODEC_ID_NONE) { + avctx->codec_type = codec->type; + avctx->codec_id = codec->id; + } + if (avctx->codec_id != codec->id || (avctx->codec_type != codec->type + && avctx->codec_type != AVMEDIA_TYPE_ATTACHMENT)) { + av_log(avctx, AV_LOG_ERROR, "Codec type or id mismatches\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } + avctx->frame_number = 0; + avctx->codec_descriptor = avcodec_descriptor_get(avctx->codec_id); + + if ((avctx->codec->capabilities & AV_CODEC_CAP_EXPERIMENTAL) && + avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { + const char *codec_string = av_codec_is_encoder(codec) ? "encoder" : "decoder"; + AVCodec *codec2; + av_log(avctx, AV_LOG_ERROR, + "The %s '%s' is experimental but experimental codecs are not enabled, " + "add '-strict %d' if you want to use it.\n", + codec_string, codec->name, FF_COMPLIANCE_EXPERIMENTAL); + codec2 = av_codec_is_encoder(codec) ? avcodec_find_encoder(codec->id) : avcodec_find_decoder(codec->id); + if (!(codec2->capabilities & AV_CODEC_CAP_EXPERIMENTAL)) + av_log(avctx, AV_LOG_ERROR, "Alternatively use the non experimental %s '%s'.\n", + codec_string, codec2->name); + ret = AVERROR_EXPERIMENTAL; + goto free_and_end; + } + + if (avctx->codec_type == AVMEDIA_TYPE_AUDIO && + (!avctx->time_base.num || !avctx->time_base.den)) { + avctx->time_base.num = 1; + avctx->time_base.den = avctx->sample_rate; + } + + if (!HAVE_THREADS) + av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n"); + + if (CONFIG_FRAME_THREAD_ENCODER && av_codec_is_encoder(avctx->codec)) { + ff_unlock_avcodec(codec); //we will instantiate a few encoders thus kick the counter to prevent false detection of a problem + ret = ff_frame_thread_encoder_init(avctx, options ? *options : NULL); + ff_lock_avcodec(avctx, codec); + if (ret < 0) + goto free_and_end; + } + + if (HAVE_THREADS + && !(avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))) { + ret = ff_thread_init(avctx); + if (ret < 0) { + goto free_and_end; + } + } + if (!HAVE_THREADS && !(codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) + avctx->thread_count = 1; + + if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) { + av_log(avctx, AV_LOG_WARNING, "The maximum value for lowres supported by the decoder is %d\n", + avctx->codec->max_lowres); + avctx->lowres = avctx->codec->max_lowres; + } + + if (av_codec_is_encoder(avctx->codec)) { + int i; +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + avctx->coded_frame = av_frame_alloc(); + if (!avctx->coded_frame) { + ret = AVERROR(ENOMEM); + goto free_and_end; + } +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + if (avctx->time_base.num <= 0 || avctx->time_base.den <= 0) { + av_log(avctx, AV_LOG_ERROR, "The encoder timebase is not set.\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } + + if (avctx->codec->sample_fmts) { + for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) { + if (avctx->sample_fmt == avctx->codec->sample_fmts[i]) + break; + if (avctx->channels == 1 && + av_get_planar_sample_fmt(avctx->sample_fmt) == + av_get_planar_sample_fmt(avctx->codec->sample_fmts[i])) { + avctx->sample_fmt = avctx->codec->sample_fmts[i]; + break; + } + } + if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) { + char buf[128]; + snprintf(buf, sizeof(buf), "%d", avctx->sample_fmt); + av_log(avctx, AV_LOG_ERROR, "Specified sample format %s is invalid or not supported\n", + (char *)av_x_if_null(av_get_sample_fmt_name(avctx->sample_fmt), buf)); + ret = AVERROR(EINVAL); + goto free_and_end; + } + } + if (avctx->codec->pix_fmts) { + for (i = 0; avctx->codec->pix_fmts[i] != AV_PIX_FMT_NONE; i++) + if (avctx->pix_fmt == avctx->codec->pix_fmts[i]) + break; + if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_NONE + && !((avctx->codec_id == AV_CODEC_ID_MJPEG || avctx->codec_id == AV_CODEC_ID_LJPEG) + && avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL)) { + char buf[128]; + snprintf(buf, sizeof(buf), "%d", avctx->pix_fmt); + av_log(avctx, AV_LOG_ERROR, "Specified pixel format %s is invalid or not supported\n", + (char *)av_x_if_null(av_get_pix_fmt_name(avctx->pix_fmt), buf)); + ret = AVERROR(EINVAL); + goto free_and_end; + } + if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ420P || + avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ411P || + avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ422P || + avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ440P || + avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ444P) + avctx->color_range = AVCOL_RANGE_JPEG; + } + if (avctx->codec->supported_samplerates) { + for (i = 0; avctx->codec->supported_samplerates[i] != 0; i++) + if (avctx->sample_rate == avctx->codec->supported_samplerates[i]) + break; + if (avctx->codec->supported_samplerates[i] == 0) { + av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n", + avctx->sample_rate); + ret = AVERROR(EINVAL); + goto free_and_end; + } + } + if (avctx->sample_rate < 0) { + av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n", + avctx->sample_rate); + ret = AVERROR(EINVAL); + goto free_and_end; + } + if (avctx->codec->channel_layouts) { + if (!avctx->channel_layout) { + av_log(avctx, AV_LOG_WARNING, "Channel layout not specified\n"); + } else { + for (i = 0; avctx->codec->channel_layouts[i] != 0; i++) + if (avctx->channel_layout == avctx->codec->channel_layouts[i]) + break; + if (avctx->codec->channel_layouts[i] == 0) { + char buf[512]; + av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout); + av_log(avctx, AV_LOG_ERROR, "Specified channel layout '%s' is not supported\n", buf); + ret = AVERROR(EINVAL); + goto free_and_end; + } + } + } + if (avctx->channel_layout && avctx->channels) { + int channels = av_get_channel_layout_nb_channels(avctx->channel_layout); + if (channels != avctx->channels) { + char buf[512]; + av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout); + av_log(avctx, AV_LOG_ERROR, + "Channel layout '%s' with %d channels does not match number of specified channels %d\n", + buf, channels, avctx->channels); + ret = AVERROR(EINVAL); + goto free_and_end; + } + } else if (avctx->channel_layout) { + avctx->channels = av_get_channel_layout_nb_channels(avctx->channel_layout); + } + if (avctx->channels < 0) { + av_log(avctx, AV_LOG_ERROR, "Specified number of channels %d is not supported\n", + avctx->channels); + ret = AVERROR(EINVAL); + goto free_and_end; + } + if(avctx->codec_type == AVMEDIA_TYPE_VIDEO) { + pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt); + if ( avctx->bits_per_raw_sample < 0 + || (avctx->bits_per_raw_sample > 8 && pixdesc->comp[0].depth <= 8)) { + av_log(avctx, AV_LOG_WARNING, "Specified bit depth %d not possible with the specified pixel formats depth %d\n", + avctx->bits_per_raw_sample, pixdesc->comp[0].depth); + avctx->bits_per_raw_sample = pixdesc->comp[0].depth; + } + if (avctx->width <= 0 || avctx->height <= 0) { + av_log(avctx, AV_LOG_ERROR, "dimensions not set\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } + } + if ( (avctx->codec_type == AVMEDIA_TYPE_VIDEO || avctx->codec_type == AVMEDIA_TYPE_AUDIO) + && avctx->bit_rate>0 && avctx->bit_rate<1000) { + av_log(avctx, AV_LOG_WARNING, "Bitrate %"PRId64" is extremely low, maybe you mean %"PRId64"k\n", avctx->bit_rate, avctx->bit_rate); + } + + if (!avctx->rc_initial_buffer_occupancy) + avctx->rc_initial_buffer_occupancy = avctx->rc_buffer_size * 3LL / 4; + + if (avctx->ticks_per_frame && avctx->time_base.num && + avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) { + av_log(avctx, AV_LOG_ERROR, + "ticks_per_frame %d too large for the timebase %d/%d.", + avctx->ticks_per_frame, + avctx->time_base.num, + avctx->time_base.den); + goto free_and_end; + } + + if (avctx->hw_frames_ctx) { + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; + if (frames_ctx->format != avctx->pix_fmt) { + av_log(avctx, AV_LOG_ERROR, + "Mismatching AVCodecContext.pix_fmt and AVHWFramesContext.format\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } + if (avctx->sw_pix_fmt != AV_PIX_FMT_NONE && + avctx->sw_pix_fmt != frames_ctx->sw_format) { + av_log(avctx, AV_LOG_ERROR, + "Mismatching AVCodecContext.sw_pix_fmt (%s) " + "and AVHWFramesContext.sw_format (%s)\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt), + av_get_pix_fmt_name(frames_ctx->sw_format)); + ret = AVERROR(EINVAL); + goto free_and_end; + } + avctx->sw_pix_fmt = frames_ctx->sw_format; + } + } + + avctx->pts_correction_num_faulty_pts = + avctx->pts_correction_num_faulty_dts = 0; + avctx->pts_correction_last_pts = + avctx->pts_correction_last_dts = INT64_MIN; + + if ( !CONFIG_GRAY && avctx->flags & AV_CODEC_FLAG_GRAY + && avctx->codec_descriptor->type == AVMEDIA_TYPE_VIDEO) + av_log(avctx, AV_LOG_WARNING, + "gray decoding requested but not enabled at configuration time\n"); + + if ( avctx->codec->init && (!(avctx->active_thread_type&FF_THREAD_FRAME) + || avctx->internal->frame_thread_encoder)) { + ret = avctx->codec->init(avctx); + if (ret < 0) { + goto free_and_end; + } + } + + ret=0; + + if (av_codec_is_decoder(avctx->codec)) { + if (!avctx->bit_rate) + avctx->bit_rate = get_bit_rate(avctx); + /* validate channel layout from the decoder */ + if (avctx->channel_layout) { + int channels = av_get_channel_layout_nb_channels(avctx->channel_layout); + if (!avctx->channels) + avctx->channels = channels; + else if (channels != avctx->channels) { + char buf[512]; + av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout); + av_log(avctx, AV_LOG_WARNING, + "Channel layout '%s' with %d channels does not match specified number of channels %d: " + "ignoring specified channel layout\n", + buf, channels, avctx->channels); + avctx->channel_layout = 0; + } + } + if (avctx->channels && avctx->channels < 0 || + avctx->channels > FF_SANE_NB_CHANNELS) { + ret = AVERROR(EINVAL); + goto free_and_end; + } + if (avctx->sub_charenc) { + if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) { + av_log(avctx, AV_LOG_ERROR, "Character encoding is only " + "supported with subtitles codecs\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) { + av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, " + "subtitles character encoding will be ignored\n", + avctx->codec_descriptor->name); + avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING; + } else { + /* input character encoding is set for a text based subtitle + * codec at this point */ + if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC) + avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER; + + if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) { +#if CONFIG_ICONV + iconv_t cd = iconv_open("UTF-8", avctx->sub_charenc); + if (cd == (iconv_t)-1) { + ret = AVERROR(errno); + av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context " + "with input character encoding \"%s\"\n", avctx->sub_charenc); + goto free_and_end; + } + iconv_close(cd); +#else + av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles " + "conversion needs a libavcodec built with iconv support " + "for this codec\n"); + ret = AVERROR(ENOSYS); + goto free_and_end; +#endif + } + } + } + +#if FF_API_AVCTX_TIMEBASE + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1})); +#endif + } + if (codec->priv_data_size > 0 && avctx->priv_data && codec->priv_class) { + av_assert0(*(const AVClass **)avctx->priv_data == codec->priv_class); + } + +end: + ff_unlock_avcodec(codec); + if (options) { + av_dict_free(options); + *options = tmp; + } + + return ret; +free_and_end: + if (avctx->codec && + (avctx->codec->caps_internal & FF_CODEC_CAP_INIT_CLEANUP)) + avctx->codec->close(avctx); + + if (codec->priv_class && codec->priv_data_size) + av_opt_free(avctx->priv_data); + av_opt_free(avctx); + +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + av_frame_free(&avctx->coded_frame); +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + av_dict_free(&tmp); + av_freep(&avctx->priv_data); + if (avctx->internal) { + av_frame_free(&avctx->internal->to_free); + av_frame_free(&avctx->internal->compat_decode_frame); + av_frame_free(&avctx->internal->buffer_frame); + av_packet_free(&avctx->internal->buffer_pkt); + av_packet_free(&avctx->internal->last_pkt_props); + + av_packet_free(&avctx->internal->ds.in_pkt); + + av_freep(&avctx->internal->pool); + } + av_freep(&avctx->internal); + avctx->codec = NULL; + goto end; +} + +void avsubtitle_free(AVSubtitle *sub) +{ + int i; + + for (i = 0; i < sub->num_rects; i++) { + av_freep(&sub->rects[i]->data[0]); + av_freep(&sub->rects[i]->data[1]); + av_freep(&sub->rects[i]->data[2]); + av_freep(&sub->rects[i]->data[3]); + av_freep(&sub->rects[i]->text); + av_freep(&sub->rects[i]->ass); + av_freep(&sub->rects[i]); + } + + av_freep(&sub->rects); + + memset(sub, 0, sizeof(*sub)); +} + +av_cold int avcodec_close(AVCodecContext *avctx) +{ + int i; + + if (!avctx) + return 0; + + if (avcodec_is_open(avctx)) { + FramePool *pool = avctx->internal->pool; + if (CONFIG_FRAME_THREAD_ENCODER && + avctx->internal->frame_thread_encoder && avctx->thread_count > 1) { + ff_frame_thread_encoder_free(avctx); + } + if (HAVE_THREADS && avctx->internal->thread_ctx) + ff_thread_free(avctx); + if (avctx->codec && avctx->codec->close) + avctx->codec->close(avctx); + avctx->internal->byte_buffer_size = 0; + av_freep(&avctx->internal->byte_buffer); + av_frame_free(&avctx->internal->to_free); + av_frame_free(&avctx->internal->compat_decode_frame); + av_frame_free(&avctx->internal->buffer_frame); + av_packet_free(&avctx->internal->buffer_pkt); + av_packet_free(&avctx->internal->last_pkt_props); + + av_packet_free(&avctx->internal->ds.in_pkt); + + for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++) + av_buffer_pool_uninit(&pool->pools[i]); + av_freep(&avctx->internal->pool); + + if (avctx->hwaccel && avctx->hwaccel->uninit) + avctx->hwaccel->uninit(avctx); + av_freep(&avctx->internal->hwaccel_priv_data); + + ff_decode_bsfs_uninit(avctx); + + av_freep(&avctx->internal); + } + + for (i = 0; i < avctx->nb_coded_side_data; i++) + av_freep(&avctx->coded_side_data[i].data); + av_freep(&avctx->coded_side_data); + avctx->nb_coded_side_data = 0; + + av_buffer_unref(&avctx->hw_frames_ctx); + av_buffer_unref(&avctx->hw_device_ctx); + + if (avctx->priv_data && avctx->codec && avctx->codec->priv_class) + av_opt_free(avctx->priv_data); + av_opt_free(avctx); + av_freep(&avctx->priv_data); + if (av_codec_is_encoder(avctx->codec)) { + av_freep(&avctx->extradata); +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS + av_frame_free(&avctx->coded_frame); +FF_ENABLE_DEPRECATION_WARNINGS +#endif + } + avctx->codec = NULL; + avctx->active_thread_type = 0; + + return 0; +} + +const char *avcodec_get_name(enum AVCodecID id) +{ + const AVCodecDescriptor *cd; + AVCodec *codec; + + if (id == AV_CODEC_ID_NONE) + return "none"; + cd = avcodec_descriptor_get(id); + if (cd) + return cd->name; + av_log(NULL, AV_LOG_WARNING, "Codec 0x%x is not in the full list.\n", id); + codec = avcodec_find_decoder(id); + if (codec) + return codec->name; + codec = avcodec_find_encoder(id); + if (codec) + return codec->name; + return "unknown_codec"; +} + +size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_tag) +{ + int i, len, ret = 0; + +#define TAG_PRINT(x) \ + (((x) >= '0' && (x) <= '9') || \ + ((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z') || \ + ((x) == '.' || (x) == ' ' || (x) == '-' || (x) == '_')) + + for (i = 0; i < 4; i++) { + len = snprintf(buf, buf_size, + TAG_PRINT(codec_tag & 0xFF) ? "%c" : "[%d]", codec_tag & 0xFF); + buf += len; + buf_size = buf_size > len ? buf_size - len : 0; + ret += len; + codec_tag >>= 8; + } + return ret; +} + +void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) +{ + const char *codec_type; + const char *codec_name; + const char *profile = NULL; + int64_t bitrate; + int new_line = 0; + AVRational display_aspect_ratio; + const char *separator = enc->dump_separator ? (const char *)enc->dump_separator : ", "; + + if (!buf || buf_size <= 0) + return; + codec_type = av_get_media_type_string(enc->codec_type); + codec_name = avcodec_get_name(enc->codec_id); + profile = avcodec_profile_name(enc->codec_id, enc->profile); + + snprintf(buf, buf_size, "%s: %s", codec_type ? codec_type : "unknown", + codec_name); + buf[0] ^= 'a' ^ 'A'; /* first letter in uppercase */ + + if (enc->codec && strcmp(enc->codec->name, codec_name)) + snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", enc->codec->name); + + if (profile) + snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", profile); + if ( enc->codec_type == AVMEDIA_TYPE_VIDEO + && av_log_get_level() >= AV_LOG_VERBOSE + && enc->refs) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %d reference frame%s", + enc->refs, enc->refs > 1 ? "s" : ""); + + if (enc->codec_tag) + snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s / 0x%04X)", + av_fourcc2str(enc->codec_tag), enc->codec_tag); + + switch (enc->codec_type) { + case AVMEDIA_TYPE_VIDEO: + { + char detail[256] = "("; + + av_strlcat(buf, separator, buf_size); + + snprintf(buf + strlen(buf), buf_size - strlen(buf), + "%s", enc->pix_fmt == AV_PIX_FMT_NONE ? "none" : + av_get_pix_fmt_name(enc->pix_fmt)); + if (enc->bits_per_raw_sample && enc->pix_fmt != AV_PIX_FMT_NONE && + enc->bits_per_raw_sample < av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth) + av_strlcatf(detail, sizeof(detail), "%d bpc, ", enc->bits_per_raw_sample); + if (enc->color_range != AVCOL_RANGE_UNSPECIFIED) + av_strlcatf(detail, sizeof(detail), "%s, ", + av_color_range_name(enc->color_range)); + + if (enc->colorspace != AVCOL_SPC_UNSPECIFIED || + enc->color_primaries != AVCOL_PRI_UNSPECIFIED || + enc->color_trc != AVCOL_TRC_UNSPECIFIED) { + if (enc->colorspace != (int)enc->color_primaries || + enc->colorspace != (int)enc->color_trc) { + new_line = 1; + av_strlcatf(detail, sizeof(detail), "%s/%s/%s, ", + av_color_space_name(enc->colorspace), + av_color_primaries_name(enc->color_primaries), + av_color_transfer_name(enc->color_trc)); + } else + av_strlcatf(detail, sizeof(detail), "%s, ", + av_get_colorspace_name(enc->colorspace)); + } + + if (enc->field_order != AV_FIELD_UNKNOWN) { + const char *field_order = "progressive"; + if (enc->field_order == AV_FIELD_TT) + field_order = "top first"; + else if (enc->field_order == AV_FIELD_BB) + field_order = "bottom first"; + else if (enc->field_order == AV_FIELD_TB) + field_order = "top coded first (swapped)"; + else if (enc->field_order == AV_FIELD_BT) + field_order = "bottom coded first (swapped)"; + + av_strlcatf(detail, sizeof(detail), "%s, ", field_order); + } + + if (av_log_get_level() >= AV_LOG_VERBOSE && + enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) + av_strlcatf(detail, sizeof(detail), "%s, ", + av_chroma_location_name(enc->chroma_sample_location)); + + if (strlen(detail) > 1) { + detail[strlen(detail) - 2] = 0; + av_strlcatf(buf, buf_size, "%s)", detail); + } + } + + if (enc->width) { + av_strlcat(buf, new_line ? separator : ", ", buf_size); + + snprintf(buf + strlen(buf), buf_size - strlen(buf), + "%dx%d", + enc->width, enc->height); + + if (av_log_get_level() >= AV_LOG_VERBOSE && + (enc->width != enc->coded_width || + enc->height != enc->coded_height)) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + " (%dx%d)", enc->coded_width, enc->coded_height); + + if (enc->sample_aspect_ratio.num) { + av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den, + enc->width * (int64_t)enc->sample_aspect_ratio.num, + enc->height * (int64_t)enc->sample_aspect_ratio.den, + 1024 * 1024); + snprintf(buf + strlen(buf), buf_size - strlen(buf), + " [SAR %d:%d DAR %d:%d]", + enc->sample_aspect_ratio.num, enc->sample_aspect_ratio.den, + display_aspect_ratio.num, display_aspect_ratio.den); + } + if (av_log_get_level() >= AV_LOG_DEBUG) { + int g = av_gcd(enc->time_base.num, enc->time_base.den); + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %d/%d", + enc->time_base.num / g, enc->time_base.den / g); + } + } + if (encode) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", q=%d-%d", enc->qmin, enc->qmax); + } else { + if (enc->properties & FF_CODEC_PROPERTY_CLOSED_CAPTIONS) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", Closed Captions"); + if (enc->properties & FF_CODEC_PROPERTY_LOSSLESS) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", lossless"); + } + break; + case AVMEDIA_TYPE_AUDIO: + av_strlcat(buf, separator, buf_size); + + if (enc->sample_rate) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + "%d Hz, ", enc->sample_rate); + } + av_get_channel_layout_string(buf + strlen(buf), buf_size - strlen(buf), enc->channels, enc->channel_layout); + if (enc->sample_fmt != AV_SAMPLE_FMT_NONE) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %s", av_get_sample_fmt_name(enc->sample_fmt)); + } + if ( enc->bits_per_raw_sample > 0 + && enc->bits_per_raw_sample != av_get_bytes_per_sample(enc->sample_fmt) * 8) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + " (%d bit)", enc->bits_per_raw_sample); + if (av_log_get_level() >= AV_LOG_VERBOSE) { + if (enc->initial_padding) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", delay %d", enc->initial_padding); + if (enc->trailing_padding) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", padding %d", enc->trailing_padding); + } + break; + case AVMEDIA_TYPE_DATA: + if (av_log_get_level() >= AV_LOG_DEBUG) { + int g = av_gcd(enc->time_base.num, enc->time_base.den); + if (g) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %d/%d", + enc->time_base.num / g, enc->time_base.den / g); + } + break; + case AVMEDIA_TYPE_SUBTITLE: + if (enc->width) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %dx%d", enc->width, enc->height); + break; + default: + return; + } + if (encode) { + if (enc->flags & AV_CODEC_FLAG_PASS1) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", pass 1"); + if (enc->flags & AV_CODEC_FLAG_PASS2) + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", pass 2"); + } + bitrate = get_bit_rate(enc); + if (bitrate != 0) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %"PRId64" kb/s", bitrate / 1000); + } else if (enc->rc_max_rate > 0) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", max. %"PRId64" kb/s", enc->rc_max_rate / 1000); + } +} + +const char *av_get_profile_name(const AVCodec *codec, int profile) +{ + const AVProfile *p; + if (profile == FF_PROFILE_UNKNOWN || !codec->profiles) + return NULL; + + for (p = codec->profiles; p->profile != FF_PROFILE_UNKNOWN; p++) + if (p->profile == profile) + return p->name; + + return NULL; +} + +const char *avcodec_profile_name(enum AVCodecID codec_id, int profile) +{ + const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id); + const AVProfile *p; + + if (profile == FF_PROFILE_UNKNOWN || !desc || !desc->profiles) + return NULL; + + for (p = desc->profiles; p->profile != FF_PROFILE_UNKNOWN; p++) + if (p->profile == profile) + return p->name; + + return NULL; +} + +unsigned avcodec_version(void) +{ +// av_assert0(AV_CODEC_ID_V410==164); + av_assert0(AV_CODEC_ID_PCM_S8_PLANAR==65563); + av_assert0(AV_CODEC_ID_ADPCM_G722==69660); +// av_assert0(AV_CODEC_ID_BMV_AUDIO==86071); + av_assert0(AV_CODEC_ID_SRT==94216); + av_assert0(LIBAVCODEC_VERSION_MICRO >= 100); + + return LIBAVCODEC_VERSION_INT; +} + +const char *avcodec_configuration(void) +{ + return FFMPEG_CONFIGURATION; +} + +const char *avcodec_license(void) +{ +#define LICENSE_PREFIX "libavcodec license: " + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; +} + +int av_get_exact_bits_per_sample(enum AVCodecID codec_id) +{ + switch (codec_id) { + case AV_CODEC_ID_8SVX_EXP: + case AV_CODEC_ID_8SVX_FIB: + case AV_CODEC_ID_ADPCM_CT: + case AV_CODEC_ID_ADPCM_IMA_APC: + case AV_CODEC_ID_ADPCM_IMA_EA_SEAD: + case AV_CODEC_ID_ADPCM_IMA_OKI: + case AV_CODEC_ID_ADPCM_IMA_WS: + case AV_CODEC_ID_ADPCM_G722: + case AV_CODEC_ID_ADPCM_YAMAHA: + case AV_CODEC_ID_ADPCM_AICA: + return 4; + case AV_CODEC_ID_DSD_LSBF: + case AV_CODEC_ID_DSD_MSBF: + case AV_CODEC_ID_DSD_LSBF_PLANAR: + case AV_CODEC_ID_DSD_MSBF_PLANAR: + case AV_CODEC_ID_PCM_ALAW: + case AV_CODEC_ID_PCM_MULAW: + case AV_CODEC_ID_PCM_S8: + case AV_CODEC_ID_PCM_S8_PLANAR: + case AV_CODEC_ID_PCM_U8: + case AV_CODEC_ID_PCM_ZORK: + case AV_CODEC_ID_SDX2_DPCM: + return 8; + case AV_CODEC_ID_PCM_S16BE: + case AV_CODEC_ID_PCM_S16BE_PLANAR: + case AV_CODEC_ID_PCM_S16LE: + case AV_CODEC_ID_PCM_S16LE_PLANAR: + case AV_CODEC_ID_PCM_U16BE: + case AV_CODEC_ID_PCM_U16LE: + return 16; + case AV_CODEC_ID_PCM_S24DAUD: + case AV_CODEC_ID_PCM_S24BE: + case AV_CODEC_ID_PCM_S24LE: + case AV_CODEC_ID_PCM_S24LE_PLANAR: + case AV_CODEC_ID_PCM_U24BE: + case AV_CODEC_ID_PCM_U24LE: + return 24; + case AV_CODEC_ID_PCM_S32BE: + case AV_CODEC_ID_PCM_S32LE: + case AV_CODEC_ID_PCM_S32LE_PLANAR: + case AV_CODEC_ID_PCM_U32BE: + case AV_CODEC_ID_PCM_U32LE: + case AV_CODEC_ID_PCM_F32BE: + case AV_CODEC_ID_PCM_F32LE: + case AV_CODEC_ID_PCM_F24LE: + case AV_CODEC_ID_PCM_F16LE: + return 32; + case AV_CODEC_ID_PCM_F64BE: + case AV_CODEC_ID_PCM_F64LE: + case AV_CODEC_ID_PCM_S64BE: + case AV_CODEC_ID_PCM_S64LE: + return 64; + default: + return 0; + } +} + +enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be) +{ + static const enum AVCodecID map[AV_SAMPLE_FMT_NB][2] = { + [AV_SAMPLE_FMT_U8 ] = { AV_CODEC_ID_PCM_U8, AV_CODEC_ID_PCM_U8 }, + [AV_SAMPLE_FMT_S16 ] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE }, + [AV_SAMPLE_FMT_S32 ] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE }, + [AV_SAMPLE_FMT_FLT ] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE }, + [AV_SAMPLE_FMT_DBL ] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE }, + [AV_SAMPLE_FMT_U8P ] = { AV_CODEC_ID_PCM_U8, AV_CODEC_ID_PCM_U8 }, + [AV_SAMPLE_FMT_S16P] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE }, + [AV_SAMPLE_FMT_S32P] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE }, + [AV_SAMPLE_FMT_S64P] = { AV_CODEC_ID_PCM_S64LE, AV_CODEC_ID_PCM_S64BE }, + [AV_SAMPLE_FMT_FLTP] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE }, + [AV_SAMPLE_FMT_DBLP] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE }, + }; + if (fmt < 0 || fmt >= AV_SAMPLE_FMT_NB) + return AV_CODEC_ID_NONE; + if (be < 0 || be > 1) + be = AV_NE(1, 0); + return map[fmt][be]; +} + +int av_get_bits_per_sample(enum AVCodecID codec_id) +{ + switch (codec_id) { + case AV_CODEC_ID_ADPCM_SBPRO_2: + return 2; + case AV_CODEC_ID_ADPCM_SBPRO_3: + return 3; + case AV_CODEC_ID_ADPCM_SBPRO_4: + case AV_CODEC_ID_ADPCM_IMA_WAV: + case AV_CODEC_ID_ADPCM_IMA_QT: + case AV_CODEC_ID_ADPCM_SWF: + case AV_CODEC_ID_ADPCM_MS: + return 4; + default: + return av_get_exact_bits_per_sample(codec_id); + } +} + +static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba, + uint32_t tag, int bits_per_coded_sample, int64_t bitrate, + uint8_t * extradata, int frame_size, int frame_bytes) +{ + int bps = av_get_exact_bits_per_sample(id); + int framecount = (ba > 0 && frame_bytes / ba > 0) ? frame_bytes / ba : 1; + + /* codecs with an exact constant bits per sample */ + if (bps > 0 && ch > 0 && frame_bytes > 0 && ch < 32768 && bps < 32768) + return (frame_bytes * 8LL) / (bps * ch); + bps = bits_per_coded_sample; + + /* codecs with a fixed packet duration */ + switch (id) { + case AV_CODEC_ID_ADPCM_ADX: return 32; + case AV_CODEC_ID_ADPCM_IMA_QT: return 64; + case AV_CODEC_ID_ADPCM_EA_XAS: return 128; + case AV_CODEC_ID_AMR_NB: + case AV_CODEC_ID_EVRC: + case AV_CODEC_ID_GSM: + case AV_CODEC_ID_QCELP: + case AV_CODEC_ID_RA_288: return 160; + case AV_CODEC_ID_AMR_WB: + case AV_CODEC_ID_GSM_MS: return 320; + case AV_CODEC_ID_MP1: return 384; + case AV_CODEC_ID_ATRAC1: return 512; + case AV_CODEC_ID_ATRAC3: return 1024 * framecount; + case AV_CODEC_ID_ATRAC3P: return 2048; + case AV_CODEC_ID_MP2: + case AV_CODEC_ID_MUSEPACK7: return 1152; + case AV_CODEC_ID_AC3: return 1536; + } + + if (sr > 0) { + /* calc from sample rate */ + if (id == AV_CODEC_ID_TTA) + return 256 * sr / 245; + else if (id == AV_CODEC_ID_DST) + return 588 * sr / 44100; + + if (ch > 0) { + /* calc from sample rate and channels */ + if (id == AV_CODEC_ID_BINKAUDIO_DCT) + return (480 << (sr / 22050)) / ch; + } + + if (id == AV_CODEC_ID_MP3) + return sr <= 24000 ? 576 : 1152; + } + + if (ba > 0) { + /* calc from block_align */ + if (id == AV_CODEC_ID_SIPR) { + switch (ba) { + case 20: return 160; + case 19: return 144; + case 29: return 288; + case 37: return 480; + } + } else if (id == AV_CODEC_ID_ILBC) { + switch (ba) { + case 38: return 160; + case 50: return 240; + } + } + } + + if (frame_bytes > 0) { + /* calc from frame_bytes only */ + if (id == AV_CODEC_ID_TRUESPEECH) + return 240 * (frame_bytes / 32); + if (id == AV_CODEC_ID_NELLYMOSER) + return 256 * (frame_bytes / 64); + if (id == AV_CODEC_ID_RA_144) + return 160 * (frame_bytes / 20); + if (id == AV_CODEC_ID_G723_1) + return 240 * (frame_bytes / 24); + + if (bps > 0) { + /* calc from frame_bytes and bits_per_coded_sample */ + if (id == AV_CODEC_ID_ADPCM_G726 || id == AV_CODEC_ID_ADPCM_G726LE) + return frame_bytes * 8 / bps; + } + + if (ch > 0 && ch < INT_MAX/16) { + /* calc from frame_bytes and channels */ + switch (id) { + case AV_CODEC_ID_ADPCM_AFC: + return frame_bytes / (9 * ch) * 16; + case AV_CODEC_ID_ADPCM_PSX: + case AV_CODEC_ID_ADPCM_DTK: + return frame_bytes / (16 * ch) * 28; + case AV_CODEC_ID_ADPCM_4XM: + case AV_CODEC_ID_ADPCM_IMA_DAT4: + case AV_CODEC_ID_ADPCM_IMA_ISS: + return (frame_bytes - 4 * ch) * 2 / ch; + case AV_CODEC_ID_ADPCM_IMA_SMJPEG: + return (frame_bytes - 4) * 2 / ch; + case AV_CODEC_ID_ADPCM_IMA_AMV: + return (frame_bytes - 8) * 2 / ch; + case AV_CODEC_ID_ADPCM_THP: + case AV_CODEC_ID_ADPCM_THP_LE: + if (extradata) + return frame_bytes * 14 / (8 * ch); + break; + case AV_CODEC_ID_ADPCM_XA: + return (frame_bytes / 128) * 224 / ch; + case AV_CODEC_ID_INTERPLAY_DPCM: + return (frame_bytes - 6 - ch) / ch; + case AV_CODEC_ID_ROQ_DPCM: + return (frame_bytes - 8) / ch; + case AV_CODEC_ID_XAN_DPCM: + return (frame_bytes - 2 * ch) / ch; + case AV_CODEC_ID_MACE3: + return 3 * frame_bytes / ch; + case AV_CODEC_ID_MACE6: + return 6 * frame_bytes / ch; + case AV_CODEC_ID_PCM_LXF: + return 2 * (frame_bytes / (5 * ch)); + case AV_CODEC_ID_IAC: + case AV_CODEC_ID_IMC: + return 4 * frame_bytes / ch; + } + + if (tag) { + /* calc from frame_bytes, channels, and codec_tag */ + if (id == AV_CODEC_ID_SOL_DPCM) { + if (tag == 3) + return frame_bytes / ch; + else + return frame_bytes * 2 / ch; + } + } + + if (ba > 0) { + /* calc from frame_bytes, channels, and block_align */ + int blocks = frame_bytes / ba; + switch (id) { + case AV_CODEC_ID_ADPCM_IMA_WAV: + if (bps < 2 || bps > 5) + return 0; + return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8); + case AV_CODEC_ID_ADPCM_IMA_DK3: + return blocks * (((ba - 16) * 2 / 3 * 4) / ch); + case AV_CODEC_ID_ADPCM_IMA_DK4: + return blocks * (1 + (ba - 4 * ch) * 2 / ch); + case AV_CODEC_ID_ADPCM_IMA_RAD: + return blocks * ((ba - 4 * ch) * 2 / ch); + case AV_CODEC_ID_ADPCM_MS: + return blocks * (2 + (ba - 7 * ch) * 2 / ch); + case AV_CODEC_ID_ADPCM_MTAF: + return blocks * (ba - 16) * 2 / ch; + } + } + + if (bps > 0) { + /* calc from frame_bytes, channels, and bits_per_coded_sample */ + switch (id) { + case AV_CODEC_ID_PCM_DVD: + if(bps<4 || frame_bytes<3) + return 0; + return 2 * ((frame_bytes - 3) / ((bps * 2 / 8) * ch)); + case AV_CODEC_ID_PCM_BLURAY: + if(bps<4 || frame_bytes<4) + return 0; + return (frame_bytes - 4) / ((FFALIGN(ch, 2) * bps) / 8); + case AV_CODEC_ID_S302M: + return 2 * (frame_bytes / ((bps + 4) / 4)) / ch; + } + } + } + } + + /* Fall back on using frame_size */ + if (frame_size > 1 && frame_bytes) + return frame_size; + + //For WMA we currently have no other means to calculate duration thus we + //do it here by assuming CBR, which is true for all known cases. + if (bitrate > 0 && frame_bytes > 0 && sr > 0 && ba > 1) { + if (id == AV_CODEC_ID_WMAV1 || id == AV_CODEC_ID_WMAV2) + return (frame_bytes * 8LL * sr) / bitrate; + } + + return 0; +} + +int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes) +{ + return get_audio_frame_duration(avctx->codec_id, avctx->sample_rate, + avctx->channels, avctx->block_align, + avctx->codec_tag, avctx->bits_per_coded_sample, + avctx->bit_rate, avctx->extradata, avctx->frame_size, + frame_bytes); +} + +int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes) +{ + return get_audio_frame_duration(par->codec_id, par->sample_rate, + par->channels, par->block_align, + par->codec_tag, par->bits_per_coded_sample, + par->bit_rate, par->extradata, par->frame_size, + frame_bytes); +} + +#if !HAVE_THREADS +int ff_thread_init(AVCodecContext *s) +{ + return -1; +} + +#endif + +unsigned int av_xiphlacing(unsigned char *s, unsigned int v) +{ + unsigned int n = 0; + + while (v >= 0xff) { + *s++ = 0xff; + v -= 0xff; + n++; + } + *s = v; + n++; + return n; +} + +int ff_match_2uint16(const uint16_t(*tab)[2], int size, int a, int b) +{ + int i; + for (i = 0; i < size && !(tab[i][0] == a && tab[i][1] == b); i++) ; + return i; +} + +const AVCodecHWConfig *avcodec_get_hw_config(const AVCodec *codec, int index) +{ + int i; + if (!codec->hw_configs || index < 0) + return NULL; + for (i = 0; i <= index; i++) + if (!codec->hw_configs[i]) + return NULL; + return &codec->hw_configs[index]->public; +} + +#if FF_API_USER_VISIBLE_AVHWACCEL +AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel) +{ + return NULL; +} + +void av_register_hwaccel(AVHWAccel *hwaccel) +{ +} +#endif + +#if FF_API_LOCKMGR +int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op)) +{ + return 0; +} +#endif + +unsigned int avpriv_toupper4(unsigned int x) +{ + return av_toupper(x & 0xFF) + + (av_toupper((x >> 8) & 0xFF) << 8) + + (av_toupper((x >> 16) & 0xFF) << 16) + +((unsigned)av_toupper((x >> 24) & 0xFF) << 24); +} + +int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src) +{ + int ret; + + dst->owner[0] = src->owner[0]; + dst->owner[1] = src->owner[1]; + + ret = av_frame_ref(dst->f, src->f); + if (ret < 0) + return ret; + + av_assert0(!dst->progress); + + if (src->progress && + !(dst->progress = av_buffer_ref(src->progress))) { + ff_thread_release_buffer(dst->owner[0], dst); + return AVERROR(ENOMEM); + } + + return 0; +} + +#if !HAVE_THREADS + +enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt) +{ + return ff_get_format(avctx, fmt); +} + +int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags) +{ + f->owner[0] = f->owner[1] = avctx; + return ff_get_buffer(avctx, f->f, flags); +} + +void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f) +{ + if (f->f) + av_frame_unref(f->f); +} + +void ff_thread_finish_setup(AVCodecContext *avctx) +{ +} + +void ff_thread_report_progress(ThreadFrame *f, int progress, int field) +{ +} + +void ff_thread_await_progress(ThreadFrame *f, int progress, int field) +{ +} + +int ff_thread_can_start_frame(AVCodecContext *avctx) +{ + return 1; +} + +int ff_alloc_entries(AVCodecContext *avctx, int count) +{ + return 0; +} + +void ff_reset_entries(AVCodecContext *avctx) +{ +} + +void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift) +{ +} + +void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n) +{ +} + +#endif + +int avcodec_is_open(AVCodecContext *s) +{ + return !!s->internal; +} + +int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf) +{ + int ret; + char *str; + + ret = av_bprint_finalize(buf, &str); + if (ret < 0) + return ret; + if (!av_bprint_is_complete(buf)) { + av_free(str); + return AVERROR(ENOMEM); + } + + avctx->extradata = str; + /* Note: the string is NUL terminated (so extradata can be read as a + * string), but the ending character is not accounted in the size (in + * binary formats you are likely not supposed to mux that character). When + * extradata is copied, it is also padded with AV_INPUT_BUFFER_PADDING_SIZE + * zeros. */ + avctx->extradata_size = buf->len; + return 0; +} + +const uint8_t *avpriv_find_start_code(const uint8_t *av_restrict p, + const uint8_t *end, + uint32_t *av_restrict state) +{ + int i; + + av_assert0(p <= end); + if (p >= end) + return end; + + for (i = 0; i < 3; i++) { + uint32_t tmp = *state << 8; + *state = tmp + *(p++); + if (tmp == 0x100 || p == end) + return p; + } + + while (p < end) { + if (p[-1] > 1 ) p += 3; + else if (p[-2] ) p += 2; + else if (p[-3]|(p[-1]-1)) p++; + else { + p++; + break; + } + } + + p = FFMIN(p, end) - 4; + *state = AV_RB32(p); + + return p + 4; +} + +AVCPBProperties *av_cpb_properties_alloc(size_t *size) +{ + AVCPBProperties *props = av_mallocz(sizeof(AVCPBProperties)); + if (!props) + return NULL; + + if (size) + *size = sizeof(*props); + + props->vbv_delay = UINT64_MAX; + + return props; +} + +AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx) +{ + AVPacketSideData *tmp; + AVCPBProperties *props; + size_t size; + + props = av_cpb_properties_alloc(&size); + if (!props) + return NULL; + + tmp = av_realloc_array(avctx->coded_side_data, avctx->nb_coded_side_data + 1, sizeof(*tmp)); + if (!tmp) { + av_freep(&props); + return NULL; + } + + avctx->coded_side_data = tmp; + avctx->nb_coded_side_data++; + + avctx->coded_side_data[avctx->nb_coded_side_data - 1].type = AV_PKT_DATA_CPB_PROPERTIES; + avctx->coded_side_data[avctx->nb_coded_side_data - 1].data = (uint8_t*)props; + avctx->coded_side_data[avctx->nb_coded_side_data - 1].size = size; + + return props; +} + +static void codec_parameters_reset(AVCodecParameters *par) +{ + av_freep(&par->extradata); + + memset(par, 0, sizeof(*par)); + + par->codec_type = AVMEDIA_TYPE_UNKNOWN; + par->codec_id = AV_CODEC_ID_NONE; + par->format = -1; + par->field_order = AV_FIELD_UNKNOWN; + par->color_range = AVCOL_RANGE_UNSPECIFIED; + par->color_primaries = AVCOL_PRI_UNSPECIFIED; + par->color_trc = AVCOL_TRC_UNSPECIFIED; + par->color_space = AVCOL_SPC_UNSPECIFIED; + par->chroma_location = AVCHROMA_LOC_UNSPECIFIED; + par->sample_aspect_ratio = (AVRational){ 0, 1 }; + par->profile = FF_PROFILE_UNKNOWN; + par->level = FF_LEVEL_UNKNOWN; +} + +AVCodecParameters *avcodec_parameters_alloc(void) +{ + AVCodecParameters *par = av_mallocz(sizeof(*par)); + + if (!par) + return NULL; + codec_parameters_reset(par); + return par; +} + +void avcodec_parameters_free(AVCodecParameters **ppar) +{ + AVCodecParameters *par = *ppar; + + if (!par) + return; + codec_parameters_reset(par); + + av_freep(ppar); +} + +int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src) +{ + codec_parameters_reset(dst); + memcpy(dst, src, sizeof(*dst)); + + dst->extradata = NULL; + dst->extradata_size = 0; + if (src->extradata) { + dst->extradata = av_mallocz(src->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!dst->extradata) + return AVERROR(ENOMEM); + memcpy(dst->extradata, src->extradata, src->extradata_size); + dst->extradata_size = src->extradata_size; + } + + return 0; +} + +int avcodec_parameters_from_context(AVCodecParameters *par, + const AVCodecContext *codec) +{ + codec_parameters_reset(par); + + par->codec_type = codec->codec_type; + par->codec_id = codec->codec_id; + par->codec_tag = codec->codec_tag; + + par->bit_rate = codec->bit_rate; + par->bits_per_coded_sample = codec->bits_per_coded_sample; + par->bits_per_raw_sample = codec->bits_per_raw_sample; + par->profile = codec->profile; + par->level = codec->level; + + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + par->format = codec->pix_fmt; + par->width = codec->width; + par->height = codec->height; + par->field_order = codec->field_order; + par->color_range = codec->color_range; + par->color_primaries = codec->color_primaries; + par->color_trc = codec->color_trc; + par->color_space = codec->colorspace; + par->chroma_location = codec->chroma_sample_location; + par->sample_aspect_ratio = codec->sample_aspect_ratio; + par->video_delay = codec->has_b_frames; + break; + case AVMEDIA_TYPE_AUDIO: + par->format = codec->sample_fmt; + par->channel_layout = codec->channel_layout; + par->channels = codec->channels; + par->sample_rate = codec->sample_rate; + par->block_align = codec->block_align; + par->frame_size = codec->frame_size; + par->initial_padding = codec->initial_padding; + par->trailing_padding = codec->trailing_padding; + par->seek_preroll = codec->seek_preroll; + break; + case AVMEDIA_TYPE_SUBTITLE: + par->width = codec->width; + par->height = codec->height; + break; + } + + if (codec->extradata) { + par->extradata = av_mallocz(codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!par->extradata) + return AVERROR(ENOMEM); + memcpy(par->extradata, codec->extradata, codec->extradata_size); + par->extradata_size = codec->extradata_size; + } + + return 0; +} + +int avcodec_parameters_to_context(AVCodecContext *codec, + const AVCodecParameters *par) +{ + codec->codec_type = par->codec_type; + codec->codec_id = par->codec_id; + codec->codec_tag = par->codec_tag; + + codec->bit_rate = par->bit_rate; + codec->bits_per_coded_sample = par->bits_per_coded_sample; + codec->bits_per_raw_sample = par->bits_per_raw_sample; + codec->profile = par->profile; + codec->level = par->level; + + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + codec->pix_fmt = par->format; + codec->width = par->width; + codec->height = par->height; + codec->field_order = par->field_order; + codec->color_range = par->color_range; + codec->color_primaries = par->color_primaries; + codec->color_trc = par->color_trc; + codec->colorspace = par->color_space; + codec->chroma_sample_location = par->chroma_location; + codec->sample_aspect_ratio = par->sample_aspect_ratio; + codec->has_b_frames = par->video_delay; + break; + case AVMEDIA_TYPE_AUDIO: + codec->sample_fmt = par->format; + codec->channel_layout = par->channel_layout; + codec->channels = par->channels; + codec->sample_rate = par->sample_rate; + codec->block_align = par->block_align; + codec->frame_size = par->frame_size; + codec->delay = + codec->initial_padding = par->initial_padding; + codec->trailing_padding = par->trailing_padding; + codec->seek_preroll = par->seek_preroll; + break; + case AVMEDIA_TYPE_SUBTITLE: + codec->width = par->width; + codec->height = par->height; + break; + } + + if (par->extradata) { + av_freep(&codec->extradata); + codec->extradata = av_mallocz(par->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!codec->extradata) + return AVERROR(ENOMEM); + memcpy(codec->extradata, par->extradata, par->extradata_size); + codec->extradata_size = par->extradata_size; + } + + return 0; +} + +int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len, + void **data, size_t *sei_size) +{ + AVFrameSideData *side_data = NULL; + uint8_t *sei_data; + + if (frame) + side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC); + + if (!side_data) { + *data = NULL; + return 0; + } + + *sei_size = side_data->size + 11; + *data = av_mallocz(*sei_size + prefix_len); + if (!*data) + return AVERROR(ENOMEM); + sei_data = (uint8_t*)*data + prefix_len; + + // country code + sei_data[0] = 181; + sei_data[1] = 0; + sei_data[2] = 49; + + /** + * 'GA94' is standard in North America for ATSC, but hard coding + * this style may not be the right thing to do -- other formats + * do exist. This information is not available in the side_data + * so we are going with this right now. + */ + AV_WL32(sei_data + 3, MKTAG('G', 'A', '9', '4')); + sei_data[7] = 3; + sei_data[8] = ((side_data->size/3) & 0x1f) | 0x40; + sei_data[9] = 0; + + memcpy(sei_data + 10, side_data->data, side_data->size); + + sei_data[side_data->size+10] = 255; + + return 0; +} + +int64_t ff_guess_coded_bitrate(AVCodecContext *avctx) +{ + AVRational framerate = avctx->framerate; + int bits_per_coded_sample = avctx->bits_per_coded_sample; + int64_t bitrate; + + if (!(framerate.num && framerate.den)) + framerate = av_inv_q(avctx->time_base); + if (!(framerate.num && framerate.den)) + return 0; + + if (!bits_per_coded_sample) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); + bits_per_coded_sample = av_get_bits_per_pixel(desc); + } + bitrate = (int64_t)bits_per_coded_sample * avctx->width * avctx->height * + framerate.num / framerate.den; + + return bitrate; +} diff --git a/libs/ffvpx/libavcodec/version.h b/libs/ffvpx/libavcodec/version.h new file mode 100644 index 000000000..6895f1a46 --- /dev/null +++ b/libs/ffvpx/libavcodec/version.h @@ -0,0 +1,137 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VERSION_H +#define AVCODEC_VERSION_H + +/** + * @file + * @ingroup libavc + * Libavcodec version macros. + */ + +#include "libavutil/version.h" + +#define LIBAVCODEC_VERSION_MAJOR 58 +#define LIBAVCODEC_VERSION_MINOR 18 +#define LIBAVCODEC_VERSION_MICRO 100 + +#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ + LIBAVCODEC_VERSION_MINOR, \ + LIBAVCODEC_VERSION_MICRO) +#define LIBAVCODEC_VERSION AV_VERSION(LIBAVCODEC_VERSION_MAJOR, \ + LIBAVCODEC_VERSION_MINOR, \ + LIBAVCODEC_VERSION_MICRO) +#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT + +#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) + +/** + * FF_API_* defines may be placed below to indicate public API that will be + * dropped at a future version bump. The defines themselves are not part of + * the public API and may change, break or disappear at any time. + * + * @note, when bumping the major version it is recommended to manually + * disable each FF_API_* in its own commit instead of disabling them all + * at once through the bump. This improves the git bisect-ability of the change. + */ + +#ifndef FF_API_LOWRES +#define FF_API_LOWRES (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_DEBUG_MV +#define FF_API_DEBUG_MV (LIBAVCODEC_VERSION_MAJOR < 58) +#endif +#ifndef FF_API_AVCTX_TIMEBASE +#define FF_API_AVCTX_TIMEBASE (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_CODED_FRAME +#define FF_API_CODED_FRAME (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_SIDEDATA_ONLY_PKT +#define FF_API_SIDEDATA_ONLY_PKT (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_VDPAU_PROFILE +#define FF_API_VDPAU_PROFILE (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_CONVERGENCE_DURATION +#define FF_API_CONVERGENCE_DURATION (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_AVPICTURE +#define FF_API_AVPICTURE (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_AVPACKET_OLD_API +#define FF_API_AVPACKET_OLD_API (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_RTP_CALLBACK +#define FF_API_RTP_CALLBACK (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_VBV_DELAY +#define FF_API_VBV_DELAY (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_CODER_TYPE +#define FF_API_CODER_TYPE (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_STAT_BITS +#define FF_API_STAT_BITS (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_PRIVATE_OPT +#define FF_API_PRIVATE_OPT (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_ASS_TIMING +#define FF_API_ASS_TIMING (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_OLD_BSF +#define FF_API_OLD_BSF (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_COPY_CONTEXT +#define FF_API_COPY_CONTEXT (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_GET_CONTEXT_DEFAULTS +#define FF_API_GET_CONTEXT_DEFAULTS (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_NVENC_OLD_NAME +#define FF_API_NVENC_OLD_NAME (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_STRUCT_VAAPI_CONTEXT +#define FF_API_STRUCT_VAAPI_CONTEXT (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_MERGE_SD_API +#define FF_API_MERGE_SD_API (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_TAG_STRING +#define FF_API_TAG_STRING (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_GETCHROMA +#define FF_API_GETCHROMA (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_CODEC_GET_SET +#define FF_API_CODEC_GET_SET (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_USER_VISIBLE_AVHWACCEL +#define FF_API_USER_VISIBLE_AVHWACCEL (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_LOCKMGR +#define FF_API_LOCKMGR (LIBAVCODEC_VERSION_MAJOR < 59) +#endif +#ifndef FF_API_NEXT +#define FF_API_NEXT (LIBAVCODEC_VERSION_MAJOR < 59) +#endif + + +#endif /* AVCODEC_VERSION_H */ diff --git a/libs/ffvpx/libavcodec/videodsp.c b/libs/ffvpx/libavcodec/videodsp.c new file mode 100644 index 000000000..ce9e9eb14 --- /dev/null +++ b/libs/ffvpx/libavcodec/videodsp.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2012 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "videodsp.h" + +#define BIT_DEPTH 8 +#include "videodsp_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 16 +#include "videodsp_template.c" +#undef BIT_DEPTH + +static void just_return(uint8_t *buf, ptrdiff_t stride, int h) +{ +} + +av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc) +{ + ctx->prefetch = just_return; + if (bpc <= 8) { + ctx->emulated_edge_mc = ff_emulated_edge_mc_8; + } else { + ctx->emulated_edge_mc = ff_emulated_edge_mc_16; + } + + if (ARCH_AARCH64) + ff_videodsp_init_aarch64(ctx, bpc); + if (ARCH_ARM) + ff_videodsp_init_arm(ctx, bpc); + if (ARCH_PPC) + ff_videodsp_init_ppc(ctx, bpc); + if (ARCH_X86) + ff_videodsp_init_x86(ctx, bpc); + if (ARCH_MIPS) + ff_videodsp_init_mips(ctx, bpc); +} diff --git a/libs/ffvpx/libavcodec/videodsp.h b/libs/ffvpx/libavcodec/videodsp.h new file mode 100644 index 000000000..c0545f22b --- /dev/null +++ b/libs/ffvpx/libavcodec/videodsp.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2012 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Core video DSP helper functions + */ + +#ifndef AVCODEC_VIDEODSP_H +#define AVCODEC_VIDEODSP_H + +#include <stddef.h> +#include <stdint.h> + +#define EMULATED_EDGE(depth) \ +void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \ + ptrdiff_t dst_stride, ptrdiff_t src_stride, \ + int block_w, int block_h,\ + int src_x, int src_y, int w, int h); + +EMULATED_EDGE(8) +EMULATED_EDGE(16) + +typedef struct VideoDSPContext { + /** + * Copy a rectangular area of samples to a temporary buffer and replicate + * the border samples. + * + * @param dst destination buffer + * @param dst_stride number of bytes between 2 vertically adjacent samples + * in destination buffer + * @param src source buffer + * @param dst_linesize number of bytes between 2 vertically adjacent + * samples in the destination buffer + * @param src_linesize number of bytes between 2 vertically adjacent + * samples in both the source buffer + * @param block_w width of block + * @param block_h height of block + * @param src_x x coordinate of the top left sample of the block in the + * source buffer + * @param src_y y coordinate of the top left sample of the block in the + * source buffer + * @param w width of the source buffer + * @param h height of the source buffer + */ + void (*emulated_edge_mc)(uint8_t *dst, const uint8_t *src, + ptrdiff_t dst_linesize, + ptrdiff_t src_linesize, + int block_w, int block_h, + int src_x, int src_y, int w, int h); + + /** + * Prefetch memory into cache (if supported by hardware). + * + * @param buf pointer to buffer to prefetch memory from + * @param stride distance between two lines of buf (in bytes) + * @param h number of lines to prefetch + */ + void (*prefetch)(uint8_t *buf, ptrdiff_t stride, int h); +} VideoDSPContext; + +void ff_videodsp_init(VideoDSPContext *ctx, int bpc); + +/* for internal use only (i.e. called by ff_videodsp_init() */ +void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc); +void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc); +void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc); +void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc); +void ff_videodsp_init_mips(VideoDSPContext *ctx, int bpc); + +#endif /* AVCODEC_VIDEODSP_H */ diff --git a/libs/ffvpx/libavcodec/videodsp_template.c b/libs/ffvpx/libavcodec/videodsp_template.c new file mode 100644 index 000000000..94c1b7188 --- /dev/null +++ b/libs/ffvpx/libavcodec/videodsp_template.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2002-2012 Michael Niedermayer + * Copyright (C) 2012 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "bit_depth_template.c" +void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_linesize, + ptrdiff_t src_linesize, + int block_w, int block_h, + int src_x, int src_y, int w, int h) +{ + int x, y; + int start_y, start_x, end_y, end_x; + + if (!w || !h) + return; + + av_assert2(block_w * sizeof(pixel) <= FFABS(buf_linesize)); + + if (src_y >= h) { + src -= src_y * src_linesize; + src += (h - 1) * src_linesize; + src_y = h - 1; + } else if (src_y <= -block_h) { + src -= src_y * src_linesize; + src += (1 - block_h) * src_linesize; + src_y = 1 - block_h; + } + if (src_x >= w) { + src += (w - 1 - src_x) * sizeof(pixel); + src_x = w - 1; + } else if (src_x <= -block_w) { + src += (1 - block_w - src_x) * sizeof(pixel); + src_x = 1 - block_w; + } + + start_y = FFMAX(0, -src_y); + start_x = FFMAX(0, -src_x); + end_y = FFMIN(block_h, h-src_y); + end_x = FFMIN(block_w, w-src_x); + av_assert2(start_y < end_y && block_h); + av_assert2(start_x < end_x && block_w); + + w = end_x - start_x; + src += start_y * src_linesize + start_x * sizeof(pixel); + buf += start_x * sizeof(pixel); + + // top + for (y = 0; y < start_y; y++) { + memcpy(buf, src, w * sizeof(pixel)); + buf += buf_linesize; + } + + // copy existing part + for (; y < end_y; y++) { + memcpy(buf, src, w * sizeof(pixel)); + src += src_linesize; + buf += buf_linesize; + } + + // bottom + src -= src_linesize; + for (; y < block_h; y++) { + memcpy(buf, src, w * sizeof(pixel)); + buf += buf_linesize; + } + + buf -= block_h * buf_linesize + start_x * sizeof(pixel); + while (block_h--) { + pixel *bufp = (pixel *) buf; + + // left + for(x = 0; x < start_x; x++) { + bufp[x] = bufp[start_x]; + } + + // right + for (x = end_x; x < block_w; x++) { + bufp[x] = bufp[end_x - 1]; + } + buf += buf_linesize; + } +} diff --git a/libs/ffvpx/libavcodec/vlc.h b/libs/ffvpx/libavcodec/vlc.h new file mode 100644 index 000000000..42ccddf3f --- /dev/null +++ b/libs/ffvpx/libavcodec/vlc.h @@ -0,0 +1,81 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VLC_H +#define AVCODEC_VLC_H + +#include <stdint.h> + +#define VLC_TYPE int16_t + +typedef struct VLC { + int bits; + VLC_TYPE (*table)[2]; ///< code, bits + int table_size, table_allocated; +} VLC; + +typedef struct RL_VLC_ELEM { + int16_t level; + int8_t len; + uint8_t run; +} RL_VLC_ELEM; + +#define init_vlc(vlc, nb_bits, nb_codes, \ + bits, bits_wrap, bits_size, \ + codes, codes_wrap, codes_size, \ + flags) \ + ff_init_vlc_sparse(vlc, nb_bits, nb_codes, \ + bits, bits_wrap, bits_size, \ + codes, codes_wrap, codes_size, \ + NULL, 0, 0, flags) + +int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + const void *symbols, int symbols_wrap, int symbols_size, + int flags); +void ff_free_vlc(VLC *vlc); + +#define INIT_VLC_LE 2 +#define INIT_VLC_USE_NEW_STATIC 4 + +#define INIT_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, h, i, j, static_size) \ + do { \ + static VLC_TYPE table[static_size][2]; \ + (vlc)->table = table; \ + (vlc)->table_allocated = static_size; \ + ff_init_vlc_sparse(vlc, bits, a, b, c, d, e, f, g, h, i, j, \ + INIT_VLC_USE_NEW_STATIC); \ + } while (0) + +#define INIT_LE_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, h, i, j, static_size) \ + do { \ + static VLC_TYPE table[static_size][2]; \ + (vlc)->table = table; \ + (vlc)->table_allocated = static_size; \ + ff_init_vlc_sparse(vlc, bits, a, b, c, d, e, f, g, h, i, j, \ + INIT_VLC_USE_NEW_STATIC | INIT_VLC_LE); \ + } while (0) + +#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size) \ + INIT_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, NULL, 0, 0, static_size) + +#define INIT_LE_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size) \ + INIT_LE_VLC_SPARSE_STATIC(vlc, bits, a, b, c, d, e, f, g, NULL, 0, 0, static_size) + +#endif /* AVCODEC_VLC_H */ diff --git a/libs/ffvpx/libavcodec/vorbis_parser.c b/libs/ffvpx/libavcodec/vorbis_parser.c new file mode 100644 index 000000000..0b2c97cde --- /dev/null +++ b/libs/ffvpx/libavcodec/vorbis_parser.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2012 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Vorbis audio parser + * + * Determines the duration for each packet. + */ + +#include "libavutil/log.h" + +#include "get_bits.h" +#include "parser.h" +#include "xiph.h" +#include "vorbis_parser_internal.h" + +static const AVClass vorbis_parser_class = { + .class_name = "Vorbis parser", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, +}; + +static int parse_id_header(AVVorbisParseContext *s, + const uint8_t *buf, int buf_size) +{ + /* Id header should be 30 bytes */ + if (buf_size < 30) { + av_log(s, AV_LOG_ERROR, "Id header is too short\n"); + return AVERROR_INVALIDDATA; + } + + /* make sure this is the Id header */ + if (buf[0] != 1) { + av_log(s, AV_LOG_ERROR, "Wrong packet type in Id header\n"); + return AVERROR_INVALIDDATA; + } + + /* check for header signature */ + if (memcmp(&buf[1], "vorbis", 6)) { + av_log(s, AV_LOG_ERROR, "Invalid packet signature in Id header\n"); + return AVERROR_INVALIDDATA; + } + + if (!(buf[29] & 0x1)) { + av_log(s, AV_LOG_ERROR, "Invalid framing bit in Id header\n"); + return AVERROR_INVALIDDATA; + } + + s->blocksize[0] = 1 << (buf[28] & 0xF); + s->blocksize[1] = 1 << (buf[28] >> 4); + + return 0; +} + +static int parse_setup_header(AVVorbisParseContext *s, + const uint8_t *buf, int buf_size) +{ + GetBitContext gb, gb0; + uint8_t *rev_buf; + int i, ret = 0; + int got_framing_bit, mode_count, got_mode_header, last_mode_count = 0; + + /* avoid overread */ + if (buf_size < 7) { + av_log(s, AV_LOG_ERROR, "Setup header is too short\n"); + return AVERROR_INVALIDDATA; + } + + /* make sure this is the Setup header */ + if (buf[0] != 5) { + av_log(s, AV_LOG_ERROR, "Wrong packet type in Setup header\n"); + return AVERROR_INVALIDDATA; + } + + /* check for header signature */ + if (memcmp(&buf[1], "vorbis", 6)) { + av_log(s, AV_LOG_ERROR, "Invalid packet signature in Setup header\n"); + return AVERROR_INVALIDDATA; + } + + /* reverse bytes so we can easily read backwards with get_bits() */ + if (!(rev_buf = av_malloc(buf_size))) { + av_log(s, AV_LOG_ERROR, "Out of memory\n"); + return AVERROR(ENOMEM); + } + for (i = 0; i < buf_size; i++) + rev_buf[i] = buf[buf_size - 1 - i]; + init_get_bits(&gb, rev_buf, buf_size * 8); + + got_framing_bit = 0; + while (get_bits_left(&gb) > 97) { + if (get_bits1(&gb)) { + got_framing_bit = get_bits_count(&gb); + break; + } + } + if (!got_framing_bit) { + av_log(s, AV_LOG_ERROR, "Invalid Setup header\n"); + ret = AVERROR_INVALIDDATA; + goto bad_header; + } + + /* Now we search backwards to find possible valid mode counts. This is not + * fool-proof because we could have false positive matches and read too + * far, but there isn't really any way to be sure without parsing through + * all the many variable-sized fields before the modes. This approach seems + * to work well in testing, and it is similar to how it is handled in + * liboggz. */ + mode_count = 0; + got_mode_header = 0; + while (get_bits_left(&gb) >= 97) { + if (get_bits(&gb, 8) > 63 || get_bits(&gb, 16) || get_bits(&gb, 16)) + break; + skip_bits(&gb, 1); + mode_count++; + if (mode_count > 64) + break; + gb0 = gb; + if (get_bits(&gb0, 6) + 1 == mode_count) { + got_mode_header = 1; + last_mode_count = mode_count; + } + } + if (!got_mode_header) { + av_log(s, AV_LOG_ERROR, "Invalid Setup header\n"); + ret = AVERROR_INVALIDDATA; + goto bad_header; + } + /* All samples I've seen use <= 2 modes, so ask for a sample if we find + * more than that, as it is most likely a false positive. If we get any + * we may need to approach this the long way and parse the whole Setup + * header, but I hope very much that it never comes to that. */ + if (last_mode_count > 2) { + avpriv_request_sample(s, + "%d modes (either a false positive or a " + "sample from an unknown encoder)", + last_mode_count); + } + /* We're limiting the mode count to 63 so that we know that the previous + * block flag will be in the first packet byte. */ + if (last_mode_count > 63) { + av_log(s, AV_LOG_ERROR, "Unsupported mode count: %d\n", + last_mode_count); + ret = AVERROR_INVALIDDATA; + goto bad_header; + } + s->mode_count = mode_count = last_mode_count; + /* Determine the number of bits required to code the mode and turn that + * into a bitmask to directly access the mode from the first frame byte. */ + s->mode_mask = ((1 << (av_log2(mode_count - 1) + 1)) - 1) << 1; + /* The previous window flag is the next bit after the mode */ + s->prev_mask = (s->mode_mask | 0x1) + 1; + + init_get_bits(&gb, rev_buf, buf_size * 8); + skip_bits_long(&gb, got_framing_bit); + for (i = mode_count - 1; i >= 0; i--) { + skip_bits_long(&gb, 40); + s->mode_blocksize[i] = get_bits1(&gb); + } + +bad_header: + av_free(rev_buf); + return ret; +} + +static int vorbis_parse_init(AVVorbisParseContext *s, + const uint8_t *extradata, int extradata_size) +{ + const uint8_t *header_start[3]; + int header_len[3]; + int ret; + + s->class = &vorbis_parser_class; + s->extradata_parsed = 1; + + if ((ret = avpriv_split_xiph_headers(extradata, + extradata_size, 30, + header_start, header_len)) < 0) { + av_log(s, AV_LOG_ERROR, "Extradata corrupt.\n"); + return ret; + } + + if ((ret = parse_id_header(s, header_start[0], header_len[0])) < 0) + return ret; + + if ((ret = parse_setup_header(s, header_start[2], header_len[2])) < 0) + return ret; + + s->valid_extradata = 1; + s->previous_blocksize = s->blocksize[s->mode_blocksize[0]]; + + return 0; +} + +int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf, + int buf_size, int *flags) +{ + int duration = 0; + + if (s->valid_extradata && buf_size > 0) { + int mode, current_blocksize; + int previous_blocksize = s->previous_blocksize; + + if (buf[0] & 1) { + /* If the user doesn't care about special packets, it's a bad one. */ + if (!flags) + goto bad_packet; + + /* Set the flag for which kind of special packet it is. */ + if (buf[0] == 1) + *flags |= VORBIS_FLAG_HEADER; + else if (buf[0] == 3) + *flags |= VORBIS_FLAG_COMMENT; + else if (buf[0] == 5) + *flags |= VORBIS_FLAG_SETUP; + else + goto bad_packet; + + /* Special packets have no duration. */ + return 0; + +bad_packet: + av_log(s, AV_LOG_ERROR, "Invalid packet\n"); + return AVERROR_INVALIDDATA; + } + if (s->mode_count == 1) + mode = 0; + else + mode = (buf[0] & s->mode_mask) >> 1; + if (mode >= s->mode_count) { + av_log(s, AV_LOG_ERROR, "Invalid mode in packet\n"); + return AVERROR_INVALIDDATA; + } + if(s->mode_blocksize[mode]){ + int flag = !!(buf[0] & s->prev_mask); + previous_blocksize = s->blocksize[flag]; + } + current_blocksize = s->blocksize[s->mode_blocksize[mode]]; + duration = (previous_blocksize + current_blocksize) >> 2; + s->previous_blocksize = current_blocksize; + } + + return duration; +} + +int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf, + int buf_size) +{ + return av_vorbis_parse_frame_flags(s, buf, buf_size, NULL); +} + +void av_vorbis_parse_reset(AVVorbisParseContext *s) +{ + if (s->valid_extradata) + s->previous_blocksize = s->blocksize[0]; +} + +void av_vorbis_parse_free(AVVorbisParseContext **s) +{ + av_freep(s); +} + +AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata, + int extradata_size) +{ + AVVorbisParseContext *s = av_mallocz(sizeof(*s)); + int ret; + + if (!s) + return NULL; + + ret = vorbis_parse_init(s, extradata, extradata_size); + if (ret < 0) { + av_vorbis_parse_free(&s); + return NULL; + } + + return s; +} + +#if CONFIG_VORBIS_PARSER + +typedef struct VorbisParseContext { + AVVorbisParseContext *vp; +} VorbisParseContext; + +static int vorbis_parse(AVCodecParserContext *s1, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + VorbisParseContext *s = s1->priv_data; + int duration; + + if (!s->vp && avctx->extradata && avctx->extradata_size) { + s->vp = av_vorbis_parse_init(avctx->extradata, avctx->extradata_size); + } + if (!s->vp) + goto end; + + if ((duration = av_vorbis_parse_frame(s->vp, buf, buf_size)) >= 0) + s1->duration = duration; + +end: + /* always return the full packet. this parser isn't doing any splitting or + combining, only packet analysis */ + *poutbuf = buf; + *poutbuf_size = buf_size; + return buf_size; +} + +static void vorbis_parser_close(AVCodecParserContext *ctx) +{ + VorbisParseContext *s = ctx->priv_data; + av_vorbis_parse_free(&s->vp); +} + +AVCodecParser ff_vorbis_parser = { + .codec_ids = { AV_CODEC_ID_VORBIS }, + .priv_data_size = sizeof(VorbisParseContext), + .parser_parse = vorbis_parse, + .parser_close = vorbis_parser_close, +}; +#endif /* CONFIG_VORBIS_PARSER */ diff --git a/libs/ffvpx/libavcodec/vorbis_parser.h b/libs/ffvpx/libavcodec/vorbis_parser.h new file mode 100644 index 000000000..789932ac4 --- /dev/null +++ b/libs/ffvpx/libavcodec/vorbis_parser.h @@ -0,0 +1,74 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * A public API for Vorbis parsing + * + * Determines the duration for each packet. + */ + +#ifndef AVCODEC_VORBIS_PARSER_H +#define AVCODEC_VORBIS_PARSER_H + +#include <stdint.h> + +typedef struct AVVorbisParseContext AVVorbisParseContext; + +/** + * Allocate and initialize the Vorbis parser using headers in the extradata. + */ +AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata, + int extradata_size); + +/** + * Free the parser and everything associated with it. + */ +void av_vorbis_parse_free(AVVorbisParseContext **s); + +#define VORBIS_FLAG_HEADER 0x00000001 +#define VORBIS_FLAG_COMMENT 0x00000002 +#define VORBIS_FLAG_SETUP 0x00000004 + +/** + * Get the duration for a Vorbis packet. + * + * If @p flags is @c NULL, + * special frames are considered invalid. + * + * @param s Vorbis parser context + * @param buf buffer containing a Vorbis frame + * @param buf_size size of the buffer + * @param flags flags for special frames + */ +int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf, + int buf_size, int *flags); + +/** + * Get the duration for a Vorbis packet. + * + * @param s Vorbis parser context + * @param buf buffer containing a Vorbis frame + * @param buf_size size of the buffer + */ +int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf, + int buf_size); + +void av_vorbis_parse_reset(AVVorbisParseContext *s); + +#endif /* AVCODEC_VORBIS_PARSER_H */ diff --git a/libs/ffvpx/libavcodec/vorbis_parser_internal.h b/libs/ffvpx/libavcodec/vorbis_parser_internal.h new file mode 100644 index 000000000..691a84238 --- /dev/null +++ b/libs/ffvpx/libavcodec/vorbis_parser_internal.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Vorbis audio parser + * + * Determines the duration for each packet. + */ + +#ifndef AVCODEC_VORBIS_PARSER_INTERNAL_H +#define AVCODEC_VORBIS_PARSER_INTERNAL_H + +#include "avcodec.h" +#include "vorbis_parser.h" + +struct AVVorbisParseContext { + const AVClass *class; + int extradata_parsed; ///< we have attempted to parse extradata + int valid_extradata; ///< extradata is valid, so we can calculate duration + int blocksize[2]; ///< short and long window sizes + int previous_blocksize; ///< previous window size + int mode_blocksize[64]; ///< window size mapping for each mode + int mode_count; ///< number of modes + int mode_mask; ///< bitmask used to get the mode in each packet + int prev_mask; ///< bitmask used to get the previous mode flag in each packet +}; + +#endif /* AVCODEC_VORBIS_PARSER_INTERNAL_H */ diff --git a/libs/ffvpx/libavcodec/vp3dsp.h b/libs/ffvpx/libavcodec/vp3dsp.h new file mode 100644 index 000000000..2fdad162c --- /dev/null +++ b/libs/ffvpx/libavcodec/vp3dsp.h @@ -0,0 +1,53 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP3DSP_H +#define AVCODEC_VP3DSP_H + +#include <stddef.h> +#include <stdint.h> + +typedef struct VP3DSPContext { + /** + * Copy 8xH pixels from source to destination buffer using a bilinear + * filter with no rounding (i.e. *dst = (*a + *b) >> 1). + * + * @param dst destination buffer, aligned by 8 + * @param a first source buffer, no alignment + * @param b second source buffer, no alignment + * @param stride distance between two lines in source/dest buffers + * @param h height + */ + void (*put_no_rnd_pixels_l2)(uint8_t *dst, + const uint8_t *a, + const uint8_t *b, + ptrdiff_t stride, int h); + + void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block); + void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); + void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values); +} VP3DSPContext; + +void ff_vp3dsp_init(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags); + +#endif /* AVCODEC_VP3DSP_H */ diff --git a/libs/ffvpx/libavcodec/vp56.h b/libs/ffvpx/libavcodec/vp56.h new file mode 100644 index 000000000..b8dda9e73 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp56.h @@ -0,0 +1,404 @@ +/* + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * VP5 and VP6 compatible video decoder (common features) + */ + +#ifndef AVCODEC_VP56_H +#define AVCODEC_VP56_H + +#include "avcodec.h" +#include "get_bits.h" +#include "hpeldsp.h" +#include "bytestream.h" +#include "h264chroma.h" +#include "videodsp.h" +#include "vp3dsp.h" +#include "vp56dsp.h" + +typedef struct vp56_context VP56Context; + +typedef enum { + VP56_FRAME_NONE =-1, + VP56_FRAME_CURRENT = 0, + VP56_FRAME_PREVIOUS = 1, + VP56_FRAME_GOLDEN = 2, + VP56_FRAME_GOLDEN2 = 3, +} VP56Frame; + +typedef enum { + VP56_MB_INTER_NOVEC_PF = 0, /**< Inter MB, no vector, from previous frame */ + VP56_MB_INTRA = 1, /**< Intra MB */ + VP56_MB_INTER_DELTA_PF = 2, /**< Inter MB, above/left vector + delta, from previous frame */ + VP56_MB_INTER_V1_PF = 3, /**< Inter MB, first vector, from previous frame */ + VP56_MB_INTER_V2_PF = 4, /**< Inter MB, second vector, from previous frame */ + VP56_MB_INTER_NOVEC_GF = 5, /**< Inter MB, no vector, from golden frame */ + VP56_MB_INTER_DELTA_GF = 6, /**< Inter MB, above/left vector + delta, from golden frame */ + VP56_MB_INTER_4V = 7, /**< Inter MB, 4 vectors, from previous frame */ + VP56_MB_INTER_V1_GF = 8, /**< Inter MB, first vector, from golden frame */ + VP56_MB_INTER_V2_GF = 9, /**< Inter MB, second vector, from golden frame */ +} VP56mb; + +typedef struct VP56Tree { + int8_t val; + int8_t prob_idx; +} VP56Tree; + +typedef struct VP56mv { + DECLARE_ALIGNED(4, int16_t, x); + int16_t y; +} VP56mv; + +#define VP56_SIZE_CHANGE 1 + +typedef void (*VP56ParseVectorAdjustment)(VP56Context *s, + VP56mv *vect); +typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src, + int offset1, int offset2, ptrdiff_t stride, + VP56mv mv, int mask, int select, int luma); +typedef int (*VP56ParseCoeff)(VP56Context *s); +typedef void (*VP56DefaultModelsInit)(VP56Context *s); +typedef void (*VP56ParseVectorModels)(VP56Context *s); +typedef int (*VP56ParseCoeffModels)(VP56Context *s); +typedef int (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf, + int buf_size); + +typedef struct VP56RangeCoder { + int high; + int bits; /* stored negated (i.e. negative "bits" is a positive number of + bits left) in order to eliminate a negate in cache refilling */ + const uint8_t *buffer; + const uint8_t *end; + unsigned int code_word; +} VP56RangeCoder; + +typedef struct VP56RefDc { + uint8_t not_null_dc; + VP56Frame ref_frame; + int16_t dc_coeff; +} VP56RefDc; + +typedef struct VP56Macroblock { + uint8_t type; + VP56mv mv; +} VP56Macroblock; + +typedef struct VP56Model { + uint8_t coeff_reorder[64]; /* used in vp6 only */ + uint8_t coeff_index_to_pos[64]; /* used in vp6 only */ + uint8_t vector_sig[2]; /* delta sign */ + uint8_t vector_dct[2]; /* delta coding types */ + uint8_t vector_pdi[2][2]; /* predefined delta init */ + uint8_t vector_pdv[2][7]; /* predefined delta values */ + uint8_t vector_fdv[2][8]; /* 8 bit delta value definition */ + uint8_t coeff_dccv[2][11]; /* DC coeff value */ + uint8_t coeff_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */ + uint8_t coeff_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */ + uint8_t coeff_dcct[2][36][5]; /* DC coeff coding type */ + uint8_t coeff_runv[2][14]; /* run value (vp6 only) */ + uint8_t mb_type[3][10][10]; /* model for decoding MB type */ + uint8_t mb_types_stats[3][10][2];/* contextual, next MB type stats */ +} VP56Model; + +struct vp56_context { + AVCodecContext *avctx; + H264ChromaContext h264chroma; + HpelDSPContext hdsp; + VideoDSPContext vdsp; + VP3DSPContext vp3dsp; + VP56DSPContext vp56dsp; + uint8_t idct_scantable[64]; + AVFrame *frames[4]; + uint8_t *edge_emu_buffer_alloc; + uint8_t *edge_emu_buffer; + VP56RangeCoder c; + VP56RangeCoder cc; + VP56RangeCoder *ccp; + int sub_version; + + /* frame info */ + int golden_frame; + int plane_width[4]; + int plane_height[4]; + int mb_width; /* number of horizontal MB */ + int mb_height; /* number of vertical MB */ + int block_offset[6]; + + int quantizer; + uint16_t dequant_dc; + uint16_t dequant_ac; + + /* DC predictors management */ + VP56RefDc *above_blocks; + VP56RefDc left_block[4]; + int above_block_idx[6]; + int16_t prev_dc[3][3]; /* [plan][ref_frame] */ + + /* blocks / macroblock */ + VP56mb mb_type; + VP56Macroblock *macroblocks; + DECLARE_ALIGNED(16, int16_t, block_coeff)[6][64]; + + /* motion vectors */ + VP56mv mv[6]; /* vectors for each block in MB */ + VP56mv vector_candidate[2]; + int vector_candidate_pos; + + /* filtering hints */ + int filter_header; /* used in vp6 only */ + int deblock_filtering; + int filter_selection; + int filter_mode; + int max_vector_length; + int sample_variance_threshold; + + uint8_t coeff_ctx[4][64]; /* used in vp5 only */ + uint8_t coeff_ctx_last[4]; /* used in vp5 only */ + + int has_alpha; + + /* upside-down flipping hints */ + int flip; /* are we flipping ? */ + int frbi; /* first row block index in MB */ + int srbi; /* second row block index in MB */ + ptrdiff_t stride[4]; /* stride for each plan */ + + const uint8_t *vp56_coord_div; + VP56ParseVectorAdjustment parse_vector_adjustment; + VP56Filter filter; + VP56ParseCoeff parse_coeff; + VP56DefaultModelsInit default_models_init; + VP56ParseVectorModels parse_vector_models; + VP56ParseCoeffModels parse_coeff_models; + VP56ParseHeader parse_header; + + /* for "slice" parallelism between YUV and A */ + VP56Context *alpha_context; + + VP56Model *modelp; + VP56Model model; + + /* huffman decoding */ + int use_huffman; + GetBitContext gb; + VLC dccv_vlc[2]; + VLC runv_vlc[2]; + VLC ract_vlc[2][3][6]; + unsigned int nb_null[2][2]; /* number of consecutive NULL DC/AC */ + + int have_undamaged_frame; + int discard_frame; +}; + + +int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha); +int ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s, + int flip, int has_alpha); +int ff_vp56_free(AVCodecContext *avctx); +int ff_vp56_free_context(VP56Context *s); +void ff_vp56_init_dequant(VP56Context *s, int quantizer); +int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, + AVPacket *avpkt); + + +/** + * vp56 specific range coder implementation + */ + +extern const uint8_t ff_vp56_norm_shift[256]; +int ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size); + +static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c) +{ + int shift = ff_vp56_norm_shift[c->high]; + int bits = c->bits; + unsigned int code_word = c->code_word; + + c->high <<= shift; + code_word <<= shift; + bits += shift; + if(bits >= 0 && c->buffer < c->end) { + code_word |= bytestream_get_be16(&c->buffer) << bits; + bits -= 16; + } + c->bits = bits; + return code_word; +} + +#if ARCH_ARM +#include "arm/vp56_arith.h" +#elif ARCH_X86 +#include "x86/vp56_arith.h" +#endif + +#ifndef vp56_rac_get_prob +#define vp56_rac_get_prob vp56_rac_get_prob +static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +{ + unsigned int code_word = vp56_rac_renorm(c); + unsigned int low = 1 + (((c->high - 1) * prob) >> 8); + unsigned int low_shift = low << 16; + int bit = code_word >= low_shift; + + c->high = bit ? c->high - low : low; + c->code_word = bit ? code_word - low_shift : code_word; + + return bit; +} +#endif + +#ifndef vp56_rac_get_prob_branchy +// branchy variant, to be used where there's a branch based on the bit decoded +static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) +{ + unsigned long code_word = vp56_rac_renorm(c); + unsigned low = 1 + (((c->high - 1) * prob) >> 8); + unsigned low_shift = low << 16; + + if (code_word >= low_shift) { + c->high -= low; + c->code_word = code_word - low_shift; + return 1; + } + + c->high = low; + c->code_word = code_word; + return 0; +} +#endif + +static av_always_inline int vp56_rac_get(VP56RangeCoder *c) +{ + unsigned int code_word = vp56_rac_renorm(c); + /* equiprobable */ + int low = (c->high + 1) >> 1; + unsigned int low_shift = low << 16; + int bit = code_word >= low_shift; + if (bit) { + c->high -= low; + code_word -= low_shift; + } else { + c->high = low; + } + + c->code_word = code_word; + return bit; +} + +// rounding is different than vp56_rac_get, is vp56_rac_get wrong? +static av_always_inline int vp8_rac_get(VP56RangeCoder *c) +{ + return vp56_rac_get_prob(c, 128); +} + +static int vp56_rac_gets(VP56RangeCoder *c, int bits) +{ + int value = 0; + + while (bits--) { + value = (value << 1) | vp56_rac_get(c); + } + + return value; +} + +static int vp8_rac_get_uint(VP56RangeCoder *c, int bits) +{ + int value = 0; + + while (bits--) { + value = (value << 1) | vp8_rac_get(c); + } + + return value; +} + +// fixme: add 1 bit to all the calls to this? +static av_unused int vp8_rac_get_sint(VP56RangeCoder *c, int bits) +{ + int v; + + if (!vp8_rac_get(c)) + return 0; + + v = vp8_rac_get_uint(c, bits); + + if (vp8_rac_get(c)) + v = -v; + + return v; +} + +// P(7) +static av_unused int vp56_rac_gets_nn(VP56RangeCoder *c, int bits) +{ + int v = vp56_rac_gets(c, 7) << 1; + return v + !v; +} + +static av_unused int vp8_rac_get_nn(VP56RangeCoder *c) +{ + int v = vp8_rac_get_uint(c, 7) << 1; + return v + !v; +} + +static av_always_inline +int vp56_rac_get_tree(VP56RangeCoder *c, + const VP56Tree *tree, + const uint8_t *probs) +{ + while (tree->val > 0) { + if (vp56_rac_get_prob_branchy(c, probs[tree->prob_idx])) + tree += tree->val; + else + tree++; + } + return -tree->val; +} + +// how probabilities are associated with decisions is different I think +// well, the new scheme fits in the old but this way has one fewer branches per decision +static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2], + const uint8_t *probs) +{ + int i = 0; + + do { + i = tree[i][vp56_rac_get_prob(c, probs[i])]; + } while (i > 0); + + return -i; +} + +// DCTextra +static av_always_inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob) +{ + int v = 0; + + do { + v = (v<<1) + vp56_rac_get_prob(c, *prob++); + } while (*prob); + + return v; +} + +#endif /* AVCODEC_VP56_H */ diff --git a/libs/ffvpx/libavcodec/vp56dsp.h b/libs/ffvpx/libavcodec/vp56dsp.h new file mode 100644 index 000000000..e35e232ea --- /dev/null +++ b/libs/ffvpx/libavcodec/vp56dsp.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP56DSP_H +#define AVCODEC_VP56DSP_H + +#include <stddef.h> +#include <stdint.h> + +typedef struct VP56DSPContext { + void (*edge_filter_hor)(uint8_t *yuv, ptrdiff_t stride, int t); + void (*edge_filter_ver)(uint8_t *yuv, ptrdiff_t stride, int t); + + void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride, + const int16_t *h_weights,const int16_t *v_weights); +} VP56DSPContext; + +void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, + const int16_t *h_weights, const int16_t *v_weights); + +void ff_vp5dsp_init(VP56DSPContext *s); +void ff_vp6dsp_init(VP56DSPContext *s); + +void ff_vp6dsp_init_arm(VP56DSPContext *s); +void ff_vp6dsp_init_x86(VP56DSPContext *s); + +#endif /* AVCODEC_VP56DSP_H */ diff --git a/libs/ffvpx/libavcodec/vp56rac.c b/libs/ffvpx/libavcodec/vp56rac.c new file mode 100644 index 000000000..e70302bf8 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp56rac.c @@ -0,0 +1,50 @@ +/* + * VP5/6/8 decoder + * Copyright (c) 2010 Fiona Glaser <fiona@x264.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/common.h" +#include "vp56.h" + +const uint8_t ff_vp56_norm_shift[256]= { + 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + +int ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size) +{ + c->high = 255; + c->bits = -16; + c->buffer = buf; + c->end = buf + buf_size; + if (buf_size < 1) + return AVERROR_INVALIDDATA; + c->code_word = bytestream_get_be24(&c->buffer); + return 0; +} diff --git a/libs/ffvpx/libavcodec/vp8.c b/libs/ffvpx/libavcodec/vp8.c new file mode 100644 index 000000000..62b9f8bc2 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8.c @@ -0,0 +1,2976 @@ +/* + * VP7/VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * Copyright (C) 2010 Fiona Glaser + * Copyright (C) 2012 Daniel Kang + * Copyright (C) 2014 Peter Ross + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/imgutils.h" + +#include "avcodec.h" +#include "hwaccel.h" +#include "internal.h" +#include "mathops.h" +#include "rectangle.h" +#include "thread.h" +#include "vp8.h" +#include "vp8data.h" + +#if ARCH_ARM +# include "arm/vp8.h" +#endif + +#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER +#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f) +#elif CONFIG_VP7_DECODER +#define VPX(vp7, f) vp7_ ## f +#else // CONFIG_VP8_DECODER +#define VPX(vp7, f) vp8_ ## f +#endif + +static void free_buffers(VP8Context *s) +{ + int i; + if (s->thread_data) + for (i = 0; i < MAX_THREADS; i++) { +#if HAVE_THREADS + pthread_cond_destroy(&s->thread_data[i].cond); + pthread_mutex_destroy(&s->thread_data[i].lock); +#endif + av_freep(&s->thread_data[i].filter_strength); + } + av_freep(&s->thread_data); + av_freep(&s->macroblocks_base); + av_freep(&s->intra4x4_pred_mode_top); + av_freep(&s->top_nnz); + av_freep(&s->top_border); + + s->macroblocks = NULL; +} + +static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) +{ + int ret; + if ((ret = ff_thread_get_buffer(s->avctx, &f->tf, + ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0) + return ret; + if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) + goto fail; + if (s->avctx->hwaccel) { + const AVHWAccel *hwaccel = s->avctx->hwaccel; + if (hwaccel->frame_priv_data_size) { + f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + if (!f->hwaccel_priv_buf) + goto fail; + f->hwaccel_picture_private = f->hwaccel_priv_buf->data; + } + } + return 0; + +fail: + av_buffer_unref(&f->seg_map); + ff_thread_release_buffer(s->avctx, &f->tf); + return AVERROR(ENOMEM); +} + +static void vp8_release_frame(VP8Context *s, VP8Frame *f) +{ + av_buffer_unref(&f->seg_map); + av_buffer_unref(&f->hwaccel_priv_buf); + f->hwaccel_picture_private = NULL; + ff_thread_release_buffer(s->avctx, &f->tf); +} + +#if CONFIG_VP8_DECODER +static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src) +{ + int ret; + + vp8_release_frame(s, dst); + + if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) + return ret; + if (src->seg_map && + !(dst->seg_map = av_buffer_ref(src->seg_map))) { + vp8_release_frame(s, dst); + return AVERROR(ENOMEM); + } + if (src->hwaccel_picture_private) { + dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); + if (!dst->hwaccel_priv_buf) + return AVERROR(ENOMEM); + dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; + } + + return 0; +} +#endif /* CONFIG_VP8_DECODER */ + +static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) +{ + VP8Context *s = avctx->priv_data; + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) + vp8_release_frame(s, &s->frames[i]); + memset(s->framep, 0, sizeof(s->framep)); + + if (free_mem) + free_buffers(s); +} + +static void vp8_decode_flush(AVCodecContext *avctx) +{ + vp8_decode_flush_impl(avctx, 0); +} + +static VP8Frame *vp8_find_free_buffer(VP8Context *s) +{ + VP8Frame *frame = NULL; + int i; + + // find a free buffer + for (i = 0; i < 5; i++) + if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] && + &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { + frame = &s->frames[i]; + break; + } + if (i == 5) { + av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); + abort(); + } + if (frame->tf.f->buf[0]) + vp8_release_frame(s, frame); + + return frame; +} + +static enum AVPixelFormat get_pixel_format(VP8Context *s) +{ + enum AVPixelFormat pix_fmts[] = { +#if CONFIG_VP8_VAAPI_HWACCEL + AV_PIX_FMT_VAAPI, +#endif +#if CONFIG_VP8_NVDEC_HWACCEL + AV_PIX_FMT_CUDA, +#endif + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NONE, + }; + + return ff_get_format(s->avctx, pix_fmts); +} + +static av_always_inline +int update_dimensions(VP8Context *s, int width, int height, int is_vp7) +{ + AVCodecContext *avctx = s->avctx; + int i, ret; + + if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || + height != s->avctx->height) { + vp8_decode_flush_impl(s->avctx, 1); + + ret = ff_set_dimensions(s->avctx, width, height); + if (ret < 0) + return ret; + } + + if (!s->actually_webp && !is_vp7) { + s->pix_fmt = get_pixel_format(s); + if (s->pix_fmt < 0) + return AVERROR(EINVAL); + avctx->pix_fmt = s->pix_fmt; + } + + s->mb_width = (s->avctx->coded_width + 15) / 16; + s->mb_height = (s->avctx->coded_height + 15) / 16; + + s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE && + avctx->thread_count > 1; + if (!s->mb_layout) { // Frame threading and one thread + s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * + sizeof(*s->macroblocks)); + s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); + } else // Sliced threading + s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * + sizeof(*s->macroblocks)); + s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); + s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); + s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); + + if (!s->macroblocks_base || !s->top_nnz || !s->top_border || + !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) { + free_buffers(s); + return AVERROR(ENOMEM); + } + + for (i = 0; i < MAX_THREADS; i++) { + s->thread_data[i].filter_strength = + av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); + if (!s->thread_data[i].filter_strength) { + free_buffers(s); + return AVERROR(ENOMEM); + } +#if HAVE_THREADS + pthread_mutex_init(&s->thread_data[i].lock, NULL); + pthread_cond_init(&s->thread_data[i].cond, NULL); +#endif + } + + s->macroblocks = s->macroblocks_base + 1; + + return 0; +} + +static int vp7_update_dimensions(VP8Context *s, int width, int height) +{ + return update_dimensions(s, width, height, IS_VP7); +} + +static int vp8_update_dimensions(VP8Context *s, int width, int height) +{ + return update_dimensions(s, width, height, IS_VP8); +} + + +static void parse_segment_info(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i; + + s->segmentation.update_map = vp8_rac_get(c); + s->segmentation.update_feature_data = vp8_rac_get(c); + + if (s->segmentation.update_feature_data) { + s->segmentation.absolute_vals = vp8_rac_get(c); + + for (i = 0; i < 4; i++) + s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); + + for (i = 0; i < 4; i++) + s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); + } + if (s->segmentation.update_map) + for (i = 0; i < 3; i++) + s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; +} + +static void update_lf_deltas(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i; + + for (i = 0; i < 4; i++) { + if (vp8_rac_get(c)) { + s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6); + + if (vp8_rac_get(c)) + s->lf_delta.ref[i] = -s->lf_delta.ref[i]; + } + } + + for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { + if (vp8_rac_get(c)) { + s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6); + + if (vp8_rac_get(c)) + s->lf_delta.mode[i] = -s->lf_delta.mode[i]; + } + } +} + +static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) +{ + const uint8_t *sizes = buf; + int i; + int ret; + + s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); + + buf += 3 * (s->num_coeff_partitions - 1); + buf_size -= 3 * (s->num_coeff_partitions - 1); + if (buf_size < 0) + return -1; + + for (i = 0; i < s->num_coeff_partitions - 1; i++) { + int size = AV_RL24(sizes + 3 * i); + if (buf_size - size < 0) + return -1; + s->coeff_partition_size[i] = size; + + ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); + if (ret < 0) + return ret; + buf += size; + buf_size -= size; + } + + s->coeff_partition_size[i] = buf_size; + ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); + + return 0; +} + +static void vp7_get_quants(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + + int yac_qi = vp8_rac_get_uint(c, 7); + int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; + int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; + int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; + int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; + int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi; + + s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi]; + s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi]; + s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi]; + s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi]; + s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132); + s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi]; +} + +static void vp8_get_quants(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i, base_qi; + + s->quant.yac_qi = vp8_rac_get_uint(c, 7); + s->quant.ydc_delta = vp8_rac_get_sint(c, 4); + s->quant.y2dc_delta = vp8_rac_get_sint(c, 4); + s->quant.y2ac_delta = vp8_rac_get_sint(c, 4); + s->quant.uvdc_delta = vp8_rac_get_sint(c, 4); + s->quant.uvac_delta = vp8_rac_get_sint(c, 4); + + for (i = 0; i < 4; i++) { + if (s->segmentation.enabled) { + base_qi = s->segmentation.base_quant[i]; + if (!s->segmentation.absolute_vals) + base_qi += s->quant.yac_qi; + } else + base_qi = s->quant.yac_qi; + + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)]; + s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; + s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2; + /* 101581>>16 is equivalent to 155/100 */ + s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16; + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)]; + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)]; + + s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); + s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); + } +} + +/** + * Determine which buffers golden and altref should be updated with after this frame. + * The spec isn't clear here, so I'm going by my understanding of what libvpx does + * + * Intra frames update all 3 references + * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set + * If the update (golden|altref) flag is set, it's updated with the current frame + * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. + * If the flag is not set, the number read means: + * 0: no update + * 1: VP56_FRAME_PREVIOUS + * 2: update golden with altref, or update altref with golden + */ +static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) +{ + VP56RangeCoder *c = &s->c; + + if (update) + return VP56_FRAME_CURRENT; + + switch (vp8_rac_get_uint(c, 2)) { + case 1: + return VP56_FRAME_PREVIOUS; + case 2: + return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; + } + return VP56_FRAME_NONE; +} + +static void vp78_reset_probability_tables(VP8Context *s) +{ + int i, j; + for (i = 0; i < 4; i++) + for (j = 0; j < 16; j++) + memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], + sizeof(s->prob->token[i][j])); +} + +static void vp78_update_probability_tables(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i, j, k, l, m; + + for (i = 0; i < 4; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 3; k++) + for (l = 0; l < NUM_DCT_TOKENS-1; l++) + if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { + int prob = vp8_rac_get_uint(c, 8); + for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) + s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; + } +} + +#define VP7_MVC_SIZE 17 +#define VP8_MVC_SIZE 19 + +static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, + int mvc_size) +{ + VP56RangeCoder *c = &s->c; + int i, j; + + if (vp8_rac_get(c)) + for (i = 0; i < 4; i++) + s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); + if (vp8_rac_get(c)) + for (i = 0; i < 3; i++) + s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); + + // 17.2 MV probability update + for (i = 0; i < 2; i++) + for (j = 0; j < mvc_size; j++) + if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) + s->prob->mvc[i][j] = vp8_rac_get_nn(c); +} + +static void update_refs(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + + int update_golden = vp8_rac_get(c); + int update_altref = vp8_rac_get(c); + + s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); + s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); +} + +static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height) +{ + int i, j; + + for (j = 1; j < 3; j++) { + for (i = 0; i < height / 2; i++) + memcpy(dst->data[j] + i * dst->linesize[j], + src->data[j] + i * src->linesize[j], width / 2); + } +} + +static void fade(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + int width, int height, + int alpha, int beta) +{ + int i, j; + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { + uint8_t y = src[j * src_linesize + i]; + dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha); + } + } +} + +static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c) +{ + int alpha = (int8_t) vp8_rac_get_uint(c, 8); + int beta = (int8_t) vp8_rac_get_uint(c, 8); + int ret; + + if (!s->keyframe && (alpha || beta)) { + int width = s->mb_width * 16; + int height = s->mb_height * 16; + AVFrame *src, *dst; + + if (!s->framep[VP56_FRAME_PREVIOUS] || + !s->framep[VP56_FRAME_GOLDEN]) { + av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); + return AVERROR_INVALIDDATA; + } + + dst = + src = s->framep[VP56_FRAME_PREVIOUS]->tf.f; + + /* preserve the golden frame, write a new previous frame */ + if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) { + s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s); + if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0) + return ret; + + dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f; + + copy_chroma(dst, src, width, height); + } + + fade(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + width, height, alpha, beta); + } + + return 0; +} + +static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) +{ + VP56RangeCoder *c = &s->c; + int part1_size, hscale, vscale, i, j, ret; + int width = s->avctx->width; + int height = s->avctx->height; + + if (buf_size < 4) { + return AVERROR_INVALIDDATA; + } + + s->profile = (buf[0] >> 1) & 7; + if (s->profile > 1) { + avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile); + return AVERROR_INVALIDDATA; + } + + s->keyframe = !(buf[0] & 1); + s->invisible = 0; + part1_size = AV_RL24(buf) >> 4; + + if (buf_size < 4 - s->profile + part1_size) { + av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size); + return AVERROR_INVALIDDATA; + } + + buf += 4 - s->profile; + buf_size -= 4 - s->profile; + + memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); + + ret = ff_vp56_init_range_decoder(c, buf, part1_size); + if (ret < 0) + return ret; + buf += part1_size; + buf_size -= part1_size; + + /* A. Dimension information (keyframes only) */ + if (s->keyframe) { + width = vp8_rac_get_uint(c, 12); + height = vp8_rac_get_uint(c, 12); + hscale = vp8_rac_get_uint(c, 2); + vscale = vp8_rac_get_uint(c, 2); + if (hscale || vscale) + avpriv_request_sample(s->avctx, "Upscaling"); + + s->update_golden = s->update_altref = VP56_FRAME_CURRENT; + vp78_reset_probability_tables(s); + memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, + sizeof(s->prob->pred16x16)); + memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, + sizeof(s->prob->pred8x8c)); + for (i = 0; i < 2; i++) + memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], + sizeof(vp7_mv_default_prob[i])); + memset(&s->segmentation, 0, sizeof(s->segmentation)); + memset(&s->lf_delta, 0, sizeof(s->lf_delta)); + memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); + } + + if (s->keyframe || s->profile > 0) + memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred)); + + /* B. Decoding information for all four macroblock-level features */ + for (i = 0; i < 4; i++) { + s->feature_enabled[i] = vp8_rac_get(c); + if (s->feature_enabled[i]) { + s->feature_present_prob[i] = vp8_rac_get_uint(c, 8); + + for (j = 0; j < 3; j++) + s->feature_index_prob[i][j] = + vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; + + if (vp7_feature_value_size[s->profile][i]) + for (j = 0; j < 4; j++) + s->feature_value[i][j] = + vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0; + } + } + + s->segmentation.enabled = 0; + s->segmentation.update_map = 0; + s->lf_delta.enabled = 0; + + s->num_coeff_partitions = 1; + ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size); + if (ret < 0) + return ret; + + if (!s->macroblocks_base || /* first frame */ + width != s->avctx->width || height != s->avctx->height || + (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) { + if ((ret = vp7_update_dimensions(s, width, height)) < 0) + return ret; + } + + /* C. Dequantization indices */ + vp7_get_quants(s); + + /* D. Golden frame update flag (a Flag) for interframes only */ + if (!s->keyframe) { + s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE; + s->sign_bias[VP56_FRAME_GOLDEN] = 0; + } + + s->update_last = 1; + s->update_probabilities = 1; + s->fade_present = 1; + + if (s->profile > 0) { + s->update_probabilities = vp8_rac_get(c); + if (!s->update_probabilities) + s->prob[1] = s->prob[0]; + + if (!s->keyframe) + s->fade_present = vp8_rac_get(c); + } + + if (c->end <= c->buffer && c->bits >= 0) + return AVERROR_INVALIDDATA; + /* E. Fading information for previous frame */ + if (s->fade_present && vp8_rac_get(c)) { + if ((ret = vp7_fade_frame(s ,c)) < 0) + return ret; + } + + /* F. Loop filter type */ + if (!s->profile) + s->filter.simple = vp8_rac_get(c); + + /* G. DCT coefficient ordering specification */ + if (vp8_rac_get(c)) + for (i = 1; i < 16; i++) + s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)]; + + /* H. Loop filter levels */ + if (s->profile > 0) + s->filter.simple = vp8_rac_get(c); + s->filter.level = vp8_rac_get_uint(c, 6); + s->filter.sharpness = vp8_rac_get_uint(c, 3); + + /* I. DCT coefficient probability update; 13.3 Token Probability Updates */ + vp78_update_probability_tables(s); + + s->mbskip_enabled = 0; + + /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */ + if (!s->keyframe) { + s->prob->intra = vp8_rac_get_uint(c, 8); + s->prob->last = vp8_rac_get_uint(c, 8); + vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE); + } + + return 0; +} + +static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) +{ + VP56RangeCoder *c = &s->c; + int header_size, hscale, vscale, ret; + int width = s->avctx->width; + int height = s->avctx->height; + + if (buf_size < 3) { + av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size); + return AVERROR_INVALIDDATA; + } + + s->keyframe = !(buf[0] & 1); + s->profile = (buf[0]>>1) & 7; + s->invisible = !(buf[0] & 0x10); + header_size = AV_RL24(buf) >> 5; + buf += 3; + buf_size -= 3; + + s->header_partition_size = header_size; + + if (s->profile > 3) + av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); + + if (!s->profile) + memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, + sizeof(s->put_pixels_tab)); + else // profile 1-3 use bilinear, 4+ aren't defined so whatever + memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, + sizeof(s->put_pixels_tab)); + + if (header_size > buf_size - 7 * s->keyframe) { + av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); + return AVERROR_INVALIDDATA; + } + + if (s->keyframe) { + if (AV_RL24(buf) != 0x2a019d) { + av_log(s->avctx, AV_LOG_ERROR, + "Invalid start code 0x%x\n", AV_RL24(buf)); + return AVERROR_INVALIDDATA; + } + width = AV_RL16(buf + 3) & 0x3fff; + height = AV_RL16(buf + 5) & 0x3fff; + hscale = buf[4] >> 6; + vscale = buf[6] >> 6; + buf += 7; + buf_size -= 7; + + if (hscale || vscale) + avpriv_request_sample(s->avctx, "Upscaling"); + + s->update_golden = s->update_altref = VP56_FRAME_CURRENT; + vp78_reset_probability_tables(s); + memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, + sizeof(s->prob->pred16x16)); + memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, + sizeof(s->prob->pred8x8c)); + memcpy(s->prob->mvc, vp8_mv_default_prob, + sizeof(s->prob->mvc)); + memset(&s->segmentation, 0, sizeof(s->segmentation)); + memset(&s->lf_delta, 0, sizeof(s->lf_delta)); + } + + ret = ff_vp56_init_range_decoder(c, buf, header_size); + if (ret < 0) + return ret; + buf += header_size; + buf_size -= header_size; + + if (s->keyframe) { + s->colorspace = vp8_rac_get(c); + if (s->colorspace) + av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); + s->fullrange = vp8_rac_get(c); + } + + if ((s->segmentation.enabled = vp8_rac_get(c))) + parse_segment_info(s); + else + s->segmentation.update_map = 0; // FIXME: move this to some init function? + + s->filter.simple = vp8_rac_get(c); + s->filter.level = vp8_rac_get_uint(c, 6); + s->filter.sharpness = vp8_rac_get_uint(c, 3); + + if ((s->lf_delta.enabled = vp8_rac_get(c))) { + s->lf_delta.update = vp8_rac_get(c); + if (s->lf_delta.update) + update_lf_deltas(s); + } + + if (setup_partitions(s, buf, buf_size)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); + return AVERROR_INVALIDDATA; + } + + if (!s->macroblocks_base || /* first frame */ + width != s->avctx->width || height != s->avctx->height || + (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) + if ((ret = vp8_update_dimensions(s, width, height)) < 0) + return ret; + + vp8_get_quants(s); + + if (!s->keyframe) { + update_refs(s); + s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); + s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); + } + + // if we aren't saving this frame's probabilities for future frames, + // make a copy of the current probabilities + if (!(s->update_probabilities = vp8_rac_get(c))) + s->prob[1] = s->prob[0]; + + s->update_last = s->keyframe || vp8_rac_get(c); + + vp78_update_probability_tables(s); + + if ((s->mbskip_enabled = vp8_rac_get(c))) + s->prob->mbskip = vp8_rac_get_uint(c, 8); + + if (!s->keyframe) { + s->prob->intra = vp8_rac_get_uint(c, 8); + s->prob->last = vp8_rac_get_uint(c, 8); + s->prob->golden = vp8_rac_get_uint(c, 8); + vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); + } + + // Record the entropy coder state here so that hwaccels can use it. + s->c.code_word = vp56_rac_renorm(&s->c); + s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8); + s->coder_state_at_header_end.range = s->c.high; + s->coder_state_at_header_end.value = s->c.code_word >> 16; + s->coder_state_at_header_end.bit_count = -s->c.bits % 8; + + return 0; +} + +static av_always_inline +void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src) +{ + dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), + av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); + dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX), + av_clip(s->mv_max.y, INT16_MIN, INT16_MAX)); +} + +/** + * Motion vector coding, 17.1. + */ +static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7) +{ + int bit, x = 0; + + if (vp56_rac_get_prob_branchy(c, p[0])) { + int i; + + for (i = 0; i < 3; i++) + x += vp56_rac_get_prob(c, p[9 + i]) << i; + for (i = (vp7 ? 7 : 9); i > 3; i--) + x += vp56_rac_get_prob(c, p[9 + i]) << i; + if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12])) + x += 8; + } else { + // small_mvtree + const uint8_t *ps = p + 2; + bit = vp56_rac_get_prob(c, *ps); + ps += 1 + 3 * bit; + x += 4 * bit; + bit = vp56_rac_get_prob(c, *ps); + ps += 1 + bit; + x += 2 * bit; + x += vp56_rac_get_prob(c, *ps); + } + + return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; +} + +static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p) +{ + return read_mv_component(c, p, 1); +} + +static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p) +{ + return read_mv_component(c, p, 0); +} + +static av_always_inline +const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7) +{ + if (is_vp7) + return vp7_submv_prob; + + if (left == top) + return vp8_submv_prob[4 - !!left]; + if (!top) + return vp8_submv_prob[2]; + return vp8_submv_prob[1 - !!left]; +} + +/** + * Split motion vector prediction, 16.4. + * @returns the number of motion vectors parsed (2, 4 or 16) + */ +static av_always_inline +int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, + int layout, int is_vp7) +{ + int part_idx; + int n, num; + VP8Macroblock *top_mb; + VP8Macroblock *left_mb = &mb[-1]; + const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; + const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; + VP56mv *top_mv; + VP56mv *left_mv = left_mb->bmv; + VP56mv *cur_mv = mb->bmv; + + if (!layout) // layout is inlined, s->mb_layout is not + top_mb = &mb[2]; + else + top_mb = &mb[-s->mb_width - 1]; + mbsplits_top = vp8_mbsplits[top_mb->partitioning]; + top_mv = top_mb->bmv; + + if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { + if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) + part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); + else + part_idx = VP8_SPLITMVMODE_8x8; + } else { + part_idx = VP8_SPLITMVMODE_4x4; + } + + num = vp8_mbsplit_count[part_idx]; + mbsplits_cur = vp8_mbsplits[part_idx], + firstidx = vp8_mbfirstidx[part_idx]; + mb->partitioning = part_idx; + + for (n = 0; n < num; n++) { + int k = firstidx[n]; + uint32_t left, above; + const uint8_t *submv_prob; + + if (!(k & 3)) + left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); + else + left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); + if (k <= 3) + above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); + else + above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); + + submv_prob = get_submv_prob(left, above, is_vp7); + + if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { + if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { + if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { + mb->bmv[n].y = mb->mv.y + + read_mv_component(c, s->prob->mvc[0], is_vp7); + mb->bmv[n].x = mb->mv.x + + read_mv_component(c, s->prob->mvc[1], is_vp7); + } else { + AV_ZERO32(&mb->bmv[n]); + } + } else { + AV_WN32A(&mb->bmv[n], above); + } + } else { + AV_WN32A(&mb->bmv[n], left); + } + } + + return num; +} + +/** + * The vp7 reference decoder uses a padding macroblock column (added to right + * edge of the frame) to guard against illegal macroblock offsets. The + * algorithm has bugs that permit offsets to straddle the padding column. + * This function replicates those bugs. + * + * @param[out] edge_x macroblock x address + * @param[out] edge_y macroblock y address + * + * @return macroblock offset legal (boolean) + */ +static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, + int xoffset, int yoffset, int boundary, + int *edge_x, int *edge_y) +{ + int vwidth = mb_width + 1; + int new = (mb_y + yoffset) * vwidth + mb_x + xoffset; + if (new < boundary || new % vwidth == vwidth - 1) + return 0; + *edge_y = new / vwidth; + *edge_x = new % vwidth; + return 1; +} + +static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock) +{ + return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0]; +} + +static av_always_inline +void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, + int mb_x, int mb_y, int layout) +{ + VP8Macroblock *mb_edge[12]; + enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR }; + enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; + int idx = CNT_ZERO; + VP56mv near_mv[3]; + uint8_t cnt[3] = { 0 }; + VP56RangeCoder *c = &s->c; + int i; + + AV_ZERO32(&near_mv[0]); + AV_ZERO32(&near_mv[1]); + AV_ZERO32(&near_mv[2]); + + for (i = 0; i < VP7_MV_PRED_COUNT; i++) { + const VP7MVPred * pred = &vp7_mv_pred[i]; + int edge_x, edge_y; + + if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, + pred->yoffset, !s->profile, &edge_x, &edge_y)) { + VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1) + ? s->macroblocks_base + 1 + edge_x + + (s->mb_width + 1) * (edge_y + 1) + : s->macroblocks + edge_x + + (s->mb_height - edge_y - 1) * 2; + uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock)); + if (mv) { + if (AV_RN32A(&near_mv[CNT_NEAREST])) { + if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) { + idx = CNT_NEAREST; + } else if (AV_RN32A(&near_mv[CNT_NEAR])) { + if (mv != AV_RN32A(&near_mv[CNT_NEAR])) + continue; + idx = CNT_NEAR; + } else { + AV_WN32A(&near_mv[CNT_NEAR], mv); + idx = CNT_NEAR; + } + } else { + AV_WN32A(&near_mv[CNT_NEAREST], mv); + idx = CNT_NEAREST; + } + } else { + idx = CNT_ZERO; + } + } else { + idx = CNT_ZERO; + } + cnt[idx] += vp7_mv_pred[i].score; + } + + mb->partitioning = VP8_SPLITMVMODE_NONE; + + if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) { + mb->mode = VP8_MVMODE_MV; + + if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) { + + if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) { + + if (cnt[CNT_NEAREST] > cnt[CNT_NEAR]) + AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST])); + else + AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR])); + + if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) { + mb->mode = VP8_MVMODE_SPLIT; + mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1]; + } else { + mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]); + mb->bmv[0] = mb->mv; + } + } else { + mb->mv = near_mv[CNT_NEAR]; + mb->bmv[0] = mb->mv; + } + } else { + mb->mv = near_mv[CNT_NEAREST]; + mb->bmv[0] = mb->mv; + } + } else { + mb->mode = VP8_MVMODE_ZERO; + AV_ZERO32(&mb->mv); + mb->bmv[0] = mb->mv; + } +} + +static av_always_inline +void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb, + int mb_x, int mb_y, int layout) +{ + VP8Macroblock *mb_edge[3] = { 0 /* top */, + mb - 1 /* left */, + 0 /* top-left */ }; + enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; + enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; + int idx = CNT_ZERO; + int cur_sign_bias = s->sign_bias[mb->ref_frame]; + int8_t *sign_bias = s->sign_bias; + VP56mv near_mv[4]; + uint8_t cnt[4] = { 0 }; + VP56RangeCoder *c = &s->c; + + if (!layout) { // layout is inlined (s->mb_layout is not) + mb_edge[0] = mb + 2; + mb_edge[2] = mb + 1; + } else { + mb_edge[0] = mb - s->mb_width - 1; + mb_edge[2] = mb - s->mb_width - 2; + } + + AV_ZERO32(&near_mv[0]); + AV_ZERO32(&near_mv[1]); + AV_ZERO32(&near_mv[2]); + + /* Process MB on top, left and top-left */ +#define MV_EDGE_CHECK(n) \ + { \ + VP8Macroblock *edge = mb_edge[n]; \ + int edge_ref = edge->ref_frame; \ + if (edge_ref != VP56_FRAME_CURRENT) { \ + uint32_t mv = AV_RN32A(&edge->mv); \ + if (mv) { \ + if (cur_sign_bias != sign_bias[edge_ref]) { \ + /* SWAR negate of the values in mv. */ \ + mv = ~mv; \ + mv = ((mv & 0x7fff7fff) + \ + 0x00010001) ^ (mv & 0x80008000); \ + } \ + if (!n || mv != AV_RN32A(&near_mv[idx])) \ + AV_WN32A(&near_mv[++idx], mv); \ + cnt[idx] += 1 + (n != 2); \ + } else \ + cnt[CNT_ZERO] += 1 + (n != 2); \ + } \ + } + + MV_EDGE_CHECK(0) + MV_EDGE_CHECK(1) + MV_EDGE_CHECK(2) + + mb->partitioning = VP8_SPLITMVMODE_NONE; + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { + mb->mode = VP8_MVMODE_MV; + + /* If we have three distinct MVs, merge first and last if they're the same */ + if (cnt[CNT_SPLITMV] && + AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) + cnt[CNT_NEAREST] += 1; + + /* Swap near and nearest if necessary */ + if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { + FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); + FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); + } + + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { + /* Choose the best mv out of 0,0 and the nearest mv */ + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); + cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + + (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + + (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); + + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { + mb->mode = VP8_MVMODE_SPLIT; + mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1]; + } else { + mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]); + mb->bmv[0] = mb->mv; + } + } else { + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]); + mb->bmv[0] = mb->mv; + } + } else { + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]); + mb->bmv[0] = mb->mv; + } + } else { + mb->mode = VP8_MVMODE_ZERO; + AV_ZERO32(&mb->mv); + mb->bmv[0] = mb->mv; + } +} + +static av_always_inline +void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, + int mb_x, int keyframe, int layout) +{ + uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; + + if (layout) { + VP8Macroblock *mb_top = mb - s->mb_width - 1; + memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); + } + if (keyframe) { + int x, y; + uint8_t *top; + uint8_t *const left = s->intra4x4_pred_mode_left; + if (layout) + top = mb->intra4x4_pred_mode_top; + else + top = s->intra4x4_pred_mode_top + 4 * mb_x; + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + const uint8_t *ctx; + ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; + *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); + left[y] = top[x] = *intra4x4; + intra4x4++; + } + } + } else { + int i; + for (i = 0; i < 16; i++) + intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, + vp8_pred4x4_prob_inter); + } +} + +static av_always_inline +void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds, + VP8Macroblock *mb, int mb_x, int mb_y, + uint8_t *segment, uint8_t *ref, int layout, int is_vp7) +{ + VP56RangeCoder *c = &s->c; + static const char * const vp7_feature_name[] = { "q-index", + "lf-delta", + "partial-golden-update", + "blit-pitch" }; + if (is_vp7) { + int i; + *segment = 0; + for (i = 0; i < 4; i++) { + if (s->feature_enabled[i]) { + if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) { + int index = vp8_rac_get_tree(c, vp7_feature_index_tree, + s->feature_index_prob[i]); + av_log(s->avctx, AV_LOG_WARNING, + "Feature %s present in macroblock (value 0x%x)\n", + vp7_feature_name[i], s->feature_value[i][index]); + } + } + } + } else if (s->segmentation.update_map) { + int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]); + *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; + } else if (s->segmentation.enabled) + *segment = ref ? *ref : *segment; + mb->segment = *segment; + + mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; + + if (s->keyframe) { + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, + vp8_pred16x16_prob_intra); + + if (mb->mode == MODE_I4x4) { + decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); + } else { + const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode + : vp8_pred4x4_mode)[mb->mode] * 0x01010101u; + if (s->mb_layout) + AV_WN32A(mb->intra4x4_pred_mode_top, modes); + else + AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); + AV_WN32A(s->intra4x4_pred_mode_left, modes); + } + + mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, + vp8_pred8x8c_prob_intra); + mb->ref_frame = VP56_FRAME_CURRENT; + } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { + // inter MB, 16.2 + if (vp56_rac_get_prob_branchy(c, s->prob->last)) + mb->ref_frame = + (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */ + : VP56_FRAME_GOLDEN; + else + mb->ref_frame = VP56_FRAME_PREVIOUS; + s->ref_count[mb->ref_frame - 1]++; + + // motion vectors, 16.3 + if (is_vp7) + vp7_decode_mvs(s, mb, mb_x, mb_y, layout); + else + vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout); + } else { + // intra MB, 16.1 + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); + + if (mb->mode == MODE_I4x4) + decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); + + mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, + s->prob->pred8x8c); + mb->ref_frame = VP56_FRAME_CURRENT; + mb->partitioning = VP8_SPLITMVMODE_NONE; + AV_ZERO32(&mb->bmv[0]); + } +} + +/** + * @param r arithmetic bitstream reader context + * @param block destination for block coefficients + * @param probs probabilities to use when reading trees from the bitstream + * @param i initial coeff index, 0 unless a separate DC block is coded + * @param qmul array holding the dc/ac dequant factor at position 0/1 + * + * @return 0 if no coeffs were decoded + * otherwise, the index of the last coeff decoded plus one + */ +static av_always_inline +int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], + int i, uint8_t *token_prob, int16_t qmul[2], + const uint8_t scan[16], int vp7) +{ + VP56RangeCoder c = *r; + goto skip_eob; + do { + int coeff; +restart: + if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB + break; + +skip_eob: + if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 + if (++i == 16) + break; // invalid input; blocks should end with EOB + token_prob = probs[i][0]; + if (vp7) + goto restart; + goto skip_eob; + } + + if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 + coeff = 1; + token_prob = probs[i + 1][1]; + } else { + if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 + coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]); + if (coeff) + coeff += vp56_rac_get_prob(&c, token_prob[5]); + coeff += 2; + } else { + // DCT_CAT* + if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) { + if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 + coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); + } else { // DCT_CAT2 + coeff = 7; + coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; + coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]); + } + } else { // DCT_CAT3 and up + int a = vp56_rac_get_prob(&c, token_prob[8]); + int b = vp56_rac_get_prob(&c, token_prob[9 + a]); + int cat = (a << 1) + b; + coeff = 3 + (8 << cat); + coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); + } + } + token_prob = probs[i + 1][2]; + } + block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; + } while (++i < 16); + + *r = c; + return i; +} + +static av_always_inline +int inter_predict_dc(int16_t block[16], int16_t pred[2]) +{ + int16_t dc = block[0]; + int ret = 0; + + if (pred[1] > 3) { + dc += pred[0]; + ret = 1; + } + + if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) { + block[0] = pred[0] = dc; + pred[1] = 0; + } else { + if (pred[0] == dc) + pred[1]++; + block[0] = pred[0] = dc; + } + + return ret; +} + +static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r, + int16_t block[16], + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], + int i, uint8_t *token_prob, + int16_t qmul[2], + const uint8_t scan[16]) +{ + return decode_block_coeffs_internal(r, block, probs, i, + token_prob, qmul, scan, IS_VP7); +} + +#ifndef vp8_decode_block_coeffs_internal +static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r, + int16_t block[16], + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], + int i, uint8_t *token_prob, + int16_t qmul[2]) +{ + return decode_block_coeffs_internal(r, block, probs, i, + token_prob, qmul, ff_zigzag_scan, IS_VP8); +} +#endif + +/** + * @param c arithmetic bitstream reader context + * @param block destination for block coefficients + * @param probs probabilities to use when reading trees from the bitstream + * @param i initial coeff index, 0 unless a separate DC block is coded + * @param zero_nhood the initial prediction context for number of surrounding + * all-zero blocks (only left/top, so 0-2) + * @param qmul array holding the dc/ac dequant factor at position 0/1 + * @param scan scan pattern (VP7 only) + * + * @return 0 if no coeffs were decoded + * otherwise, the index of the last coeff decoded plus one + */ +static av_always_inline +int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], + int i, int zero_nhood, int16_t qmul[2], + const uint8_t scan[16], int vp7) +{ + uint8_t *token_prob = probs[i][zero_nhood]; + if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB + return 0; + return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, + token_prob, qmul, scan) + : vp8_decode_block_coeffs_internal(c, block, probs, i, + token_prob, qmul); +} + +static av_always_inline +void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, + VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], + int is_vp7) +{ + int i, x, y, luma_start = 0, luma_ctx = 3; + int nnz_pred, nnz, nnz_total = 0; + int segment = mb->segment; + int block_dc = 0; + + if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) { + nnz_pred = t_nnz[8] + l_nnz[8]; + + // decode DC values and do hadamard + nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, + nnz_pred, s->qmat[segment].luma_dc_qmul, + ff_zigzag_scan, is_vp7); + l_nnz[8] = t_nnz[8] = !!nnz; + + if (is_vp7 && mb->mode > MODE_I4x4) { + nnz |= inter_predict_dc(td->block_dc, + s->inter_dc_pred[mb->ref_frame - 1]); + } + + if (nnz) { + nnz_total += nnz; + block_dc = 1; + if (nnz == 1) + s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); + else + s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); + } + luma_start = 1; + luma_ctx = 0; + } + + // luma blocks + for (y = 0; y < 4; y++) + for (x = 0; x < 4; x++) { + nnz_pred = l_nnz[y] + t_nnz[x]; + nnz = decode_block_coeffs(c, td->block[y][x], + s->prob->token[luma_ctx], + luma_start, nnz_pred, + s->qmat[segment].luma_qmul, + s->prob[0].scan, is_vp7); + /* nnz+block_dc may be one more than the actual last index, + * but we don't care */ + td->non_zero_count_cache[y][x] = nnz + block_dc; + t_nnz[x] = l_nnz[y] = !!nnz; + nnz_total += nnz; + } + + // chroma blocks + // TODO: what to do about dimensions? 2nd dim for luma is x, + // but for chroma it's (y<<1)|x + for (i = 4; i < 6; i++) + for (y = 0; y < 2; y++) + for (x = 0; x < 2; x++) { + nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; + nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], + s->prob->token[2], 0, nnz_pred, + s->qmat[segment].chroma_qmul, + s->prob[0].scan, is_vp7); + td->non_zero_count_cache[i][(y << 1) + x] = nnz; + t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; + nnz_total += nnz; + } + + // if there were no coded coeffs despite the macroblock not being marked skip, + // we MUST not do the inner loop filter and should not do IDCT + // Since skip isn't used for bitstream prediction, just manually set it. + if (!nnz_total) + mb->skip = 1; +} + +static av_always_inline +void backup_mb_border(uint8_t *top_border, uint8_t *src_y, + uint8_t *src_cb, uint8_t *src_cr, + ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple) +{ + AV_COPY128(top_border, src_y + 15 * linesize); + if (!simple) { + AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); + AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); + } +} + +static av_always_inline +void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, + uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x, + int mb_y, int mb_width, int simple, int xchg) +{ + uint8_t *top_border_m1 = top_border - 32; // for TL prediction + src_y -= linesize; + src_cb -= uvlinesize; + src_cr -= uvlinesize; + +#define XCHG(a, b, xchg) \ + do { \ + if (xchg) \ + AV_SWAP64(b, a); \ + else \ + AV_COPY64(b, a); \ + } while (0) + + XCHG(top_border_m1 + 8, src_y - 8, xchg); + XCHG(top_border, src_y, xchg); + XCHG(top_border + 8, src_y + 8, 1); + if (mb_x < mb_width - 1) + XCHG(top_border + 32, src_y + 16, 1); + + // only copy chroma for normal loop filter + // or to initialize the top row to 127 + if (!simple || !mb_y) { + XCHG(top_border_m1 + 16, src_cb - 8, xchg); + XCHG(top_border_m1 + 24, src_cr - 8, xchg); + XCHG(top_border + 16, src_cb, 1); + XCHG(top_border + 24, src_cr, 1); + } +} + +static av_always_inline +int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) +{ + if (!mb_x) + return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; + else + return mb_y ? mode : LEFT_DC_PRED8x8; +} + +static av_always_inline +int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7) +{ + if (!mb_x) + return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8); + else + return mb_y ? mode : HOR_PRED8x8; +} + +static av_always_inline +int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7) +{ + switch (mode) { + case DC_PRED8x8: + return check_dc_pred8x8_mode(mode, mb_x, mb_y); + case VERT_PRED8x8: + return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode; + case HOR_PRED8x8: + return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode; + case PLANE_PRED8x8: /* TM */ + return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7); + } + return mode; +} + +static av_always_inline +int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7) +{ + if (!mb_x) { + return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED); + } else { + return mb_y ? mode : HOR_VP8_PRED; + } +} + +static av_always_inline +int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, + int *copy_buf, int vp7) +{ + switch (mode) { + case VERT_PRED: + if (!mb_x && mb_y) { + *copy_buf = 1; + return mode; + } + /* fall-through */ + case DIAG_DOWN_LEFT_PRED: + case VERT_LEFT_PRED: + return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode; + case HOR_PRED: + if (!mb_y) { + *copy_buf = 1; + return mode; + } + /* fall-through */ + case HOR_UP_PRED: + return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode; + case TM_VP8_PRED: + return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7); + case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions + * as 16x16/8x8 DC */ + case DIAG_DOWN_RIGHT_PRED: + case VERT_RIGHT_PRED: + case HOR_DOWN_PRED: + if (!mb_y || !mb_x) + *copy_buf = 1; + return mode; + } + return mode; +} + +static av_always_inline +void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], + VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7) +{ + int x, y, mode, nnz; + uint32_t tr; + + /* for the first row, we need to run xchg_mb_border to init the top edge + * to 127 otherwise, skip it if we aren't going to deblock */ + if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) + xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], + s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, + s->filter.simple, 1); + + if (mb->mode < MODE_I4x4) { + mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7); + s->hpc.pred16x16[mode](dst[0], s->linesize); + } else { + uint8_t *ptr = dst[0]; + uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; + const uint8_t lo = is_vp7 ? 128 : 127; + const uint8_t hi = is_vp7 ? 128 : 129; + uint8_t tr_top[4] = { lo, lo, lo, lo }; + + // all blocks on the right edge of the macroblock use bottom edge + // the top macroblock for their topright edge + uint8_t *tr_right = ptr - s->linesize + 16; + + // if we're on the right edge of the frame, said edge is extended + // from the top macroblock + if (mb_y && mb_x == s->mb_width - 1) { + tr = tr_right[-1] * 0x01010101u; + tr_right = (uint8_t *) &tr; + } + + if (mb->skip) + AV_ZERO128(td->non_zero_count_cache); + + for (y = 0; y < 4; y++) { + uint8_t *topright = ptr + 4 - s->linesize; + for (x = 0; x < 4; x++) { + int copy = 0; + ptrdiff_t linesize = s->linesize; + uint8_t *dst = ptr + 4 * x; + LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]); + + if ((y == 0 || x == 3) && mb_y == 0) { + topright = tr_top; + } else if (x == 3) + topright = tr_right; + + mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, + mb_y + y, ©, is_vp7); + if (copy) { + dst = copy_dst + 12; + linesize = 8; + if (!(mb_y + y)) { + copy_dst[3] = lo; + AV_WN32A(copy_dst + 4, lo * 0x01010101U); + } else { + AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); + if (!(mb_x + x)) { + copy_dst[3] = hi; + } else { + copy_dst[3] = ptr[4 * x - s->linesize - 1]; + } + } + if (!(mb_x + x)) { + copy_dst[11] = + copy_dst[19] = + copy_dst[27] = + copy_dst[35] = hi; + } else { + copy_dst[11] = ptr[4 * x - 1]; + copy_dst[19] = ptr[4 * x + s->linesize - 1]; + copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; + copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; + } + } + s->hpc.pred4x4[mode](dst, topright, linesize); + if (copy) { + AV_COPY32(ptr + 4 * x, copy_dst + 12); + AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); + AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); + AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); + } + + nnz = td->non_zero_count_cache[y][x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, + td->block[y][x], s->linesize); + else + s->vp8dsp.vp8_idct_add(ptr + 4 * x, + td->block[y][x], s->linesize); + } + topright += 4; + } + + ptr += 4 * s->linesize; + intra4x4 += 4; + } + } + + mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, + mb_x, mb_y, is_vp7); + s->hpc.pred8x8[mode](dst[1], s->uvlinesize); + s->hpc.pred8x8[mode](dst[2], s->uvlinesize); + + if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) + xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], + s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, + s->filter.simple, 0); +} + +static const uint8_t subpel_idx[3][8] = { + { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, + // also function pointer index + { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required + { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels +}; + +/** + * luma MC function + * + * @param s VP8 decoding context + * @param dst target buffer for block data at block position + * @param ref reference picture buffer at origin (0, 0) + * @param mv motion vector (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block (16, 8 or 4) + * @param block_h height of block (always same as block_w) + * @param width width of src/dst plane data + * @param height height of src/dst plane data + * @param linesize size of a single line of plane data, including padding + * @param mc_func motion compensation function pointers (bilinear or sixtap MC) + */ +static av_always_inline +void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, + ThreadFrame *ref, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, ptrdiff_t linesize, + vp8_mc_func mc_func[3][3]) +{ + uint8_t *src = ref->f->data[0]; + + if (AV_RN32A(mv)) { + ptrdiff_t src_linesize = linesize; + + int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx]; + int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 2; + y_off += mv->y >> 2; + + // edge emulation + ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0); + src += y_off * linesize + x_off; + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + src - my_idx * linesize - mx_idx, + EDGE_EMU_LINESIZE, linesize, + block_w + subpel_idx[1][mx], + block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, + width, height); + src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; + src_linesize = EDGE_EMU_LINESIZE; + } + mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); + } else { + ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); + mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, + linesize, block_h, 0, 0); + } +} + +/** + * chroma MC function + * + * @param s VP8 decoding context + * @param dst1 target buffer for block data at block position (U plane) + * @param dst2 target buffer for block data at block position (V plane) + * @param ref reference picture buffer at origin (0, 0) + * @param mv motion vector (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block (16, 8 or 4) + * @param block_h height of block (always same as block_w) + * @param width width of src/dst plane data + * @param height height of src/dst plane data + * @param linesize size of a single line of plane data, including padding + * @param mc_func motion compensation function pointers (bilinear or sixtap MC) + */ +static av_always_inline +void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, + uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, ptrdiff_t linesize, + vp8_mc_func mc_func[3][3]) +{ + uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; + + if (AV_RN32A(mv)) { + int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; + int my = mv->y & 7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 3; + y_off += mv->y >> 3; + + // edge emulation + src1 += y_off * linesize + x_off; + src2 += y_off * linesize + x_off; + ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + src1 - my_idx * linesize - mx_idx, + EDGE_EMU_LINESIZE, linesize, + block_w + subpel_idx[1][mx], + block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; + mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my); + + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + src2 - my_idx * linesize - mx_idx, + EDGE_EMU_LINESIZE, linesize, + block_w + subpel_idx[1][mx], + block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; + mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); + } else { + mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); + mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); + } + } else { + ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0); + mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); + mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); + } +} + +static av_always_inline +void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], + ThreadFrame *ref_frame, int x_off, int y_off, + int bx_off, int by_off, int block_w, int block_h, + int width, int height, VP56mv *mv) +{ + VP56mv uvmv = *mv; + + /* Y */ + vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, + ref_frame, mv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->linesize, + s->put_pixels_tab[block_w == 8]); + + /* U/V */ + if (s->profile == 3) { + /* this block only applies VP8; it is safe to check + * only the profile, as VP7 profile <= 1 */ + uvmv.x &= ~7; + uvmv.y &= ~7; + } + x_off >>= 1; + y_off >>= 1; + bx_off >>= 1; + by_off >>= 1; + width >>= 1; + height >>= 1; + block_w >>= 1; + block_h >>= 1; + vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, + dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, + &uvmv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->uvlinesize, + s->put_pixels_tab[1 + (block_w == 4)]); +} + +/* Fetch pixels for estimated mv 4 macroblocks ahead. + * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ +static av_always_inline +void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + int mb_xy, int ref) +{ + /* Don't prefetch refs that haven't been used very often this frame. */ + if (s->ref_count[ref - 1] > (mb_xy >> 5)) { + int x_off = mb_x << 4, y_off = mb_y << 4; + int mx = (mb->mv.x >> 2) + x_off + 8; + int my = (mb->mv.y >> 2) + y_off; + uint8_t **src = s->framep[ref]->tf.f->data; + int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; + /* For threading, a ff_thread_await_progress here might be useful, but + * it actually slows down the decoder. Since a bad prefetch doesn't + * generate bad decoder output, we don't run it here. */ + s->vdsp.prefetch(src[0] + off, s->linesize, 4); + off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; + s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); + } +} + +/** + * Apply motion vectors to prediction buffer, chapter 18. + */ +static av_always_inline +void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], + VP8Macroblock *mb, int mb_x, int mb_y) +{ + int x_off = mb_x << 4, y_off = mb_y << 4; + int width = 16 * s->mb_width, height = 16 * s->mb_height; + ThreadFrame *ref = &s->framep[mb->ref_frame]->tf; + VP56mv *bmv = mb->bmv; + + switch (mb->partitioning) { + case VP8_SPLITMVMODE_NONE: + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 0, 16, 16, width, height, &mb->mv); + break; + case VP8_SPLITMVMODE_4x4: { + int x, y; + VP56mv uvmv; + + /* Y */ + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, + ref, &bmv[4 * y + x], + 4 * x + x_off, 4 * y + y_off, 4, 4, + width, height, s->linesize, + s->put_pixels_tab[2]); + } + } + + /* U/V */ + x_off >>= 1; + y_off >>= 1; + width >>= 1; + height >>= 1; + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + + mb->bmv[2 * y * 4 + 2 * x + 1].x + + mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + + mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; + uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + + mb->bmv[2 * y * 4 + 2 * x + 1].y + + mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + + mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; + uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2; + uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2; + if (s->profile == 3) { + uvmv.x &= ~7; + uvmv.y &= ~7; + } + vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, + dst[2] + 4 * y * s->uvlinesize + x * 4, ref, + &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, + width, height, s->uvlinesize, + s->put_pixels_tab[2]); + } + } + break; + } + case VP8_SPLITMVMODE_16x8: + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 0, 16, 8, width, height, &bmv[0]); + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 8, 16, 8, width, height, &bmv[1]); + break; + case VP8_SPLITMVMODE_8x16: + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 0, 8, 16, width, height, &bmv[0]); + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 8, 0, 8, 16, width, height, &bmv[1]); + break; + case VP8_SPLITMVMODE_8x8: + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 0, 8, 8, width, height, &bmv[0]); + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 8, 0, 8, 8, width, height, &bmv[1]); + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 0, 8, 8, 8, width, height, &bmv[2]); + vp8_mc_part(s, td, dst, ref, x_off, y_off, + 8, 8, 8, 8, width, height, &bmv[3]); + break; + } +} + +static av_always_inline +void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb) +{ + int x, y, ch; + + if (mb->mode != MODE_I4x4) { + uint8_t *y_dst = dst[0]; + for (y = 0; y < 4; y++) { + uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); + if (nnz4) { + if (nnz4 & ~0x01010101) { + for (x = 0; x < 4; x++) { + if ((uint8_t) nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, + td->block[y][x], + s->linesize); + else if ((uint8_t) nnz4 > 1) + s->vp8dsp.vp8_idct_add(y_dst + 4 * x, + td->block[y][x], + s->linesize); + nnz4 >>= 8; + if (!nnz4) + break; + } + } else { + s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); + } + } + y_dst += 4 * s->linesize; + } + } + + for (ch = 0; ch < 2; ch++) { + uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); + if (nnz4) { + uint8_t *ch_dst = dst[1 + ch]; + if (nnz4 & ~0x01010101) { + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + if ((uint8_t) nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, + td->block[4 + ch][(y << 1) + x], + s->uvlinesize); + else if ((uint8_t) nnz4 > 1) + s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, + td->block[4 + ch][(y << 1) + x], + s->uvlinesize); + nnz4 >>= 8; + if (!nnz4) + goto chroma_idct_end; + } + ch_dst += 4 * s->uvlinesize; + } + } else { + s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); + } + } +chroma_idct_end: + ; + } +} + +static av_always_inline +void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, + VP8FilterStrength *f, int is_vp7) +{ + int interior_limit, filter_level; + + if (s->segmentation.enabled) { + filter_level = s->segmentation.filter_level[mb->segment]; + if (!s->segmentation.absolute_vals) + filter_level += s->filter.level; + } else + filter_level = s->filter.level; + + if (s->lf_delta.enabled) { + filter_level += s->lf_delta.ref[mb->ref_frame]; + filter_level += s->lf_delta.mode[mb->mode]; + } + + filter_level = av_clip_uintp2(filter_level, 6); + + interior_limit = filter_level; + if (s->filter.sharpness) { + interior_limit >>= (s->filter.sharpness + 3) >> 2; + interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); + } + interior_limit = FFMAX(interior_limit, 1); + + f->filter_level = filter_level; + f->inner_limit = interior_limit; + f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 || + mb->mode == VP8_MVMODE_SPLIT; +} + +static av_always_inline +void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, + int mb_x, int mb_y, int is_vp7) +{ + int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh; + int filter_level = f->filter_level; + int inner_limit = f->inner_limit; + int inner_filter = f->inner_filter; + ptrdiff_t linesize = s->linesize; + ptrdiff_t uvlinesize = s->uvlinesize; + static const uint8_t hev_thresh_lut[2][64] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2 } + }; + + if (!filter_level) + return; + + if (is_vp7) { + bedge_lim_y = filter_level; + bedge_lim_uv = filter_level * 2; + mbedge_lim = filter_level + 2; + } else { + bedge_lim_y = + bedge_lim_uv = filter_level * 2 + inner_limit; + mbedge_lim = bedge_lim_y + 4; + } + + hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; + + if (mb_x) { + s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, + mbedge_lim, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, + mbedge_lim, inner_limit, hev_thresh); + } + +#define H_LOOP_FILTER_16Y_INNER(cond) \ + if (cond && inner_filter) { \ + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \ + bedge_lim_y, inner_limit, \ + hev_thresh); \ + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \ + bedge_lim_y, inner_limit, \ + hev_thresh); \ + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \ + bedge_lim_y, inner_limit, \ + hev_thresh); \ + s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \ + uvlinesize, bedge_lim_uv, \ + inner_limit, hev_thresh); \ + } + + H_LOOP_FILTER_16Y_INNER(!is_vp7) + + if (mb_y) { + s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, + mbedge_lim, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, + mbedge_lim, inner_limit, hev_thresh); + } + + if (inner_filter) { + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, + linesize, bedge_lim_y, + inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, + linesize, bedge_lim_y, + inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, + linesize, bedge_lim_y, + inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, + dst[2] + 4 * uvlinesize, + uvlinesize, bedge_lim_uv, + inner_limit, hev_thresh); + } + + H_LOOP_FILTER_16Y_INNER(is_vp7) +} + +static av_always_inline +void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, + int mb_x, int mb_y) +{ + int mbedge_lim, bedge_lim; + int filter_level = f->filter_level; + int inner_limit = f->inner_limit; + int inner_filter = f->inner_filter; + ptrdiff_t linesize = s->linesize; + + if (!filter_level) + return; + + bedge_lim = 2 * filter_level + inner_limit; + mbedge_lim = bedge_lim + 4; + + if (mb_x) + s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); + if (inner_filter) { + s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); + } + + if (mb_y) + s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); + if (inner_filter) { + s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); + } +} + +#define MARGIN (16 << 2) +static av_always_inline +void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, + VP8Frame *prev_frame, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + int mb_x, mb_y; + + s->mv_bounds.mv_min.y = -MARGIN; + s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + VP8Macroblock *mb = s->macroblocks_base + + ((s->mb_width + 1) * (mb_y + 1) + 1); + int mb_xy = mb_y * s->mb_width; + + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); + + s->mv_bounds.mv_min.x = -MARGIN; + s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { + if (mb_y == 0) + AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, + DC_PRED * 0x01010101); + decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, + prev_frame && prev_frame->seg_map ? + prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7); + s->mv_bounds.mv_min.x -= 64; + s->mv_bounds.mv_max.x -= 64; + } + s->mv_bounds.mv_min.y -= 64; + s->mv_bounds.mv_max.y -= 64; + } +} + +static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, + VP8Frame *prev_frame) +{ + vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7); +} + +static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, + VP8Frame *prev_frame) +{ + vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8); +} + +#if HAVE_THREADS +#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ + do { \ + int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ + if (atomic_load(&otd->thread_mb_pos) < tmp) { \ + pthread_mutex_lock(&otd->lock); \ + atomic_store(&td->wait_mb_pos, tmp); \ + do { \ + if (atomic_load(&otd->thread_mb_pos) >= tmp) \ + break; \ + pthread_cond_wait(&otd->cond, &otd->lock); \ + } while (1); \ + atomic_store(&td->wait_mb_pos, INT_MAX); \ + pthread_mutex_unlock(&otd->lock); \ + } \ + } while (0) + +#define update_pos(td, mb_y, mb_x) \ + do { \ + int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ + int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ + (num_jobs > 1); \ + int is_null = !next_td || !prev_td; \ + int pos_check = (is_null) ? 1 : \ + (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \ + (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \ + atomic_store(&td->thread_mb_pos, pos); \ + if (sliced_threading && pos_check) { \ + pthread_mutex_lock(&td->lock); \ + pthread_cond_broadcast(&td->cond); \ + pthread_mutex_unlock(&td->lock); \ + } \ + } while (0) +#else +#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0) +#define update_pos(td, mb_y, mb_x) while(0) +#endif + +static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; + int mb_y = atomic_load(&td->thread_mb_pos) >> 16; + int mb_x, mb_xy = mb_y * s->mb_width; + int num_jobs = s->num_jobs; + VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame; + VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; + VP8Macroblock *mb; + uint8_t *dst[3] = { + curframe->tf.f->data[0] + 16 * mb_y * s->linesize, + curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, + curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize + }; + + if (c->end <= c->buffer && c->bits >= 0) + return AVERROR_INVALIDDATA; + + if (mb_y == 0) + prev_td = td; + else + prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; + if (mb_y == s->mb_height - 1) + next_td = td; + else + next_td = &s->thread_data[(jobnr + 1) % num_jobs]; + if (s->mb_layout == 1) + mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); + else { + // Make sure the previous frame has read its segmentation map, + // if we re-use the same map. + if (prev_frame && s->segmentation.enabled && + !s->segmentation.update_map) + ff_thread_await_progress(&prev_frame->tf, mb_y, 0); + mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; + memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); + } + + if (!is_vp7 || mb_y == 0) + memset(td->left_nnz, 0, sizeof(td->left_nnz)); + + td->mv_bounds.mv_min.x = -MARGIN; + td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { + if (c->end <= c->buffer && c->bits >= 0) + return AVERROR_INVALIDDATA; + // Wait for previous thread to read mb_x+2, and reach mb_y-1. + if (prev_td != td) { + if (threadnr != 0) { + check_thread_pos(td, prev_td, + mb_x + (is_vp7 ? 2 : 1), + mb_y - (is_vp7 ? 2 : 1)); + } else { + check_thread_pos(td, prev_td, + mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3, + mb_y - (is_vp7 ? 2 : 1)); + } + } + + s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, + s->linesize, 4); + s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, + dst[2] - dst[1], 2); + + if (!s->mb_layout) + decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, + prev_frame && prev_frame->seg_map ? + prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7); + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); + + if (!mb->skip) + decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7); + + if (mb->mode <= MODE_I4x4) + intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7); + else + inter_predict(s, td, dst, mb, mb_x, mb_y); + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); + + if (!mb->skip) { + idct_mb(s, td, dst, mb); + } else { + AV_ZERO64(td->left_nnz); + AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned + + /* Reset DC block predictors if they would exist + * if the mb had coefficients */ + if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { + td->left_nnz[8] = 0; + s->top_nnz[mb_x][8] = 0; + } + } + + if (s->deblock_filter) + filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7); + + if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { + if (s->filter.simple) + backup_mb_border(s->top_border[mb_x + 1], dst[0], + NULL, NULL, s->linesize, 0, 1); + else + backup_mb_border(s->top_border[mb_x + 1], dst[0], + dst[1], dst[2], s->linesize, s->uvlinesize, 0); + } + + prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); + + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; + td->mv_bounds.mv_min.x -= 64; + td->mv_bounds.mv_max.x -= 64; + + if (mb_x == s->mb_width + 1) { + update_pos(td, mb_y, s->mb_width + 3); + } else { + update_pos(td, mb_y, mb_x); + } + } + return 0; +} + +static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); +} + +static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); +} + +static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + VP8ThreadData *td = &s->thread_data[threadnr]; + int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs; + AVFrame *curframe = s->curframe->tf.f; + VP8Macroblock *mb; + VP8ThreadData *prev_td, *next_td; + uint8_t *dst[3] = { + curframe->data[0] + 16 * mb_y * s->linesize, + curframe->data[1] + 8 * mb_y * s->uvlinesize, + curframe->data[2] + 8 * mb_y * s->uvlinesize + }; + + if (s->mb_layout == 1) + mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); + else + mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; + + if (mb_y == 0) + prev_td = td; + else + prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; + if (mb_y == s->mb_height - 1) + next_td = td; + else + next_td = &s->thread_data[(jobnr + 1) % num_jobs]; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { + VP8FilterStrength *f = &td->filter_strength[mb_x]; + if (prev_td != td) + check_thread_pos(td, prev_td, + (mb_x + 1) + (s->mb_width + 3), mb_y - 1); + if (next_td != td) + if (next_td != &s->thread_data[0]) + check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); + + if (num_jobs == 1) { + if (s->filter.simple) + backup_mb_border(s->top_border[mb_x + 1], dst[0], + NULL, NULL, s->linesize, 0, 1); + else + backup_mb_border(s->top_border[mb_x + 1], dst[0], + dst[1], dst[2], s->linesize, s->uvlinesize, 0); + } + + if (s->filter.simple) + filter_mb_simple(s, dst[0], f, mb_x, mb_y); + else + filter_mb(s, dst, f, mb_x, mb_y, is_vp7); + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; + + update_pos(td, mb_y, (s->mb_width + 3) + mb_x); + } +} + +static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + filter_mb_row(avctx, tdata, jobnr, threadnr, 1); +} + +static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + filter_mb_row(avctx, tdata, jobnr, threadnr, 0); +} + +static av_always_inline +int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, + int threadnr, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + VP8ThreadData *td = &s->thread_data[jobnr]; + VP8ThreadData *next_td = NULL, *prev_td = NULL; + VP8Frame *curframe = s->curframe; + int mb_y, num_jobs = s->num_jobs; + int ret; + + td->thread_nr = threadnr; + td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; + td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; + for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { + atomic_store(&td->thread_mb_pos, mb_y << 16); + ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); + if (ret < 0) { + update_pos(td, s->mb_height, INT_MAX & 0xFFFF); + return ret; + } + if (s->deblock_filter) + s->filter_mb_row(avctx, tdata, jobnr, threadnr); + update_pos(td, mb_y, INT_MAX & 0xFFFF); + + td->mv_bounds.mv_min.y -= 64 * num_jobs; + td->mv_bounds.mv_max.y -= 64 * num_jobs; + + if (avctx->active_thread_type == FF_THREAD_FRAME) + ff_thread_report_progress(&curframe->tf, mb_y, 0); + } + + return 0; +} + +static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7); +} + +static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, + int jobnr, int threadnr) +{ + return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); +} + +static av_always_inline +int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, + AVPacket *avpkt, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + int ret, i, referenced, num_jobs; + enum AVDiscard skip_thresh; + VP8Frame *av_uninit(curframe), *prev_frame; + + if (is_vp7) + ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size); + else + ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size); + + if (ret < 0) + goto err; + + if (s->actually_webp) { + // avctx->pix_fmt already set in caller. + } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { + s->pix_fmt = get_pixel_format(s); + if (s->pix_fmt < 0) { + ret = AVERROR(EINVAL); + goto err; + } + avctx->pix_fmt = s->pix_fmt; + } + + prev_frame = s->framep[VP56_FRAME_CURRENT]; + + referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT || + s->update_altref == VP56_FRAME_CURRENT; + + skip_thresh = !referenced ? AVDISCARD_NONREF + : !s->keyframe ? AVDISCARD_NONKEY + : AVDISCARD_ALL; + + if (avctx->skip_frame >= skip_thresh) { + s->invisible = 1; + memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); + goto skip_decode; + } + s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; + + // release no longer referenced frames + for (i = 0; i < 5; i++) + if (s->frames[i].tf.f->buf[0] && + &s->frames[i] != prev_frame && + &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) + vp8_release_frame(s, &s->frames[i]); + + curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s); + + if (!s->colorspace) + avctx->colorspace = AVCOL_SPC_BT470BG; + if (s->fullrange) + avctx->color_range = AVCOL_RANGE_JPEG; + else + avctx->color_range = AVCOL_RANGE_MPEG; + + /* Given that arithmetic probabilities are updated every frame, it's quite + * likely that the values we have on a random interframe are complete + * junk if we didn't start decode on a keyframe. So just don't display + * anything rather than junk. */ + if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || + !s->framep[VP56_FRAME_GOLDEN] || + !s->framep[VP56_FRAME_GOLDEN2])) { + av_log(avctx, AV_LOG_WARNING, + "Discarding interframe without a prior keyframe!\n"); + ret = AVERROR_INVALIDDATA; + goto err; + } + + curframe->tf.f->key_frame = s->keyframe; + curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I + : AV_PICTURE_TYPE_P; + if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0) + goto err; + + // check if golden and altref are swapped + if (s->update_altref != VP56_FRAME_NONE) + s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; + else + s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; + + if (s->update_golden != VP56_FRAME_NONE) + s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; + else + s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; + + if (s->update_last) + s->next_framep[VP56_FRAME_PREVIOUS] = curframe; + else + s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; + + s->next_framep[VP56_FRAME_CURRENT] = curframe; + + ff_thread_finish_setup(avctx); + + if (avctx->hwaccel) { + ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size); + if (ret < 0) + goto err; + + ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); + if (ret < 0) + goto err; + + ret = avctx->hwaccel->end_frame(avctx); + if (ret < 0) + goto err; + + } else { + s->linesize = curframe->tf.f->linesize[0]; + s->uvlinesize = curframe->tf.f->linesize[1]; + + memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); + /* Zero macroblock structures for top/top-left prediction + * from outside the frame. */ + if (!s->mb_layout) + memset(s->macroblocks + s->mb_height * 2 - 1, 0, + (s->mb_width + 1) * sizeof(*s->macroblocks)); + if (!s->mb_layout && s->keyframe) + memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); + + memset(s->ref_count, 0, sizeof(s->ref_count)); + + if (s->mb_layout == 1) { + // Make sure the previous frame has read its segmentation map, + // if we re-use the same map. + if (prev_frame && s->segmentation.enabled && + !s->segmentation.update_map) + ff_thread_await_progress(&prev_frame->tf, 1, 0); + if (is_vp7) + vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); + else + vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); + } + + if (avctx->active_thread_type == FF_THREAD_FRAME) + num_jobs = 1; + else + num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); + s->num_jobs = num_jobs; + s->curframe = curframe; + s->prev_frame = prev_frame; + s->mv_bounds.mv_min.y = -MARGIN; + s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; + for (i = 0; i < MAX_THREADS; i++) { + VP8ThreadData *td = &s->thread_data[i]; + atomic_init(&td->thread_mb_pos, 0); + atomic_init(&td->wait_mb_pos, INT_MAX); + } + if (is_vp7) + avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, + num_jobs); + else + avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, + num_jobs); + } + + ff_thread_report_progress(&curframe->tf, INT_MAX, 0); + memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); + +skip_decode: + // if future frames don't use the updated probabilities, + // reset them to the values we saved + if (!s->update_probabilities) + s->prob[0] = s->prob[1]; + + if (!s->invisible) { + if ((ret = av_frame_ref(data, curframe->tf.f)) < 0) + return ret; + *got_frame = 1; + } + + return avpkt->size; +err: + memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); + return ret; +} + +int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, + AVPacket *avpkt) +{ + return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8); +} + +#if CONFIG_VP7_DECODER +static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, + AVPacket *avpkt) +{ + return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7); +} +#endif /* CONFIG_VP7_DECODER */ + +av_cold int ff_vp8_decode_free(AVCodecContext *avctx) +{ + VP8Context *s = avctx->priv_data; + int i; + + if (!s) + return 0; + + vp8_decode_flush_impl(avctx, 1); + for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) + av_frame_free(&s->frames[i].tf.f); + + return 0; +} + +static av_cold int vp8_init_frames(VP8Context *s) +{ + int i; + for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) { + s->frames[i].tf.f = av_frame_alloc(); + if (!s->frames[i].tf.f) + return AVERROR(ENOMEM); + } + return 0; +} + +static av_always_inline +int vp78_decode_init(AVCodecContext *avctx, int is_vp7) +{ + VP8Context *s = avctx->priv_data; + int ret; + + s->avctx = avctx; + s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7; + s->pix_fmt = AV_PIX_FMT_NONE; + avctx->pix_fmt = AV_PIX_FMT_YUV420P; + avctx->internal->allocate_progress = 1; + + ff_videodsp_init(&s->vdsp, 8); + + ff_vp78dsp_init(&s->vp8dsp); + if (CONFIG_VP7_DECODER && is_vp7) { + ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1); + ff_vp7dsp_init(&s->vp8dsp); + s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter; + s->filter_mb_row = vp7_filter_mb_row; + } else if (CONFIG_VP8_DECODER && !is_vp7) { + ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); + ff_vp8dsp_init(&s->vp8dsp); + s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter; + s->filter_mb_row = vp8_filter_mb_row; + } + + /* does not change for VP8 */ + memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); + + if ((ret = vp8_init_frames(s)) < 0) { + ff_vp8_decode_free(avctx); + return ret; + } + + return 0; +} + +#if CONFIG_VP7_DECODER +static int vp7_decode_init(AVCodecContext *avctx) +{ + return vp78_decode_init(avctx, IS_VP7); +} +#endif /* CONFIG_VP7_DECODER */ + +av_cold int ff_vp8_decode_init(AVCodecContext *avctx) +{ + return vp78_decode_init(avctx, IS_VP8); +} + +#if CONFIG_VP8_DECODER +#if HAVE_THREADS +static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx) +{ + VP8Context *s = avctx->priv_data; + int ret; + + s->avctx = avctx; + + if ((ret = vp8_init_frames(s)) < 0) { + ff_vp8_decode_free(avctx); + return ret; + } + + return 0; +} + +#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL) + +static int vp8_decode_update_thread_context(AVCodecContext *dst, + const AVCodecContext *src) +{ + VP8Context *s = dst->priv_data, *s_src = src->priv_data; + int i; + + if (s->macroblocks_base && + (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { + free_buffers(s); + s->mb_width = s_src->mb_width; + s->mb_height = s_src->mb_height; + } + + s->pix_fmt = s_src->pix_fmt; + s->prob[0] = s_src->prob[!s_src->update_probabilities]; + s->segmentation = s_src->segmentation; + s->lf_delta = s_src->lf_delta; + memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); + + for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) { + if (s_src->frames[i].tf.f->buf[0]) { + int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]); + if (ret < 0) + return ret; + } + } + + s->framep[0] = REBASE(s_src->next_framep[0]); + s->framep[1] = REBASE(s_src->next_framep[1]); + s->framep[2] = REBASE(s_src->next_framep[2]); + s->framep[3] = REBASE(s_src->next_framep[3]); + + return 0; +} +#endif /* HAVE_THREADS */ +#endif /* CONFIG_VP8_DECODER */ + +#if CONFIG_VP7_DECODER +AVCodec ff_vp7_decoder = { + .name = "vp7", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_VP7, + .priv_data_size = sizeof(VP8Context), + .init = vp7_decode_init, + .close = ff_vp8_decode_free, + .decode = vp7_decode_frame, + .capabilities = AV_CODEC_CAP_DR1, + .flush = vp8_decode_flush, +}; +#endif /* CONFIG_VP7_DECODER */ + +#if CONFIG_VP8_DECODER +AVCodec ff_vp8_decoder = { + .name = "vp8", + .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_VP8, + .priv_data_size = sizeof(VP8Context), + .init = ff_vp8_decode_init, + .close = ff_vp8_decode_free, + .decode = ff_vp8_decode_frame, + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | + AV_CODEC_CAP_SLICE_THREADS, + .flush = vp8_decode_flush, + .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy), + .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context), + .hw_configs = (const AVCodecHWConfigInternal*[]) { +#if CONFIG_VP8_VAAPI_HWACCEL + HWACCEL_VAAPI(vp8), +#endif +#if CONFIG_VP8_NVDEC_HWACCEL + HWACCEL_NVDEC(vp8), +#endif + NULL + }, +}; +#endif /* CONFIG_VP7_DECODER */ diff --git a/libs/ffvpx/libavcodec/vp8.h b/libs/ffvpx/libavcodec/vp8.h new file mode 100644 index 000000000..70d21e3c6 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8.h @@ -0,0 +1,351 @@ +/* + * VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * Copyright (C) 2010 Fiona Glaser + * Copyright (C) 2012 Daniel Kang + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP8_H +#define AVCODEC_VP8_H + +#include <stdatomic.h> + +#include "libavutil/buffer.h" +#include "libavutil/thread.h" + +#include "h264pred.h" +#include "thread.h" +#include "vp56.h" +#include "vp8dsp.h" + +#define VP8_MAX_QUANT 127 + +enum dct_token { + DCT_0, + DCT_1, + DCT_2, + DCT_3, + DCT_4, + DCT_CAT1, + DCT_CAT2, + DCT_CAT3, + DCT_CAT4, + DCT_CAT5, + DCT_CAT6, + DCT_EOB, + + NUM_DCT_TOKENS +}; + +// used to signal 4x4 intra pred in luma MBs +#define MODE_I4x4 4 + +enum inter_mvmode { + VP8_MVMODE_ZERO = MODE_I4x4 + 1, + VP8_MVMODE_MV, + VP8_MVMODE_SPLIT +}; + +enum inter_splitmvmode { + VP8_SPLITMVMODE_16x8 = 0, ///< 2 16x8 blocks (vertical) + VP8_SPLITMVMODE_8x16, ///< 2 8x16 blocks (horizontal) + VP8_SPLITMVMODE_8x8, ///< 2x2 blocks of 8x8px each + VP8_SPLITMVMODE_4x4, ///< 4x4 blocks of 4x4px each + VP8_SPLITMVMODE_NONE, ///< (only used in prediction) no split MVs +}; + +typedef struct VP8FilterStrength { + uint8_t filter_level; + uint8_t inner_limit; + uint8_t inner_filter; +} VP8FilterStrength; + +typedef struct VP8Macroblock { + uint8_t skip; + // TODO: make it possible to check for at least (i4x4 or split_mv) + // in one op. are others needed? + uint8_t mode; + uint8_t ref_frame; + uint8_t partitioning; + uint8_t chroma_pred_mode; + uint8_t segment; + uint8_t intra4x4_pred_mode_mb[16]; + DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4]; + VP56mv mv; + VP56mv bmv[16]; +} VP8Macroblock; + +typedef struct VP8intmv { + int x; + int y; +} VP8intmv; + +typedef struct VP8mvbounds { + VP8intmv mv_min; + VP8intmv mv_max; +} VP8mvbounds; + +typedef struct VP8ThreadData { + DECLARE_ALIGNED(16, int16_t, block)[6][4][16]; + DECLARE_ALIGNED(16, int16_t, block_dc)[16]; + /** + * This is the index plus one of the last non-zero coeff + * for each of the blocks in the current macroblock. + * So, 0 -> no coeffs + * 1 -> dc-only (special transform) + * 2+-> full transform + */ + DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; + /** + * For coeff decode, we need to know whether the above block had non-zero + * coefficients. This means for each macroblock, we need data for 4 luma + * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 + * per macroblock. We keep the last row in top_nnz. + */ + DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; + int thread_nr; +#if HAVE_THREADS + pthread_mutex_t lock; + pthread_cond_t cond; +#endif + atomic_int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF) + atomic_int wait_mb_pos; // What the current thread is waiting on. + +#define EDGE_EMU_LINESIZE 32 + DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE]; + VP8FilterStrength *filter_strength; + VP8mvbounds mv_bounds; +} VP8ThreadData; + +typedef struct VP8Frame { + ThreadFrame tf; + AVBufferRef *seg_map; + + AVBufferRef *hwaccel_priv_buf; + void *hwaccel_picture_private; +} VP8Frame; + +#define MAX_THREADS 8 +typedef struct VP8Context { + VP8ThreadData *thread_data; + AVCodecContext *avctx; + enum AVPixelFormat pix_fmt; + int actually_webp; + + VP8Frame *framep[4]; + VP8Frame *next_framep[4]; + VP8Frame *curframe; + VP8Frame *prev_frame; + + uint16_t mb_width; /* number of horizontal MB */ + uint16_t mb_height; /* number of vertical MB */ + ptrdiff_t linesize; + ptrdiff_t uvlinesize; + + uint8_t keyframe; + uint8_t deblock_filter; + uint8_t mbskip_enabled; + uint8_t profile; + VP8mvbounds mv_bounds; + + int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type + int ref_count[3]; + + /** + * Base parameters for segmentation, i.e. per-macroblock parameters. + * These must be kept unchanged even if segmentation is not used for + * a frame, since the values persist between interframes. + */ + struct { + uint8_t enabled; + uint8_t absolute_vals; + uint8_t update_map; + uint8_t update_feature_data; + int8_t base_quant[4]; + int8_t filter_level[4]; ///< base loop filter level + } segmentation; + + struct { + uint8_t simple; + uint8_t level; + uint8_t sharpness; + } filter; + + VP8Macroblock *macroblocks; + + uint8_t *intra4x4_pred_mode_top; + uint8_t intra4x4_pred_mode_left[4]; + + /** + * Macroblocks can have one of 4 different quants in a frame when + * segmentation is enabled. + * If segmentation is disabled, only the first segment's values are used. + */ + struct { + // [0] - DC qmul [1] - AC qmul + int16_t luma_qmul[2]; + int16_t luma_dc_qmul[2]; ///< luma dc-only block quant + int16_t chroma_qmul[2]; + } qmat[4]; + + // Raw quantisation values, which may be needed by hwaccel decode. + struct { + int yac_qi; + int ydc_delta; + int y2dc_delta; + int y2ac_delta; + int uvdc_delta; + int uvac_delta; + } quant; + + struct { + uint8_t enabled; ///< whether each mb can have a different strength based on mode/ref + uint8_t update; + + /** + * filter strength adjustment for the following macroblock modes: + * [0-3] - i16x16 (always zero) + * [4] - i4x4 + * [5] - zero mv + * [6] - inter modes except for zero or split mv + * [7] - split mv + * i16x16 modes never have any adjustment + */ + int8_t mode[VP8_MVMODE_SPLIT + 1]; + + /** + * filter strength adjustment for macroblocks that reference: + * [0] - intra / VP56_FRAME_CURRENT + * [1] - VP56_FRAME_PREVIOUS + * [2] - VP56_FRAME_GOLDEN + * [3] - altref / VP56_FRAME_GOLDEN2 + */ + int8_t ref[4]; + } lf_delta; + + uint8_t (*top_border)[16 + 8 + 8]; + uint8_t (*top_nnz)[9]; + + VP56RangeCoder c; ///< header context, includes mb modes and motion vectors + + /* This contains the entropy coder state at the end of the header + * block, in the form specified by the standard. For use by + * hwaccels, so that a hardware decoder has the information to + * start decoding at the macroblock layer. + */ + struct { + const uint8_t *input; + uint32_t range; + uint32_t value; + int bit_count; + } coder_state_at_header_end; + + int header_partition_size; + + /** + * These are all of the updatable probabilities for binary decisions. + * They are only implicitly reset on keyframes, making it quite likely + * for an interframe to desync if a prior frame's header was corrupt + * or missing outright! + */ + struct { + uint8_t segmentid[3]; + uint8_t mbskip; + uint8_t intra; + uint8_t last; + uint8_t golden; + uint8_t pred16x16[4]; + uint8_t pred8x8c[3]; + uint8_t token[4][16][3][NUM_DCT_TOKENS - 1]; + uint8_t mvc[2][19]; + uint8_t scan[16]; + } prob[2]; + + VP8Macroblock *macroblocks_base; + int invisible; + int update_last; ///< update VP56_FRAME_PREVIOUS with the current one + int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so + int update_altref; + + /** + * If this flag is not set, all the probability updates + * are discarded after this frame is decoded. + */ + int update_probabilities; + + /** + * All coefficients are contained in separate arith coding contexts. + * There can be 1, 2, 4, or 8 of these after the header context. + */ + int num_coeff_partitions; + VP56RangeCoder coeff_partition[8]; + int coeff_partition_size[8]; + VideoDSPContext vdsp; + VP8DSPContext vp8dsp; + H264PredContext hpc; + vp8_mc_func put_pixels_tab[3][3][3]; + VP8Frame frames[5]; + + uint8_t colorspace; ///< 0 is the only value allowed (meaning bt601) + uint8_t fullrange; ///< whether we can skip clamping in dsp functions + + int num_jobs; + /** + * This describes the macroblock memory layout. + * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread). + * 1 -> Macroblocks for entire frame allocated (sliced thread). + */ + int mb_layout; + + int (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr); + void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr); + + int vp7; + + /** + * Fade bit present in bitstream (VP7) + */ + int fade_present; + + /** + * Interframe DC prediction (VP7) + * [0] VP56_FRAME_PREVIOUS + * [1] VP56_FRAME_GOLDEN + */ + uint16_t inter_dc_pred[2][2]; + + /** + * Macroblock features (VP7) + */ + uint8_t feature_enabled[4]; + uint8_t feature_present_prob[4]; + uint8_t feature_index_prob[4][3]; + uint8_t feature_value[4][4]; +} VP8Context; + +int ff_vp8_decode_init(AVCodecContext *avctx); + +int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, + AVPacket *avpkt); + +int ff_vp8_decode_free(AVCodecContext *avctx); + +#endif /* AVCODEC_VP8_H */ diff --git a/libs/ffvpx/libavcodec/vp8_parser.c b/libs/ffvpx/libavcodec/vp8_parser.c new file mode 100644 index 000000000..e2d91b271 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8_parser.c @@ -0,0 +1,81 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" + +#include "avcodec.h" + +static int parse(AVCodecParserContext *s, + AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + unsigned int frame_type; + unsigned int profile; + + *poutbuf = buf; + *poutbuf_size = buf_size; + + if (buf_size < 3) + return buf_size; + + frame_type = buf[0] & 1; + profile = (buf[0] >> 1) & 7; + if (profile > 3) { + av_log(avctx, AV_LOG_ERROR, "Invalid profile %u.\n", profile); + return buf_size; + } + + avctx->profile = profile; + s->key_frame = frame_type == 0; + s->pict_type = frame_type ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I; + s->format = AV_PIX_FMT_YUV420P; + s->field_order = AV_FIELD_PROGRESSIVE; + s->picture_structure = AV_PICTURE_STRUCTURE_FRAME; + + if (frame_type == 0) { + unsigned int sync_code; + unsigned int width, height; + + if (buf_size < 10) + return buf_size; + + sync_code = AV_RL24(buf + 3); + if (sync_code != 0x2a019d) { + av_log(avctx, AV_LOG_ERROR, "Invalid sync code %06x.\n", sync_code); + return buf_size; + } + + width = AV_RL16(buf + 6) & 0x3fff; + height = AV_RL16(buf + 8) & 0x3fff; + + s->width = width; + s->height = height; + s->coded_width = FFALIGN(width, 16); + s->coded_height = FFALIGN(height, 16); + } + + *poutbuf = buf; + *poutbuf_size = buf_size; + return buf_size; +} + +AVCodecParser ff_vp8_parser = { + .codec_ids = { AV_CODEC_ID_VP8 }, + .parser_parse = parse, +}; diff --git a/libs/ffvpx/libavcodec/vp8data.h b/libs/ffvpx/libavcodec/vp8data.h new file mode 100644 index 000000000..5e6dea761 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8data.h @@ -0,0 +1,830 @@ +/* + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * VP8 compatible video decoder + */ + +#ifndef AVCODEC_VP8DATA_H +#define AVCODEC_VP8DATA_H + +#include "vp8.h" +#include "h264pred.h" + +static const uint8_t vp7_pred4x4_mode[] = { + [DC_PRED8x8] = DC_PRED, + [VERT_PRED8x8] = TM_VP8_PRED, + [HOR_PRED8x8] = TM_VP8_PRED, + [PLANE_PRED8x8] = TM_VP8_PRED, +}; + +static const uint8_t vp8_pred4x4_mode[] = { + [DC_PRED8x8] = DC_PRED, + [VERT_PRED8x8] = VERT_PRED, + [HOR_PRED8x8] = HOR_PRED, + [PLANE_PRED8x8] = TM_VP8_PRED, +}; + +static const int8_t vp8_pred16x16_tree_intra[4][2] = { + { -MODE_I4x4, 1 }, // '0' + { 2, 3 }, + { -DC_PRED8x8, -VERT_PRED8x8 }, // '100', '101' + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +}; + +static const int8_t vp8_pred16x16_tree_inter[4][2] = { + { -DC_PRED8x8, 1 }, // '0' + { 2, 3 }, + { -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101' + { -PLANE_PRED8x8, -MODE_I4x4 }, // '110', '111' +}; + +typedef struct VP7MVPred { + int8_t yoffset; + int8_t xoffset; + uint8_t subblock; + uint8_t score; +} VP7MVPred; + +#define VP7_MV_PRED_COUNT 12 +static const VP7MVPred vp7_mv_pred[VP7_MV_PRED_COUNT] = { + { -1, 0, 12, 8 }, + { 0, -1, 3, 8 }, + { -1, -1, 15, 2 }, + { -1, 1, 12, 2 }, + { -2, 0, 12, 2 }, + { 0, -2, 3, 2 }, + { -1, -2, 15, 1 }, + { -2, -1, 15, 1 }, + { -2, 1, 12, 1 }, + { -1, 2, 12, 1 }, + { -2, -2, 15, 1 }, + { -2, 2, 12, 1 }, +}; + +static const int vp7_mode_contexts[31][4] = { + { 3, 3, 1, 246 }, + { 7, 89, 66, 239 }, + { 10, 90, 78, 238 }, + { 14, 118, 95, 241 }, + { 14, 123, 106, 238 }, + { 20, 140, 109, 240 }, + { 13, 155, 103, 238 }, + { 21, 158, 99, 240 }, + { 27, 82, 108, 232 }, + { 19, 99, 123, 217 }, + { 45, 139, 148, 236 }, + { 50, 117, 144, 235 }, + { 57, 128, 164, 238 }, + { 69, 139, 171, 239 }, + { 74, 154, 179, 238 }, + { 112, 165, 186, 242 }, + { 98, 143, 185, 245 }, + { 105, 153, 190, 250 }, + { 124, 167, 192, 245 }, + { 131, 186, 203, 246 }, + { 59, 184, 222, 224 }, + { 148, 215, 214, 213 }, + { 137, 211, 210, 219 }, + { 190, 227, 128, 228 }, + { 183, 228, 128, 228 }, + { 194, 234, 128, 228 }, + { 202, 236, 128, 228 }, + { 205, 240, 128, 228 }, + { 205, 244, 128, 228 }, + { 225, 246, 128, 228 }, + { 233, 251, 128, 228 }, +}; + +static const int vp8_mode_contexts[6][4] = { + { 7, 1, 1, 143 }, + { 14, 18, 14, 107 }, + { 135, 64, 57, 68 }, + { 60, 56, 128, 65 }, + { 159, 134, 128, 34 }, + { 234, 188, 128, 28 }, +}; + +static const uint8_t vp8_mbsplits[5][16] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +}; + +static const uint8_t vp8_mbfirstidx[4][16] = { + { 0, 8 }, + { 0, 2 }, + { 0, 2, 8, 10 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } +}; + +static const uint8_t vp8_mbsplit_count[4] = { + 2, 2, 4, 16 +}; +static const uint8_t vp8_mbsplit_prob[3] = { + 110, 111, 150 +}; + +static const uint8_t vp7_submv_prob[3] = { + 180, 162, 25 +}; + +static const uint8_t vp8_submv_prob[5][3] = { + { 147, 136, 18 }, + { 106, 145, 1 }, + { 179, 121, 1 }, + { 223, 1, 34 }, + { 208, 1, 1 } +}; + +static const uint8_t vp8_pred16x16_prob_intra[4] = { + 145, 156, 163, 128 +}; +static const uint8_t vp8_pred16x16_prob_inter[4] = { + 112, 86, 140, 37 +}; + +static const int8_t vp8_pred4x4_tree[9][2] = { + { -DC_PRED, 1 }, // '0' + { -TM_VP8_PRED, 2 }, // '10' + { -VERT_PRED, 3 }, // '110' + { 4, 6 }, + { -HOR_PRED, 5 }, // '11100' + { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011' + { -DIAG_DOWN_LEFT_PRED, 7 }, // '11110' + { -VERT_LEFT_PRED, 8 }, // '111110' + { -HOR_DOWN_PRED, -HOR_UP_PRED }, // '1111110', '1111111' +}; + +static const int8_t vp8_pred8x8c_tree[3][2] = { + { -DC_PRED8x8, 1 }, // '0' + { -VERT_PRED8x8, 2 }, // '10 + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +}; + +static const uint8_t vp8_pred8x8c_prob_intra[3] = { + 142, 114, 183 +}; +static const uint8_t vp8_pred8x8c_prob_inter[3] = { + 162, 101, 204 +}; +static const uint8_t vp8_pred4x4_prob_inter[9] = { + 120, 90, 79, 133, 87, 85, 80, 111, 151 +}; + +static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = { + { + { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, + { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, + { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, + { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, + { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, + { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, + { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, + { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, + { 62, 18, 78, 95, 85, 57, 50, 48, 51 }, + { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, + }, + { + { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, + { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, + { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, + { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, + { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, + { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, + { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, + { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, + { 51, 50, 17, 168, 209, 192, 23, 25, 82 }, + { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, + }, + { + { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, + { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, + { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, + { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, + { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, + { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, + { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, + { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, + { 81, 40, 11, 96, 182, 84, 29, 16, 36 }, + { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, + }, + { + { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, + { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, + { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, + { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, + { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, + { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, + { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, + { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, + { 117, 20, 15, 36, 163, 128, 68, 1, 26 }, + { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, + }, + { + { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, + { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, + { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, + { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, + { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, + { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, + { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, + { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, + { 87, 37, 9, 115, 59, 77, 64, 21, 47 }, + { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, + }, + { + { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, + { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, + { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, + { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, + { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, + { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, + { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, + { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, + { 55, 38, 70, 124, 73, 102, 1, 34, 98 }, + { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, + }, + { + { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, + { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, + { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, + { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, + { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, + { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, + { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, + { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, + { 35, 27, 10, 146, 174, 171, 12, 26, 128 }, + { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, + }, + { + { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, + { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, + { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, + { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, + { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, + { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, + { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, + { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, + { 58, 15, 20, 82, 135, 57, 26, 121, 40 }, + { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, + }, + { + { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, + { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, + { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, + { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, + { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, + { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, + { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, + { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, + { 112, 19, 12, 61, 195, 128, 48, 4, 24 }, + { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, + }, + { + { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, + { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, + { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, + { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, + { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, + { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, + { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, + { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, + { 66, 45, 25, 102, 197, 189, 23, 18, 22 }, + { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, + }, +}; + +static const int8_t vp8_segmentid_tree[][2] = { + { 1, 2 }, + { -0, -1 }, // '00', '01' + { -2, -3 }, // '10', '11' +}; + +static const uint8_t vp8_coeff_band[16] = { + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 +}; + +/* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes. + * Each list is -1-terminated. */ +static const int8_t vp8_coeff_band_indexes[8][10] = { + { 0, -1 }, + { 1, -1 }, + { 2, -1 }, + { 3, -1 }, + { 5, -1 }, + { 6, -1 }, + { 4, 7, 8, 9, 10, 11, 12, 13, 14, -1 }, + { 15, -1 } +}; + +static const uint8_t vp8_dct_cat1_prob[] = { + 159, 0 +}; +static const uint8_t vp8_dct_cat2_prob[] = { + 165, 145, 0 +}; +static const uint8_t vp8_dct_cat3_prob[] = { + 173, 148, 140, 0 +}; +static const uint8_t vp8_dct_cat4_prob[] = { + 176, 155, 140, 135, 0 +}; +static const uint8_t vp8_dct_cat5_prob[] = { + 180, 157, 141, 134, 130, 0 +}; +static const uint8_t vp8_dct_cat6_prob[] = { + 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 +}; + +// only used for cat3 and above; cat 1 and 2 are referenced directly +const uint8_t *const ff_vp8_dct_cat_prob[] = { + vp8_dct_cat3_prob, + vp8_dct_cat4_prob, + vp8_dct_cat5_prob, + vp8_dct_cat6_prob, +}; + +static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS - 1] = { + { + { + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, + }, + { + { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + }, + { + { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + }, + { + { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, + }, + { + { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, + }, + { + { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, + }, + { + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, + }, + { + { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, + }, + { + { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, + }, + { + { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, + }, + { + { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, + }, + { + { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, + }, + { + { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, + }, + { + { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, + }, + { + { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, + }, + { + { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, + }, + { + { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, + }, + { + { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, + }, + { + { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, + }, + { + { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, + }, + { + { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, + }, + { + { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, + }, + { + { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, + }, + { + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, +}; + +static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS - 1] = { + { + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 }, + { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }, + }, + { + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, +}; + +static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT + 1] = { + 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, +}; + +static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT + 1] = { + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, +}; + +static const uint8_t vp8_mv_update_prob[2][19] = { + { 237, + 246, + 253, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 250, 250, 252, /* VP8 only: */ 254, 254 }, + { 231, + 243, + 245, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 251, 251, 254, /* VP8 only: */ 254, 254 } +}; + +static const uint8_t vp7_mv_default_prob[2][17] = { + { 162, + 128, + 225, 146, 172, 147, 214, 39, 156, + 247, 210, 135, 68, 138, 220, 239, 246 }, + { 164, + 128, + 204, 170, 119, 235, 140, 230, 228, + 244, 184, 201, 44, 173, 221, 239, 253 } +}; + +static const uint8_t vp8_mv_default_prob[2][19] = { + { 162, + 128, + 225, 146, 172, 147, 214, 39, 156, + 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 }, + { 164, + 128, + 204, 170, 119, 235, 140, 230, 228, + 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 } +}; + +static const uint8_t vp7_feature_value_size[2][4] = { + { 7, 6, 0, 8 }, + { 7, 6, 0, 5 }, +}; + +static const int8_t vp7_feature_index_tree[4][2] = +{ + { 1, 2 }, + { -0, -1 }, // '00', '01' + { -2, -3 }, // '10', '11' +}; + +static const uint16_t vp7_ydc_qlookup[] = { + 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38, 39, 39, 40, 41, + 41, 42, 43, 43, 44, 45, 45, 46, 47, 48, 48, 49, 50, 51, 52, + 53, 53, 54, 56, 57, 58, 59, 60, 62, 63, 65, 66, 68, 70, 72, + 74, 76, 79, 81, 84, 87, 90, 93, 96, 100, 104, 108, 112, 116, 121, + 126, 131, 136, 142, 148, 154, 160, 167, 174, 182, 189, 198, 206, 215, 224, + 234, 244, 254, 265, 277, 288, 301, 313, 327, 340, 355, 370, 385, 401, 417, + 434, 452, 470, 489, 509, 529, 550, 572, +}; + +static const uint16_t vp7_yac_qlookup[] = { + 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 15, + 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, + 37, 38, 40, 41, 42, 44, 45, 46, 48, 49, 50, 51, 53, 54, + 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 67, 68, 69, 70, + 72, 73, 75, 76, 78, 80, 82, 84, 86, 88, 91, 93, 96, 99, + 102, 105, 109, 112, 116, 121, 125, 130, 135, 140, 146, 152, 158, 165, + 172, 180, 188, 196, 205, 214, 224, 234, 245, 256, 268, 281, 294, 308, + 322, 337, 353, 369, 386, 404, 423, 443, 463, 484, 506, 529, 553, 578, + 604, 631, 659, 688, 718, 749, 781, 814, 849, 885, 922, 960, 1000, 1041, + 1083, 1127, +}; + +static const uint16_t vp7_y2dc_qlookup[] = { + 7, 9, 11, 13, 15, 17, 19, 21, 23, 26, 28, 30, 33, 35, + 37, 39, 42, 44, 46, 48, 51, 53, 55, 57, 59, 61, 63, 65, + 67, 69, 70, 72, 74, 75, 77, 78, 80, 81, 83, 84, 85, 87, + 88, 89, 90, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 104, + 105, 106, 108, 109, 111, 113, 114, 116, 118, 120, 123, 125, 128, 131, + 134, 137, 140, 144, 148, 152, 156, 161, 166, 171, 176, 182, 188, 195, + 202, 209, 217, 225, 234, 243, 253, 263, 274, 285, 297, 309, 322, 336, + 350, 365, 381, 397, 414, 432, 450, 470, 490, 511, 533, 556, 579, 604, + 630, 656, 684, 713, 742, 773, 805, 838, 873, 908, 945, 983, 1022, 1063, + 1105, 1148, +}; + +static const uint16_t vp7_y2ac_qlookup[] = { + 7, 9, 11, 13, 16, 18, 21, 24, 26, 29, 32, 35, + 38, 41, 43, 46, 49, 52, 55, 58, 61, 64, 66, 69, + 72, 74, 77, 79, 82, 84, 86, 88, 91, 93, 95, 97, + 98, 100, 102, 104, 105, 107, 109, 110, 112, 113, 115, 116, + 117, 119, 120, 122, 123, 125, 127, 128, 130, 132, 134, 136, + 138, 141, 143, 146, 149, 152, 155, 158, 162, 166, 171, 175, + 180, 185, 191, 197, 204, 210, 218, 226, 234, 243, 252, 262, + 273, 284, 295, 308, 321, 335, 350, 365, 381, 398, 416, 435, + 455, 476, 497, 520, 544, 569, 595, 622, 650, 680, 711, 743, + 776, 811, 848, 885, 925, 965, 1008, 1052, 1097, 1144, 1193, 1244, + 1297, 1351, 1407, 1466, 1526, 1588, 1652, 1719, +}; + +#endif /* AVCODEC_VP8DATA_H */ diff --git a/libs/ffvpx/libavcodec/vp8dsp.c b/libs/ffvpx/libavcodec/vp8dsp.c new file mode 100644 index 000000000..fed5c67a9 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8dsp.c @@ -0,0 +1,743 @@ +/* + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * Copyright (C) 2014 Peter Ross + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * VP8 compatible video decoder + */ + +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" + +#include "mathops.h" +#include "vp8dsp.h" + +#define MK_IDCT_DC_ADD4_C(name) \ +static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \ + ptrdiff_t stride) \ +{ \ + name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ + name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ + name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ + name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ +} \ + \ +static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \ + ptrdiff_t stride) \ +{ \ + name ## _idct_dc_add_c(dst + 0, block[0], stride); \ + name ## _idct_dc_add_c(dst + 4, block[1], stride); \ + name ## _idct_dc_add_c(dst + 8, block[2], stride); \ + name ## _idct_dc_add_c(dst + 12, block[3], stride); \ +} + +#if CONFIG_VP7_DECODER +static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) +{ + int i; + unsigned a1, b1, c1, d1; + int16_t tmp[16]; + + for (i = 0; i < 4; i++) { + a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170; + b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170; + c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274; + d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540; + tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; + tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; + tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; + tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; + } + + for (i = 0; i < 4; i++) { + a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; + b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; + c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; + d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; + AV_ZERO64(dc + i * 4); + block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18; + block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18; + block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18; + block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18; + } +} + +static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) +{ + int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18; + dc[0] = 0; + + for (i = 0; i < 4; i++) { + block[i][0][0] = val; + block[i][1][0] = val; + block[i][2][0] = val; + block[i][3][0] = val; + } +} + +static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) +{ + int i; + unsigned a1, b1, c1, d1; + int16_t tmp[16]; + + for (i = 0; i < 4; i++) { + a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170; + b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170; + c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274; + d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540; + AV_ZERO64(block + i * 4); + tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; + tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; + tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; + tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; + } + + for (i = 0; i < 4; i++) { + a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; + b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; + c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; + d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; + dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] + + ((int)(a1 + d1 + 0x20000) >> 18)); + dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] + + ((int)(a1 - d1 + 0x20000) >> 18)); + dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] + + ((int)(b1 + c1 + 0x20000) >> 18)); + dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] + + ((int)(b1 - c1 + 0x20000) >> 18)); + } +} + +static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) +{ + int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18; + block[0] = 0; + + for (i = 0; i < 4; i++) { + dst[0] = av_clip_uint8(dst[0] + dc); + dst[1] = av_clip_uint8(dst[1] + dc); + dst[2] = av_clip_uint8(dst[2] + dc); + dst[3] = av_clip_uint8(dst[3] + dc); + dst += stride; + } +} + +MK_IDCT_DC_ADD4_C(vp7) +#endif /* CONFIG_VP7_DECODER */ + +// TODO: Maybe add dequant +#if CONFIG_VP8_DECODER +static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) +{ + int i, t0, t1, t2, t3; + + for (i = 0; i < 4; i++) { + t0 = dc[0 * 4 + i] + dc[3 * 4 + i]; + t1 = dc[1 * 4 + i] + dc[2 * 4 + i]; + t2 = dc[1 * 4 + i] - dc[2 * 4 + i]; + t3 = dc[0 * 4 + i] - dc[3 * 4 + i]; + + dc[0 * 4 + i] = t0 + t1; + dc[1 * 4 + i] = t3 + t2; + dc[2 * 4 + i] = t0 - t1; + dc[3 * 4 + i] = t3 - t2; + } + + for (i = 0; i < 4; i++) { + t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding + t1 = dc[i * 4 + 1] + dc[i * 4 + 2]; + t2 = dc[i * 4 + 1] - dc[i * 4 + 2]; + t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding + AV_ZERO64(dc + i * 4); + + block[i][0][0] = (t0 + t1) >> 3; + block[i][1][0] = (t3 + t2) >> 3; + block[i][2][0] = (t0 - t1) >> 3; + block[i][3][0] = (t3 - t2) >> 3; + } +} + +static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) +{ + int i, val = (dc[0] + 3) >> 3; + dc[0] = 0; + + for (i = 0; i < 4; i++) { + block[i][0][0] = val; + block[i][1][0] = val; + block[i][2][0] = val; + block[i][3][0] = val; + } +} + +#define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) +#define MUL_35468(a) (((a) * 35468) >> 16) + +static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) +{ + int i, t0, t1, t2, t3; + int16_t tmp[16]; + + for (i = 0; i < 4; i++) { + t0 = block[0 * 4 + i] + block[2 * 4 + i]; + t1 = block[0 * 4 + i] - block[2 * 4 + i]; + t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]); + t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]); + block[0 * 4 + i] = 0; + block[1 * 4 + i] = 0; + block[2 * 4 + i] = 0; + block[3 * 4 + i] = 0; + + tmp[i * 4 + 0] = t0 + t3; + tmp[i * 4 + 1] = t1 + t2; + tmp[i * 4 + 2] = t1 - t2; + tmp[i * 4 + 3] = t0 - t3; + } + + for (i = 0; i < 4; i++) { + t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i]; + t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i]; + t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]); + t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]); + + dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); + dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); + dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); + dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); + dst += stride; + } +} + +static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) +{ + int i, dc = (block[0] + 4) >> 3; + block[0] = 0; + + for (i = 0; i < 4; i++) { + dst[0] = av_clip_uint8(dst[0] + dc); + dst[1] = av_clip_uint8(dst[1] + dc); + dst[2] = av_clip_uint8(dst[2] + dc); + dst[3] = av_clip_uint8(dst[3] + dc); + dst += stride; + } +} + +MK_IDCT_DC_ADD4_C(vp8) +#endif /* CONFIG_VP8_DECODER */ + +// because I like only having two parameters to pass functions... +#define LOAD_PIXELS \ + int av_unused p3 = p[-4 * stride]; \ + int av_unused p2 = p[-3 * stride]; \ + int av_unused p1 = p[-2 * stride]; \ + int av_unused p0 = p[-1 * stride]; \ + int av_unused q0 = p[ 0 * stride]; \ + int av_unused q1 = p[ 1 * stride]; \ + int av_unused q2 = p[ 2 * stride]; \ + int av_unused q3 = p[ 3 * stride]; + +#define clip_int8(n) (cm[(n) + 0x80] - 0x80) + +static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, + int is4tap, int is_vp7) +{ + LOAD_PIXELS + int a, f1, f2; + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; + + a = 3 * (q0 - p0); + + if (is4tap) + a += clip_int8(p1 - q1); + + a = clip_int8(a); + + // We deviate from the spec here with c(a+3) >> 3 + // since that's what libvpx does. + f1 = FFMIN(a + 4, 127) >> 3; + + if (is_vp7) + f2 = f1 - ((a & 7) == 4); + else + f2 = FFMIN(a + 3, 127) >> 3; + + // Despite what the spec says, we do need to clamp here to + // be bitexact with libvpx. + p[-1 * stride] = cm[p0 + f2]; + p[ 0 * stride] = cm[q0 - f1]; + + // only used for _inner on blocks without high edge variance + if (!is4tap) { + a = (f1 + 1) >> 1; + p[-2 * stride] = cm[p1 + a]; + p[ 1 * stride] = cm[q1 - a]; + } +} + +static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, + int is4tap) +{ + filter_common(p, stride, is4tap, IS_VP7); +} + +static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, + int is4tap) +{ + filter_common(p, stride, is4tap, IS_VP8); +} + +static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, + int flim) +{ + LOAD_PIXELS + return FFABS(p0 - q0) <= flim; +} + +static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, + int flim) +{ + LOAD_PIXELS + return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; +} + +/** + * E - limit at the macroblock edge + * I - limit for interior difference + */ +#define NORMAL_LIMIT(vpn) \ +static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \ + ptrdiff_t stride, \ + int E, int I) \ +{ \ + LOAD_PIXELS \ + return vp ## vpn ## _simple_limit(p, stride, E) && \ + FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ + FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ + FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ +} + +NORMAL_LIMIT(7) +NORMAL_LIMIT(8) + +// high edge variance +static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) +{ + LOAD_PIXELS + return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; +} + +static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) +{ + int a0, a1, a2, w; + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; + + LOAD_PIXELS + + w = clip_int8(p1 - q1); + w = clip_int8(w + 3 * (q0 - p0)); + + a0 = (27 * w + 63) >> 7; + a1 = (18 * w + 63) >> 7; + a2 = (9 * w + 63) >> 7; + + p[-3 * stride] = cm[p2 + a2]; + p[-2 * stride] = cm[p1 + a1]; + p[-1 * stride] = cm[p0 + a0]; + p[ 0 * stride] = cm[q0 - a0]; + p[ 1 * stride] = cm[q1 - a1]; + p[ 2 * stride] = cm[q2 - a2]; +} + +#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \ +static maybe_inline \ +void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim_E, int flim_I, \ + int hev_thresh) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + if (vpn ## _normal_limit(dst + i * stridea, strideb, \ + flim_E, flim_I)) { \ + if (hev(dst + i * stridea, strideb, hev_thresh)) \ + vpn ## _filter_common(dst + i * stridea, strideb, 1); \ + else \ + filter_mbedge(dst + i * stridea, strideb); \ + } \ +} \ + \ +static maybe_inline \ +void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim_E, \ + int flim_I, \ + int hev_thresh) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + if (vpn ## _normal_limit(dst + i * stridea, strideb, \ + flim_E, flim_I)) { \ + int hv = hev(dst + i * stridea, strideb, hev_thresh); \ + if (hv) \ + vpn ## _filter_common(dst + i * stridea, strideb, 1); \ + else \ + vpn ## _filter_common(dst + i * stridea, strideb, 0); \ + } \ +} + +#define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \ +LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \ +static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t stride, int fE, \ + int fI, int hev_thresh) \ +{ \ + vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ + vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ +} \ + \ +static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t stride, \ + int fE, int fI, \ + int hev_thresh) \ +{ \ + vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ + hev_thresh); \ + vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ + hev_thresh); \ +} + +#define LOOP_FILTER_SIMPLE(vpn) \ +static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ + int flim) \ +{ \ + int i; \ + for (i = 0; i < 16; i++) \ + if (vpn ## _simple_limit(dst + i, stride, flim)) \ + vpn ## _filter_common(dst + i, stride, 1); \ +} \ + \ +static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ + int flim) \ +{ \ + int i; \ + for (i = 0; i < 16; i++) \ + if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ + vpn ## _filter_common(dst + i * stride, 1, 1); \ +} + +#define LOOP_FILTERS(vpn) \ + LOOP_FILTER(vpn, v, 16, 1, stride, ) \ + LOOP_FILTER(vpn, h, 16, stride, 1, ) \ + UV_LOOP_FILTER(vpn, v, 1, stride) \ + UV_LOOP_FILTER(vpn, h, stride, 1) \ + LOOP_FILTER_SIMPLE(vpn) \ + +static const uint8_t subpel_filters[7][6] = { + { 0, 6, 123, 12, 1, 0 }, + { 2, 11, 108, 36, 8, 1 }, + { 0, 9, 93, 50, 6, 0 }, + { 3, 16, 77, 77, 16, 3 }, + { 0, 6, 50, 93, 9, 0 }, + { 1, 8, 36, 108, 11, 2 }, + { 0, 1, 12, 123, 6, 0 }, +}; + +#define PUT_PIXELS(WIDTH) \ +static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int h, int x, int y) \ +{ \ + int i; \ + for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ + memcpy(dst, src, WIDTH); \ +} + +PUT_PIXELS(16) +PUT_PIXELS(8) +PUT_PIXELS(4) + +#define FILTER_6TAP(src, F, stride) \ + cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ + F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ + F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] + +#define FILTER_4TAP(src, F, stride) \ + cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ + F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] + +#define VP8_EPEL_H(SIZE, TAPS) \ +static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ + dst += dststride; \ + src += srcstride; \ + } \ +} + +#define VP8_EPEL_V(SIZE, TAPS) \ +static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[my - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ + dst += dststride; \ + src += srcstride; \ + } \ +} + +#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ +static void \ +put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, \ + int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ + uint8_t *tmp = tmp_array; \ + src -= (2 - (VTAPS == 4)) * srcstride; \ + \ + for (y = 0; y < h + VTAPS - 1; y++) { \ + for (x = 0; x < SIZE; x++) \ + tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ + tmp += SIZE; \ + src += srcstride; \ + } \ + tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ + filter = subpel_filters[my - 1]; \ + \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ + dst += dststride; \ + tmp += SIZE; \ + } \ +} + +VP8_EPEL_H(16, 4) +VP8_EPEL_H(8, 4) +VP8_EPEL_H(4, 4) +VP8_EPEL_H(16, 6) +VP8_EPEL_H(8, 6) +VP8_EPEL_H(4, 6) +VP8_EPEL_V(16, 4) +VP8_EPEL_V(8, 4) +VP8_EPEL_V(4, 4) +VP8_EPEL_V(16, 6) +VP8_EPEL_V(8, 6) +VP8_EPEL_V(4, 6) + +VP8_EPEL_HV(16, 4, 4) +VP8_EPEL_HV(8, 4, 4) +VP8_EPEL_HV(4, 4, 4) +VP8_EPEL_HV(16, 4, 6) +VP8_EPEL_HV(8, 4, 6) +VP8_EPEL_HV(4, 4, 6) +VP8_EPEL_HV(16, 6, 4) +VP8_EPEL_HV(8, 6, 4) +VP8_EPEL_HV(4, 6, 4) +VP8_EPEL_HV(16, 6, 6) +VP8_EPEL_HV(8, 6, 6) +VP8_EPEL_HV(4, 6, 6) + +#define VP8_BILINEAR(SIZE) \ +static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \ + uint8_t *src, ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int a = 8 - mx, b = mx; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ + dst += dstride; \ + src += sstride; \ + } \ +} \ + \ +static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \ + uint8_t *src, ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int c = 8 - my, d = my; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ + dst += dstride; \ + src += sstride; \ + } \ +} \ + \ +static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \ + ptrdiff_t dstride, \ + uint8_t *src, \ + ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int a = 8 - mx, b = mx; \ + int c = 8 - my, d = my; \ + int x, y; \ + uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ + uint8_t *tmp = tmp_array; \ + for (y = 0; y < h + 1; y++) { \ + for (x = 0; x < SIZE; x++) \ + tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ + tmp += SIZE; \ + src += sstride; \ + } \ + tmp = tmp_array; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ + dst += dstride; \ + tmp += SIZE; \ + } \ +} + +VP8_BILINEAR(16) +VP8_BILINEAR(8) +VP8_BILINEAR(4) + +#define VP78_MC_FUNC(IDX, SIZE) \ + dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c + +#define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \ + dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ + dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c + +av_cold void ff_vp78dsp_init(VP8DSPContext *dsp) +{ + VP78_MC_FUNC(0, 16); + VP78_MC_FUNC(1, 8); + VP78_MC_FUNC(2, 4); + + VP78_BILINEAR_MC_FUNC(0, 16); + VP78_BILINEAR_MC_FUNC(1, 8); + VP78_BILINEAR_MC_FUNC(2, 4); + + if (ARCH_ARM) + ff_vp78dsp_init_arm(dsp); + if (ARCH_PPC) + ff_vp78dsp_init_ppc(dsp); + if (ARCH_X86) + ff_vp78dsp_init_x86(dsp); +} + +#if CONFIG_VP7_DECODER +LOOP_FILTERS(vp7) + +av_cold void ff_vp7dsp_init(VP8DSPContext *dsp) +{ + dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c; + dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c; + dsp->vp8_idct_add = vp7_idct_add_c; + dsp->vp8_idct_dc_add = vp7_idct_dc_add_c; + dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c; + dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c; + + dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c; + dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c; + dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c; + dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c; + + dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c; + dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c; + dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c; + dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c; + + dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c; + dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c; +} +#endif /* CONFIG_VP7_DECODER */ + +#if CONFIG_VP8_DECODER +LOOP_FILTERS(vp8) + +av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) +{ + dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; + dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; + dsp->vp8_idct_add = vp8_idct_add_c; + dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; + dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; + dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; + + dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; + dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; + dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; + dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; + + dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; + dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; + dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; + dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; + + dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; + dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; + + if (ARCH_ARM) + ff_vp8dsp_init_arm(dsp); + if (ARCH_X86) + ff_vp8dsp_init_x86(dsp); + if (ARCH_MIPS) + ff_vp8dsp_init_mips(dsp); +} +#endif /* CONFIG_VP8_DECODER */ diff --git a/libs/ffvpx/libavcodec/vp8dsp.h b/libs/ffvpx/libavcodec/vp8dsp.h new file mode 100644 index 000000000..eaae4aed6 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp8dsp.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * VP8 compatible video decoder + */ + +#ifndef AVCODEC_VP8DSP_H +#define AVCODEC_VP8DSP_H + +#include <stddef.h> +#include <stdint.h> + +typedef void (*vp8_mc_func)(uint8_t *dst /* align 8 */, ptrdiff_t dstStride, + uint8_t *src /* align 1 */, ptrdiff_t srcStride, + int h, int x, int y); + +typedef struct VP8DSPContext { + void (*vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16]); + void (*vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16]); + void (*vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride); + void (*vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride); + void (*vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride); + void (*vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride); + + // loop filter applied to edges between macroblocks + void (*vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + + // loop filter applied to inner macroblock edges + void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, + ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, + ptrdiff_t stride, + int flim_E, int flim_I, int hev_thresh); + + void (*vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim); + void (*vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim); + + /** + * first dimension: 4-log2(width) + * second dimension: 0 if no vertical interpolation is needed; + * 1 4-tap vertical interpolation filter (my & 1) + * 2 6-tap vertical interpolation filter (!(my & 1)) + * third dimension: same as second dimension, for horizontal interpolation + * so something like put_vp8_epel_pixels_tab[4-log2(width)][2*!!my-(my&1)][2*!!mx-(mx&1)](..., mx, my) + */ + vp8_mc_func put_vp8_epel_pixels_tab[3][3][3]; + vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3]; +} VP8DSPContext; + +void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, + int h, int x, int y); +void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, + int h, int x, int y); +void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, + int h, int x, int y); + +void ff_vp7dsp_init(VP8DSPContext *c); + +void ff_vp78dsp_init(VP8DSPContext *c); +void ff_vp78dsp_init_arm(VP8DSPContext *c); +void ff_vp78dsp_init_ppc(VP8DSPContext *c); +void ff_vp78dsp_init_x86(VP8DSPContext *c); + +void ff_vp8dsp_init(VP8DSPContext *c); +void ff_vp8dsp_init_arm(VP8DSPContext *c); +void ff_vp8dsp_init_x86(VP8DSPContext *c); +void ff_vp8dsp_init_mips(VP8DSPContext *c); + +#define IS_VP7 1 +#define IS_VP8 0 + +#endif /* AVCODEC_VP8DSP_H */ diff --git a/libs/ffvpx/libavcodec/vp9.c b/libs/ffvpx/libavcodec/vp9.c new file mode 100644 index 000000000..b1178c9c0 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9.c @@ -0,0 +1,1819 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "get_bits.h" +#include "hwaccel.h" +#include "internal.h" +#include "profiles.h" +#include "thread.h" +#include "videodsp.h" +#include "vp56.h" +#include "vp9.h" +#include "vp9data.h" +#include "vp9dec.h" +#include "libavutil/avassert.h" +#include "libavutil/pixdesc.h" + +#define VP9_SYNCCODE 0x498342 + +#if HAVE_THREADS +static void vp9_free_entries(AVCodecContext *avctx) { + VP9Context *s = avctx->priv_data; + + if (avctx->active_thread_type & FF_THREAD_SLICE) { + pthread_mutex_destroy(&s->progress_mutex); + pthread_cond_destroy(&s->progress_cond); + av_freep(&s->entries); + } +} + +static int vp9_alloc_entries(AVCodecContext *avctx, int n) { + VP9Context *s = avctx->priv_data; + int i; + + if (avctx->active_thread_type & FF_THREAD_SLICE) { + if (s->entries) + av_freep(&s->entries); + + s->entries = av_malloc_array(n, sizeof(atomic_int)); + + if (!s->entries) { + av_freep(&s->entries); + return AVERROR(ENOMEM); + } + + for (i = 0; i < n; i++) + atomic_init(&s->entries[i], 0); + + pthread_mutex_init(&s->progress_mutex, NULL); + pthread_cond_init(&s->progress_cond, NULL); + } + return 0; +} + +static void vp9_report_tile_progress(VP9Context *s, int field, int n) { + pthread_mutex_lock(&s->progress_mutex); + atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release); + pthread_cond_signal(&s->progress_cond); + pthread_mutex_unlock(&s->progress_mutex); +} + +static void vp9_await_tile_progress(VP9Context *s, int field, int n) { + if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n) + return; + + pthread_mutex_lock(&s->progress_mutex); + while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n) + pthread_cond_wait(&s->progress_cond, &s->progress_mutex); + pthread_mutex_unlock(&s->progress_mutex); +} +#else +static void vp9_free_entries(AVCodecContext *avctx) {} +static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; } +#endif + +static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f) +{ + ff_thread_release_buffer(avctx, &f->tf); + av_buffer_unref(&f->extradata); + av_buffer_unref(&f->hwaccel_priv_buf); + f->segmentation_map = NULL; + f->hwaccel_picture_private = NULL; +} + +static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) +{ + VP9Context *s = avctx->priv_data; + int ret, sz; + + ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF); + if (ret < 0) + return ret; + + sz = 64 * s->sb_cols * s->sb_rows; + f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair))); + if (!f->extradata) { + goto fail; + } + + f->segmentation_map = f->extradata->data; + f->mv = (VP9mvrefPair *) (f->extradata->data + sz); + + if (avctx->hwaccel) { + const AVHWAccel *hwaccel = avctx->hwaccel; + av_assert0(!f->hwaccel_picture_private); + if (hwaccel->frame_priv_data_size) { + f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + if (!f->hwaccel_priv_buf) + goto fail; + f->hwaccel_picture_private = f->hwaccel_priv_buf->data; + } + } + + return 0; + +fail: + vp9_frame_unref(avctx, f); + return AVERROR(ENOMEM); +} + +static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src) +{ + int ret; + + ret = ff_thread_ref_frame(&dst->tf, &src->tf); + if (ret < 0) + return ret; + + dst->extradata = av_buffer_ref(src->extradata); + if (!dst->extradata) + goto fail; + + dst->segmentation_map = src->segmentation_map; + dst->mv = src->mv; + dst->uses_2pass = src->uses_2pass; + + if (src->hwaccel_picture_private) { + dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); + if (!dst->hwaccel_priv_buf) + goto fail; + dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; + } + + return 0; + +fail: + vp9_frame_unref(avctx, dst); + return AVERROR(ENOMEM); +} + +static int update_size(AVCodecContext *avctx, int w, int h) +{ +#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \ + CONFIG_VP9_D3D11VA_HWACCEL * 2 + \ + CONFIG_VP9_NVDEC_HWACCEL + \ + CONFIG_VP9_VAAPI_HWACCEL) + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; + VP9Context *s = avctx->priv_data; + uint8_t *p; + int bytesperpixel = s->bytesperpixel, ret, cols, rows; + int lflvl_len, i; + + av_assert0(w > 0 && h > 0); + + if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) { + if ((ret = ff_set_dimensions(avctx, w, h)) < 0) + return ret; + + switch (s->pix_fmt) { + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV420P10: +#if CONFIG_VP9_DXVA2_HWACCEL + *fmtp++ = AV_PIX_FMT_DXVA2_VLD; +#endif +#if CONFIG_VP9_D3D11VA_HWACCEL + *fmtp++ = AV_PIX_FMT_D3D11VA_VLD; + *fmtp++ = AV_PIX_FMT_D3D11; +#endif +#if CONFIG_VP9_NVDEC_HWACCEL + *fmtp++ = AV_PIX_FMT_CUDA; +#endif +#if CONFIG_VP9_VAAPI_HWACCEL + *fmtp++ = AV_PIX_FMT_VAAPI; +#endif + break; + case AV_PIX_FMT_YUV420P12: +#if CONFIG_VP9_NVDEC_HWACCEL + *fmtp++ = AV_PIX_FMT_CUDA; +#endif +#if CONFIG_VP9_VAAPI_HWACCEL + *fmtp++ = AV_PIX_FMT_VAAPI; +#endif + break; + } + + *fmtp++ = s->pix_fmt; + *fmtp = AV_PIX_FMT_NONE; + + ret = ff_thread_get_format(avctx, pix_fmts); + if (ret < 0) + return ret; + + avctx->pix_fmt = ret; + s->gf_fmt = s->pix_fmt; + s->w = w; + s->h = h; + } + + cols = (w + 7) >> 3; + rows = (h + 7) >> 3; + + if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt) + return 0; + + s->last_fmt = s->pix_fmt; + s->sb_cols = (w + 63) >> 6; + s->sb_rows = (h + 63) >> 6; + s->cols = (w + 7) >> 3; + s->rows = (h + 7) >> 3; + lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1; + +#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var) + av_freep(&s->intra_pred_data[0]); + // FIXME we slightly over-allocate here for subsampled chroma, but a little + // bit of padding shouldn't affect performance... + p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel + + lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx))); + if (!p) + return AVERROR(ENOMEM); + assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel); + assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel); + assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel); + assign(s->above_y_nnz_ctx, uint8_t *, 16); + assign(s->above_mode_ctx, uint8_t *, 16); + assign(s->above_mv_ctx, VP56mv(*)[2], 16); + assign(s->above_uv_nnz_ctx[0], uint8_t *, 16); + assign(s->above_uv_nnz_ctx[1], uint8_t *, 16); + assign(s->above_partition_ctx, uint8_t *, 8); + assign(s->above_skip_ctx, uint8_t *, 8); + assign(s->above_txfm_ctx, uint8_t *, 8); + assign(s->above_segpred_ctx, uint8_t *, 8); + assign(s->above_intra_ctx, uint8_t *, 8); + assign(s->above_comp_ctx, uint8_t *, 8); + assign(s->above_ref_ctx, uint8_t *, 8); + assign(s->above_filter_ctx, uint8_t *, 8); + assign(s->lflvl, VP9Filter *, lflvl_len); +#undef assign + + if (s->td) { + for (i = 0; i < s->active_tile_cols; i++) { + av_freep(&s->td[i].b_base); + av_freep(&s->td[i].block_base); + } + } + + if (s->s.h.bpp != s->last_bpp) { + ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT); + ff_videodsp_init(&s->vdsp, s->s.h.bpp); + s->last_bpp = s->s.h.bpp; + } + + return 0; +} + +static int update_block_buffers(AVCodecContext *avctx) +{ + int i; + VP9Context *s = avctx->priv_data; + int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel; + VP9TileData *td = &s->td[0]; + + if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass) + return 0; + + av_free(td->b_base); + av_free(td->block_base); + chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v); + chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v); + if (s->s.frames[CUR_FRAME].uses_2pass) { + int sbs = s->sb_cols * s->sb_rows; + + td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block)); + td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + + 16 * 16 + 2 * chroma_eobs) * sbs); + if (!td->b_base || !td->block_base) + return AVERROR(ENOMEM); + td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel; + td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel; + td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel); + td->uveob_base[0] = td->eob_base + 16 * 16 * sbs; + td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs; + } else { + for (i = 1; i < s->active_tile_cols; i++) { + if (s->td[i].b_base && s->td[i].block_base) { + av_free(s->td[i].b_base); + av_free(s->td[i].block_base); + } + } + for (i = 0; i < s->active_tile_cols; i++) { + s->td[i].b_base = av_malloc(sizeof(VP9Block)); + s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + + 16 * 16 + 2 * chroma_eobs); + if (!s->td[i].b_base || !s->td[i].block_base) + return AVERROR(ENOMEM); + s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel; + s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel; + s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel); + s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16; + s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs; + } + } + s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass; + + return 0; +} + +// The sign bit is at the end, not the start, of a bit sequence +static av_always_inline int get_sbits_inv(GetBitContext *gb, int n) +{ + int v = get_bits(gb, n); + return get_bits1(gb) ? -v : v; +} + +static av_always_inline int inv_recenter_nonneg(int v, int m) +{ + if (v > 2 * m) + return v; + if (v & 1) + return m - ((v + 1) >> 1); + return m + (v >> 1); +} + +// differential forward probability updates +static int update_prob(VP56RangeCoder *c, int p) +{ + static const int inv_map_table[255] = { + 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, + 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, + 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, + 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145, + 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, + 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, + 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, + 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 253, + }; + int d; + + /* This code is trying to do a differential probability update. For a + * current probability A in the range [1, 255], the difference to a new + * probability of any value can be expressed differentially as 1-A, 255-A + * where some part of this (absolute range) exists both in positive as + * well as the negative part, whereas another part only exists in one + * half. We're trying to code this shared part differentially, i.e. + * times two where the value of the lowest bit specifies the sign, and + * the single part is then coded on top of this. This absolute difference + * then again has a value of [0, 254], but a bigger value in this range + * indicates that we're further away from the original value A, so we + * can code this as a VLC code, since higher values are increasingly + * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough' + * updates vs. the 'fine, exact' updates further down the range, which + * adds one extra dimension to this differential update model. */ + + if (!vp8_rac_get(c)) { + d = vp8_rac_get_uint(c, 4) + 0; + } else if (!vp8_rac_get(c)) { + d = vp8_rac_get_uint(c, 4) + 16; + } else if (!vp8_rac_get(c)) { + d = vp8_rac_get_uint(c, 5) + 32; + } else { + d = vp8_rac_get_uint(c, 7); + if (d >= 65) + d = (d << 1) - 65 + vp8_rac_get(c); + d += 64; + av_assert2(d < FF_ARRAY_ELEMS(inv_map_table)); + } + + return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) : + 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p); +} + +static int read_colorspace_details(AVCodecContext *avctx) +{ + static const enum AVColorSpace colorspaces[8] = { + AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M, + AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB, + }; + VP9Context *s = avctx->priv_data; + int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 + + s->bpp_index = bits; + s->s.h.bpp = 8 + bits * 2; + s->bytesperpixel = (7 + s->s.h.bpp) >> 3; + avctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; + if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 + static const enum AVPixelFormat pix_fmt_rgb[3] = { + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12 + }; + s->ss_h = s->ss_v = 0; + avctx->color_range = AVCOL_RANGE_JPEG; + s->pix_fmt = pix_fmt_rgb[bits]; + if (avctx->profile & 1) { + if (get_bits1(&s->gb)) { + av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n"); + return AVERROR_INVALIDDATA; + } + } else { + av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n", + avctx->profile); + return AVERROR_INVALIDDATA; + } + } else { + static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = { + { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P }, + { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } }, + { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 }, + { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } }, + { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 }, + { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } } + }; + avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; + if (avctx->profile & 1) { + s->ss_h = get_bits1(&s->gb); + s->ss_v = get_bits1(&s->gb); + s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]; + if (s->pix_fmt == AV_PIX_FMT_YUV420P) { + av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n", + avctx->profile); + return AVERROR_INVALIDDATA; + } else if (get_bits1(&s->gb)) { + av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n", + avctx->profile); + return AVERROR_INVALIDDATA; + } + } else { + s->ss_h = s->ss_v = 1; + s->pix_fmt = pix_fmt_for_ss[bits][1][1]; + } + } + + return 0; +} + +static int decode_frame_header(AVCodecContext *avctx, + const uint8_t *data, int size, int *ref) +{ + VP9Context *s = avctx->priv_data; + int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp; + int last_invisible; + const uint8_t *data2; + + /* general header */ + if ((ret = init_get_bits8(&s->gb, data, size)) < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n"); + return ret; + } + if (get_bits(&s->gb, 2) != 0x2) { // frame marker + av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n"); + return AVERROR_INVALIDDATA; + } + avctx->profile = get_bits1(&s->gb); + avctx->profile |= get_bits1(&s->gb) << 1; + if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb); + if (avctx->profile > 3) { + av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile); + return AVERROR_INVALIDDATA; + } + s->s.h.profile = avctx->profile; + if (get_bits1(&s->gb)) { + *ref = get_bits(&s->gb, 3); + return 0; + } + + s->last_keyframe = s->s.h.keyframe; + s->s.h.keyframe = !get_bits1(&s->gb); + + last_invisible = s->s.h.invisible; + s->s.h.invisible = !get_bits1(&s->gb); + s->s.h.errorres = get_bits1(&s->gb); + s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible; + + if (s->s.h.keyframe) { + if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode + av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); + return AVERROR_INVALIDDATA; + } + if ((ret = read_colorspace_details(avctx)) < 0) + return ret; + // for profile 1, here follows the subsampling bits + s->s.h.refreshrefmask = 0xff; + w = get_bits(&s->gb, 16) + 1; + h = get_bits(&s->gb, 16) + 1; + if (get_bits1(&s->gb)) // display size + skip_bits(&s->gb, 32); + } else { + s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0; + s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2); + if (s->s.h.intraonly) { + if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode + av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); + return AVERROR_INVALIDDATA; + } + if (avctx->profile >= 1) { + if ((ret = read_colorspace_details(avctx)) < 0) + return ret; + } else { + s->ss_h = s->ss_v = 1; + s->s.h.bpp = 8; + s->bpp_index = 0; + s->bytesperpixel = 1; + s->pix_fmt = AV_PIX_FMT_YUV420P; + avctx->colorspace = AVCOL_SPC_BT470BG; + avctx->color_range = AVCOL_RANGE_MPEG; + } + s->s.h.refreshrefmask = get_bits(&s->gb, 8); + w = get_bits(&s->gb, 16) + 1; + h = get_bits(&s->gb, 16) + 1; + if (get_bits1(&s->gb)) // display size + skip_bits(&s->gb, 32); + } else { + s->s.h.refreshrefmask = get_bits(&s->gb, 8); + s->s.h.refidx[0] = get_bits(&s->gb, 3); + s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres; + s->s.h.refidx[1] = get_bits(&s->gb, 3); + s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres; + s->s.h.refidx[2] = get_bits(&s->gb, 3); + s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres; + if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] || + !s->s.refs[s->s.h.refidx[1]].f->buf[0] || + !s->s.refs[s->s.h.refidx[2]].f->buf[0]) { + av_log(avctx, AV_LOG_ERROR, "Not all references are available\n"); + return AVERROR_INVALIDDATA; + } + if (get_bits1(&s->gb)) { + w = s->s.refs[s->s.h.refidx[0]].f->width; + h = s->s.refs[s->s.h.refidx[0]].f->height; + } else if (get_bits1(&s->gb)) { + w = s->s.refs[s->s.h.refidx[1]].f->width; + h = s->s.refs[s->s.h.refidx[1]].f->height; + } else if (get_bits1(&s->gb)) { + w = s->s.refs[s->s.h.refidx[2]].f->width; + h = s->s.refs[s->s.h.refidx[2]].f->height; + } else { + w = get_bits(&s->gb, 16) + 1; + h = get_bits(&s->gb, 16) + 1; + } + // Note that in this code, "CUR_FRAME" is actually before we + // have formally allocated a frame, and thus actually represents + // the _last_ frame + s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w && + s->s.frames[CUR_FRAME].tf.f->height == h; + if (get_bits1(&s->gb)) // display size + skip_bits(&s->gb, 32); + s->s.h.highprecisionmvs = get_bits1(&s->gb); + s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE : + get_bits(&s->gb, 2); + s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] || + s->s.h.signbias[0] != s->s.h.signbias[2]; + if (s->s.h.allowcompinter) { + if (s->s.h.signbias[0] == s->s.h.signbias[1]) { + s->s.h.fixcompref = 2; + s->s.h.varcompref[0] = 0; + s->s.h.varcompref[1] = 1; + } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) { + s->s.h.fixcompref = 1; + s->s.h.varcompref[0] = 0; + s->s.h.varcompref[1] = 2; + } else { + s->s.h.fixcompref = 0; + s->s.h.varcompref[0] = 1; + s->s.h.varcompref[1] = 2; + } + } + } + } + s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb); + s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb); + s->s.h.framectxid = c = get_bits(&s->gb, 2); + if (s->s.h.keyframe || s->s.h.intraonly) + s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes + + /* loopfilter header data */ + if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) { + // reset loopfilter defaults + s->s.h.lf_delta.ref[0] = 1; + s->s.h.lf_delta.ref[1] = 0; + s->s.h.lf_delta.ref[2] = -1; + s->s.h.lf_delta.ref[3] = -1; + s->s.h.lf_delta.mode[0] = 0; + s->s.h.lf_delta.mode[1] = 0; + memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat)); + } + s->s.h.filter.level = get_bits(&s->gb, 6); + sharp = get_bits(&s->gb, 3); + // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep + // the old cache values since they are still valid + if (s->s.h.filter.sharpness != sharp) { + for (i = 1; i <= 63; i++) { + int limit = i; + + if (sharp > 0) { + limit >>= (sharp + 3) >> 2; + limit = FFMIN(limit, 9 - sharp); + } + limit = FFMAX(limit, 1); + + s->filter_lut.lim_lut[i] = limit; + s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit; + } + } + s->s.h.filter.sharpness = sharp; + if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) { + if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) { + for (i = 0; i < 4; i++) + if (get_bits1(&s->gb)) + s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6); + for (i = 0; i < 2; i++) + if (get_bits1(&s->gb)) + s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6); + } + } + + /* quantization header data */ + s->s.h.yac_qi = get_bits(&s->gb, 8); + s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; + s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; + s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; + s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 && + s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0; + if (s->s.h.lossless) + avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS; + + /* segmentation header info */ + if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) { + if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) { + for (i = 0; i < 7; i++) + s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ? + get_bits(&s->gb, 8) : 255; + if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) + for (i = 0; i < 3; i++) + s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ? + get_bits(&s->gb, 8) : 255; + } + + if (get_bits1(&s->gb)) { + s->s.h.segmentation.absolute_vals = get_bits1(&s->gb); + for (i = 0; i < 8; i++) { + if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb))) + s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8); + if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb))) + s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6); + if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb))) + s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2); + s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb); + } + } + } + + // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas + for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) { + int qyac, qydc, quvac, quvdc, lflvl, sh; + + if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) { + if (s->s.h.segmentation.absolute_vals) + qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8); + else + qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8); + } else { + qyac = s->s.h.yac_qi; + } + qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8); + quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8); + quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8); + qyac = av_clip_uintp2(qyac, 8); + + s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc]; + s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac]; + s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc]; + s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac]; + + sh = s->s.h.filter.level >= 32; + if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) { + if (s->s.h.segmentation.absolute_vals) + lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6); + else + lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6); + } else { + lflvl = s->s.h.filter.level; + } + if (s->s.h.lf_delta.enabled) { + s->s.h.segmentation.feat[i].lflvl[0][0] = + s->s.h.segmentation.feat[i].lflvl[0][1] = + av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6); + for (j = 1; j < 4; j++) { + s->s.h.segmentation.feat[i].lflvl[j][0] = + av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + + s->s.h.lf_delta.mode[0]) * (1 << sh)), 6); + s->s.h.segmentation.feat[i].lflvl[j][1] = + av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + + s->s.h.lf_delta.mode[1]) * (1 << sh)), 6); + } + } else { + memset(s->s.h.segmentation.feat[i].lflvl, lflvl, + sizeof(s->s.h.segmentation.feat[i].lflvl)); + } + } + + /* tiling info */ + if ((ret = update_size(avctx, w, h)) < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", + w, h, s->pix_fmt); + return ret; + } + for (s->s.h.tiling.log2_tile_cols = 0; + s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols); + s->s.h.tiling.log2_tile_cols++) ; + for (max = 0; (s->sb_cols >> max) >= 4; max++) ; + max = FFMAX(0, max - 1); + while (max > s->s.h.tiling.log2_tile_cols) { + if (get_bits1(&s->gb)) + s->s.h.tiling.log2_tile_cols++; + else + break; + } + s->s.h.tiling.log2_tile_rows = decode012(&s->gb); + s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows; + if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) { + int n_range_coders; + VP56RangeCoder *rc; + + if (s->td) { + for (i = 0; i < s->active_tile_cols; i++) { + av_free(s->td[i].b_base); + av_free(s->td[i].block_base); + } + av_free(s->td); + } + + s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols; + vp9_free_entries(avctx); + s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ? + s->s.h.tiling.tile_cols : 1; + vp9_alloc_entries(avctx, s->sb_rows); + if (avctx->active_thread_type == FF_THREAD_SLICE) { + n_range_coders = 4; // max_tile_rows + } else { + n_range_coders = s->s.h.tiling.tile_cols; + } + s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) + + n_range_coders * sizeof(VP56RangeCoder)); + if (!s->td) + return AVERROR(ENOMEM); + rc = (VP56RangeCoder *) &s->td[s->active_tile_cols]; + for (i = 0; i < s->active_tile_cols; i++) { + s->td[i].s = s; + s->td[i].c_b = rc; + rc += n_range_coders; + } + } + + /* check reference frames */ + if (!s->s.h.keyframe && !s->s.h.intraonly) { + for (i = 0; i < 3; i++) { + AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f; + int refw = ref->width, refh = ref->height; + + if (ref->format != avctx->pix_fmt) { + av_log(avctx, AV_LOG_ERROR, + "Ref pixfmt (%s) did not match current frame (%s)", + av_get_pix_fmt_name(ref->format), + av_get_pix_fmt_name(avctx->pix_fmt)); + return AVERROR_INVALIDDATA; + } else if (refw == w && refh == h) { + s->mvscale[i][0] = s->mvscale[i][1] = 0; + } else { + if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) { + av_log(avctx, AV_LOG_ERROR, + "Invalid ref frame dimensions %dx%d for frame size %dx%d\n", + refw, refh, w, h); + return AVERROR_INVALIDDATA; + } + s->mvscale[i][0] = (refw << 14) / w; + s->mvscale[i][1] = (refh << 14) / h; + s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14; + s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14; + } + } + } + + if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) { + s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p = + s->prob_ctx[3].p = ff_vp9_default_probs; + memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs, + sizeof(ff_vp9_default_coef_probs)); + memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs, + sizeof(ff_vp9_default_coef_probs)); + memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs, + sizeof(ff_vp9_default_coef_probs)); + memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs, + sizeof(ff_vp9_default_coef_probs)); + } else if (s->s.h.intraonly && s->s.h.resetctx == 2) { + s->prob_ctx[c].p = ff_vp9_default_probs; + memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs, + sizeof(ff_vp9_default_coef_probs)); + } + + // next 16 bits is size of the rest of the header (arith-coded) + s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16); + s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8; + + data2 = align_get_bits(&s->gb); + if (size2 > size - (data2 - data)) { + av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n"); + return AVERROR_INVALIDDATA; + } + ret = ff_vp56_init_range_decoder(&s->c, data2, size2); + if (ret < 0) + return ret; + + if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit + av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n"); + return AVERROR_INVALIDDATA; + } + + for (i = 0; i < s->active_tile_cols; i++) { + if (s->s.h.keyframe || s->s.h.intraonly) { + memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef)); + memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob)); + } else { + memset(&s->td[i].counts, 0, sizeof(s->td[0].counts)); + } + } + + /* FIXME is it faster to not copy here, but do it down in the fw updates + * as explicit copies if the fw update is missing (and skip the copy upon + * fw update)? */ + s->prob.p = s->prob_ctx[c].p; + + // txfm updates + if (s->s.h.lossless) { + s->s.h.txfmmode = TX_4X4; + } else { + s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2); + if (s->s.h.txfmmode == 3) + s->s.h.txfmmode += vp8_rac_get(&s->c); + + if (s->s.h.txfmmode == TX_SWITCHABLE) { + for (i = 0; i < 2; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.tx16p[i][j] = + update_prob(&s->c, s->prob.p.tx16p[i][j]); + for (i = 0; i < 2; i++) + for (j = 0; j < 3; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.tx32p[i][j] = + update_prob(&s->c, s->prob.p.tx32p[i][j]); + } + } + + // coef updates + for (i = 0; i < 4; i++) { + uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i]; + if (vp8_rac_get(&s->c)) { + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 6; l++) + for (m = 0; m < 6; m++) { + uint8_t *p = s->prob.coef[i][j][k][l][m]; + uint8_t *r = ref[j][k][l][m]; + if (m >= 3 && l == 0) // dc only has 3 pt + break; + for (n = 0; n < 3; n++) { + if (vp56_rac_get_prob_branchy(&s->c, 252)) + p[n] = update_prob(&s->c, r[n]); + else + p[n] = r[n]; + } + memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8); + } + } else { + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 6; l++) + for (m = 0; m < 6; m++) { + uint8_t *p = s->prob.coef[i][j][k][l][m]; + uint8_t *r = ref[j][k][l][m]; + if (m > 3 && l == 0) // dc only has 3 pt + break; + memcpy(p, r, 3); + memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8); + } + } + if (s->s.h.txfmmode == i) + break; + } + + // mode updates + for (i = 0; i < 3; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]); + if (!s->s.h.keyframe && !s->s.h.intraonly) { + for (i = 0; i < 7; i++) + for (j = 0; j < 3; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_mode[i][j] = + update_prob(&s->c, s->prob.p.mv_mode[i][j]); + + if (s->s.h.filtermode == FILTER_SWITCHABLE) + for (i = 0; i < 4; i++) + for (j = 0; j < 2; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.filter[i][j] = + update_prob(&s->c, s->prob.p.filter[i][j]); + + for (i = 0; i < 4; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]); + + if (s->s.h.allowcompinter) { + s->s.h.comppredmode = vp8_rac_get(&s->c); + if (s->s.h.comppredmode) + s->s.h.comppredmode += vp8_rac_get(&s->c); + if (s->s.h.comppredmode == PRED_SWITCHABLE) + for (i = 0; i < 5; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.comp[i] = + update_prob(&s->c, s->prob.p.comp[i]); + } else { + s->s.h.comppredmode = PRED_SINGLEREF; + } + + if (s->s.h.comppredmode != PRED_COMPREF) { + for (i = 0; i < 5; i++) { + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.single_ref[i][0] = + update_prob(&s->c, s->prob.p.single_ref[i][0]); + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.single_ref[i][1] = + update_prob(&s->c, s->prob.p.single_ref[i][1]); + } + } + + if (s->s.h.comppredmode != PRED_SINGLEREF) { + for (i = 0; i < 5; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.comp_ref[i] = + update_prob(&s->c, s->prob.p.comp_ref[i]); + } + + for (i = 0; i < 4; i++) + for (j = 0; j < 9; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.y_mode[i][j] = + update_prob(&s->c, s->prob.p.y_mode[i][j]); + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.partition[3 - i][j][k] = + update_prob(&s->c, + s->prob.p.partition[3 - i][j][k]); + + // mv fields don't use the update_prob subexp model for some reason + for (i = 0; i < 3; i++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + for (i = 0; i < 2; i++) { + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].sign = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + for (j = 0; j < 10; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].classes[j] = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].class0 = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + for (j = 0; j < 10; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].bits[j] = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) + for (k = 0; k < 3; k++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].class0_fp[j][k] = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + for (j = 0; j < 3; j++) + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].fp[j] = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + } + + if (s->s.h.highprecisionmvs) { + for (i = 0; i < 2; i++) { + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].class0_hp = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + + if (vp56_rac_get_prob_branchy(&s->c, 252)) + s->prob.p.mv_comp[i].hp = + (vp8_rac_get_uint(&s->c, 7) << 1) | 1; + } + } + } + + return (data2 - data) + size2; +} + +static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl, + ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) +{ + const VP9Context *s = td->s; + int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) | + (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1); + const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] : + s->prob.p.partition[bl][c]; + enum BlockPartition bp; + ptrdiff_t hbs = 4 >> bl; + AVFrame *f = s->s.frames[CUR_FRAME].tf.f; + ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; + int bytesperpixel = s->bytesperpixel; + + if (bl == BL_8X8) { + bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p); + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + } else if (col + hbs < s->cols) { // FIXME why not <=? + if (row + hbs < s->rows) { // FIXME why not <=? + bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p); + switch (bp) { + case PARTITION_NONE: + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + break; + case PARTITION_H: + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp); + break; + case PARTITION_V: + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + yoff += hbs * 8 * bytesperpixel; + uvoff += hbs * 8 * bytesperpixel >> s->ss_h; + ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp); + break; + case PARTITION_SPLIT: + decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); + decode_sb(td, row, col + hbs, lflvl, + yoff + 8 * hbs * bytesperpixel, + uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); + decode_sb(td, row + hbs, col + hbs, lflvl, + yoff + 8 * hbs * bytesperpixel, + uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); + break; + default: + av_assert0(0); + } + } else if (vp56_rac_get_prob_branchy(td->c, p[1])) { + bp = PARTITION_SPLIT; + decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); + decode_sb(td, row, col + hbs, lflvl, + yoff + 8 * hbs * bytesperpixel, + uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); + } else { + bp = PARTITION_H; + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + } + } else if (row + hbs < s->rows) { // FIXME why not <=? + if (vp56_rac_get_prob_branchy(td->c, p[2])) { + bp = PARTITION_SPLIT; + decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); + } else { + bp = PARTITION_V; + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); + } + } else { + bp = PARTITION_SPLIT; + decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); + } + td->counts.partition[bl][c][bp]++; +} + +static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl, + ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) +{ + const VP9Context *s = td->s; + VP9Block *b = td->b; + ptrdiff_t hbs = 4 >> bl; + AVFrame *f = s->s.frames[CUR_FRAME].tf.f; + ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; + int bytesperpixel = s->bytesperpixel; + + if (bl == BL_8X8) { + av_assert2(b->bl == BL_8X8); + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp); + } else if (td->b->bl == bl) { + ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp); + if (b->bp == PARTITION_H && row + hbs < s->rows) { + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp); + } else if (b->bp == PARTITION_V && col + hbs < s->cols) { + yoff += hbs * 8 * bytesperpixel; + uvoff += hbs * 8 * bytesperpixel >> s->ss_h; + ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp); + } + } else { + decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1); + if (col + hbs < s->cols) { // FIXME why not <=? + if (row + hbs < s->rows) { + decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel, + uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); + decode_sb_mem(td, row + hbs, col + hbs, lflvl, + yoff + 8 * hbs * bytesperpixel, + uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); + } else { + yoff += hbs * 8 * bytesperpixel; + uvoff += hbs * 8 * bytesperpixel >> s->ss_h; + decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1); + } + } else if (row + hbs < s->rows) { + yoff += hbs * 8 * y_stride; + uvoff += hbs * 8 * uv_stride >> s->ss_v; + decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); + } + } +} + +static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n) +{ + int sb_start = ( idx * n) >> log2_n; + int sb_end = ((idx + 1) * n) >> log2_n; + *start = FFMIN(sb_start, n) << 3; + *end = FFMIN(sb_end, n) << 3; +} + +static void free_buffers(VP9Context *s) +{ + int i; + + av_freep(&s->intra_pred_data[0]); + for (i = 0; i < s->active_tile_cols; i++) { + av_freep(&s->td[i].b_base); + av_freep(&s->td[i].block_base); + } +} + +static av_cold int vp9_decode_free(AVCodecContext *avctx) +{ + VP9Context *s = avctx->priv_data; + int i; + + for (i = 0; i < 3; i++) { + if (s->s.frames[i].tf.f->buf[0]) + vp9_frame_unref(avctx, &s->s.frames[i]); + av_frame_free(&s->s.frames[i].tf.f); + } + for (i = 0; i < 8; i++) { + if (s->s.refs[i].f->buf[0]) + ff_thread_release_buffer(avctx, &s->s.refs[i]); + av_frame_free(&s->s.refs[i].f); + if (s->next_refs[i].f->buf[0]) + ff_thread_release_buffer(avctx, &s->next_refs[i]); + av_frame_free(&s->next_refs[i].f); + } + + free_buffers(s); + vp9_free_entries(avctx); + av_freep(&s->td); + return 0; +} + +static int decode_tiles(AVCodecContext *avctx, + const uint8_t *data, int size) +{ + VP9Context *s = avctx->priv_data; + VP9TileData *td = &s->td[0]; + int row, col, tile_row, tile_col, ret; + int bytesperpixel; + int tile_row_start, tile_row_end, tile_col_start, tile_col_end; + AVFrame *f; + ptrdiff_t yoff, uvoff, ls_y, ls_uv; + + f = s->s.frames[CUR_FRAME].tf.f; + ls_y = f->linesize[0]; + ls_uv =f->linesize[1]; + bytesperpixel = s->bytesperpixel; + + yoff = uvoff = 0; + for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { + set_tile_offset(&tile_row_start, &tile_row_end, + tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows); + + for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { + int64_t tile_size; + + if (tile_col == s->s.h.tiling.tile_cols - 1 && + tile_row == s->s.h.tiling.tile_rows - 1) { + tile_size = size; + } else { + tile_size = AV_RB32(data); + data += 4; + size -= 4; + } + if (tile_size > size) { + ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); + return AVERROR_INVALIDDATA; + } + ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size); + if (ret < 0) + return ret; + if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit + ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); + return AVERROR_INVALIDDATA; + } + data += tile_size; + size -= tile_size; + } + + for (row = tile_row_start; row < tile_row_end; + row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) { + VP9Filter *lflvl_ptr = s->lflvl; + ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; + + for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { + set_tile_offset(&tile_col_start, &tile_col_end, + tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols); + td->tile_col_start = tile_col_start; + if (s->pass != 2) { + memset(td->left_partition_ctx, 0, 8); + memset(td->left_skip_ctx, 0, 8); + if (s->s.h.keyframe || s->s.h.intraonly) { + memset(td->left_mode_ctx, DC_PRED, 16); + } else { + memset(td->left_mode_ctx, NEARESTMV, 8); + } + memset(td->left_y_nnz_ctx, 0, 16); + memset(td->left_uv_nnz_ctx, 0, 32); + memset(td->left_segpred_ctx, 0, 8); + + td->c = &td->c_b[tile_col]; + } + + for (col = tile_col_start; + col < tile_col_end; + col += 8, yoff2 += 64 * bytesperpixel, + uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { + // FIXME integrate with lf code (i.e. zero after each + // use, similar to invtxfm coefficients, or similar) + if (s->pass != 1) { + memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask)); + } + + if (s->pass == 2) { + decode_sb_mem(td, row, col, lflvl_ptr, + yoff2, uvoff2, BL_64X64); + } else { + decode_sb(td, row, col, lflvl_ptr, + yoff2, uvoff2, BL_64X64); + } + } + } + + if (s->pass == 1) + continue; + + // backup pre-loopfilter reconstruction data for intra + // prediction of next row of sb64s + if (row + 8 < s->rows) { + memcpy(s->intra_pred_data[0], + f->data[0] + yoff + 63 * ls_y, + 8 * s->cols * bytesperpixel); + memcpy(s->intra_pred_data[1], + f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, + 8 * s->cols * bytesperpixel >> s->ss_h); + memcpy(s->intra_pred_data[2], + f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, + 8 * s->cols * bytesperpixel >> s->ss_h); + } + + // loopfilter one row + if (s->s.h.filter.level) { + yoff2 = yoff; + uvoff2 = uvoff; + lflvl_ptr = s->lflvl; + for (col = 0; col < s->cols; + col += 8, yoff2 += 64 * bytesperpixel, + uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { + ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col, + yoff2, uvoff2); + } + } + + // FIXME maybe we can make this more finegrained by running the + // loopfilter per-block instead of after each sbrow + // In fact that would also make intra pred left preparation easier? + ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0); + } + } + return 0; +} + +#if HAVE_THREADS +static av_always_inline +int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr, + int threadnr) +{ + VP9Context *s = avctx->priv_data; + VP9TileData *td = &s->td[jobnr]; + ptrdiff_t uvoff, yoff, ls_y, ls_uv; + int bytesperpixel = s->bytesperpixel, row, col, tile_row; + unsigned tile_cols_len; + int tile_row_start, tile_row_end, tile_col_start, tile_col_end; + VP9Filter *lflvl_ptr_base; + AVFrame *f; + + f = s->s.frames[CUR_FRAME].tf.f; + ls_y = f->linesize[0]; + ls_uv =f->linesize[1]; + + set_tile_offset(&tile_col_start, &tile_col_end, + jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols); + td->tile_col_start = tile_col_start; + uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3); + yoff = (64 * bytesperpixel)*(tile_col_start >> 3); + lflvl_ptr_base = s->lflvl+(tile_col_start >> 3); + + for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { + set_tile_offset(&tile_row_start, &tile_row_end, + tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows); + + td->c = &td->c_b[tile_row]; + for (row = tile_row_start; row < tile_row_end; + row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) { + ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; + VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3); + + memset(td->left_partition_ctx, 0, 8); + memset(td->left_skip_ctx, 0, 8); + if (s->s.h.keyframe || s->s.h.intraonly) { + memset(td->left_mode_ctx, DC_PRED, 16); + } else { + memset(td->left_mode_ctx, NEARESTMV, 8); + } + memset(td->left_y_nnz_ctx, 0, 16); + memset(td->left_uv_nnz_ctx, 0, 32); + memset(td->left_segpred_ctx, 0, 8); + + for (col = tile_col_start; + col < tile_col_end; + col += 8, yoff2 += 64 * bytesperpixel, + uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { + // FIXME integrate with lf code (i.e. zero after each + // use, similar to invtxfm coefficients, or similar) + memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask)); + decode_sb(td, row, col, lflvl_ptr, + yoff2, uvoff2, BL_64X64); + } + + // backup pre-loopfilter reconstruction data for intra + // prediction of next row of sb64s + tile_cols_len = tile_col_end - tile_col_start; + if (row + 8 < s->rows) { + memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel), + f->data[0] + yoff + 63 * ls_y, + 8 * tile_cols_len * bytesperpixel); + memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h), + f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, + 8 * tile_cols_len * bytesperpixel >> s->ss_h); + memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h), + f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, + 8 * tile_cols_len * bytesperpixel >> s->ss_h); + } + + vp9_report_tile_progress(s, row >> 3, 1); + } + } + return 0; +} + +static av_always_inline +int loopfilter_proc(AVCodecContext *avctx) +{ + VP9Context *s = avctx->priv_data; + ptrdiff_t uvoff, yoff, ls_y, ls_uv; + VP9Filter *lflvl_ptr; + int bytesperpixel = s->bytesperpixel, col, i; + AVFrame *f; + + f = s->s.frames[CUR_FRAME].tf.f; + ls_y = f->linesize[0]; + ls_uv =f->linesize[1]; + + for (i = 0; i < s->sb_rows; i++) { + vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols); + + if (s->s.h.filter.level) { + yoff = (ls_y * 64)*i; + uvoff = (ls_uv * 64 >> s->ss_v)*i; + lflvl_ptr = s->lflvl+s->sb_cols*i; + for (col = 0; col < s->cols; + col += 8, yoff += 64 * bytesperpixel, + uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { + ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col, + yoff, uvoff); + } + } + } + return 0; +} +#endif + +static int vp9_decode_frame(AVCodecContext *avctx, void *frame, + int *got_frame, AVPacket *pkt) +{ + const uint8_t *data = pkt->data; + int size = pkt->size; + VP9Context *s = avctx->priv_data; + int ret, i, j, ref; + int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map && + (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map); + AVFrame *f; + + if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) { + return ret; + } else if (ret == 0) { + if (!s->s.refs[ref].f->buf[0]) { + av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref); + return AVERROR_INVALIDDATA; + } + if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0) + return ret; + ((AVFrame *)frame)->pts = pkt->pts; +#if FF_API_PKT_PTS +FF_DISABLE_DEPRECATION_WARNINGS + ((AVFrame *)frame)->pkt_pts = pkt->pts; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + ((AVFrame *)frame)->pkt_dts = pkt->dts; + for (i = 0; i < 8; i++) { + if (s->next_refs[i].f->buf[0]) + ff_thread_release_buffer(avctx, &s->next_refs[i]); + if (s->s.refs[i].f->buf[0] && + (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0) + return ret; + } + *got_frame = 1; + return pkt->size; + } + data += ret; + size -= ret; + + if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) { + if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0]) + vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]); + if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] && + (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0) + return ret; + } + if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0]) + vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]); + if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] && + (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0) + return ret; + if (s->s.frames[CUR_FRAME].tf.f->buf[0]) + vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]); + if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0) + return ret; + f = s->s.frames[CUR_FRAME].tf.f; + f->key_frame = s->s.h.keyframe; + f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; + + if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] && + (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width || + s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) { + vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]); + } + + // ref frame setup + for (i = 0; i < 8; i++) { + if (s->next_refs[i].f->buf[0]) + ff_thread_release_buffer(avctx, &s->next_refs[i]); + if (s->s.h.refreshrefmask & (1 << i)) { + ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf); + } else if (s->s.refs[i].f->buf[0]) { + ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]); + } + if (ret < 0) + return ret; + } + + if (avctx->hwaccel) { + ret = avctx->hwaccel->start_frame(avctx, NULL, 0); + if (ret < 0) + return ret; + ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size); + if (ret < 0) + return ret; + ret = avctx->hwaccel->end_frame(avctx); + if (ret < 0) + return ret; + goto finish; + } + + // main tile decode loop + memset(s->above_partition_ctx, 0, s->cols); + memset(s->above_skip_ctx, 0, s->cols); + if (s->s.h.keyframe || s->s.h.intraonly) { + memset(s->above_mode_ctx, DC_PRED, s->cols * 2); + } else { + memset(s->above_mode_ctx, NEARESTMV, s->cols); + } + memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16); + memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h); + memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h); + memset(s->above_segpred_ctx, 0, s->cols); + s->pass = s->s.frames[CUR_FRAME].uses_2pass = + avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode; + if ((ret = update_block_buffers(avctx)) < 0) { + av_log(avctx, AV_LOG_ERROR, + "Failed to allocate block buffers\n"); + return ret; + } + if (s->s.h.refreshctx && s->s.h.parallelmode) { + int j, k, l, m; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 6; l++) + for (m = 0; m < 6; m++) + memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m], + s->prob.coef[i][j][k][l][m], 3); + if (s->s.h.txfmmode == i) + break; + } + s->prob_ctx[s->s.h.framectxid].p = s->prob.p; + ff_thread_finish_setup(avctx); + } else if (!s->s.h.refreshctx) { + ff_thread_finish_setup(avctx); + } + +#if HAVE_THREADS + if (avctx->active_thread_type & FF_THREAD_SLICE) { + for (i = 0; i < s->sb_rows; i++) + atomic_store(&s->entries[i], 0); + } +#endif + + do { + for (i = 0; i < s->active_tile_cols; i++) { + s->td[i].b = s->td[i].b_base; + s->td[i].block = s->td[i].block_base; + s->td[i].uvblock[0] = s->td[i].uvblock_base[0]; + s->td[i].uvblock[1] = s->td[i].uvblock_base[1]; + s->td[i].eob = s->td[i].eob_base; + s->td[i].uveob[0] = s->td[i].uveob_base[0]; + s->td[i].uveob[1] = s->td[i].uveob_base[1]; + } + +#if HAVE_THREADS + if (avctx->active_thread_type == FF_THREAD_SLICE) { + int tile_row, tile_col; + + av_assert1(!s->pass); + + for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { + for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { + int64_t tile_size; + + if (tile_col == s->s.h.tiling.tile_cols - 1 && + tile_row == s->s.h.tiling.tile_rows - 1) { + tile_size = size; + } else { + tile_size = AV_RB32(data); + data += 4; + size -= 4; + } + if (tile_size > size) + return AVERROR_INVALIDDATA; + ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size); + if (ret < 0) + return ret; + if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit + return AVERROR_INVALIDDATA; + data += tile_size; + size -= tile_size; + } + } + + ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols); + } else +#endif + { + ret = decode_tiles(avctx, data, size); + if (ret < 0) { + ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); + return ret; + } + } + + // Sum all counts fields into td[0].counts for tile threading + if (avctx->active_thread_type == FF_THREAD_SLICE) + for (i = 1; i < s->s.h.tiling.tile_cols; i++) + for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++) + ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j]; + + if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) { + ff_vp9_adapt_probs(s); + ff_thread_finish_setup(avctx); + } + } while (s->pass++ == 1); + ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); + +finish: + // ref frame setup + for (i = 0; i < 8; i++) { + if (s->s.refs[i].f->buf[0]) + ff_thread_release_buffer(avctx, &s->s.refs[i]); + if (s->next_refs[i].f->buf[0] && + (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0) + return ret; + } + + if (!s->s.h.invisible) { + if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0) + return ret; + *got_frame = 1; + } + + return pkt->size; +} + +static void vp9_decode_flush(AVCodecContext *avctx) +{ + VP9Context *s = avctx->priv_data; + int i; + + for (i = 0; i < 3; i++) + vp9_frame_unref(avctx, &s->s.frames[i]); + for (i = 0; i < 8; i++) + ff_thread_release_buffer(avctx, &s->s.refs[i]); +} + +static int init_frames(AVCodecContext *avctx) +{ + VP9Context *s = avctx->priv_data; + int i; + + for (i = 0; i < 3; i++) { + s->s.frames[i].tf.f = av_frame_alloc(); + if (!s->s.frames[i].tf.f) { + vp9_decode_free(avctx); + av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i); + return AVERROR(ENOMEM); + } + } + for (i = 0; i < 8; i++) { + s->s.refs[i].f = av_frame_alloc(); + s->next_refs[i].f = av_frame_alloc(); + if (!s->s.refs[i].f || !s->next_refs[i].f) { + vp9_decode_free(avctx); + av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i); + return AVERROR(ENOMEM); + } + } + + return 0; +} + +static av_cold int vp9_decode_init(AVCodecContext *avctx) +{ + VP9Context *s = avctx->priv_data; + + avctx->internal->allocate_progress = 1; + s->last_bpp = 0; + s->s.h.filter.sharpness = -1; + + return init_frames(avctx); +} + +#if HAVE_THREADS +static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx) +{ + return init_frames(avctx); +} + +static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +{ + int i, ret; + VP9Context *s = dst->priv_data, *ssrc = src->priv_data; + + for (i = 0; i < 3; i++) { + if (s->s.frames[i].tf.f->buf[0]) + vp9_frame_unref(dst, &s->s.frames[i]); + if (ssrc->s.frames[i].tf.f->buf[0]) { + if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0) + return ret; + } + } + for (i = 0; i < 8; i++) { + if (s->s.refs[i].f->buf[0]) + ff_thread_release_buffer(dst, &s->s.refs[i]); + if (ssrc->next_refs[i].f->buf[0]) { + if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0) + return ret; + } + } + + s->s.h.invisible = ssrc->s.h.invisible; + s->s.h.keyframe = ssrc->s.h.keyframe; + s->s.h.intraonly = ssrc->s.h.intraonly; + s->ss_v = ssrc->ss_v; + s->ss_h = ssrc->ss_h; + s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled; + s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map; + s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals; + s->bytesperpixel = ssrc->bytesperpixel; + s->gf_fmt = ssrc->gf_fmt; + s->w = ssrc->w; + s->h = ssrc->h; + s->s.h.bpp = ssrc->s.h.bpp; + s->bpp_index = ssrc->bpp_index; + s->pix_fmt = ssrc->pix_fmt; + memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); + memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta)); + memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat, + sizeof(s->s.h.segmentation.feat)); + + return 0; +} +#endif + +AVCodec ff_vp9_decoder = { + .name = "vp9", + .long_name = NULL_IF_CONFIG_SMALL("Google VP9"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_VP9, + .priv_data_size = sizeof(VP9Context), + .init = vp9_decode_init, + .close = vp9_decode_free, + .decode = vp9_decode_frame, + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, + .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF, + .flush = vp9_decode_flush, + .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy), + .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context), + .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles), + .bsfs = "vp9_superframe_split", + .hw_configs = (const AVCodecHWConfigInternal*[]) { +#if CONFIG_VP9_DXVA2_HWACCEL + HWACCEL_DXVA2(vp9), +#endif +#if CONFIG_VP9_D3D11VA_HWACCEL + HWACCEL_D3D11VA(vp9), +#endif +#if CONFIG_VP9_D3D11VA2_HWACCEL + HWACCEL_D3D11VA2(vp9), +#endif +#if CONFIG_VP9_NVDEC_HWACCEL + HWACCEL_NVDEC(vp9), +#endif +#if CONFIG_VP9_VAAPI_HWACCEL + HWACCEL_VAAPI(vp9), +#endif + NULL + }, +}; diff --git a/libs/ffvpx/libavcodec/vp9.h b/libs/ffvpx/libavcodec/vp9.h new file mode 100644 index 000000000..c8d07ad98 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9.h @@ -0,0 +1,73 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP9_H +#define AVCODEC_VP9_H + +enum TxfmMode { + TX_4X4, + TX_8X8, + TX_16X16, + TX_32X32, + N_TXFM_SIZES, + TX_SWITCHABLE = N_TXFM_SIZES, + N_TXFM_MODES +}; + +enum TxfmType { + DCT_DCT, + DCT_ADST, + ADST_DCT, + ADST_ADST, + N_TXFM_TYPES +}; + +enum IntraPredMode { + VERT_PRED, + HOR_PRED, + DC_PRED, + DIAG_DOWN_LEFT_PRED, + DIAG_DOWN_RIGHT_PRED, + VERT_RIGHT_PRED, + HOR_DOWN_PRED, + VERT_LEFT_PRED, + HOR_UP_PRED, + TM_VP8_PRED, + LEFT_DC_PRED, + TOP_DC_PRED, + DC_128_PRED, + DC_127_PRED, + DC_129_PRED, + N_INTRA_PRED_MODES +}; + +enum FilterMode { + FILTER_8TAP_SMOOTH, + FILTER_8TAP_REGULAR, + FILTER_8TAP_SHARP, + FILTER_BILINEAR, + N_FILTERS, + FILTER_SWITCHABLE = N_FILTERS, +}; + +#endif /* AVCODEC_VP9_H */ diff --git a/libs/ffvpx/libavcodec/vp9_mc_template.c b/libs/ffvpx/libavcodec/vp9_mc_template.c new file mode 100644 index 000000000..31e692f36 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9_mc_template.c @@ -0,0 +1,439 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define ROUNDED_DIV_MVx2(a, b) \ + (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) } +#define ROUNDED_DIV_MVx4(a, b, c, d) \ + (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \ + .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) } + +static void FN(inter_pred)(VP9TileData *td) +{ + static const uint8_t bwlog_tab[2][N_BS_SIZES] = { + { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }, + { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 }, + }; + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col; + ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2; + AVFrame *ref1 = tref1->f, *ref2; + int w1 = ref1->width, h1 = ref1->height, w2, h2; + ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride; + int bytesperpixel = BYTES_PER_PIXEL; + + if (b->comp) { + tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]]; + ref2 = tref2->f; + w2 = ref2->width; + h2 = ref2->height; + } + + // y inter pred + if (b->bs > BS_8x8) { + VP56mv uvmv; + +#if SCALED == 0 + if (b->bs == BS_8x4) { + mc_luma_dir(td, mc[3][b->filter][0], td->dst[0], ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0); + mc_luma_dir(td, mc[3][b->filter][0], + td->dst[0] + 4 * ls_y, ls_y, + ref1->data[0], ref1->linesize[0], tref1, + (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0); + w1 = (w1 + s->ss_h) >> s->ss_h; + if (s->ss_v) { + h1 = (h1 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]); + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 2, col << (3 - s->ss_h), + &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0); + } else { + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 3, col << (3 - s->ss_h), + &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0); + // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index + // to get the motion vector for the bottom 4x4 block + // https://code.google.com/p/webm/issues/detail?id=993 + if (s->ss_h == 0) { + uvmv = b->mv[2][0]; + } else { + uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]); + } + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + (row << 3) + 4, col << (3 - s->ss_h), + &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0); + } + + if (b->comp) { + mc_luma_dir(td, mc[3][b->filter][1], td->dst[0], ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1); + mc_luma_dir(td, mc[3][b->filter][1], + td->dst[0] + 4 * ls_y, ls_y, + ref2->data[0], ref2->linesize[0], tref2, + (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1); + w2 = (w2 + s->ss_h) >> s->ss_h; + if (s->ss_v) { + h2 = (h2 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]); + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 2, col << (3 - s->ss_h), + &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1); + } else { + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 3, col << (3 - s->ss_h), + &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1); + // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index + // to get the motion vector for the bottom 4x4 block + // https://code.google.com/p/webm/issues/detail?id=993 + if (s->ss_h == 0) { + uvmv = b->mv[2][1]; + } else { + uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]); + } + mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + (row << 3) + 4, col << (3 - s->ss_h), + &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1); + } + } + } else if (b->bs == BS_4x8) { + mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0); + mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0); + h1 = (h1 + s->ss_v) >> s->ss_v; + if (s->ss_h) { + w1 = (w1 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << (3 - s->ss_v), col << 2, + &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0); + } else { + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << (3 - s->ss_v), col << 3, + &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << (3 - s->ss_v), (col << 3) + 4, + &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0); + } + + if (b->comp) { + mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1); + mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1); + h2 = (h2 + s->ss_v) >> s->ss_v; + if (s->ss_h) { + w2 = (w2 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << (3 - s->ss_v), col << 2, + &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1); + } else { + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << (3 - s->ss_v), col << 3, + &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << (3 - s->ss_v), (col << 3) + 4, + &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1); + } + } + } else +#endif + { +#if SCALED == 0 + av_assert2(b->bs == BS_4x4); +#endif + + // FIXME if two horizontally adjacent blocks have the same MV, + // do a w8 instead of a w4 call + mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, col << 3, &b->mv[0][0], + 0, 0, 8, 8, 4, 4, w1, h1, 0); + mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, (col << 3) + 4, &b->mv[1][0], + 4, 0, 8, 8, 4, 4, w1, h1, 0); + mc_luma_dir(td, mc[4][b->filter][0], + td->dst[0] + 4 * ls_y, ls_y, + ref1->data[0], ref1->linesize[0], tref1, + (row << 3) + 4, col << 3, &b->mv[2][0], + 0, 4, 8, 8, 4, 4, w1, h1, 0); + mc_luma_dir(td, mc[4][b->filter][0], + td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y, + ref1->data[0], ref1->linesize[0], tref1, + (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], + 4, 4, 8, 8, 4, 4, w1, h1, 0); + if (s->ss_v) { + h1 = (h1 + 1) >> 1; + if (s->ss_h) { + w1 = (w1 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0], + b->mv[2][0], b->mv[3][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 2, col << 2, + &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0); + } else { + uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 2, col << 3, + &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0); + uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 2, (col << 3) + 4, + &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0); + } + } else { + if (s->ss_h) { + w1 = (w1 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 3, col << 2, + &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0); + // BUG libvpx uses wrong block index for 4:2:2 bs=4x4 + // bottom block + // https://code.google.com/p/webm/issues/detail?id=993 + uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + (row << 3) + 4, col << 2, + &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0); + } else { + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 3, col << 3, + &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << 3, (col << 3) + 4, + &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + (row << 3) + 4, col << 3, + &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0); + mc_chroma_dir(td, mc[4][b->filter][0], + td->dst[1] + 4 * ls_uv + 4 * bytesperpixel, + td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + (row << 3) + 4, (col << 3) + 4, + &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0); + } + } + + if (b->comp) { + mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1); + mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1); + mc_luma_dir(td, mc[4][b->filter][1], + td->dst[0] + 4 * ls_y, ls_y, + ref2->data[0], ref2->linesize[0], tref2, + (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1); + mc_luma_dir(td, mc[4][b->filter][1], + td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y, + ref2->data[0], ref2->linesize[0], tref2, + (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1); + if (s->ss_v) { + h2 = (h2 + 1) >> 1; + if (s->ss_h) { + w2 = (w2 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1], + b->mv[2][1], b->mv[3][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 2, col << 2, + &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1); + } else { + uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 2, col << 3, + &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1); + uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 2, (col << 3) + 4, + &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1); + } + } else { + if (s->ss_h) { + w2 = (w2 + 1) >> 1; + uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 3, col << 2, + &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1); + // BUG libvpx uses wrong block index for 4:2:2 bs=4x4 + // bottom block + // https://code.google.com/p/webm/issues/detail?id=993 + uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + (row << 3) + 4, col << 2, + &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1); + } else { + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 3, col << 3, + &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * bytesperpixel, + td->dst[2] + 4 * bytesperpixel, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << 3, (col << 3) + 4, + &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + (row << 3) + 4, col << 3, + &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1); + mc_chroma_dir(td, mc[4][b->filter][1], + td->dst[1] + 4 * ls_uv + 4 * bytesperpixel, + td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + (row << 3) + 4, (col << 3) + 4, + &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1); + } + } + } + } + } else { + int bwl = bwlog_tab[0][b->bs]; + int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4; + int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4; + int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4; + int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4; + + mc_luma_dir(td, mc[bwl][b->filter][0], td->dst[0], ls_y, + ref1->data[0], ref1->linesize[0], tref1, + row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0); + w1 = (w1 + s->ss_h) >> s->ss_h; + h1 = (h1 + s->ss_v) >> s->ss_v; + mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][0], + td->dst[1], td->dst[2], ls_uv, + ref1->data[1], ref1->linesize[1], + ref1->data[2], ref1->linesize[2], tref1, + row << (3 - s->ss_v), col << (3 - s->ss_h), + &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0); + + if (b->comp) { + mc_luma_dir(td, mc[bwl][b->filter][1], td->dst[0], ls_y, + ref2->data[0], ref2->linesize[0], tref2, + row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1); + w2 = (w2 + s->ss_h) >> s->ss_h; + h2 = (h2 + s->ss_v) >> s->ss_v; + mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][1], + td->dst[1], td->dst[2], ls_uv, + ref2->data[1], ref2->linesize[1], + ref2->data[2], ref2->linesize[2], tref2, + row << (3 - s->ss_v), col << (3 - s->ss_h), + &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1); + } + } +} diff --git a/libs/ffvpx/libavcodec/vp9_parser.c b/libs/ffvpx/libavcodec/vp9_parser.c new file mode 100644 index 000000000..9531f34a3 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9_parser.c @@ -0,0 +1,66 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "libavcodec/get_bits.h" +#include "parser.h" + +static int parse(AVCodecParserContext *ctx, + AVCodecContext *avctx, + const uint8_t **out_data, int *out_size, + const uint8_t *data, int size) +{ + GetBitContext gb; + int res, profile, keyframe; + + *out_data = data; + *out_size = size; + + if ((res = init_get_bits8(&gb, data, size)) < 0) + return size; // parsers can't return errors + get_bits(&gb, 2); // frame marker + profile = get_bits1(&gb); + profile |= get_bits1(&gb) << 1; + if (profile == 3) profile += get_bits1(&gb); + + if (get_bits1(&gb)) { + keyframe = 0; + } else { + keyframe = !get_bits1(&gb); + } + + if (!keyframe) { + ctx->pict_type = AV_PICTURE_TYPE_P; + ctx->key_frame = 0; + } else { + ctx->pict_type = AV_PICTURE_TYPE_I; + ctx->key_frame = 1; + } + + return size; +} + +AVCodecParser ff_vp9_parser = { + .codec_ids = { AV_CODEC_ID_VP9 }, + .parser_parse = parse, +}; diff --git a/libs/ffvpx/libavcodec/vp9_superframe_split_bsf.c b/libs/ffvpx/libavcodec/vp9_superframe_split_bsf.c new file mode 100644 index 000000000..9c4aa33dc --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9_superframe_split_bsf.c @@ -0,0 +1,147 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * This bitstream filter splits VP9 superframes into packets containing + * just one frame. + */ + +#include <stddef.h> + +#include "avcodec.h" +#include "bsf.h" +#include "bytestream.h" +#include "get_bits.h" + +typedef struct VP9SFSplitContext { + AVPacket buffer_pkt; + + int nb_frames; + int next_frame; + size_t next_frame_offset; + int sizes[8]; +} VP9SFSplitContext; + +static int vp9_superframe_split_filter(AVBSFContext *ctx, AVPacket *out) +{ + VP9SFSplitContext *s = ctx->priv_data; + AVPacket *in; + int i, j, ret, marker; + int is_superframe = !!s->buffer_pkt.data; + + if (!s->buffer_pkt.data) { + ret = ff_bsf_get_packet_ref(ctx, &s->buffer_pkt); + if (ret < 0) + return ret; + in = &s->buffer_pkt; + + marker = in->data[in->size - 1]; + if ((marker & 0xe0) == 0xc0) { + int length_size = 1 + ((marker >> 3) & 0x3); + int nb_frames = 1 + (marker & 0x7); + int idx_size = 2 + nb_frames * length_size; + + if (in->size >= idx_size && in->data[in->size - idx_size] == marker) { + GetByteContext bc; + int64_t total_size = 0; + + bytestream2_init(&bc, in->data + in->size + 1 - idx_size, + nb_frames * length_size); + + for (i = 0; i < nb_frames; i++) { + int frame_size = 0; + for (j = 0; j < length_size; j++) + frame_size |= bytestream2_get_byte(&bc) << (j * 8); + + total_size += frame_size; + if (frame_size < 0 || total_size > in->size - idx_size) { + av_log(ctx, AV_LOG_ERROR, + "Invalid frame size in a superframe: %d\n", frame_size); + ret = AVERROR(EINVAL); + goto fail; + } + s->sizes[i] = frame_size; + } + s->nb_frames = nb_frames; + s->next_frame = 0; + s->next_frame_offset = 0; + is_superframe = 1; + } + } + } + + if (is_superframe) { + GetBitContext gb; + int profile, invisible = 0; + + ret = av_packet_ref(out, &s->buffer_pkt); + if (ret < 0) + goto fail; + + out->data += s->next_frame_offset; + out->size = s->sizes[s->next_frame]; + + s->next_frame_offset += out->size; + s->next_frame++; + + if (s->next_frame >= s->nb_frames) + av_packet_unref(&s->buffer_pkt); + + ret = init_get_bits8(&gb, out->data, out->size); + if (ret < 0) + goto fail; + + get_bits(&gb, 2); // frame_marker + profile = get_bits1(&gb); + profile |= get_bits1(&gb) << 1; + if (profile == 3) + get_bits1(&gb); + if (!get_bits1(&gb)) { + get_bits1(&gb); + invisible = !get_bits1(&gb); + } + + if (invisible) + out->pts = AV_NOPTS_VALUE; + + } else { + av_packet_move_ref(out, &s->buffer_pkt); + } + + return 0; +fail: + if (ret < 0) + av_packet_unref(out); + av_packet_unref(&s->buffer_pkt); + return ret; +} + +static void vp9_superframe_split_uninit(AVBSFContext *ctx) +{ + VP9SFSplitContext *s = ctx->priv_data; + av_packet_unref(&s->buffer_pkt); +} + +const AVBitStreamFilter ff_vp9_superframe_split_bsf = { + .name = "vp9_superframe_split", + .priv_data_size = sizeof(VP9SFSplitContext), + .close = vp9_superframe_split_uninit, + .filter = vp9_superframe_split_filter, + .codec_ids = (const enum AVCodecID []){ AV_CODEC_ID_VP9, AV_CODEC_ID_NONE }, +}; diff --git a/libs/ffvpx/libavcodec/vp9block.c b/libs/ffvpx/libavcodec/vp9block.c new file mode 100644 index 000000000..1c3f7a722 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9block.c @@ -0,0 +1,1449 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" + +#include "avcodec.h" +#include "internal.h" +#include "videodsp.h" +#include "vp56.h" +#include "vp9.h" +#include "vp9data.h" +#include "vp9dec.h" + +static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, + ptrdiff_t stride, int v) +{ + switch (w) { + case 1: + do { + *ptr = v; + ptr += stride; + } while (--h); + break; + case 2: { + int v16 = v * 0x0101; + do { + AV_WN16A(ptr, v16); + ptr += stride; + } while (--h); + break; + } + case 4: { + uint32_t v32 = v * 0x01010101; + do { + AV_WN32A(ptr, v32); + ptr += stride; + } while (--h); + break; + } + case 8: { +#if HAVE_FAST_64BIT + uint64_t v64 = v * 0x0101010101010101ULL; + do { + AV_WN64A(ptr, v64); + ptr += stride; + } while (--h); +#else + uint32_t v32 = v * 0x01010101; + do { + AV_WN32A(ptr, v32); + AV_WN32A(ptr + 4, v32); + ptr += stride; + } while (--h); +#endif + break; + } + } +} + +static void decode_mode(VP9TileData *td) +{ + static const uint8_t left_ctx[N_BS_SIZES] = { + 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf + }; + static const uint8_t above_ctx[N_BS_SIZES] = { + 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf + }; + static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = { + TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16, + TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4 + }; + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col, row7 = td->row7; + enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs]; + int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4); + int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y; + int have_a = row > 0, have_l = col > td->tile_col_start; + int vref, filter_id; + + if (!s->s.h.segmentation.enabled) { + b->seg_id = 0; + } else if (s->s.h.keyframe || s->s.h.intraonly) { + b->seg_id = !s->s.h.segmentation.update_map ? 0 : + vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob); + } else if (!s->s.h.segmentation.update_map || + (s->s.h.segmentation.temporal && + vp56_rac_get_prob_branchy(td->c, + s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] + + td->left_segpred_ctx[row7]]))) { + if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) { + int pred = 8, x; + uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map; + + if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass) + ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0); + for (y = 0; y < h4; y++) { + int idx_base = (y + row) * 8 * s->sb_cols + col; + for (x = 0; x < w4; x++) + pred = FFMIN(pred, refsegmap[idx_base + x]); + } + av_assert1(pred < 8); + b->seg_id = pred; + } else { + b->seg_id = 0; + } + + memset(&s->above_segpred_ctx[col], 1, w4); + memset(&td->left_segpred_ctx[row7], 1, h4); + } else { + b->seg_id = vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, + s->s.h.segmentation.prob); + + memset(&s->above_segpred_ctx[col], 0, w4); + memset(&td->left_segpred_ctx[row7], 0, h4); + } + if (s->s.h.segmentation.enabled && + (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) { + setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col], + bw4, bh4, 8 * s->sb_cols, b->seg_id); + } + + b->skip = s->s.h.segmentation.enabled && + s->s.h.segmentation.feat[b->seg_id].skip_enabled; + if (!b->skip) { + int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col]; + b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]); + td->counts.skip[c][b->skip]++; + } + + if (s->s.h.keyframe || s->s.h.intraonly) { + b->intra = 1; + } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { + b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val; + } else { + int c, bit; + + if (have_a && have_l) { + c = s->above_intra_ctx[col] + td->left_intra_ctx[row7]; + c += (c == 2); + } else { + c = have_a ? 2 * s->above_intra_ctx[col] : + have_l ? 2 * td->left_intra_ctx[row7] : 0; + } + bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]); + td->counts.intra[c][bit]++; + b->intra = !bit; + } + + if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) { + int c; + if (have_a) { + if (have_l) { + c = (s->above_skip_ctx[col] ? max_tx : + s->above_txfm_ctx[col]) + + (td->left_skip_ctx[row7] ? max_tx : + td->left_txfm_ctx[row7]) > max_tx; + } else { + c = s->above_skip_ctx[col] ? 1 : + (s->above_txfm_ctx[col] * 2 > max_tx); + } + } else if (have_l) { + c = td->left_skip_ctx[row7] ? 1 : + (td->left_txfm_ctx[row7] * 2 > max_tx); + } else { + c = 1; + } + switch (max_tx) { + case TX_32X32: + b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]); + if (b->tx) { + b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]); + if (b->tx == 2) + b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]); + } + td->counts.tx32p[c][b->tx]++; + break; + case TX_16X16: + b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]); + if (b->tx) + b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]); + td->counts.tx16p[c][b->tx]++; + break; + case TX_8X8: + b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]); + td->counts.tx8p[c][b->tx]++; + break; + case TX_4X4: + b->tx = TX_4X4; + break; + } + } else { + b->tx = FFMIN(max_tx, s->s.h.txfmmode); + } + + if (s->s.h.keyframe || s->s.h.intraonly) { + uint8_t *a = &s->above_mode_ctx[col * 2]; + uint8_t *l = &td->left_mode_ctx[(row7) << 1]; + + b->comp = 0; + if (b->bs > BS_8x8) { + // FIXME the memory storage intermediates here aren't really + // necessary, they're just there to make the code slightly + // simpler for now + b->mode[0] = + a[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_ymode_probs[a[0]][l[0]]); + if (b->bs != BS_8x4) { + b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]); + l[0] = + a[1] = b->mode[1]; + } else { + l[0] = + a[1] = + b->mode[1] = b->mode[0]; + } + if (b->bs != BS_4x8) { + b->mode[2] = + a[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_ymode_probs[a[0]][l[1]]); + if (b->bs != BS_8x4) { + b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]); + l[1] = + a[1] = b->mode[3]; + } else { + l[1] = + a[1] = + b->mode[3] = b->mode[2]; + } + } else { + b->mode[2] = b->mode[0]; + l[1] = + a[1] = + b->mode[3] = b->mode[1]; + } + } else { + b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_ymode_probs[*a][*l]); + b->mode[3] = + b->mode[2] = + b->mode[1] = b->mode[0]; + // FIXME this can probably be optimized + memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]); + memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]); + } + b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + ff_vp9_default_kf_uvmode_probs[b->mode[3]]); + } else if (b->intra) { + b->comp = 0; + if (b->bs > BS_8x8) { + b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.y_mode[0]); + td->counts.y_mode[0][b->mode[0]]++; + if (b->bs != BS_8x4) { + b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.y_mode[0]); + td->counts.y_mode[0][b->mode[1]]++; + } else { + b->mode[1] = b->mode[0]; + } + if (b->bs != BS_4x8) { + b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.y_mode[0]); + td->counts.y_mode[0][b->mode[2]]++; + if (b->bs != BS_8x4) { + b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.y_mode[0]); + td->counts.y_mode[0][b->mode[3]]++; + } else { + b->mode[3] = b->mode[2]; + } + } else { + b->mode[2] = b->mode[0]; + b->mode[3] = b->mode[1]; + } + } else { + static const uint8_t size_group[10] = { + 3, 3, 3, 3, 2, 2, 2, 1, 1, 1 + }; + int sz = size_group[b->bs]; + + b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.y_mode[sz]); + b->mode[1] = + b->mode[2] = + b->mode[3] = b->mode[0]; + td->counts.y_mode[sz][b->mode[3]]++; + } + b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, + s->prob.p.uv_mode[b->mode[3]]); + td->counts.uv_mode[b->mode[3]][b->uvmode]++; + } else { + static const uint8_t inter_mode_ctx_lut[14][14] = { + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 }, + { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 }, + }; + + if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { + av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0); + b->comp = 0; + b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1; + } else { + // read comp_pred flag + if (s->s.h.comppredmode != PRED_SWITCHABLE) { + b->comp = s->s.h.comppredmode == PRED_COMPREF; + } else { + int c; + + // FIXME add intra as ref=0xff (or -1) to make these easier? + if (have_a) { + if (have_l) { + if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) { + c = 4; + } else if (s->above_comp_ctx[col]) { + c = 2 + (td->left_intra_ctx[row7] || + td->left_ref_ctx[row7] == s->s.h.fixcompref); + } else if (td->left_comp_ctx[row7]) { + c = 2 + (s->above_intra_ctx[col] || + s->above_ref_ctx[col] == s->s.h.fixcompref); + } else { + c = (!s->above_intra_ctx[col] && + s->above_ref_ctx[col] == s->s.h.fixcompref) ^ + (!td->left_intra_ctx[row7] && + td->left_ref_ctx[row & 7] == s->s.h.fixcompref); + } + } else { + c = s->above_comp_ctx[col] ? 3 : + (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref); + } + } else if (have_l) { + c = td->left_comp_ctx[row7] ? 3 : + (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref); + } else { + c = 1; + } + b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]); + td->counts.comp[c][b->comp]++; + } + + // read actual references + // FIXME probably cache a few variables here to prevent repetitive + // memory accesses below + if (b->comp) { /* two references */ + int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit; + + b->ref[fix_idx] = s->s.h.fixcompref; + // FIXME can this codeblob be replaced by some sort of LUT? + if (have_a) { + if (have_l) { + if (s->above_intra_ctx[col]) { + if (td->left_intra_ctx[row7]) { + c = 2; + } else { + c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); + } + } else if (td->left_intra_ctx[row7]) { + c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); + } else { + int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col]; + + if (refl == refa && refa == s->s.h.varcompref[1]) { + c = 0; + } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) { + if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) || + (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) { + c = 4; + } else { + c = (refa == refl) ? 3 : 1; + } + } else if (!td->left_comp_ctx[row7]) { + if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) { + c = 1; + } else { + c = (refl == s->s.h.varcompref[1] && + refa != s->s.h.varcompref[1]) ? 2 : 4; + } + } else if (!s->above_comp_ctx[col]) { + if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) { + c = 1; + } else { + c = (refa == s->s.h.varcompref[1] && + refl != s->s.h.varcompref[1]) ? 2 : 4; + } + } else { + c = (refl == refa) ? 4 : 2; + } + } + } else { + if (s->above_intra_ctx[col]) { + c = 2; + } else if (s->above_comp_ctx[col]) { + c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); + } else { + c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); + } + } + } else if (have_l) { + if (td->left_intra_ctx[row7]) { + c = 2; + } else if (td->left_comp_ctx[row7]) { + c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); + } else { + c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); + } + } else { + c = 2; + } + bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]); + b->ref[var_idx] = s->s.h.varcompref[bit]; + td->counts.comp_ref[c][bit]++; + } else /* single reference */ { + int bit, c; + + if (have_a && !s->above_intra_ctx[col]) { + if (have_l && !td->left_intra_ctx[row7]) { + if (td->left_comp_ctx[row7]) { + if (s->above_comp_ctx[col]) { + c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] || + !s->above_ref_ctx[col]); + } else { + c = (3 * !s->above_ref_ctx[col]) + + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]); + } + } else if (s->above_comp_ctx[col]) { + c = (3 * !td->left_ref_ctx[row7]) + + (!s->s.h.fixcompref || !s->above_ref_ctx[col]); + } else { + c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col]; + } + } else if (s->above_intra_ctx[col]) { + c = 2; + } else if (s->above_comp_ctx[col]) { + c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]); + } else { + c = 4 * (!s->above_ref_ctx[col]); + } + } else if (have_l && !td->left_intra_ctx[row7]) { + if (td->left_intra_ctx[row7]) { + c = 2; + } else if (td->left_comp_ctx[row7]) { + c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]); + } else { + c = 4 * (!td->left_ref_ctx[row7]); + } + } else { + c = 2; + } + bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]); + td->counts.single_ref[c][0][bit]++; + if (!bit) { + b->ref[0] = 0; + } else { + // FIXME can this codeblob be replaced by some sort of LUT? + if (have_a) { + if (have_l) { + if (td->left_intra_ctx[row7]) { + if (s->above_intra_ctx[col]) { + c = 2; + } else if (s->above_comp_ctx[col]) { + c = 1 + 2 * (s->s.h.fixcompref == 1 || + s->above_ref_ctx[col] == 1); + } else if (!s->above_ref_ctx[col]) { + c = 3; + } else { + c = 4 * (s->above_ref_ctx[col] == 1); + } + } else if (s->above_intra_ctx[col]) { + if (td->left_intra_ctx[row7]) { + c = 2; + } else if (td->left_comp_ctx[row7]) { + c = 1 + 2 * (s->s.h.fixcompref == 1 || + td->left_ref_ctx[row7] == 1); + } else if (!td->left_ref_ctx[row7]) { + c = 3; + } else { + c = 4 * (td->left_ref_ctx[row7] == 1); + } + } else if (s->above_comp_ctx[col]) { + if (td->left_comp_ctx[row7]) { + if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) { + c = 3 * (s->s.h.fixcompref == 1 || + td->left_ref_ctx[row7] == 1); + } else { + c = 2; + } + } else if (!td->left_ref_ctx[row7]) { + c = 1 + 2 * (s->s.h.fixcompref == 1 || + s->above_ref_ctx[col] == 1); + } else { + c = 3 * (td->left_ref_ctx[row7] == 1) + + (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); + } + } else if (td->left_comp_ctx[row7]) { + if (!s->above_ref_ctx[col]) { + c = 1 + 2 * (s->s.h.fixcompref == 1 || + td->left_ref_ctx[row7] == 1); + } else { + c = 3 * (s->above_ref_ctx[col] == 1) + + (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1); + } + } else if (!s->above_ref_ctx[col]) { + if (!td->left_ref_ctx[row7]) { + c = 3; + } else { + c = 4 * (td->left_ref_ctx[row7] == 1); + } + } else if (!td->left_ref_ctx[row7]) { + c = 4 * (s->above_ref_ctx[col] == 1); + } else { + c = 2 * (td->left_ref_ctx[row7] == 1) + + 2 * (s->above_ref_ctx[col] == 1); + } + } else { + if (s->above_intra_ctx[col] || + (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) { + c = 2; + } else if (s->above_comp_ctx[col]) { + c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); + } else { + c = 4 * (s->above_ref_ctx[col] == 1); + } + } + } else if (have_l) { + if (td->left_intra_ctx[row7] || + (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) { + c = 2; + } else if (td->left_comp_ctx[row7]) { + c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1); + } else { + c = 4 * (td->left_ref_ctx[row7] == 1); + } + } else { + c = 2; + } + bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]); + td->counts.single_ref[c][1][bit]++; + b->ref[0] = 1 + bit; + } + } + } + + if (b->bs <= BS_8x8) { + if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) { + b->mode[0] = + b->mode[1] = + b->mode[2] = + b->mode[3] = ZEROMV; + } else { + static const uint8_t off[10] = { + 3, 0, 0, 1, 0, 0, 0, 0, 0, 0 + }; + + // FIXME this needs to use the LUT tables from find_ref_mvs + // because not all are -1,0/0,-1 + int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]] + [td->left_mode_ctx[row7 + off[b->bs]]]; + + b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, + s->prob.p.mv_mode[c]); + b->mode[1] = + b->mode[2] = + b->mode[3] = b->mode[0]; + td->counts.mv_mode[c][b->mode[0] - 10]++; + } + } + + if (s->s.h.filtermode == FILTER_SWITCHABLE) { + int c; + + if (have_a && s->above_mode_ctx[col] >= NEARESTMV) { + if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) { + c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ? + td->left_filter_ctx[row7] : 3; + } else { + c = s->above_filter_ctx[col]; + } + } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) { + c = td->left_filter_ctx[row7]; + } else { + c = 3; + } + + filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree, + s->prob.p.filter[c]); + td->counts.filter[c][filter_id]++; + b->filter = ff_vp9_filter_lut[filter_id]; + } else { + b->filter = s->s.h.filtermode; + } + + if (b->bs > BS_8x8) { + int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]]; + + b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, + s->prob.p.mv_mode[c]); + td->counts.mv_mode[c][b->mode[0] - 10]++; + ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0); + + if (b->bs != BS_8x4) { + b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, + s->prob.p.mv_mode[c]); + td->counts.mv_mode[c][b->mode[1] - 10]++; + ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1); + } else { + b->mode[1] = b->mode[0]; + AV_COPY32(&b->mv[1][0], &b->mv[0][0]); + AV_COPY32(&b->mv[1][1], &b->mv[0][1]); + } + + if (b->bs != BS_4x8) { + b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, + s->prob.p.mv_mode[c]); + td->counts.mv_mode[c][b->mode[2] - 10]++; + ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2); + + if (b->bs != BS_8x4) { + b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, + s->prob.p.mv_mode[c]); + td->counts.mv_mode[c][b->mode[3] - 10]++; + ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3); + } else { + b->mode[3] = b->mode[2]; + AV_COPY32(&b->mv[3][0], &b->mv[2][0]); + AV_COPY32(&b->mv[3][1], &b->mv[2][1]); + } + } else { + b->mode[2] = b->mode[0]; + AV_COPY32(&b->mv[2][0], &b->mv[0][0]); + AV_COPY32(&b->mv[2][1], &b->mv[0][1]); + b->mode[3] = b->mode[1]; + AV_COPY32(&b->mv[3][0], &b->mv[1][0]); + AV_COPY32(&b->mv[3][1], &b->mv[1][1]); + } + } else { + ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1); + AV_COPY32(&b->mv[1][0], &b->mv[0][0]); + AV_COPY32(&b->mv[2][0], &b->mv[0][0]); + AV_COPY32(&b->mv[3][0], &b->mv[0][0]); + AV_COPY32(&b->mv[1][1], &b->mv[0][1]); + AV_COPY32(&b->mv[2][1], &b->mv[0][1]); + AV_COPY32(&b->mv[3][1], &b->mv[0][1]); + } + + vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0]; + } + +#if HAVE_FAST_64BIT +#define SPLAT_CTX(var, val, n) \ + switch (n) { \ + case 1: var = val; break; \ + case 2: AV_WN16A(&var, val * 0x0101); break; \ + case 4: AV_WN32A(&var, val * 0x01010101); break; \ + case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \ + case 16: { \ + uint64_t v64 = val * 0x0101010101010101ULL; \ + AV_WN64A( &var, v64); \ + AV_WN64A(&((uint8_t *) &var)[8], v64); \ + break; \ + } \ + } +#else +#define SPLAT_CTX(var, val, n) \ + switch (n) { \ + case 1: var = val; break; \ + case 2: AV_WN16A(&var, val * 0x0101); break; \ + case 4: AV_WN32A(&var, val * 0x01010101); break; \ + case 8: { \ + uint32_t v32 = val * 0x01010101; \ + AV_WN32A( &var, v32); \ + AV_WN32A(&((uint8_t *) &var)[4], v32); \ + break; \ + } \ + case 16: { \ + uint32_t v32 = val * 0x01010101; \ + AV_WN32A( &var, v32); \ + AV_WN32A(&((uint8_t *) &var)[4], v32); \ + AV_WN32A(&((uint8_t *) &var)[8], v32); \ + AV_WN32A(&((uint8_t *) &var)[12], v32); \ + break; \ + } \ + } +#endif + + switch (ff_vp9_bwh_tab[1][b->bs][0]) { +#define SET_CTXS(perf, dir, off, n) \ + do { \ + SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \ + SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \ + SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \ + if (!s->s.h.keyframe && !s->s.h.intraonly) { \ + SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \ + SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \ + SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \ + if (!b->intra) { \ + SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \ + if (s->s.h.filtermode == FILTER_SWITCHABLE) { \ + SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \ + } \ + } \ + } \ + } while (0) + case 1: SET_CTXS(s, above, col, 1); break; + case 2: SET_CTXS(s, above, col, 2); break; + case 4: SET_CTXS(s, above, col, 4); break; + case 8: SET_CTXS(s, above, col, 8); break; + } + switch (ff_vp9_bwh_tab[1][b->bs][1]) { + case 1: SET_CTXS(td, left, row7, 1); break; + case 2: SET_CTXS(td, left, row7, 2); break; + case 4: SET_CTXS(td, left, row7, 4); break; + case 8: SET_CTXS(td, left, row7, 8); break; + } +#undef SPLAT_CTX +#undef SET_CTXS + + if (!s->s.h.keyframe && !s->s.h.intraonly) { + if (b->bs > BS_8x8) { + int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); + + AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]); + AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]); + AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0); + AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1); + AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]); + AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]); + AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0); + AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1); + } else { + int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); + + for (n = 0; n < w4 * 2; n++) { + AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0); + AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1); + } + for (n = 0; n < h4 * 2; n++) { + AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0); + AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1); + } + } + } + + // FIXME kinda ugly + for (y = 0; y < h4; y++) { + int x, o = (row + y) * s->sb_cols * 8 + col; + VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o]; + + if (b->intra) { + for (x = 0; x < w4; x++) { + mv[x].ref[0] = + mv[x].ref[1] = -1; + } + } else if (b->comp) { + for (x = 0; x < w4; x++) { + mv[x].ref[0] = b->ref[0]; + mv[x].ref[1] = b->ref[1]; + AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); + AV_COPY32(&mv[x].mv[1], &b->mv[3][1]); + } + } else { + for (x = 0; x < w4; x++) { + mv[x].ref[0] = b->ref[0]; + mv[x].ref[1] = -1; + AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); + } + } + } +} + +// FIXME merge cnt/eob arguments? +static av_always_inline int +decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, + int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3], + unsigned (*eob)[6][2], uint8_t (*p)[6][11], + int nnz, const int16_t *scan, const int16_t (*nb)[2], + const int16_t *band_counts, int16_t *qmul) +{ + int i = 0, band = 0, band_left = band_counts[band]; + const uint8_t *tp = p[0][nnz]; + uint8_t cache[1024]; + + do { + int val, rc; + + val = vp56_rac_get_prob_branchy(c, tp[0]); // eob + eob[band][nnz][val]++; + if (!val) + break; + +skip_eob: + if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero + cnt[band][nnz][0]++; + if (!--band_left) + band_left = band_counts[++band]; + cache[scan[i]] = 0; + nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; + tp = p[band][nnz]; + if (++i == n_coeffs) + break; //invalid input; blocks should end with EOB + goto skip_eob; + } + + rc = scan[i]; + if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one + cnt[band][nnz][1]++; + val = 1; + cache[rc] = 1; + } else { + cnt[band][nnz][2]++; + if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4 + if (!vp56_rac_get_prob_branchy(c, tp[4])) { + cache[rc] = val = 2; + } else { + val = 3 + vp56_rac_get_prob(c, tp[5]); + cache[rc] = 3; + } + } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2 + cache[rc] = 4; + if (!vp56_rac_get_prob_branchy(c, tp[7])) { + val = vp56_rac_get_prob(c, 159) + 5; + } else { + val = (vp56_rac_get_prob(c, 165) << 1) + 7; + val += vp56_rac_get_prob(c, 145); + } + } else { // cat 3-6 + cache[rc] = 5; + if (!vp56_rac_get_prob_branchy(c, tp[8])) { + if (!vp56_rac_get_prob_branchy(c, tp[9])) { + val = 11 + (vp56_rac_get_prob(c, 173) << 2); + val += (vp56_rac_get_prob(c, 148) << 1); + val += vp56_rac_get_prob(c, 140); + } else { + val = 19 + (vp56_rac_get_prob(c, 176) << 3); + val += (vp56_rac_get_prob(c, 155) << 2); + val += (vp56_rac_get_prob(c, 140) << 1); + val += vp56_rac_get_prob(c, 135); + } + } else if (!vp56_rac_get_prob_branchy(c, tp[10])) { + val = (vp56_rac_get_prob(c, 180) << 4) + 35; + val += (vp56_rac_get_prob(c, 157) << 3); + val += (vp56_rac_get_prob(c, 141) << 2); + val += (vp56_rac_get_prob(c, 134) << 1); + val += vp56_rac_get_prob(c, 130); + } else { + val = 67; + if (!is8bitsperpixel) { + if (bpp == 12) { + val += vp56_rac_get_prob(c, 255) << 17; + val += vp56_rac_get_prob(c, 255) << 16; + } + val += (vp56_rac_get_prob(c, 255) << 15); + val += (vp56_rac_get_prob(c, 255) << 14); + } + val += (vp56_rac_get_prob(c, 254) << 13); + val += (vp56_rac_get_prob(c, 254) << 12); + val += (vp56_rac_get_prob(c, 254) << 11); + val += (vp56_rac_get_prob(c, 252) << 10); + val += (vp56_rac_get_prob(c, 249) << 9); + val += (vp56_rac_get_prob(c, 243) << 8); + val += (vp56_rac_get_prob(c, 230) << 7); + val += (vp56_rac_get_prob(c, 196) << 6); + val += (vp56_rac_get_prob(c, 177) << 5); + val += (vp56_rac_get_prob(c, 153) << 4); + val += (vp56_rac_get_prob(c, 140) << 3); + val += (vp56_rac_get_prob(c, 133) << 2); + val += (vp56_rac_get_prob(c, 130) << 1); + val += vp56_rac_get_prob(c, 129); + } + } + } +#define STORE_COEF(c, i, v) do { \ + if (is8bitsperpixel) { \ + c[i] = v; \ + } else { \ + AV_WN32A(&c[i * 2], v); \ + } \ +} while (0) + if (!--band_left) + band_left = band_counts[++band]; + if (is_tx32x32) + STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2); + else + STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]); + nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; + tp = p[band][nnz]; + } while (++i < n_coeffs); + + return i; +} + +static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, + unsigned (*cnt)[6][3], unsigned (*eob)[6][2], + uint8_t (*p)[6][11], int nnz, const int16_t *scan, + const int16_t (*nb)[2], const int16_t *band_counts, + int16_t *qmul) +{ + return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p, + nnz, scan, nb, band_counts, qmul); +} + +static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, + unsigned (*cnt)[6][3], unsigned (*eob)[6][2], + uint8_t (*p)[6][11], int nnz, const int16_t *scan, + const int16_t (*nb)[2], const int16_t *band_counts, + int16_t *qmul) +{ + return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p, + nnz, scan, nb, band_counts, qmul); +} + +static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, + unsigned (*cnt)[6][3], unsigned (*eob)[6][2], + uint8_t (*p)[6][11], int nnz, const int16_t *scan, + const int16_t (*nb)[2], const int16_t *band_counts, + int16_t *qmul) +{ + return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p, + nnz, scan, nb, band_counts, qmul); +} + +static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, + unsigned (*cnt)[6][3], unsigned (*eob)[6][2], + uint8_t (*p)[6][11], int nnz, const int16_t *scan, + const int16_t (*nb)[2], const int16_t *band_counts, + int16_t *qmul) +{ + return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p, + nnz, scan, nb, band_counts, qmul); +} + +static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel) +{ + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col; + uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra]; + unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra]; + unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra]; + int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1; + int end_x = FFMIN(2 * (s->cols - col), w4); + int end_y = FFMIN(2 * (s->rows - row), h4); + int n, pl, x, y, ret; + int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul; + int tx = 4 * s->s.h.lossless + b->tx; + const int16_t * const *yscans = ff_vp9_scans[tx]; + const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx]; + const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT]; + const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT]; + uint8_t *a = &s->above_y_nnz_ctx[col * 2]; + uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1]; + static const int16_t band_counts[4][8] = { + { 1, 2, 3, 4, 3, 16 - 13 }, + { 1, 2, 3, 4, 11, 64 - 21 }, + { 1, 2, 3, 4, 11, 256 - 21 }, + { 1, 2, 3, 4, 11, 1024 - 21 }, + }; + const int16_t *y_band_counts = band_counts[b->tx]; + const int16_t *uv_band_counts = band_counts[b->uvtx]; + int bytesperpixel = is8bitsperpixel ? 1 : 2; + int total_coeff = 0; + +#define MERGE(la, end, step, rd) \ + for (n = 0; n < end; n += step) \ + la[n] = !!rd(&la[n]) +#define MERGE_CTX(step, rd) \ + do { \ + MERGE(l, end_y, step, rd); \ + MERGE(a, end_x, step, rd); \ + } while (0) + +#define DECODE_Y_COEF_LOOP(step, mode_index, v) \ + for (n = 0, y = 0; y < end_y; y += step) { \ + for (x = 0; x < end_x; x += step, n += step * step) { \ + enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \ + ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ + (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \ + c, e, p, a[x] + l[y], yscans[txtp], \ + ynbs[txtp], y_band_counts, qmul[0]); \ + a[x] = l[y] = !!ret; \ + total_coeff |= !!ret; \ + if (step >= 4) { \ + AV_WN16A(&td->eob[n], ret); \ + } else { \ + td->eob[n] = ret; \ + } \ + } \ + } + +#define SPLAT(la, end, step, cond) \ + if (step == 2) { \ + for (n = 1; n < end; n += step) \ + la[n] = la[n - 1]; \ + } else if (step == 4) { \ + if (cond) { \ + for (n = 0; n < end; n += step) \ + AV_WN32A(&la[n], la[n] * 0x01010101); \ + } else { \ + for (n = 0; n < end; n += step) \ + memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \ + } \ + } else /* step == 8 */ { \ + if (cond) { \ + if (HAVE_FAST_64BIT) { \ + for (n = 0; n < end; n += step) \ + AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \ + } else { \ + for (n = 0; n < end; n += step) { \ + uint32_t v32 = la[n] * 0x01010101; \ + AV_WN32A(&la[n], v32); \ + AV_WN32A(&la[n + 4], v32); \ + } \ + } \ + } else { \ + for (n = 0; n < end; n += step) \ + memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \ + } \ + } +#define SPLAT_CTX(step) \ + do { \ + SPLAT(a, end_x, step, end_x == w4); \ + SPLAT(l, end_y, step, end_y == h4); \ + } while (0) + + /* y tokens */ + switch (b->tx) { + case TX_4X4: + DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,); + break; + case TX_8X8: + MERGE_CTX(2, AV_RN16A); + DECODE_Y_COEF_LOOP(2, 0,); + SPLAT_CTX(2); + break; + case TX_16X16: + MERGE_CTX(4, AV_RN32A); + DECODE_Y_COEF_LOOP(4, 0,); + SPLAT_CTX(4); + break; + case TX_32X32: + MERGE_CTX(8, AV_RN64A); + DECODE_Y_COEF_LOOP(8, 0, 32); + SPLAT_CTX(8); + break; + } + +#define DECODE_UV_COEF_LOOP(step, v) \ + for (n = 0, y = 0; y < end_y; y += step) { \ + for (x = 0; x < end_x; x += step, n += step * step) { \ + ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ + (td, td->uvblock[pl] + 16 * n * bytesperpixel, \ + 16 * step * step, c, e, p, a[x] + l[y], \ + uvscan, uvnb, uv_band_counts, qmul[1]); \ + a[x] = l[y] = !!ret; \ + total_coeff |= !!ret; \ + if (step >= 4) { \ + AV_WN16A(&td->uveob[pl][n], ret); \ + } else { \ + td->uveob[pl][n] = ret; \ + } \ + } \ + } + + p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra]; + c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra]; + e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra]; + w4 >>= s->ss_h; + end_x >>= s->ss_h; + h4 >>= s->ss_v; + end_y >>= s->ss_v; + for (pl = 0; pl < 2; pl++) { + a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h]; + l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v]; + switch (b->uvtx) { + case TX_4X4: + DECODE_UV_COEF_LOOP(1,); + break; + case TX_8X8: + MERGE_CTX(2, AV_RN16A); + DECODE_UV_COEF_LOOP(2,); + SPLAT_CTX(2); + break; + case TX_16X16: + MERGE_CTX(4, AV_RN32A); + DECODE_UV_COEF_LOOP(4,); + SPLAT_CTX(4); + break; + case TX_32X32: + MERGE_CTX(8, AV_RN64A); + DECODE_UV_COEF_LOOP(8, 32); + SPLAT_CTX(8); + break; + } + } + + return total_coeff; +} + +static int decode_coeffs_8bpp(VP9TileData *td) +{ + return decode_coeffs(td, 1); +} + +static int decode_coeffs_16bpp(VP9TileData *td) +{ + return decode_coeffs(td, 0); +} + +static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, + int row_and_7, int col_and_7, + int w, int h, int col_end, int row_end, + enum TxfmMode tx, int skip_inter) +{ + static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 }; + static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 }; + + // FIXME I'm pretty sure all loops can be replaced by a single LUT if + // we make VP9Filter.mask uint64_t (i.e. row/col all single variable) + // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then + // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7) + + // the intended behaviour of the vp9 loopfilter is to work on 8-pixel + // edges. This means that for UV, we work on two subsampled blocks at + // a time, and we only use the topleft block's mode information to set + // things like block strength. Thus, for any block size smaller than + // 16x16, ignore the odd portion of the block. + if (tx == TX_4X4 && (ss_v | ss_h)) { + if (h == ss_v) { + if (row_and_7 & 1) + return; + if (!row_end) + h += 1; + } + if (w == ss_h) { + if (col_and_7 & 1) + return; + if (!col_end) + w += 1; + } + } + + if (tx == TX_4X4 && !skip_inter) { + int t = 1 << col_and_7, m_col = (t << w) - t, y; + // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide + int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8; + + for (y = row_and_7; y < h + row_and_7; y++) { + int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]); + + mask[0][y][1] |= m_row_8; + mask[0][y][2] |= m_row_4; + // for odd lines, if the odd col is not being filtered, + // skip odd row also: + // .---. <-- a + // | | + // |___| <-- b + // ^ ^ + // c d + // + // if a/c are even row/col and b/d are odd, and d is skipped, + // e.g. right edge of size-66x66.webm, then skip b also (bug) + if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) { + mask[1][y][col_mask_id] |= (t << (w - 1)) - t; + } else { + mask[1][y][col_mask_id] |= m_col; + } + if (!ss_h) + mask[0][y][3] |= m_col; + if (!ss_v) { + if (ss_h && (col_end & 1)) + mask[1][y][3] |= (t << (w - 1)) - t; + else + mask[1][y][3] |= m_col; + } + } + } else { + int y, t = 1 << col_and_7, m_col = (t << w) - t; + + if (!skip_inter) { + int mask_id = (tx == TX_8X8); + int l2 = tx + ss_h - 1, step1d; + static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 }; + int m_row = m_col & masks[l2]; + + // at odd UV col/row edges tx16/tx32 loopfilter edges, force + // 8wd loopfilter to prevent going off the visible edge. + if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) { + int m_row_16 = ((t << (w - 1)) - t) & masks[l2]; + int m_row_8 = m_row - m_row_16; + + for (y = row_and_7; y < h + row_and_7; y++) { + mask[0][y][0] |= m_row_16; + mask[0][y][1] |= m_row_8; + } + } else { + for (y = row_and_7; y < h + row_and_7; y++) + mask[0][y][mask_id] |= m_row; + } + + l2 = tx + ss_v - 1; + step1d = 1 << l2; + if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) { + for (y = row_and_7; y < h + row_and_7 - 1; y += step1d) + mask[1][y][0] |= m_col; + if (y - row_and_7 == h - 1) + mask[1][y][1] |= m_col; + } else { + for (y = row_and_7; y < h + row_and_7; y += step1d) + mask[1][y][mask_id] |= m_col; + } + } else if (tx != TX_4X4) { + int mask_id; + + mask_id = (tx == TX_8X8) || (h == ss_v); + mask[1][row_and_7][mask_id] |= m_col; + mask_id = (tx == TX_8X8) || (w == ss_h); + for (y = row_and_7; y < h + row_and_7; y++) + mask[0][y][mask_id] |= t; + } else { + int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8; + + for (y = row_and_7; y < h + row_and_7; y++) { + mask[0][y][2] |= t4; + mask[0][y][1] |= t8; + } + mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col; + } + } +} + +void ff_vp9_decode_block(VP9TileData *td, int row, int col, + VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, + enum BlockLevel bl, enum BlockPartition bp) +{ + VP9Context *s = td->s; + VP9Block *b = td->b; + enum BlockSize bs = bl * 3 + bp; + int bytesperpixel = s->bytesperpixel; + int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl; + int emu[2]; + AVFrame *f = s->s.frames[CUR_FRAME].tf.f; + + td->row = row; + td->row7 = row & 7; + td->col = col; + td->col7 = col & 7; + + td->min_mv.x = -(128 + col * 64); + td->min_mv.y = -(128 + row * 64); + td->max_mv.x = 128 + (s->cols - col - w4) * 64; + td->max_mv.y = 128 + (s->rows - row - h4) * 64; + + if (s->pass < 2) { + b->bs = bs; + b->bl = bl; + b->bp = bp; + decode_mode(td); + b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) || + (s->ss_v && h4 * 2 == (1 << b->tx))); + + if (!b->skip) { + int has_coeffs; + + if (bytesperpixel == 1) { + has_coeffs = decode_coeffs_8bpp(td); + } else { + has_coeffs = decode_coeffs_16bpp(td); + } + if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) { + b->skip = 1; + memset(&s->above_skip_ctx[col], 1, w4); + memset(&td->left_skip_ctx[td->row7], 1, h4); + } + } else { + int row7 = td->row7; + +#define SPLAT_ZERO_CTX(v, n) \ + switch (n) { \ + case 1: v = 0; break; \ + case 2: AV_ZERO16(&v); break; \ + case 4: AV_ZERO32(&v); break; \ + case 8: AV_ZERO64(&v); break; \ + case 16: AV_ZERO128(&v); break; \ + } +#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \ + do { \ + SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \ + if (s->ss_##dir2) { \ + SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \ + SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \ + } else { \ + SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \ + SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \ + } \ + } while (0) + + switch (w4) { + case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break; + case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break; + case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break; + case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break; + } + switch (h4) { + case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break; + case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break; + case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break; + case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break; + } + } + + if (s->pass == 1) { + s->td[0].b++; + s->td[0].block += w4 * h4 * 64 * bytesperpixel; + s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); + s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); + s->td[0].eob += 4 * w4 * h4; + s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); + s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); + + return; + } + } + + // emulated overhangs if the stride of the target buffer can't hold. This + // makes it possible to support emu-edge and so on even if we have large block + // overhangs + emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] || + (row + h4) > s->rows; + emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] || + (row + h4) > s->rows; + if (emu[0]) { + td->dst[0] = td->tmp_y; + td->y_stride = 128; + } else { + td->dst[0] = f->data[0] + yoff; + td->y_stride = f->linesize[0]; + } + if (emu[1]) { + td->dst[1] = td->tmp_uv[0]; + td->dst[2] = td->tmp_uv[1]; + td->uv_stride = 128; + } else { + td->dst[1] = f->data[1] + uvoff; + td->dst[2] = f->data[2] + uvoff; + td->uv_stride = f->linesize[1]; + } + if (b->intra) { + if (s->s.h.bpp > 8) { + ff_vp9_intra_recon_16bpp(td, yoff, uvoff); + } else { + ff_vp9_intra_recon_8bpp(td, yoff, uvoff); + } + } else { + if (s->s.h.bpp > 8) { + ff_vp9_inter_recon_16bpp(td); + } else { + ff_vp9_inter_recon_8bpp(td); + } + } + if (emu[0]) { + int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0; + + for (n = 0; o < w; n++) { + int bw = 64 >> n; + + av_assert2(n <= 4); + if (w & bw) { + s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0], + td->tmp_y + o * bytesperpixel, 128, h, 0, 0); + o += bw; + } + } + } + if (emu[1]) { + int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h; + int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0; + + for (n = s->ss_h; o < w; n++) { + int bw = 64 >> n; + + av_assert2(n <= 4); + if (w & bw) { + s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1], + td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0); + s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2], + td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0); + o += bw; + } + } + } + + // pick filter level and find edges to apply filter to + if (s->s.h.filter.level && + (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1] + [b->mode[3] != ZEROMV]) > 0) { + int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4); + int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7; + + setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl); + mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter); + if (s->ss_h || s->ss_v) + mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end, + s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0, + s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0, + b->uvtx, skip_inter); + } + + if (s->pass == 2) { + s->td[0].b++; + s->td[0].block += w4 * h4 * 64 * bytesperpixel; + s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); + s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); + s->td[0].eob += 4 * w4 * h4; + s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); + s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); + } +} diff --git a/libs/ffvpx/libavcodec/vp9data.c b/libs/ffvpx/libavcodec/vp9data.c new file mode 100644 index 000000000..7af8a97b1 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9data.c @@ -0,0 +1,2247 @@ +/* + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vp9.h" +#include "vp9data.h" + +const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2] = { + { + { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 }, + { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, + }, { + { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 }, + { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, + } +}; + +const int8_t ff_vp9_partition_tree[3][2] = { + { -PARTITION_NONE, 1 }, // '0' + { -PARTITION_H, 2 }, // '10' + { -PARTITION_V, -PARTITION_SPLIT }, // '110', '111' +}; + +const uint8_t ff_vp9_default_kf_partition_probs[4][4][3] = { + { /* 64x64 -> 32x32 */ + { 174, 35, 49 } /* a/l both not split */, + { 68, 11, 27 } /* a split, l not split */, + { 57, 15, 9 } /* l split, a not split */, + { 12, 3, 3 } /* a/l both split */ + }, { /* 32x32 -> 16x16 */ + { 150, 40, 39 } /* a/l both not split */, + { 78, 12, 26 } /* a split, l not split */, + { 67, 33, 11 } /* l split, a not split */, + { 24, 7, 5 } /* a/l both split */, + }, { /* 16x16 -> 8x8 */ + { 149, 53, 53 } /* a/l both not split */, + { 94, 20, 48 } /* a split, l not split */, + { 83, 53, 24 } /* l split, a not split */, + { 52, 18, 18 } /* a/l both split */, + }, { /* 8x8 -> 4x4 */ + { 158, 97, 94 } /* a/l both not split */, + { 93, 24, 99 } /* a split, l not split */, + { 85, 119, 44 } /* l split, a not split */, + { 62, 59, 67 } /* a/l both split */, + }, +}; + +const int8_t ff_vp9_segmentation_tree[7][2] = { + { 1, 2 }, + { 3, 4 }, + { 5, 6 }, + { -0, -1 }, // '00x' + { -2, -3 }, // '01x' + { -4, -5 }, // '10x' + { -6, -7 }, // '11x' +}; + +const int8_t ff_vp9_intramode_tree[9][2] = { + { -DC_PRED, 1 }, // '0' + { -TM_VP8_PRED, 2 }, // '10' + { -VERT_PRED, 3 }, // '110' + { 4, 6 }, + { -HOR_PRED, 5 }, // '11100' + { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '11101x' + { -DIAG_DOWN_LEFT_PRED, 7 }, // '11110' + { -VERT_LEFT_PRED, 8 }, // '111110' + { -HOR_DOWN_PRED, -HOR_UP_PRED }, // '111111x' +}; + +const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9] = { + { /* above = v */ + { 43, 46, 168, 134, 107, 128, 69, 142, 92 } /* left = v */, + { 44, 29, 68, 159, 201, 177, 50, 57, 77 } /* left = h */, + { 63, 36, 126, 146, 123, 158, 60, 90, 96 } /* left = dc */, + { 58, 38, 76, 114, 97, 172, 78, 133, 92 } /* left = d45 */, + { 46, 41, 76, 140, 63, 184, 69, 112, 57 } /* left = d135 */, + { 38, 32, 85, 140, 46, 112, 54, 151, 133 } /* left = d117 */, + { 39, 27, 61, 131, 110, 175, 44, 75, 136 } /* left = d153 */, + { 47, 35, 80, 100, 74, 143, 64, 163, 74 } /* left = d63 */, + { 52, 30, 74, 113, 130, 175, 51, 64, 58 } /* left = d27 */, + { 36, 61, 116, 114, 128, 162, 80, 125, 82 } /* left = tm */ + }, { /* above = h */ + { 55, 44, 68, 166, 179, 192, 57, 57, 108 } /* left = v */, + { 42, 26, 11, 199, 241, 228, 23, 15, 85 } /* left = h */, + { 82, 26, 26, 171, 208, 204, 44, 32, 105 } /* left = dc */, + { 68, 42, 19, 131, 160, 199, 55, 52, 83 } /* left = d45 */, + { 58, 50, 25, 139, 115, 232, 39, 52, 118 } /* left = d135 */, + { 50, 35, 33, 153, 104, 162, 64, 59, 131 } /* left = d117 */, + { 44, 24, 16, 150, 177, 202, 33, 19, 156 } /* left = d153 */, + { 53, 49, 21, 110, 116, 168, 59, 80, 76 } /* left = d63 */, + { 55, 27, 12, 153, 203, 218, 26, 27, 49 } /* left = d27 */, + { 38, 72, 19, 168, 203, 212, 50, 50, 107 } /* left = tm */ + }, { /* above = dc */ + { 92, 45, 102, 136, 116, 180, 74, 90, 100 } /* left = v */, + { 73, 32, 19, 187, 222, 215, 46, 34, 100 } /* left = h */, + { 137, 30, 42, 148, 151, 207, 70, 52, 91 } /* left = dc */, + { 91, 30, 32, 116, 121, 186, 93, 86, 94 } /* left = d45 */, + { 72, 35, 36, 149, 68, 206, 68, 63, 105 } /* left = d135 */, + { 73, 31, 28, 138, 57, 124, 55, 122, 151 } /* left = d117 */, + { 67, 23, 21, 140, 126, 197, 40, 37, 171 } /* left = d153 */, + { 74, 32, 27, 107, 86, 160, 63, 134, 102 } /* left = d63 */, + { 86, 27, 28, 128, 154, 212, 45, 43, 53 } /* left = d27 */, + { 59, 67, 44, 140, 161, 202, 78, 67, 119 } /* left = tm */ + }, { /* above = d45 */ + { 59, 38, 83, 112, 103, 162, 98, 136, 90 } /* left = v */, + { 62, 30, 23, 158, 200, 207, 59, 57, 50 } /* left = h */, + { 103, 26, 36, 129, 132, 201, 83, 80, 93 } /* left = dc */, + { 67, 30, 29, 84, 86, 191, 102, 91, 59 } /* left = d45 */, + { 60, 32, 33, 112, 71, 220, 64, 89, 104 } /* left = d135 */, + { 53, 26, 34, 130, 56, 149, 84, 120, 103 } /* left = d117 */, + { 53, 21, 23, 133, 109, 210, 56, 77, 172 } /* left = d153 */, + { 61, 29, 29, 93, 97, 165, 83, 175, 162 } /* left = d63 */, + { 77, 19, 29, 112, 142, 228, 55, 66, 36 } /* left = d27 */, + { 47, 47, 43, 114, 137, 181, 100, 99, 95 } /* left = tm */ + }, { /* above = d135 */ + { 53, 40, 55, 139, 69, 183, 61, 80, 110 } /* left = v */, + { 40, 29, 19, 161, 180, 207, 43, 24, 91 } /* left = h */, + { 69, 23, 29, 128, 83, 199, 46, 44, 101 } /* left = dc */, + { 60, 34, 19, 105, 61, 198, 53, 64, 89 } /* left = d45 */, + { 52, 31, 22, 158, 40, 209, 58, 62, 89 } /* left = d135 */, + { 44, 31, 29, 147, 46, 158, 56, 102, 198 } /* left = d117 */, + { 35, 19, 12, 135, 87, 209, 41, 45, 167 } /* left = d153 */, + { 51, 38, 25, 113, 58, 164, 70, 93, 97 } /* left = d63 */, + { 55, 25, 21, 118, 95, 215, 38, 39, 66 } /* left = d27 */, + { 47, 54, 34, 146, 108, 203, 72, 103, 151 } /* left = tm */ + }, { /* above = d117 */ + { 46, 27, 80, 150, 55, 124, 55, 121, 135 } /* left = v */, + { 36, 23, 27, 165, 149, 166, 54, 64, 118 } /* left = h */, + { 64, 19, 37, 156, 66, 138, 49, 95, 133 } /* left = dc */, + { 53, 21, 36, 131, 63, 163, 60, 109, 81 } /* left = d45 */, + { 40, 26, 35, 154, 40, 185, 51, 97, 123 } /* left = d135 */, + { 35, 19, 34, 179, 19, 97, 48, 129, 124 } /* left = d117 */, + { 36, 20, 26, 136, 62, 164, 33, 77, 154 } /* left = d153 */, + { 45, 26, 28, 129, 45, 129, 49, 147, 123 } /* left = d63 */, + { 45, 18, 32, 130, 90, 157, 40, 79, 91 } /* left = d27 */, + { 38, 44, 51, 136, 74, 162, 57, 97, 121 } /* left = tm */ + }, { /* above = d153 */ + { 56, 39, 58, 133, 117, 173, 48, 53, 187 } /* left = v */, + { 35, 21, 12, 161, 212, 207, 20, 23, 145 } /* left = h */, + { 75, 17, 22, 136, 138, 185, 32, 34, 166 } /* left = dc */, + { 56, 29, 19, 117, 109, 181, 55, 68, 112 } /* left = d45 */, + { 47, 29, 17, 153, 64, 220, 59, 51, 114 } /* left = d135 */, + { 46, 16, 24, 136, 76, 147, 41, 64, 172 } /* left = d117 */, + { 34, 17, 11, 108, 152, 187, 13, 15, 209 } /* left = d153 */, + { 55, 30, 18, 122, 79, 179, 44, 88, 116 } /* left = d63 */, + { 51, 24, 14, 115, 133, 209, 32, 26, 104 } /* left = d27 */, + { 37, 49, 25, 129, 168, 164, 41, 54, 148 } /* left = tm */ + }, { /* above = d63 */ + { 48, 34, 86, 101, 92, 146, 78, 179, 134 } /* left = v */, + { 47, 22, 24, 138, 187, 178, 68, 69, 59 } /* left = h */, + { 78, 23, 39, 111, 117, 170, 74, 124, 94 } /* left = dc */, + { 56, 25, 33, 105, 112, 187, 95, 177, 129 } /* left = d45 */, + { 48, 31, 27, 114, 63, 183, 82, 116, 56 } /* left = d135 */, + { 43, 28, 37, 121, 63, 123, 61, 192, 169 } /* left = d117 */, + { 42, 17, 24, 109, 97, 177, 56, 76, 122 } /* left = d153 */, + { 46, 23, 32, 74, 86, 150, 67, 183, 88 } /* left = d63 */, + { 58, 18, 28, 105, 139, 182, 70, 92, 63 } /* left = d27 */, + { 36, 38, 48, 92, 122, 165, 88, 137, 91 } /* left = tm */ + }, { /* above = d27 */ + { 62, 44, 61, 123, 105, 189, 48, 57, 64 } /* left = v */, + { 47, 25, 17, 175, 222, 220, 24, 30, 86 } /* left = h */, + { 82, 22, 32, 127, 143, 213, 39, 41, 70 } /* left = dc */, + { 68, 36, 17, 106, 102, 206, 59, 74, 74 } /* left = d45 */, + { 57, 39, 23, 151, 68, 216, 55, 63, 58 } /* left = d135 */, + { 49, 30, 35, 141, 70, 168, 82, 40, 115 } /* left = d117 */, + { 51, 25, 15, 136, 129, 202, 38, 35, 139 } /* left = d153 */, + { 59, 39, 19, 114, 75, 180, 77, 104, 42 } /* left = d63 */, + { 68, 26, 16, 111, 141, 215, 29, 28, 28 } /* left = d27 */, + { 40, 61, 26, 126, 152, 206, 61, 59, 93 } /* left = tm */ + }, { /* above = tm */ + { 44, 78, 115, 132, 119, 173, 71, 112, 93 } /* left = v */, + { 39, 38, 21, 184, 227, 206, 42, 32, 64 } /* left = h */, + { 65, 70, 60, 155, 159, 199, 61, 60, 81 } /* left = dc */, + { 58, 47, 36, 124, 137, 193, 80, 82, 78 } /* left = d45 */, + { 49, 50, 35, 144, 95, 205, 63, 78, 59 } /* left = d135 */, + { 41, 53, 52, 148, 71, 142, 65, 128, 51 } /* left = d117 */, + { 40, 36, 28, 143, 143, 202, 40, 55, 137 } /* left = d153 */, + { 42, 44, 44, 104, 105, 164, 64, 130, 80 } /* left = d63 */, + { 52, 34, 29, 129, 183, 227, 42, 35, 43 } /* left = d27 */, + { 43, 81, 53, 140, 169, 204, 68, 84, 72 } /* left = tm */ + } +}; + +const uint8_t ff_vp9_default_kf_uvmode_probs[10][9] = { + { 118, 15, 123, 148, 131, 101, 44, 93, 131 } /* y = v */, + { 113, 12, 23, 188, 226, 142, 26, 32, 125 } /* y = h */, + { 144, 11, 54, 157, 195, 130, 46, 58, 108 } /* y = dc */, + { 120, 11, 50, 123, 163, 135, 64, 77, 103 } /* y = d45 */, + { 113, 9, 36, 155, 111, 157, 32, 44, 161 } /* y = d135 */, + { 116, 9, 55, 176, 76, 96, 37, 61, 149 } /* y = d117 */, + { 115, 9, 28, 141, 161, 167, 21, 25, 193 } /* y = d153 */, + { 116, 12, 64, 120, 140, 125, 49, 115, 121 } /* y = d63 */, + { 120, 12, 32, 145, 195, 142, 32, 38, 86 } /* y = d27 */, + { 102, 19, 66, 162, 182, 122, 35, 59, 128 } /* y = tm */ +}; + +const int8_t ff_vp9_inter_mode_tree[3][2] = { + { -ZEROMV, 1 }, // '0' + { -NEARESTMV, 2 }, // '10' + { -NEARMV, -NEWMV }, // '11x' +}; + +const int8_t ff_vp9_filter_tree[2][2] = { + { -0, 1 }, // '0' + { -1, -2 }, // '1x' +}; + +const enum FilterMode ff_vp9_filter_lut[3] = { + FILTER_8TAP_REGULAR, + FILTER_8TAP_SMOOTH, + FILTER_8TAP_SHARP, +}; + +const int16_t ff_vp9_dc_qlookup[3][256] = { + { + 4, 8, 8, 9, 10, 11, 12, 12, + 13, 14, 15, 16, 17, 18, 19, 19, + 20, 21, 22, 23, 24, 25, 26, 26, + 27, 28, 29, 30, 31, 32, 32, 33, + 34, 35, 36, 37, 38, 38, 39, 40, + 41, 42, 43, 43, 44, 45, 46, 47, + 48, 48, 49, 50, 51, 52, 53, 53, + 54, 55, 56, 57, 57, 58, 59, 60, + 61, 62, 62, 63, 64, 65, 66, 66, + 67, 68, 69, 70, 70, 71, 72, 73, + 74, 74, 75, 76, 77, 78, 78, 79, + 80, 81, 81, 82, 83, 84, 85, 85, + 87, 88, 90, 92, 93, 95, 96, 98, + 99, 101, 102, 104, 105, 107, 108, 110, + 111, 113, 114, 116, 117, 118, 120, 121, + 123, 125, 127, 129, 131, 134, 136, 138, + 140, 142, 144, 146, 148, 150, 152, 154, + 156, 158, 161, 164, 166, 169, 172, 174, + 177, 180, 182, 185, 187, 190, 192, 195, + 199, 202, 205, 208, 211, 214, 217, 220, + 223, 226, 230, 233, 237, 240, 243, 247, + 250, 253, 257, 261, 265, 269, 272, 276, + 280, 284, 288, 292, 296, 300, 304, 309, + 313, 317, 322, 326, 330, 335, 340, 344, + 349, 354, 359, 364, 369, 374, 379, 384, + 389, 395, 400, 406, 411, 417, 423, 429, + 435, 441, 447, 454, 461, 467, 475, 482, + 489, 497, 505, 513, 522, 530, 539, 549, + 559, 569, 579, 590, 602, 614, 626, 640, + 654, 668, 684, 700, 717, 736, 755, 775, + 796, 819, 843, 869, 896, 925, 955, 988, + 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, + }, { + 4, 9, 10, 13, 15, 17, 20, 22, + 25, 28, 31, 34, 37, 40, 43, 47, + 50, 53, 57, 60, 64, 68, 71, 75, + 78, 82, 86, 90, 93, 97, 101, 105, + 109, 113, 116, 120, 124, 128, 132, 136, + 140, 143, 147, 151, 155, 159, 163, 166, + 170, 174, 178, 182, 185, 189, 193, 197, + 200, 204, 208, 212, 215, 219, 223, 226, + 230, 233, 237, 241, 244, 248, 251, 255, + 259, 262, 266, 269, 273, 276, 280, 283, + 287, 290, 293, 297, 300, 304, 307, 310, + 314, 317, 321, 324, 327, 331, 334, 337, + 343, 350, 356, 362, 369, 375, 381, 387, + 394, 400, 406, 412, 418, 424, 430, 436, + 442, 448, 454, 460, 466, 472, 478, 484, + 490, 499, 507, 516, 525, 533, 542, 550, + 559, 567, 576, 584, 592, 601, 609, 617, + 625, 634, 644, 655, 666, 676, 687, 698, + 708, 718, 729, 739, 749, 759, 770, 782, + 795, 807, 819, 831, 844, 856, 868, 880, + 891, 906, 920, 933, 947, 961, 975, 988, + 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, + 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, + 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, + 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, + 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, + 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, + 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, + 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, + 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, + 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, + 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, + }, { + 4, 12, 18, 25, 33, 41, 50, 60, + 70, 80, 91, 103, 115, 127, 140, 153, + 166, 180, 194, 208, 222, 237, 251, 266, + 281, 296, 312, 327, 343, 358, 374, 390, + 405, 421, 437, 453, 469, 484, 500, 516, + 532, 548, 564, 580, 596, 611, 627, 643, + 659, 674, 690, 706, 721, 737, 752, 768, + 783, 798, 814, 829, 844, 859, 874, 889, + 904, 919, 934, 949, 964, 978, 993, 1008, + 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, + 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, + 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, + 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, + 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, + 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, + 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, + 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, + 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, + 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, + 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, + 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, + 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, + 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, + 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, + 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, + 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, + 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, + 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, + 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, + 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, + 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, + 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, + } +}; + +const int16_t ff_vp9_ac_qlookup[3][256] = { + { + 4, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, + 136, 138, 140, 142, 144, 146, 148, 150, + 152, 155, 158, 161, 164, 167, 170, 173, + 176, 179, 182, 185, 188, 191, 194, 197, + 200, 203, 207, 211, 215, 219, 223, 227, + 231, 235, 239, 243, 247, 251, 255, 260, + 265, 270, 275, 280, 285, 290, 295, 300, + 305, 311, 317, 323, 329, 335, 341, 347, + 353, 359, 366, 373, 380, 387, 394, 401, + 408, 416, 424, 432, 440, 448, 456, 465, + 474, 483, 492, 501, 510, 520, 530, 540, + 550, 560, 571, 582, 593, 604, 615, 627, + 639, 651, 663, 676, 689, 702, 715, 729, + 743, 757, 771, 786, 801, 816, 832, 848, + 864, 881, 898, 915, 933, 951, 969, 988, + 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, + 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, + 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, + 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, + }, { + 4, 9, 11, 13, 16, 18, 21, 24, + 27, 30, 33, 37, 40, 44, 48, 51, + 55, 59, 63, 67, 71, 75, 79, 83, + 88, 92, 96, 100, 105, 109, 114, 118, + 122, 127, 131, 136, 140, 145, 149, 154, + 158, 163, 168, 172, 177, 181, 186, 190, + 195, 199, 204, 208, 213, 217, 222, 226, + 231, 235, 240, 244, 249, 253, 258, 262, + 267, 271, 275, 280, 284, 289, 293, 297, + 302, 306, 311, 315, 319, 324, 328, 332, + 337, 341, 345, 349, 354, 358, 362, 367, + 371, 375, 379, 384, 388, 392, 396, 401, + 409, 417, 425, 433, 441, 449, 458, 466, + 474, 482, 490, 498, 506, 514, 523, 531, + 539, 547, 555, 563, 571, 579, 588, 596, + 604, 616, 628, 640, 652, 664, 676, 688, + 700, 713, 725, 737, 749, 761, 773, 785, + 797, 809, 825, 841, 857, 873, 889, 905, + 922, 938, 954, 970, 986, 1002, 1018, 1038, + 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, + 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, + 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, + 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, + 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, + 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, + 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, + 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, + 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, + 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, + 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, + 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, + 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, + }, { + 4, 13, 19, 27, 35, 44, 54, 64, + 75, 87, 99, 112, 126, 139, 154, 168, + 183, 199, 214, 230, 247, 263, 280, 297, + 314, 331, 349, 366, 384, 402, 420, 438, + 456, 475, 493, 511, 530, 548, 567, 586, + 604, 623, 642, 660, 679, 698, 716, 735, + 753, 772, 791, 809, 828, 846, 865, 884, + 902, 920, 939, 957, 976, 994, 1012, 1030, + 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, + 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, + 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, + 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, + 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, + 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, + 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, + 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, + 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, + 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, + 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, + 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, + 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, + 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, + 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, + 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, + 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, + 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, + 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, + 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, + 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, + 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, + 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, + 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, + } +}; + +const enum TxfmType ff_vp9_intra_txfm_type[14] = { + [VERT_PRED] = ADST_DCT, + [HOR_PRED] = DCT_ADST, + [DC_PRED] = DCT_DCT, + [DIAG_DOWN_LEFT_PRED] = DCT_DCT, + [DIAG_DOWN_RIGHT_PRED] = ADST_ADST, + [VERT_RIGHT_PRED] = ADST_DCT, + [HOR_DOWN_PRED] = DCT_ADST, + [VERT_LEFT_PRED] = ADST_DCT, + [HOR_UP_PRED] = DCT_ADST, + [TM_VP8_PRED] = ADST_ADST, + [NEARESTMV] = DCT_DCT, + [NEARMV] = DCT_DCT, + [ZEROMV] = DCT_DCT, + [NEWMV] = DCT_DCT, +}; + +const int16_t ff_vp9_default_scan_4x4[16] = { + 0, 1, 4, 5, + 2, 8, 3, 6, + 12, 9, 7, 10, + 13, 11, 14, 15, +}; + +const int16_t ff_vp9_col_scan_4x4[16] = { + 0, 1, 2, 4, + 3, 5, 6, 8, + 7, 9, 10, 12, + 13, 11, 14, 15, +}; + +const int16_t ff_vp9_row_scan_4x4[16] = { + 0, 4, 1, 8, + 5, 12, 9, 2, + 6, 13, 3, 10, + 7, 14, 11, 15, +}; + +const int16_t ff_vp9_default_scan_8x8[64] = { + 0, 1, 8, 2, 9, 16, 10, 3, + 17, 24, 18, 11, 4, 25, 32, 19, + 12, 26, 5, 33, 20, 27, 40, 13, + 34, 6, 41, 28, 21, 35, 42, 48, + 14, 7, 36, 29, 43, 56, 49, 22, + 15, 37, 50, 44, 57, 30, 23, 51, + 45, 58, 38, 31, 52, 59, 39, 46, + 53, 60, 47, 54, 61, 55, 62, 63, +}; + +const int16_t ff_vp9_col_scan_8x8[64] = { + 0, 1, 2, 8, 3, 9, 4, 10, + 16, 5, 11, 17, 12, 18, 6, 24, + 19, 13, 25, 7, 26, 20, 32, 14, + 27, 21, 33, 28, 34, 15, 22, 35, + 40, 29, 41, 36, 23, 30, 42, 37, + 48, 43, 31, 44, 49, 38, 50, 56, + 45, 39, 51, 57, 52, 46, 58, 53, + 59, 47, 60, 54, 61, 55, 62, 63, +}; + +const int16_t ff_vp9_row_scan_8x8[64] = { + 0, 8, 16, 1, 9, 24, 2, 17, + 32, 10, 25, 3, 40, 18, 11, 33, + 26, 19, 4, 48, 41, 34, 12, 27, + 56, 20, 5, 42, 35, 13, 49, 28, + 6, 21, 43, 36, 14, 50, 29, 57, + 7, 44, 22, 37, 51, 15, 58, 30, + 23, 45, 52, 38, 59, 31, 46, 53, + 39, 60, 47, 61, 54, 62, 55, 63, +}; + +const int16_t ff_vp9_default_scan_16x16[256] = { + 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 34, 19, 49, 20, 5, + 35, 64, 50, 36, 65, 21, 6, 51, 80, 66, 37, 22, 52, 7, 81, 67, + 38, 82, 53, 23, 96, 68, 8, 83, 97, 54, 39, 69, 112, 24, 98, 84, + 70, 55, 9, 40, 85, 99, 113, 128, 25, 114, 100, 71, 86, 56, 10, 41, + 115, 101, 129, 116, 72, 87, 26, 130, 144, 102, 57, 11, 42, 117, 131, 145, + 88, 103, 27, 73, 132, 118, 146, 58, 160, 12, 43, 133, 147, 104, 89, 119, + 161, 74, 148, 134, 28, 162, 59, 13, 176, 120, 149, 90, 135, 105, 163, 44, + 75, 177, 164, 29, 150, 121, 136, 178, 165, 14, 106, 60, 91, 151, 45, 179, + 192, 137, 166, 122, 76, 180, 152, 30, 61, 15, 107, 167, 181, 193, 92, 208, + 46, 138, 123, 153, 194, 77, 168, 182, 31, 195, 209, 183, 108, 139, 62, 154, + 47, 196, 93, 169, 210, 197, 224, 124, 184, 211, 78, 109, 170, 155, 63, 198, + 212, 185, 225, 240, 140, 94, 199, 125, 79, 213, 226, 171, 186, 156, 214, 200, + 110, 227, 141, 95, 241, 215, 228, 201, 126, 242, 187, 172, 157, 229, 111, 216, + 243, 142, 202, 230, 127, 217, 244, 173, 188, 231, 158, 203, 143, 245, 218, 232, + 189, 246, 159, 174, 233, 247, 219, 204, 175, 190, 248, 234, 205, 220, 249, 191, + 235, 221, 250, 206, 222, 251, 236, 207, 237, 223, 252, 238, 253, 239, 254, 255, +}; + +const int16_t ff_vp9_col_scan_16x16[256] = { + 0, 1, 2, 3, 16, 4, 17, 5, 18, 6, 19, 32, 20, 7, 33, 21, + 34, 8, 35, 22, 48, 36, 9, 49, 23, 50, 37, 10, 38, 51, 24, 64, + 52, 11, 65, 39, 25, 53, 66, 54, 40, 67, 12, 80, 26, 68, 55, 81, + 41, 69, 13, 27, 82, 56, 70, 83, 42, 14, 84, 96, 71, 28, 57, 85, + 97, 15, 72, 98, 43, 86, 58, 99, 29, 87, 100, 112, 73, 44, 101, 59, + 30, 113, 88, 114, 74, 128, 102, 45, 31, 115, 60, 103, 89, 116, 75, 129, + 117, 46, 104, 90, 61, 130, 118, 131, 132, 105, 76, 47, 119, 144, 91, 62, + 133, 106, 145, 120, 146, 134, 77, 147, 121, 92, 135, 148, 63, 107, 136, 122, + 93, 149, 160, 78, 150, 137, 108, 161, 162, 151, 123, 79, 138, 163, 152, 94, + 164, 109, 165, 153, 124, 139, 176, 166, 95, 177, 167, 110, 154, 178, 125, 179, + 140, 168, 155, 111, 180, 192, 181, 169, 141, 126, 182, 193, 194, 156, 183, 170, + 195, 127, 142, 196, 184, 208, 197, 157, 171, 143, 185, 198, 209, 199, 210, 172, + 158, 186, 211, 224, 212, 200, 240, 159, 213, 225, 187, 201, 173, 226, 214, 215, + 227, 202, 228, 188, 241, 216, 174, 229, 242, 203, 243, 217, 230, 175, 189, 244, + 231, 204, 218, 232, 245, 219, 246, 190, 233, 205, 191, 247, 234, 248, 220, 206, + 249, 235, 221, 207, 250, 236, 222, 251, 223, 237, 238, 252, 239, 253, 254, 255, +}; + +const int16_t ff_vp9_row_scan_16x16[256] = { + 0, 16, 32, 1, 48, 17, 64, 33, 2, 80, 18, 49, 96, 34, 3, 65, + 19, 112, 50, 81, 35, 4, 128, 66, 20, 97, 51, 82, 5, 144, 36, 67, + 113, 98, 21, 52, 160, 83, 129, 37, 68, 6, 114, 176, 99, 53, 22, 84, + 145, 38, 69, 130, 7, 115, 192, 100, 54, 23, 85, 161, 146, 131, 39, 70, + 208, 116, 8, 101, 177, 55, 86, 24, 162, 147, 132, 71, 224, 117, 40, 102, + 9, 148, 56, 87, 193, 163, 240, 133, 178, 25, 118, 72, 41, 103, 164, 10, + 149, 88, 134, 209, 179, 57, 119, 194, 26, 73, 165, 150, 104, 42, 135, 11, + 180, 120, 89, 225, 195, 58, 27, 210, 151, 181, 166, 74, 43, 105, 12, 136, + 90, 59, 241, 121, 28, 196, 167, 211, 152, 44, 182, 137, 75, 13, 226, 106, + 122, 60, 197, 91, 168, 29, 183, 153, 14, 76, 212, 138, 45, 107, 15, 198, + 92, 227, 169, 30, 123, 154, 61, 242, 184, 213, 139, 46, 77, 31, 108, 170, + 199, 185, 124, 228, 93, 155, 214, 62, 140, 243, 78, 47, 200, 109, 186, 171, + 201, 94, 63, 215, 229, 156, 79, 125, 141, 110, 216, 187, 172, 244, 202, 230, + 217, 95, 157, 126, 245, 111, 142, 231, 188, 127, 158, 218, 173, 232, 246, 233, + 203, 143, 247, 174, 189, 159, 219, 204, 248, 234, 249, 175, 190, 220, 205, 250, + 235, 191, 221, 251, 236, 206, 252, 222, 207, 237, 223, 253, 238, 254, 239, 255, +}; + +const int16_t ff_vp9_default_scan_32x32[1024] = { + 0, 1, 32, 2, 33, 64, 3, 34, 65, 4, 96, 35, 66, 5, 36, 97, 67, 128, 98, 68, 37, 6, 129, 99, 7, 160, 69, 38, 130, 100, 161, 131, + 39, 70, 8, 101, 162, 132, 192, 71, 40, 9, 102, 163, 133, 193, 72, 224, 103, 41, 164, 10, 194, 134, 165, 73, 104, 135, 225, 42, 195, 11, 256, 166, + 226, 196, 74, 105, 136, 43, 12, 167, 197, 227, 257, 75, 106, 137, 228, 44, 198, 168, 258, 288, 13, 229, 76, 107, 199, 138, 259, 169, 289, 45, 230, 260, + 200, 108, 14, 170, 139, 320, 290, 77, 231, 261, 46, 201, 140, 291, 109, 232, 321, 262, 171, 78, 292, 15, 322, 202, 263, 352, 172, 293, 233, 141, 323, 110, + 47, 203, 264, 234, 294, 353, 324, 16, 79, 204, 265, 295, 325, 173, 354, 142, 235, 384, 48, 296, 111, 266, 355, 326, 80, 17, 205, 236, 174, 356, 385, 327, + 143, 297, 267, 357, 386, 112, 49, 328, 298, 206, 416, 237, 358, 387, 81, 175, 18, 329, 359, 388, 299, 330, 389, 113, 417, 238, 360, 50, 207, 418, 390, 331, + 19, 448, 361, 82, 419, 391, 239, 51, 362, 420, 114, 449, 480, 421, 83, 363, 450, 422, 512, 451, 423, 115, 452, 481, 453, 482, 454, 544, 483, 455, 513, 484, + 514, 485, 515, 486, 545, 576, 487, 546, 547, 608, 577, 578, 579, 609, 610, 611, 20, 144, 268, 392, 516, 640, 21, 52, 145, 176, 269, 300, 393, 424, 517, 548, + 641, 672, 22, 53, 84, 146, 177, 208, 270, 301, 332, 394, 425, 456, 518, 549, 580, 642, 673, 704, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302, 333, 364, + 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705, 736, 55, 86, 117, 179, 210, 241, 303, 334, 365, 427, 458, 489, 551, 582, 613, 675, 706, 737, 87, 118, + 211, 242, 335, 366, 459, 490, 583, 614, 707, 738, 119, 243, 367, 491, 615, 739, 24, 148, 272, 396, 520, 644, 768, 25, 56, 149, 180, 273, 304, 397, 428, 521, + 552, 645, 676, 769, 800, 26, 57, 88, 150, 181, 212, 274, 305, 336, 398, 429, 460, 522, 553, 584, 646, 677, 708, 770, 801, 832, 27, 58, 89, 120, 151, 182, + 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771, 802, 833, 864, 59, 90, 121, 183, 214, 245, 307, 338, 369, 431, + 462, 493, 555, 586, 617, 679, 710, 741, 803, 834, 865, 91, 122, 215, 246, 339, 370, 463, 494, 587, 618, 711, 742, 835, 866, 123, 247, 371, 495, 619, 743, 867, + 28, 152, 276, 400, 524, 648, 772, 896, 29, 60, 153, 184, 277, 308, 401, 432, 525, 556, 649, 680, 773, 804, 897, 928, 30, 61, 92, 154, 185, 216, 278, 309, + 340, 402, 433, 464, 526, 557, 588, 650, 681, 712, 774, 805, 836, 898, 929, 960, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434, 465, 496, + 527, 558, 589, 620, 651, 682, 713, 744, 775, 806, 837, 868, 899, 930, 961, 992, 63, 94, 125, 187, 218, 249, 311, 342, 373, 435, 466, 497, 559, 590, 621, 683, + 714, 745, 807, 838, 869, 931, 962, 993, 95, 126, 219, 250, 343, 374, 467, 498, 591, 622, 715, 746, 839, 870, 963, 994, 127, 251, 375, 499, 623, 747, 871, 995, + 156, 280, 404, 528, 652, 776, 900, 157, 188, 281, 312, 405, 436, 529, 560, 653, 684, 777, 808, 901, 932, 158, 189, 220, 282, 313, 344, 406, 437, 468, 530, 561, + 592, 654, 685, 716, 778, 809, 840, 902, 933, 964, 159, 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717, 748, 779, 810, + 841, 872, 903, 934, 965, 996, 191, 222, 253, 315, 346, 377, 439, 470, 501, 563, 594, 625, 687, 718, 749, 811, 842, 873, 935, 966, 997, 223, 254, 347, 378, 471, + 502, 595, 626, 719, 750, 843, 874, 967, 998, 255, 379, 503, 627, 751, 875, 999, 284, 408, 532, 656, 780, 904, 285, 316, 409, 440, 533, 564, 657, 688, 781, 812, + 905, 936, 286, 317, 348, 410, 441, 472, 534, 565, 596, 658, 689, 720, 782, 813, 844, 906, 937, 968, 287, 318, 349, 380, 411, 442, 473, 504, 535, 566, 597, 628, + 659, 690, 721, 752, 783, 814, 845, 876, 907, 938, 969, 1000, 319, 350, 381, 443, 474, 505, 567, 598, 629, 691, 722, 753, 815, 846, 877, 939, 970, 1001, 351, 382, + 475, 506, 599, 630, 723, 754, 847, 878, 971, 1002, 383, 507, 631, 755, 879, 1003, 412, 536, 660, 784, 908, 413, 444, 537, 568, 661, 692, 785, 816, 909, 940, 414, + 445, 476, 538, 569, 600, 662, 693, 724, 786, 817, 848, 910, 941, 972, 415, 446, 477, 508, 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942, + 973, 1004, 447, 478, 509, 571, 602, 633, 695, 726, 757, 819, 850, 881, 943, 974, 1005, 479, 510, 603, 634, 727, 758, 851, 882, 975, 1006, 511, 635, 759, 883, 1007, + 540, 664, 788, 912, 541, 572, 665, 696, 789, 820, 913, 944, 542, 573, 604, 666, 697, 728, 790, 821, 852, 914, 945, 976, 543, 574, 605, 636, 667, 698, 729, 760, + 791, 822, 853, 884, 915, 946, 977, 1008, 575, 606, 637, 699, 730, 761, 823, 854, 885, 947, 978, 1009, 607, 638, 731, 762, 855, 886, 979, 1010, 639, 763, 887, 1011, + 668, 792, 916, 669, 700, 793, 824, 917, 948, 670, 701, 732, 794, 825, 856, 918, 949, 980, 671, 702, 733, 764, 795, 826, 857, 888, 919, 950, 981, 1012, 703, 734, + 765, 827, 858, 889, 951, 982, 1013, 735, 766, 859, 890, 983, 1014, 767, 891, 1015, 796, 920, 797, 828, 921, 952, 798, 829, 860, 922, 953, 984, 799, 830, 861, 892, + 923, 954, 985, 1016, 831, 862, 893, 955, 986, 1017, 863, 894, 987, 1018, 895, 1019, 924, 925, 956, 926, 957, 988, 927, 958, 989, 1020, 959, 990, 1021, 991, 1022, 1023, +}; + +const int16_t * const ff_vp9_scans[5][4] = { + { + ff_vp9_default_scan_4x4, ff_vp9_col_scan_4x4, + ff_vp9_row_scan_4x4, ff_vp9_default_scan_4x4 + }, { + ff_vp9_default_scan_8x8, ff_vp9_col_scan_8x8, + ff_vp9_row_scan_8x8, ff_vp9_default_scan_8x8 + }, { + ff_vp9_default_scan_16x16, ff_vp9_col_scan_16x16, + ff_vp9_row_scan_16x16, ff_vp9_default_scan_16x16 + }, { + ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32, + ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32 + }, { // lossless + ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4, + ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4 + } +}; + +const int16_t ff_vp9_default_scan_4x4_nb[16][2] = { + { 0, 0 }, { 0, 0 }, { 4, 1 }, { 1, 1 }, + { 4, 4 }, { 2, 2 }, { 5, 2 }, { 8, 8 }, + { 8, 5 }, { 6, 3 }, { 9, 6 }, { 12, 9 }, + { 10, 7 }, { 13, 10 }, { 14, 11 }, { 0, 0 }, +}; + +const int16_t ff_vp9_col_scan_4x4_nb[16][2] = { + { 0, 0 }, { 1, 1 }, { 0, 0 }, { 2, 2 }, + { 4, 4 }, { 5, 5 }, { 4, 4 }, { 6, 6 }, + { 8, 8 }, { 9, 9 }, { 8, 8 }, { 12, 12 }, + { 10, 10 }, { 13, 13 }, { 14, 14 }, { 0, 0 }, +}; + +const int16_t ff_vp9_row_scan_4x4_nb[16][2] = { + { 0, 0 }, { 0, 0 }, { 4, 4 }, { 1, 1 }, + { 8, 8 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, + { 9, 9 }, { 2, 2 }, { 6, 6 }, { 3, 3 }, + { 10, 10 }, { 7, 7 }, { 11, 11 }, { 0, 0 }, +}; + +const int16_t ff_vp9_default_scan_8x8_nb[64][2] = { + { 0, 0 }, { 0, 0 }, { 1, 1 }, { 8, 1 }, + { 8, 8 }, { 9, 2 }, { 2, 2 }, { 16, 9 }, + { 16, 16 }, { 17, 10 }, { 10, 3 }, { 3, 3 }, + { 24, 17 }, { 24, 24 }, { 18, 11 }, { 11, 4 }, + { 25, 18 }, { 4, 4 }, { 32, 25 }, { 19, 12 }, + { 26, 19 }, { 32, 32 }, { 12, 5 }, { 33, 26 }, + { 5, 5 }, { 40, 33 }, { 27, 20 }, { 20, 13 }, + { 34, 27 }, { 41, 34 }, { 40, 40 }, { 13, 6 }, + { 6, 6 }, { 35, 28 }, { 28, 21 }, { 42, 35 }, + { 48, 48 }, { 48, 41 }, { 21, 14 }, { 14, 7 }, + { 36, 29 }, { 49, 42 }, { 43, 36 }, { 56, 49 }, + { 29, 22 }, { 22, 15 }, { 50, 43 }, { 44, 37 }, + { 57, 50 }, { 37, 30 }, { 30, 23 }, { 51, 44 }, + { 58, 51 }, { 38, 31 }, { 45, 38 }, { 52, 45 }, + { 59, 52 }, { 46, 39 }, { 53, 46 }, { 60, 53 }, + { 54, 47 }, { 61, 54 }, { 62, 55 }, { 0, 0 }, +}; + +const int16_t ff_vp9_col_scan_8x8_nb[64][2] = { + { 0, 0 }, { 1, 1 }, { 0, 0 }, { 2, 2 }, + { 8, 8 }, { 3, 3 }, { 9, 9 }, { 8, 8 }, + { 4, 4 }, { 10, 10 }, { 16, 16 }, { 11, 11 }, + { 17, 17 }, { 5, 5 }, { 16, 16 }, { 18, 18 }, + { 12, 12 }, { 24, 24 }, { 6, 6 }, { 25, 25 }, + { 19, 19 }, { 24, 24 }, { 13, 13 }, { 26, 26 }, + { 20, 20 }, { 32, 32 }, { 27, 27 }, { 33, 33 }, + { 14, 14 }, { 21, 21 }, { 34, 34 }, { 32, 32 }, + { 28, 28 }, { 40, 40 }, { 35, 35 }, { 22, 22 }, + { 29, 29 }, { 41, 41 }, { 36, 36 }, { 40, 40 }, + { 42, 42 }, { 30, 30 }, { 43, 43 }, { 48, 48 }, + { 37, 37 }, { 49, 49 }, { 48, 48 }, { 44, 44 }, + { 38, 38 }, { 50, 50 }, { 56, 56 }, { 51, 51 }, + { 45, 45 }, { 57, 57 }, { 52, 52 }, { 58, 58 }, + { 46, 46 }, { 59, 59 }, { 53, 53 }, { 60, 60 }, + { 54, 54 }, { 61, 61 }, { 62, 62 }, { 0, 0 }, +}; + +const int16_t ff_vp9_row_scan_8x8_nb[64][2] = { + { 0, 0 }, { 8, 8 }, { 0, 0 }, { 1, 1 }, + { 16, 16 }, { 1, 1 }, { 9, 9 }, { 24, 24 }, + { 2, 2 }, { 17, 17 }, { 2, 2 }, { 32, 32 }, + { 10, 10 }, { 3, 3 }, { 25, 25 }, { 18, 18 }, + { 11, 11 }, { 3, 3 }, { 40, 40 }, { 33, 33 }, + { 26, 26 }, { 4, 4 }, { 19, 19 }, { 48, 48 }, + { 12, 12 }, { 4, 4 }, { 34, 34 }, { 27, 27 }, + { 5, 5 }, { 41, 41 }, { 20, 20 }, { 5, 5 }, + { 13, 13 }, { 35, 35 }, { 28, 28 }, { 6, 6 }, + { 42, 42 }, { 21, 21 }, { 49, 49 }, { 6, 6 }, + { 36, 36 }, { 14, 14 }, { 29, 29 }, { 43, 43 }, + { 7, 7 }, { 50, 50 }, { 22, 22 }, { 15, 15 }, + { 37, 37 }, { 44, 44 }, { 30, 30 }, { 51, 51 }, + { 23, 23 }, { 38, 38 }, { 45, 45 }, { 31, 31 }, + { 52, 52 }, { 39, 39 }, { 53, 53 }, { 46, 46 }, + { 54, 54 }, { 47, 47 }, { 55, 55 }, { 0, 0 }, +}; + +const int16_t ff_vp9_default_scan_16x16_nb[256][2] = { + { 0, 0 }, { 0, 0 }, { 1, 1 }, { 16, 1 }, + { 16, 16 }, { 2, 2 }, { 17, 2 }, { 32, 17 }, + { 32, 32 }, { 3, 3 }, { 33, 18 }, { 18, 3 }, + { 48, 33 }, { 19, 4 }, { 4, 4 }, { 34, 19 }, + { 48, 48 }, { 49, 34 }, { 35, 20 }, { 64, 49 }, + { 20, 5 }, { 5, 5 }, { 50, 35 }, { 64, 64 }, + { 65, 50 }, { 36, 21 }, { 21, 6 }, { 51, 36 }, + { 6, 6 }, { 80, 65 }, { 66, 51 }, { 37, 22 }, + { 81, 66 }, { 52, 37 }, { 22, 7 }, { 80, 80 }, + { 67, 52 }, { 7, 7 }, { 82, 67 }, { 96, 81 }, + { 53, 38 }, { 38, 23 }, { 68, 53 }, { 96, 96 }, + { 23, 8 }, { 97, 82 }, { 83, 68 }, { 69, 54 }, + { 54, 39 }, { 8, 8 }, { 39, 24 }, { 84, 69 }, + { 98, 83 }, { 112, 97 }, { 112, 112 }, { 24, 9 }, + { 113, 98 }, { 99, 84 }, { 70, 55 }, { 85, 70 }, + { 55, 40 }, { 9, 9 }, { 40, 25 }, { 114, 99 }, + { 100, 85 }, { 128, 113 }, { 115, 100 }, { 71, 56 }, + { 86, 71 }, { 25, 10 }, { 129, 114 }, { 128, 128 }, + { 101, 86 }, { 56, 41 }, { 10, 10 }, { 41, 26 }, + { 116, 101 }, { 130, 115 }, { 144, 129 }, { 87, 72 }, + { 102, 87 }, { 26, 11 }, { 72, 57 }, { 131, 116 }, + { 117, 102 }, { 145, 130 }, { 57, 42 }, { 144, 144 }, + { 11, 11 }, { 42, 27 }, { 132, 117 }, { 146, 131 }, + { 103, 88 }, { 88, 73 }, { 118, 103 }, { 160, 145 }, + { 73, 58 }, { 147, 132 }, { 133, 118 }, { 27, 12 }, + { 161, 146 }, { 58, 43 }, { 12, 12 }, { 160, 160 }, + { 119, 104 }, { 148, 133 }, { 89, 74 }, { 134, 119 }, + { 104, 89 }, { 162, 147 }, { 43, 28 }, { 74, 59 }, + { 176, 161 }, { 163, 148 }, { 28, 13 }, { 149, 134 }, + { 120, 105 }, { 135, 120 }, { 177, 162 }, { 164, 149 }, + { 13, 13 }, { 105, 90 }, { 59, 44 }, { 90, 75 }, + { 150, 135 }, { 44, 29 }, { 178, 163 }, { 176, 176 }, + { 136, 121 }, { 165, 150 }, { 121, 106 }, { 75, 60 }, + { 179, 164 }, { 151, 136 }, { 29, 14 }, { 60, 45 }, + { 14, 14 }, { 106, 91 }, { 166, 151 }, { 180, 165 }, + { 192, 177 }, { 91, 76 }, { 192, 192 }, { 45, 30 }, + { 137, 122 }, { 122, 107 }, { 152, 137 }, { 193, 178 }, + { 76, 61 }, { 167, 152 }, { 181, 166 }, { 30, 15 }, + { 194, 179 }, { 208, 193 }, { 182, 167 }, { 107, 92 }, + { 138, 123 }, { 61, 46 }, { 153, 138 }, { 46, 31 }, + { 195, 180 }, { 92, 77 }, { 168, 153 }, { 209, 194 }, + { 196, 181 }, { 208, 208 }, { 123, 108 }, { 183, 168 }, + { 210, 195 }, { 77, 62 }, { 108, 93 }, { 169, 154 }, + { 154, 139 }, { 62, 47 }, { 197, 182 }, { 211, 196 }, + { 184, 169 }, { 224, 209 }, { 224, 224 }, { 139, 124 }, + { 93, 78 }, { 198, 183 }, { 124, 109 }, { 78, 63 }, + { 212, 197 }, { 225, 210 }, { 170, 155 }, { 185, 170 }, + { 155, 140 }, { 213, 198 }, { 199, 184 }, { 109, 94 }, + { 226, 211 }, { 140, 125 }, { 94, 79 }, { 240, 225 }, + { 214, 199 }, { 227, 212 }, { 200, 185 }, { 125, 110 }, + { 241, 226 }, { 186, 171 }, { 171, 156 }, { 156, 141 }, + { 228, 213 }, { 110, 95 }, { 215, 200 }, { 242, 227 }, + { 141, 126 }, { 201, 186 }, { 229, 214 }, { 126, 111 }, + { 216, 201 }, { 243, 228 }, { 172, 157 }, { 187, 172 }, + { 230, 215 }, { 157, 142 }, { 202, 187 }, { 142, 127 }, + { 244, 229 }, { 217, 202 }, { 231, 216 }, { 188, 173 }, + { 245, 230 }, { 158, 143 }, { 173, 158 }, { 232, 217 }, + { 246, 231 }, { 218, 203 }, { 203, 188 }, { 174, 159 }, + { 189, 174 }, { 247, 232 }, { 233, 218 }, { 204, 189 }, + { 219, 204 }, { 248, 233 }, { 190, 175 }, { 234, 219 }, + { 220, 205 }, { 249, 234 }, { 205, 190 }, { 221, 206 }, + { 250, 235 }, { 235, 220 }, { 206, 191 }, { 236, 221 }, + { 222, 207 }, { 251, 236 }, { 237, 222 }, { 252, 237 }, + { 238, 223 }, { 253, 238 }, { 254, 239 }, { 0, 0 }, +}; + +const int16_t ff_vp9_col_scan_16x16_nb[256][2] = { + { 0, 0 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, + { 3, 3 }, { 16, 16 }, { 4, 4 }, { 17, 17 }, + { 5, 5 }, { 18, 18 }, { 16, 16 }, { 19, 19 }, + { 6, 6 }, { 32, 32 }, { 20, 20 }, { 33, 33 }, + { 7, 7 }, { 34, 34 }, { 21, 21 }, { 32, 32 }, + { 35, 35 }, { 8, 8 }, { 48, 48 }, { 22, 22 }, + { 49, 49 }, { 36, 36 }, { 9, 9 }, { 37, 37 }, + { 50, 50 }, { 23, 23 }, { 48, 48 }, { 51, 51 }, + { 10, 10 }, { 64, 64 }, { 38, 38 }, { 24, 24 }, + { 52, 52 }, { 65, 65 }, { 53, 53 }, { 39, 39 }, + { 66, 66 }, { 11, 11 }, { 64, 64 }, { 25, 25 }, + { 67, 67 }, { 54, 54 }, { 80, 80 }, { 40, 40 }, + { 68, 68 }, { 12, 12 }, { 26, 26 }, { 81, 81 }, + { 55, 55 }, { 69, 69 }, { 82, 82 }, { 41, 41 }, + { 13, 13 }, { 83, 83 }, { 80, 80 }, { 70, 70 }, + { 27, 27 }, { 56, 56 }, { 84, 84 }, { 96, 96 }, + { 14, 14 }, { 71, 71 }, { 97, 97 }, { 42, 42 }, + { 85, 85 }, { 57, 57 }, { 98, 98 }, { 28, 28 }, + { 86, 86 }, { 99, 99 }, { 96, 96 }, { 72, 72 }, + { 43, 43 }, { 100, 100 }, { 58, 58 }, { 29, 29 }, + { 112, 112 }, { 87, 87 }, { 113, 113 }, { 73, 73 }, + { 112, 112 }, { 101, 101 }, { 44, 44 }, { 30, 30 }, + { 114, 114 }, { 59, 59 }, { 102, 102 }, { 88, 88 }, + { 115, 115 }, { 74, 74 }, { 128, 128 }, { 116, 116 }, + { 45, 45 }, { 103, 103 }, { 89, 89 }, { 60, 60 }, + { 129, 129 }, { 117, 117 }, { 130, 130 }, { 131, 131 }, + { 104, 104 }, { 75, 75 }, { 46, 46 }, { 118, 118 }, + { 128, 128 }, { 90, 90 }, { 61, 61 }, { 132, 132 }, + { 105, 105 }, { 144, 144 }, { 119, 119 }, { 145, 145 }, + { 133, 133 }, { 76, 76 }, { 146, 146 }, { 120, 120 }, + { 91, 91 }, { 134, 134 }, { 147, 147 }, { 62, 62 }, + { 106, 106 }, { 135, 135 }, { 121, 121 }, { 92, 92 }, + { 148, 148 }, { 144, 144 }, { 77, 77 }, { 149, 149 }, + { 136, 136 }, { 107, 107 }, { 160, 160 }, { 161, 161 }, + { 150, 150 }, { 122, 122 }, { 78, 78 }, { 137, 137 }, + { 162, 162 }, { 151, 151 }, { 93, 93 }, { 163, 163 }, + { 108, 108 }, { 164, 164 }, { 152, 152 }, { 123, 123 }, + { 138, 138 }, { 160, 160 }, { 165, 165 }, { 94, 94 }, + { 176, 176 }, { 166, 166 }, { 109, 109 }, { 153, 153 }, + { 177, 177 }, { 124, 124 }, { 178, 178 }, { 139, 139 }, + { 167, 167 }, { 154, 154 }, { 110, 110 }, { 179, 179 }, + { 176, 176 }, { 180, 180 }, { 168, 168 }, { 140, 140 }, + { 125, 125 }, { 181, 181 }, { 192, 192 }, { 193, 193 }, + { 155, 155 }, { 182, 182 }, { 169, 169 }, { 194, 194 }, + { 126, 126 }, { 141, 141 }, { 195, 195 }, { 183, 183 }, + { 192, 192 }, { 196, 196 }, { 156, 156 }, { 170, 170 }, + { 142, 142 }, { 184, 184 }, { 197, 197 }, { 208, 208 }, + { 198, 198 }, { 209, 209 }, { 171, 171 }, { 157, 157 }, + { 185, 185 }, { 210, 210 }, { 208, 208 }, { 211, 211 }, + { 199, 199 }, { 224, 224 }, { 158, 158 }, { 212, 212 }, + { 224, 224 }, { 186, 186 }, { 200, 200 }, { 172, 172 }, + { 225, 225 }, { 213, 213 }, { 214, 214 }, { 226, 226 }, + { 201, 201 }, { 227, 227 }, { 187, 187 }, { 240, 240 }, + { 215, 215 }, { 173, 173 }, { 228, 228 }, { 241, 241 }, + { 202, 202 }, { 242, 242 }, { 216, 216 }, { 229, 229 }, + { 174, 174 }, { 188, 188 }, { 243, 243 }, { 230, 230 }, + { 203, 203 }, { 217, 217 }, { 231, 231 }, { 244, 244 }, + { 218, 218 }, { 245, 245 }, { 189, 189 }, { 232, 232 }, + { 204, 204 }, { 190, 190 }, { 246, 246 }, { 233, 233 }, + { 247, 247 }, { 219, 219 }, { 205, 205 }, { 248, 248 }, + { 234, 234 }, { 220, 220 }, { 206, 206 }, { 249, 249 }, + { 235, 235 }, { 221, 221 }, { 250, 250 }, { 222, 222 }, + { 236, 236 }, { 237, 237 }, { 251, 251 }, { 238, 238 }, + { 252, 252 }, { 253, 253 }, { 254, 254 }, { 0, 0 }, +}; + +const int16_t ff_vp9_row_scan_16x16_nb[256][2] = { + { 0, 0 }, { 16, 16 }, { 0, 0 }, { 32, 32 }, + { 1, 1 }, { 48, 48 }, { 17, 17 }, { 1, 1 }, + { 64, 64 }, { 2, 2 }, { 33, 33 }, { 80, 80 }, + { 18, 18 }, { 2, 2 }, { 49, 49 }, { 3, 3 }, + { 96, 96 }, { 34, 34 }, { 65, 65 }, { 19, 19 }, + { 3, 3 }, { 112, 112 }, { 50, 50 }, { 4, 4 }, + { 81, 81 }, { 35, 35 }, { 66, 66 }, { 4, 4 }, + { 128, 128 }, { 20, 20 }, { 51, 51 }, { 97, 97 }, + { 82, 82 }, { 5, 5 }, { 36, 36 }, { 144, 144 }, + { 67, 67 }, { 113, 113 }, { 21, 21 }, { 52, 52 }, + { 5, 5 }, { 98, 98 }, { 160, 160 }, { 83, 83 }, + { 37, 37 }, { 6, 6 }, { 68, 68 }, { 129, 129 }, + { 22, 22 }, { 53, 53 }, { 114, 114 }, { 6, 6 }, + { 99, 99 }, { 176, 176 }, { 84, 84 }, { 38, 38 }, + { 7, 7 }, { 69, 69 }, { 145, 145 }, { 130, 130 }, + { 115, 115 }, { 23, 23 }, { 54, 54 }, { 192, 192 }, + { 100, 100 }, { 7, 7 }, { 85, 85 }, { 161, 161 }, + { 39, 39 }, { 70, 70 }, { 8, 8 }, { 146, 146 }, + { 131, 131 }, { 116, 116 }, { 55, 55 }, { 208, 208 }, + { 101, 101 }, { 24, 24 }, { 86, 86 }, { 8, 8 }, + { 132, 132 }, { 40, 40 }, { 71, 71 }, { 177, 177 }, + { 147, 147 }, { 224, 224 }, { 117, 117 }, { 162, 162 }, + { 9, 9 }, { 102, 102 }, { 56, 56 }, { 25, 25 }, + { 87, 87 }, { 148, 148 }, { 9, 9 }, { 133, 133 }, + { 72, 72 }, { 118, 118 }, { 193, 193 }, { 163, 163 }, + { 41, 41 }, { 103, 103 }, { 178, 178 }, { 10, 10 }, + { 57, 57 }, { 149, 149 }, { 134, 134 }, { 88, 88 }, + { 26, 26 }, { 119, 119 }, { 10, 10 }, { 164, 164 }, + { 104, 104 }, { 73, 73 }, { 209, 209 }, { 179, 179 }, + { 42, 42 }, { 11, 11 }, { 194, 194 }, { 135, 135 }, + { 165, 165 }, { 150, 150 }, { 58, 58 }, { 27, 27 }, + { 89, 89 }, { 11, 11 }, { 120, 120 }, { 74, 74 }, + { 43, 43 }, { 225, 225 }, { 105, 105 }, { 12, 12 }, + { 180, 180 }, { 151, 151 }, { 195, 195 }, { 136, 136 }, + { 28, 28 }, { 166, 166 }, { 121, 121 }, { 59, 59 }, + { 12, 12 }, { 210, 210 }, { 90, 90 }, { 106, 106 }, + { 44, 44 }, { 181, 181 }, { 75, 75 }, { 152, 152 }, + { 13, 13 }, { 167, 167 }, { 137, 137 }, { 13, 13 }, + { 60, 60 }, { 196, 196 }, { 122, 122 }, { 29, 29 }, + { 91, 91 }, { 14, 14 }, { 182, 182 }, { 76, 76 }, + { 211, 211 }, { 153, 153 }, { 14, 14 }, { 107, 107 }, + { 138, 138 }, { 45, 45 }, { 226, 226 }, { 168, 168 }, + { 197, 197 }, { 123, 123 }, { 30, 30 }, { 61, 61 }, + { 15, 15 }, { 92, 92 }, { 154, 154 }, { 183, 183 }, + { 169, 169 }, { 108, 108 }, { 212, 212 }, { 77, 77 }, + { 139, 139 }, { 198, 198 }, { 46, 46 }, { 124, 124 }, + { 227, 227 }, { 62, 62 }, { 31, 31 }, { 184, 184 }, + { 93, 93 }, { 170, 170 }, { 155, 155 }, { 185, 185 }, + { 78, 78 }, { 47, 47 }, { 199, 199 }, { 213, 213 }, + { 140, 140 }, { 63, 63 }, { 109, 109 }, { 125, 125 }, + { 94, 94 }, { 200, 200 }, { 171, 171 }, { 156, 156 }, + { 228, 228 }, { 186, 186 }, { 214, 214 }, { 201, 201 }, + { 79, 79 }, { 141, 141 }, { 110, 110 }, { 229, 229 }, + { 95, 95 }, { 126, 126 }, { 215, 215 }, { 172, 172 }, + { 111, 111 }, { 142, 142 }, { 202, 202 }, { 157, 157 }, + { 216, 216 }, { 230, 230 }, { 217, 217 }, { 187, 187 }, + { 127, 127 }, { 231, 231 }, { 158, 158 }, { 173, 173 }, + { 143, 143 }, { 203, 203 }, { 188, 188 }, { 232, 232 }, + { 218, 218 }, { 233, 233 }, { 159, 159 }, { 174, 174 }, + { 204, 204 }, { 189, 189 }, { 234, 234 }, { 219, 219 }, + { 175, 175 }, { 205, 205 }, { 235, 235 }, { 220, 220 }, + { 190, 190 }, { 236, 236 }, { 206, 206 }, { 191, 191 }, + { 221, 221 }, { 207, 207 }, { 237, 237 }, { 222, 222 }, + { 238, 238 }, { 223, 223 }, { 239, 239 }, { 0, 0 }, +}; + +const int16_t ff_vp9_default_scan_32x32_nb[1024][2] = { + { 0, 0 }, { 0, 0 }, { 1, 1 }, { 32, 1 }, + { 32, 32 }, { 2, 2 }, { 33, 2 }, { 64, 33 }, + { 3, 3 }, { 64, 64 }, { 34, 3 }, { 65, 34 }, + { 4, 4 }, { 35, 4 }, { 96, 65 }, { 66, 35 }, + { 96, 96 }, { 97, 66 }, { 67, 36 }, { 36, 5 }, + { 5, 5 }, { 128, 97 }, { 98, 67 }, { 6, 6 }, + { 128, 128 }, { 68, 37 }, { 37, 6 }, { 129, 98 }, + { 99, 68 }, { 160, 129 }, { 130, 99 }, { 38, 7 }, + { 69, 38 }, { 7, 7 }, { 100, 69 }, { 161, 130 }, + { 131, 100 }, { 160, 160 }, { 70, 39 }, { 39, 8 }, + { 8, 8 }, { 101, 70 }, { 162, 131 }, { 132, 101 }, + { 192, 161 }, { 71, 40 }, { 192, 192 }, { 102, 71 }, + { 40, 9 }, { 163, 132 }, { 9, 9 }, { 193, 162 }, + { 133, 102 }, { 164, 133 }, { 72, 41 }, { 103, 72 }, + { 134, 103 }, { 224, 193 }, { 41, 10 }, { 194, 163 }, + { 10, 10 }, { 224, 224 }, { 165, 134 }, { 225, 194 }, + { 195, 164 }, { 73, 42 }, { 104, 73 }, { 135, 104 }, + { 42, 11 }, { 11, 11 }, { 166, 135 }, { 196, 165 }, + { 226, 195 }, { 256, 225 }, { 74, 43 }, { 105, 74 }, + { 136, 105 }, { 227, 196 }, { 43, 12 }, { 197, 166 }, + { 167, 136 }, { 257, 226 }, { 256, 256 }, { 12, 12 }, + { 228, 197 }, { 75, 44 }, { 106, 75 }, { 198, 167 }, + { 137, 106 }, { 258, 227 }, { 168, 137 }, { 288, 257 }, + { 44, 13 }, { 229, 198 }, { 259, 228 }, { 199, 168 }, + { 107, 76 }, { 13, 13 }, { 169, 138 }, { 138, 107 }, + { 288, 288 }, { 289, 258 }, { 76, 45 }, { 230, 199 }, + { 260, 229 }, { 45, 14 }, { 200, 169 }, { 139, 108 }, + { 290, 259 }, { 108, 77 }, { 231, 200 }, { 320, 289 }, + { 261, 230 }, { 170, 139 }, { 77, 46 }, { 291, 260 }, + { 14, 14 }, { 321, 290 }, { 201, 170 }, { 262, 231 }, + { 320, 320 }, { 171, 140 }, { 292, 261 }, { 232, 201 }, + { 140, 109 }, { 322, 291 }, { 109, 78 }, { 46, 15 }, + { 202, 171 }, { 263, 232 }, { 233, 202 }, { 293, 262 }, + { 352, 321 }, { 323, 292 }, { 15, 15 }, { 78, 47 }, + { 203, 172 }, { 264, 233 }, { 294, 263 }, { 324, 293 }, + { 172, 141 }, { 353, 322 }, { 141, 110 }, { 234, 203 }, + { 352, 352 }, { 47, 16 }, { 295, 264 }, { 110, 79 }, + { 265, 234 }, { 354, 323 }, { 325, 294 }, { 79, 48 }, + { 16, 16 }, { 204, 173 }, { 235, 204 }, { 173, 142 }, + { 355, 324 }, { 384, 353 }, { 326, 295 }, { 142, 111 }, + { 296, 265 }, { 266, 235 }, { 356, 325 }, { 385, 354 }, + { 111, 80 }, { 48, 17 }, { 327, 296 }, { 297, 266 }, + { 205, 174 }, { 384, 384 }, { 236, 205 }, { 357, 326 }, + { 386, 355 }, { 80, 49 }, { 174, 143 }, { 17, 17 }, + { 328, 297 }, { 358, 327 }, { 387, 356 }, { 298, 267 }, + { 329, 298 }, { 388, 357 }, { 112, 81 }, { 416, 385 }, + { 237, 206 }, { 359, 328 }, { 49, 18 }, { 206, 175 }, + { 417, 386 }, { 389, 358 }, { 330, 299 }, { 18, 18 }, + { 416, 416 }, { 360, 329 }, { 81, 50 }, { 418, 387 }, + { 390, 359 }, { 238, 207 }, { 50, 19 }, { 361, 330 }, + { 419, 388 }, { 113, 82 }, { 448, 417 }, { 448, 448 }, + { 420, 389 }, { 82, 51 }, { 362, 331 }, { 449, 418 }, + { 421, 390 }, { 480, 480 }, { 450, 419 }, { 422, 391 }, + { 114, 83 }, { 451, 420 }, { 480, 449 }, { 452, 421 }, + { 481, 450 }, { 453, 422 }, { 512, 512 }, { 482, 451 }, + { 454, 423 }, { 512, 481 }, { 483, 452 }, { 513, 482 }, + { 484, 453 }, { 514, 483 }, { 485, 454 }, { 544, 513 }, + { 544, 544 }, { 486, 455 }, { 545, 514 }, { 546, 515 }, + { 576, 576 }, { 576, 545 }, { 577, 546 }, { 578, 547 }, + { 608, 577 }, { 609, 578 }, { 610, 579 }, { 19, 19 }, + { 143, 112 }, { 267, 236 }, { 391, 360 }, { 515, 484 }, + { 608, 608 }, { 20, 20 }, { 51, 20 }, { 144, 113 }, + { 175, 144 }, { 268, 237 }, { 299, 268 }, { 392, 361 }, + { 423, 392 }, { 516, 485 }, { 547, 516 }, { 640, 609 }, + { 640, 640 }, { 21, 21 }, { 52, 21 }, { 83, 52 }, + { 145, 114 }, { 176, 145 }, { 207, 176 }, { 269, 238 }, + { 300, 269 }, { 331, 300 }, { 393, 362 }, { 424, 393 }, + { 455, 424 }, { 517, 486 }, { 548, 517 }, { 579, 548 }, + { 641, 610 }, { 672, 641 }, { 672, 672 }, { 22, 22 }, + { 53, 22 }, { 84, 53 }, { 115, 84 }, { 146, 115 }, + { 177, 146 }, { 208, 177 }, { 239, 208 }, { 270, 239 }, + { 301, 270 }, { 332, 301 }, { 363, 332 }, { 394, 363 }, + { 425, 394 }, { 456, 425 }, { 487, 456 }, { 518, 487 }, + { 549, 518 }, { 580, 549 }, { 611, 580 }, { 642, 611 }, + { 673, 642 }, { 704, 673 }, { 704, 704 }, { 54, 23 }, + { 85, 54 }, { 116, 85 }, { 178, 147 }, { 209, 178 }, + { 240, 209 }, { 302, 271 }, { 333, 302 }, { 364, 333 }, + { 426, 395 }, { 457, 426 }, { 488, 457 }, { 550, 519 }, + { 581, 550 }, { 612, 581 }, { 674, 643 }, { 705, 674 }, + { 736, 705 }, { 86, 55 }, { 117, 86 }, { 210, 179 }, + { 241, 210 }, { 334, 303 }, { 365, 334 }, { 458, 427 }, + { 489, 458 }, { 582, 551 }, { 613, 582 }, { 706, 675 }, + { 737, 706 }, { 118, 87 }, { 242, 211 }, { 366, 335 }, + { 490, 459 }, { 614, 583 }, { 738, 707 }, { 23, 23 }, + { 147, 116 }, { 271, 240 }, { 395, 364 }, { 519, 488 }, + { 643, 612 }, { 736, 736 }, { 24, 24 }, { 55, 24 }, + { 148, 117 }, { 179, 148 }, { 272, 241 }, { 303, 272 }, + { 396, 365 }, { 427, 396 }, { 520, 489 }, { 551, 520 }, + { 644, 613 }, { 675, 644 }, { 768, 737 }, { 768, 768 }, + { 25, 25 }, { 56, 25 }, { 87, 56 }, { 149, 118 }, + { 180, 149 }, { 211, 180 }, { 273, 242 }, { 304, 273 }, + { 335, 304 }, { 397, 366 }, { 428, 397 }, { 459, 428 }, + { 521, 490 }, { 552, 521 }, { 583, 552 }, { 645, 614 }, + { 676, 645 }, { 707, 676 }, { 769, 738 }, { 800, 769 }, + { 800, 800 }, { 26, 26 }, { 57, 26 }, { 88, 57 }, + { 119, 88 }, { 150, 119 }, { 181, 150 }, { 212, 181 }, + { 243, 212 }, { 274, 243 }, { 305, 274 }, { 336, 305 }, + { 367, 336 }, { 398, 367 }, { 429, 398 }, { 460, 429 }, + { 491, 460 }, { 522, 491 }, { 553, 522 }, { 584, 553 }, + { 615, 584 }, { 646, 615 }, { 677, 646 }, { 708, 677 }, + { 739, 708 }, { 770, 739 }, { 801, 770 }, { 832, 801 }, + { 832, 832 }, { 58, 27 }, { 89, 58 }, { 120, 89 }, + { 182, 151 }, { 213, 182 }, { 244, 213 }, { 306, 275 }, + { 337, 306 }, { 368, 337 }, { 430, 399 }, { 461, 430 }, + { 492, 461 }, { 554, 523 }, { 585, 554 }, { 616, 585 }, + { 678, 647 }, { 709, 678 }, { 740, 709 }, { 802, 771 }, + { 833, 802 }, { 864, 833 }, { 90, 59 }, { 121, 90 }, + { 214, 183 }, { 245, 214 }, { 338, 307 }, { 369, 338 }, + { 462, 431 }, { 493, 462 }, { 586, 555 }, { 617, 586 }, + { 710, 679 }, { 741, 710 }, { 834, 803 }, { 865, 834 }, + { 122, 91 }, { 246, 215 }, { 370, 339 }, { 494, 463 }, + { 618, 587 }, { 742, 711 }, { 866, 835 }, { 27, 27 }, + { 151, 120 }, { 275, 244 }, { 399, 368 }, { 523, 492 }, + { 647, 616 }, { 771, 740 }, { 864, 864 }, { 28, 28 }, + { 59, 28 }, { 152, 121 }, { 183, 152 }, { 276, 245 }, + { 307, 276 }, { 400, 369 }, { 431, 400 }, { 524, 493 }, + { 555, 524 }, { 648, 617 }, { 679, 648 }, { 772, 741 }, + { 803, 772 }, { 896, 865 }, { 896, 896 }, { 29, 29 }, + { 60, 29 }, { 91, 60 }, { 153, 122 }, { 184, 153 }, + { 215, 184 }, { 277, 246 }, { 308, 277 }, { 339, 308 }, + { 401, 370 }, { 432, 401 }, { 463, 432 }, { 525, 494 }, + { 556, 525 }, { 587, 556 }, { 649, 618 }, { 680, 649 }, + { 711, 680 }, { 773, 742 }, { 804, 773 }, { 835, 804 }, + { 897, 866 }, { 928, 897 }, { 928, 928 }, { 30, 30 }, + { 61, 30 }, { 92, 61 }, { 123, 92 }, { 154, 123 }, + { 185, 154 }, { 216, 185 }, { 247, 216 }, { 278, 247 }, + { 309, 278 }, { 340, 309 }, { 371, 340 }, { 402, 371 }, + { 433, 402 }, { 464, 433 }, { 495, 464 }, { 526, 495 }, + { 557, 526 }, { 588, 557 }, { 619, 588 }, { 650, 619 }, + { 681, 650 }, { 712, 681 }, { 743, 712 }, { 774, 743 }, + { 805, 774 }, { 836, 805 }, { 867, 836 }, { 898, 867 }, + { 929, 898 }, { 960, 929 }, { 960, 960 }, { 62, 31 }, + { 93, 62 }, { 124, 93 }, { 186, 155 }, { 217, 186 }, + { 248, 217 }, { 310, 279 }, { 341, 310 }, { 372, 341 }, + { 434, 403 }, { 465, 434 }, { 496, 465 }, { 558, 527 }, + { 589, 558 }, { 620, 589 }, { 682, 651 }, { 713, 682 }, + { 744, 713 }, { 806, 775 }, { 837, 806 }, { 868, 837 }, + { 930, 899 }, { 961, 930 }, { 992, 961 }, { 94, 63 }, + { 125, 94 }, { 218, 187 }, { 249, 218 }, { 342, 311 }, + { 373, 342 }, { 466, 435 }, { 497, 466 }, { 590, 559 }, + { 621, 590 }, { 714, 683 }, { 745, 714 }, { 838, 807 }, + { 869, 838 }, { 962, 931 }, { 993, 962 }, { 126, 95 }, + { 250, 219 }, { 374, 343 }, { 498, 467 }, { 622, 591 }, + { 746, 715 }, { 870, 839 }, { 994, 963 }, { 155, 124 }, + { 279, 248 }, { 403, 372 }, { 527, 496 }, { 651, 620 }, + { 775, 744 }, { 899, 868 }, { 156, 125 }, { 187, 156 }, + { 280, 249 }, { 311, 280 }, { 404, 373 }, { 435, 404 }, + { 528, 497 }, { 559, 528 }, { 652, 621 }, { 683, 652 }, + { 776, 745 }, { 807, 776 }, { 900, 869 }, { 931, 900 }, + { 157, 126 }, { 188, 157 }, { 219, 188 }, { 281, 250 }, + { 312, 281 }, { 343, 312 }, { 405, 374 }, { 436, 405 }, + { 467, 436 }, { 529, 498 }, { 560, 529 }, { 591, 560 }, + { 653, 622 }, { 684, 653 }, { 715, 684 }, { 777, 746 }, + { 808, 777 }, { 839, 808 }, { 901, 870 }, { 932, 901 }, + { 963, 932 }, { 158, 127 }, { 189, 158 }, { 220, 189 }, + { 251, 220 }, { 282, 251 }, { 313, 282 }, { 344, 313 }, + { 375, 344 }, { 406, 375 }, { 437, 406 }, { 468, 437 }, + { 499, 468 }, { 530, 499 }, { 561, 530 }, { 592, 561 }, + { 623, 592 }, { 654, 623 }, { 685, 654 }, { 716, 685 }, + { 747, 716 }, { 778, 747 }, { 809, 778 }, { 840, 809 }, + { 871, 840 }, { 902, 871 }, { 933, 902 }, { 964, 933 }, + { 995, 964 }, { 190, 159 }, { 221, 190 }, { 252, 221 }, + { 314, 283 }, { 345, 314 }, { 376, 345 }, { 438, 407 }, + { 469, 438 }, { 500, 469 }, { 562, 531 }, { 593, 562 }, + { 624, 593 }, { 686, 655 }, { 717, 686 }, { 748, 717 }, + { 810, 779 }, { 841, 810 }, { 872, 841 }, { 934, 903 }, + { 965, 934 }, { 996, 965 }, { 222, 191 }, { 253, 222 }, + { 346, 315 }, { 377, 346 }, { 470, 439 }, { 501, 470 }, + { 594, 563 }, { 625, 594 }, { 718, 687 }, { 749, 718 }, + { 842, 811 }, { 873, 842 }, { 966, 935 }, { 997, 966 }, + { 254, 223 }, { 378, 347 }, { 502, 471 }, { 626, 595 }, + { 750, 719 }, { 874, 843 }, { 998, 967 }, { 283, 252 }, + { 407, 376 }, { 531, 500 }, { 655, 624 }, { 779, 748 }, + { 903, 872 }, { 284, 253 }, { 315, 284 }, { 408, 377 }, + { 439, 408 }, { 532, 501 }, { 563, 532 }, { 656, 625 }, + { 687, 656 }, { 780, 749 }, { 811, 780 }, { 904, 873 }, + { 935, 904 }, { 285, 254 }, { 316, 285 }, { 347, 316 }, + { 409, 378 }, { 440, 409 }, { 471, 440 }, { 533, 502 }, + { 564, 533 }, { 595, 564 }, { 657, 626 }, { 688, 657 }, + { 719, 688 }, { 781, 750 }, { 812, 781 }, { 843, 812 }, + { 905, 874 }, { 936, 905 }, { 967, 936 }, { 286, 255 }, + { 317, 286 }, { 348, 317 }, { 379, 348 }, { 410, 379 }, + { 441, 410 }, { 472, 441 }, { 503, 472 }, { 534, 503 }, + { 565, 534 }, { 596, 565 }, { 627, 596 }, { 658, 627 }, + { 689, 658 }, { 720, 689 }, { 751, 720 }, { 782, 751 }, + { 813, 782 }, { 844, 813 }, { 875, 844 }, { 906, 875 }, + { 937, 906 }, { 968, 937 }, { 999, 968 }, { 318, 287 }, + { 349, 318 }, { 380, 349 }, { 442, 411 }, { 473, 442 }, + { 504, 473 }, { 566, 535 }, { 597, 566 }, { 628, 597 }, + { 690, 659 }, { 721, 690 }, { 752, 721 }, { 814, 783 }, + { 845, 814 }, { 876, 845 }, { 938, 907 }, { 969, 938 }, + { 1000, 969 }, { 350, 319 }, { 381, 350 }, { 474, 443 }, + { 505, 474 }, { 598, 567 }, { 629, 598 }, { 722, 691 }, + { 753, 722 }, { 846, 815 }, { 877, 846 }, { 970, 939 }, + { 1001, 970 }, { 382, 351 }, { 506, 475 }, { 630, 599 }, + { 754, 723 }, { 878, 847 }, { 1002, 971 }, { 411, 380 }, + { 535, 504 }, { 659, 628 }, { 783, 752 }, { 907, 876 }, + { 412, 381 }, { 443, 412 }, { 536, 505 }, { 567, 536 }, + { 660, 629 }, { 691, 660 }, { 784, 753 }, { 815, 784 }, + { 908, 877 }, { 939, 908 }, { 413, 382 }, { 444, 413 }, + { 475, 444 }, { 537, 506 }, { 568, 537 }, { 599, 568 }, + { 661, 630 }, { 692, 661 }, { 723, 692 }, { 785, 754 }, + { 816, 785 }, { 847, 816 }, { 909, 878 }, { 940, 909 }, + { 971, 940 }, { 414, 383 }, { 445, 414 }, { 476, 445 }, + { 507, 476 }, { 538, 507 }, { 569, 538 }, { 600, 569 }, + { 631, 600 }, { 662, 631 }, { 693, 662 }, { 724, 693 }, + { 755, 724 }, { 786, 755 }, { 817, 786 }, { 848, 817 }, + { 879, 848 }, { 910, 879 }, { 941, 910 }, { 972, 941 }, + { 1003, 972 }, { 446, 415 }, { 477, 446 }, { 508, 477 }, + { 570, 539 }, { 601, 570 }, { 632, 601 }, { 694, 663 }, + { 725, 694 }, { 756, 725 }, { 818, 787 }, { 849, 818 }, + { 880, 849 }, { 942, 911 }, { 973, 942 }, { 1004, 973 }, + { 478, 447 }, { 509, 478 }, { 602, 571 }, { 633, 602 }, + { 726, 695 }, { 757, 726 }, { 850, 819 }, { 881, 850 }, + { 974, 943 }, { 1005, 974 }, { 510, 479 }, { 634, 603 }, + { 758, 727 }, { 882, 851 }, { 1006, 975 }, { 539, 508 }, + { 663, 632 }, { 787, 756 }, { 911, 880 }, { 540, 509 }, + { 571, 540 }, { 664, 633 }, { 695, 664 }, { 788, 757 }, + { 819, 788 }, { 912, 881 }, { 943, 912 }, { 541, 510 }, + { 572, 541 }, { 603, 572 }, { 665, 634 }, { 696, 665 }, + { 727, 696 }, { 789, 758 }, { 820, 789 }, { 851, 820 }, + { 913, 882 }, { 944, 913 }, { 975, 944 }, { 542, 511 }, + { 573, 542 }, { 604, 573 }, { 635, 604 }, { 666, 635 }, + { 697, 666 }, { 728, 697 }, { 759, 728 }, { 790, 759 }, + { 821, 790 }, { 852, 821 }, { 883, 852 }, { 914, 883 }, + { 945, 914 }, { 976, 945 }, { 1007, 976 }, { 574, 543 }, + { 605, 574 }, { 636, 605 }, { 698, 667 }, { 729, 698 }, + { 760, 729 }, { 822, 791 }, { 853, 822 }, { 884, 853 }, + { 946, 915 }, { 977, 946 }, { 1008, 977 }, { 606, 575 }, + { 637, 606 }, { 730, 699 }, { 761, 730 }, { 854, 823 }, + { 885, 854 }, { 978, 947 }, { 1009, 978 }, { 638, 607 }, + { 762, 731 }, { 886, 855 }, { 1010, 979 }, { 667, 636 }, + { 791, 760 }, { 915, 884 }, { 668, 637 }, { 699, 668 }, + { 792, 761 }, { 823, 792 }, { 916, 885 }, { 947, 916 }, + { 669, 638 }, { 700, 669 }, { 731, 700 }, { 793, 762 }, + { 824, 793 }, { 855, 824 }, { 917, 886 }, { 948, 917 }, + { 979, 948 }, { 670, 639 }, { 701, 670 }, { 732, 701 }, + { 763, 732 }, { 794, 763 }, { 825, 794 }, { 856, 825 }, + { 887, 856 }, { 918, 887 }, { 949, 918 }, { 980, 949 }, + { 1011, 980 }, { 702, 671 }, { 733, 702 }, { 764, 733 }, + { 826, 795 }, { 857, 826 }, { 888, 857 }, { 950, 919 }, + { 981, 950 }, { 1012, 981 }, { 734, 703 }, { 765, 734 }, + { 858, 827 }, { 889, 858 }, { 982, 951 }, { 1013, 982 }, + { 766, 735 }, { 890, 859 }, { 1014, 983 }, { 795, 764 }, + { 919, 888 }, { 796, 765 }, { 827, 796 }, { 920, 889 }, + { 951, 920 }, { 797, 766 }, { 828, 797 }, { 859, 828 }, + { 921, 890 }, { 952, 921 }, { 983, 952 }, { 798, 767 }, + { 829, 798 }, { 860, 829 }, { 891, 860 }, { 922, 891 }, + { 953, 922 }, { 984, 953 }, { 1015, 984 }, { 830, 799 }, + { 861, 830 }, { 892, 861 }, { 954, 923 }, { 985, 954 }, + { 1016, 985 }, { 862, 831 }, { 893, 862 }, { 986, 955 }, + { 1017, 986 }, { 894, 863 }, { 1018, 987 }, { 923, 892 }, + { 924, 893 }, { 955, 924 }, { 925, 894 }, { 956, 925 }, + { 987, 956 }, { 926, 895 }, { 957, 926 }, { 988, 957 }, + { 1019, 988 }, { 958, 927 }, { 989, 958 }, { 1020, 989 }, + { 990, 959 }, { 1021, 990 }, { 1022, 991 }, { 0, 0 }, +}; + +const int16_t (* const ff_vp9_scans_nb[5][4])[2] = { + { + ff_vp9_default_scan_4x4_nb, ff_vp9_col_scan_4x4_nb, + ff_vp9_row_scan_4x4_nb, ff_vp9_default_scan_4x4_nb + }, { + ff_vp9_default_scan_8x8_nb, ff_vp9_col_scan_8x8_nb, + ff_vp9_row_scan_8x8_nb, ff_vp9_default_scan_8x8_nb + }, { + ff_vp9_default_scan_16x16_nb, ff_vp9_col_scan_16x16_nb, + ff_vp9_row_scan_16x16_nb, ff_vp9_default_scan_16x16_nb + }, { + ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb, + ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb + }, { // lossless + ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb, + ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb + } +}; + +const uint8_t ff_vp9_model_pareto8[256][8] = { + { 6, 86, 128, 11, 87, 42, 91, 52 }, + { 3, 86, 128, 6, 86, 23, 88, 29 }, + { 6, 86, 128, 11, 87, 42, 91, 52 }, + { 9, 86, 129, 17, 88, 61, 94, 76 }, + { 12, 86, 129, 22, 88, 77, 97, 93 }, + { 15, 87, 129, 28, 89, 93, 100, 110 }, + { 17, 87, 129, 33, 90, 105, 103, 123 }, + { 20, 88, 130, 38, 91, 118, 106, 136 }, + { 23, 88, 130, 43, 91, 128, 108, 146 }, + { 26, 89, 131, 48, 92, 139, 111, 156 }, + { 28, 89, 131, 53, 93, 147, 114, 163 }, + { 31, 90, 131, 58, 94, 156, 117, 171 }, + { 34, 90, 131, 62, 94, 163, 119, 177 }, + { 37, 90, 132, 66, 95, 171, 122, 184 }, + { 39, 90, 132, 70, 96, 177, 124, 189 }, + { 42, 91, 132, 75, 97, 183, 127, 194 }, + { 44, 91, 132, 79, 97, 188, 129, 198 }, + { 47, 92, 133, 83, 98, 193, 132, 202 }, + { 49, 92, 133, 86, 99, 197, 134, 205 }, + { 52, 93, 133, 90, 100, 201, 137, 208 }, + { 54, 93, 133, 94, 100, 204, 139, 211 }, + { 57, 94, 134, 98, 101, 208, 142, 214 }, + { 59, 94, 134, 101, 102, 211, 144, 216 }, + { 62, 94, 135, 105, 103, 214, 146, 218 }, + { 64, 94, 135, 108, 103, 216, 148, 220 }, + { 66, 95, 135, 111, 104, 219, 151, 222 }, + { 68, 95, 135, 114, 105, 221, 153, 223 }, + { 71, 96, 136, 117, 106, 224, 155, 225 }, + { 73, 96, 136, 120, 106, 225, 157, 226 }, + { 76, 97, 136, 123, 107, 227, 159, 228 }, + { 78, 97, 136, 126, 108, 229, 160, 229 }, + { 80, 98, 137, 129, 109, 231, 162, 231 }, + { 82, 98, 137, 131, 109, 232, 164, 232 }, + { 84, 98, 138, 134, 110, 234, 166, 233 }, + { 86, 98, 138, 137, 111, 235, 168, 234 }, + { 89, 99, 138, 140, 112, 236, 170, 235 }, + { 91, 99, 138, 142, 112, 237, 171, 235 }, + { 93, 100, 139, 145, 113, 238, 173, 236 }, + { 95, 100, 139, 147, 114, 239, 174, 237 }, + { 97, 101, 140, 149, 115, 240, 176, 238 }, + { 99, 101, 140, 151, 115, 241, 177, 238 }, + { 101, 102, 140, 154, 116, 242, 179, 239 }, + { 103, 102, 140, 156, 117, 242, 180, 239 }, + { 105, 103, 141, 158, 118, 243, 182, 240 }, + { 107, 103, 141, 160, 118, 243, 183, 240 }, + { 109, 104, 141, 162, 119, 244, 185, 241 }, + { 111, 104, 141, 164, 119, 244, 186, 241 }, + { 113, 104, 142, 166, 120, 245, 187, 242 }, + { 114, 104, 142, 168, 121, 245, 188, 242 }, + { 116, 105, 143, 170, 122, 246, 190, 243 }, + { 118, 105, 143, 171, 122, 246, 191, 243 }, + { 120, 106, 143, 173, 123, 247, 192, 244 }, + { 121, 106, 143, 175, 124, 247, 193, 244 }, + { 123, 107, 144, 177, 125, 248, 195, 244 }, + { 125, 107, 144, 178, 125, 248, 196, 244 }, + { 127, 108, 145, 180, 126, 249, 197, 245 }, + { 128, 108, 145, 181, 127, 249, 198, 245 }, + { 130, 109, 145, 183, 128, 249, 199, 245 }, + { 132, 109, 145, 184, 128, 249, 200, 245 }, + { 134, 110, 146, 186, 129, 250, 201, 246 }, + { 135, 110, 146, 187, 130, 250, 202, 246 }, + { 137, 111, 147, 189, 131, 251, 203, 246 }, + { 138, 111, 147, 190, 131, 251, 204, 246 }, + { 140, 112, 147, 192, 132, 251, 205, 247 }, + { 141, 112, 147, 193, 132, 251, 206, 247 }, + { 143, 113, 148, 194, 133, 251, 207, 247 }, + { 144, 113, 148, 195, 134, 251, 207, 247 }, + { 146, 114, 149, 197, 135, 252, 208, 248 }, + { 147, 114, 149, 198, 135, 252, 209, 248 }, + { 149, 115, 149, 199, 136, 252, 210, 248 }, + { 150, 115, 149, 200, 137, 252, 210, 248 }, + { 152, 115, 150, 201, 138, 252, 211, 248 }, + { 153, 115, 150, 202, 138, 252, 212, 248 }, + { 155, 116, 151, 204, 139, 253, 213, 249 }, + { 156, 116, 151, 205, 139, 253, 213, 249 }, + { 158, 117, 151, 206, 140, 253, 214, 249 }, + { 159, 117, 151, 207, 141, 253, 215, 249 }, + { 161, 118, 152, 208, 142, 253, 216, 249 }, + { 162, 118, 152, 209, 142, 253, 216, 249 }, + { 163, 119, 153, 210, 143, 253, 217, 249 }, + { 164, 119, 153, 211, 143, 253, 217, 249 }, + { 166, 120, 153, 212, 144, 254, 218, 250 }, + { 167, 120, 153, 212, 145, 254, 219, 250 }, + { 168, 121, 154, 213, 146, 254, 220, 250 }, + { 169, 121, 154, 214, 146, 254, 220, 250 }, + { 171, 122, 155, 215, 147, 254, 221, 250 }, + { 172, 122, 155, 216, 147, 254, 221, 250 }, + { 173, 123, 155, 217, 148, 254, 222, 250 }, + { 174, 123, 155, 217, 149, 254, 222, 250 }, + { 176, 124, 156, 218, 150, 254, 223, 250 }, + { 177, 124, 156, 219, 150, 254, 223, 250 }, + { 178, 125, 157, 220, 151, 254, 224, 251 }, + { 179, 125, 157, 220, 151, 254, 224, 251 }, + { 180, 126, 157, 221, 152, 254, 225, 251 }, + { 181, 126, 157, 221, 152, 254, 225, 251 }, + { 183, 127, 158, 222, 153, 254, 226, 251 }, + { 184, 127, 158, 223, 154, 254, 226, 251 }, + { 185, 128, 159, 224, 155, 255, 227, 251 }, + { 186, 128, 159, 224, 155, 255, 227, 251 }, + { 187, 129, 160, 225, 156, 255, 228, 251 }, + { 188, 130, 160, 225, 156, 255, 228, 251 }, + { 189, 131, 160, 226, 157, 255, 228, 251 }, + { 190, 131, 160, 226, 158, 255, 228, 251 }, + { 191, 132, 161, 227, 159, 255, 229, 251 }, + { 192, 132, 161, 227, 159, 255, 229, 251 }, + { 193, 133, 162, 228, 160, 255, 230, 252 }, + { 194, 133, 162, 229, 160, 255, 230, 252 }, + { 195, 134, 163, 230, 161, 255, 231, 252 }, + { 196, 134, 163, 230, 161, 255, 231, 252 }, + { 197, 135, 163, 231, 162, 255, 231, 252 }, + { 198, 135, 163, 231, 162, 255, 231, 252 }, + { 199, 136, 164, 232, 163, 255, 232, 252 }, + { 200, 136, 164, 232, 164, 255, 232, 252 }, + { 201, 137, 165, 233, 165, 255, 233, 252 }, + { 201, 137, 165, 233, 165, 255, 233, 252 }, + { 202, 138, 166, 233, 166, 255, 233, 252 }, + { 203, 138, 166, 233, 166, 255, 233, 252 }, + { 204, 139, 166, 234, 167, 255, 234, 252 }, + { 205, 139, 166, 234, 167, 255, 234, 252 }, + { 206, 140, 167, 235, 168, 255, 235, 252 }, + { 206, 140, 167, 235, 168, 255, 235, 252 }, + { 207, 141, 168, 236, 169, 255, 235, 252 }, + { 208, 141, 168, 236, 170, 255, 235, 252 }, + { 209, 142, 169, 237, 171, 255, 236, 252 }, + { 209, 143, 169, 237, 171, 255, 236, 252 }, + { 210, 144, 169, 237, 172, 255, 236, 252 }, + { 211, 144, 169, 237, 172, 255, 236, 252 }, + { 212, 145, 170, 238, 173, 255, 237, 252 }, + { 213, 145, 170, 238, 173, 255, 237, 252 }, + { 214, 146, 171, 239, 174, 255, 237, 253 }, + { 214, 146, 171, 239, 174, 255, 237, 253 }, + { 215, 147, 172, 240, 175, 255, 238, 253 }, + { 215, 147, 172, 240, 175, 255, 238, 253 }, + { 216, 148, 173, 240, 176, 255, 238, 253 }, + { 217, 148, 173, 240, 176, 255, 238, 253 }, + { 218, 149, 173, 241, 177, 255, 239, 253 }, + { 218, 149, 173, 241, 178, 255, 239, 253 }, + { 219, 150, 174, 241, 179, 255, 239, 253 }, + { 219, 151, 174, 241, 179, 255, 239, 253 }, + { 220, 152, 175, 242, 180, 255, 240, 253 }, + { 221, 152, 175, 242, 180, 255, 240, 253 }, + { 222, 153, 176, 242, 181, 255, 240, 253 }, + { 222, 153, 176, 242, 181, 255, 240, 253 }, + { 223, 154, 177, 243, 182, 255, 240, 253 }, + { 223, 154, 177, 243, 182, 255, 240, 253 }, + { 224, 155, 178, 244, 183, 255, 241, 253 }, + { 224, 155, 178, 244, 183, 255, 241, 253 }, + { 225, 156, 178, 244, 184, 255, 241, 253 }, + { 225, 157, 178, 244, 184, 255, 241, 253 }, + { 226, 158, 179, 244, 185, 255, 242, 253 }, + { 227, 158, 179, 244, 185, 255, 242, 253 }, + { 228, 159, 180, 245, 186, 255, 242, 253 }, + { 228, 159, 180, 245, 186, 255, 242, 253 }, + { 229, 160, 181, 245, 187, 255, 242, 253 }, + { 229, 160, 181, 245, 187, 255, 242, 253 }, + { 230, 161, 182, 246, 188, 255, 243, 253 }, + { 230, 162, 182, 246, 188, 255, 243, 253 }, + { 231, 163, 183, 246, 189, 255, 243, 253 }, + { 231, 163, 183, 246, 189, 255, 243, 253 }, + { 232, 164, 184, 247, 190, 255, 243, 253 }, + { 232, 164, 184, 247, 190, 255, 243, 253 }, + { 233, 165, 185, 247, 191, 255, 244, 253 }, + { 233, 165, 185, 247, 191, 255, 244, 253 }, + { 234, 166, 185, 247, 192, 255, 244, 253 }, + { 234, 167, 185, 247, 192, 255, 244, 253 }, + { 235, 168, 186, 248, 193, 255, 244, 253 }, + { 235, 168, 186, 248, 193, 255, 244, 253 }, + { 236, 169, 187, 248, 194, 255, 244, 253 }, + { 236, 169, 187, 248, 194, 255, 244, 253 }, + { 236, 170, 188, 248, 195, 255, 245, 253 }, + { 236, 170, 188, 248, 195, 255, 245, 253 }, + { 237, 171, 189, 249, 196, 255, 245, 254 }, + { 237, 172, 189, 249, 196, 255, 245, 254 }, + { 238, 173, 190, 249, 197, 255, 245, 254 }, + { 238, 173, 190, 249, 197, 255, 245, 254 }, + { 239, 174, 191, 249, 198, 255, 245, 254 }, + { 239, 174, 191, 249, 198, 255, 245, 254 }, + { 240, 175, 192, 249, 199, 255, 246, 254 }, + { 240, 176, 192, 249, 199, 255, 246, 254 }, + { 240, 177, 193, 250, 200, 255, 246, 254 }, + { 240, 177, 193, 250, 200, 255, 246, 254 }, + { 241, 178, 194, 250, 201, 255, 246, 254 }, + { 241, 178, 194, 250, 201, 255, 246, 254 }, + { 242, 179, 195, 250, 202, 255, 246, 254 }, + { 242, 180, 195, 250, 202, 255, 246, 254 }, + { 242, 181, 196, 250, 203, 255, 247, 254 }, + { 242, 181, 196, 250, 203, 255, 247, 254 }, + { 243, 182, 197, 251, 204, 255, 247, 254 }, + { 243, 183, 197, 251, 204, 255, 247, 254 }, + { 244, 184, 198, 251, 205, 255, 247, 254 }, + { 244, 184, 198, 251, 205, 255, 247, 254 }, + { 244, 185, 199, 251, 206, 255, 247, 254 }, + { 244, 185, 199, 251, 206, 255, 247, 254 }, + { 245, 186, 200, 251, 207, 255, 247, 254 }, + { 245, 187, 200, 251, 207, 255, 247, 254 }, + { 246, 188, 201, 252, 207, 255, 248, 254 }, + { 246, 188, 201, 252, 207, 255, 248, 254 }, + { 246, 189, 202, 252, 208, 255, 248, 254 }, + { 246, 190, 202, 252, 208, 255, 248, 254 }, + { 247, 191, 203, 252, 209, 255, 248, 254 }, + { 247, 191, 203, 252, 209, 255, 248, 254 }, + { 247, 192, 204, 252, 210, 255, 248, 254 }, + { 247, 193, 204, 252, 210, 255, 248, 254 }, + { 248, 194, 205, 252, 211, 255, 248, 254 }, + { 248, 194, 205, 252, 211, 255, 248, 254 }, + { 248, 195, 206, 252, 212, 255, 249, 254 }, + { 248, 196, 206, 252, 212, 255, 249, 254 }, + { 249, 197, 207, 253, 213, 255, 249, 254 }, + { 249, 197, 207, 253, 213, 255, 249, 254 }, + { 249, 198, 208, 253, 214, 255, 249, 254 }, + { 249, 199, 209, 253, 214, 255, 249, 254 }, + { 250, 200, 210, 253, 215, 255, 249, 254 }, + { 250, 200, 210, 253, 215, 255, 249, 254 }, + { 250, 201, 211, 253, 215, 255, 249, 254 }, + { 250, 202, 211, 253, 215, 255, 249, 254 }, + { 250, 203, 212, 253, 216, 255, 249, 254 }, + { 250, 203, 212, 253, 216, 255, 249, 254 }, + { 251, 204, 213, 253, 217, 255, 250, 254 }, + { 251, 205, 213, 253, 217, 255, 250, 254 }, + { 251, 206, 214, 254, 218, 255, 250, 254 }, + { 251, 206, 215, 254, 218, 255, 250, 254 }, + { 252, 207, 216, 254, 219, 255, 250, 254 }, + { 252, 208, 216, 254, 219, 255, 250, 254 }, + { 252, 209, 217, 254, 220, 255, 250, 254 }, + { 252, 210, 217, 254, 220, 255, 250, 254 }, + { 252, 211, 218, 254, 221, 255, 250, 254 }, + { 252, 212, 218, 254, 221, 255, 250, 254 }, + { 253, 213, 219, 254, 222, 255, 250, 254 }, + { 253, 213, 220, 254, 222, 255, 250, 254 }, + { 253, 214, 221, 254, 223, 255, 250, 254 }, + { 253, 215, 221, 254, 223, 255, 250, 254 }, + { 253, 216, 222, 254, 224, 255, 251, 254 }, + { 253, 217, 223, 254, 224, 255, 251, 254 }, + { 253, 218, 224, 254, 225, 255, 251, 254 }, + { 253, 219, 224, 254, 225, 255, 251, 254 }, + { 254, 220, 225, 254, 225, 255, 251, 254 }, + { 254, 221, 226, 254, 225, 255, 251, 254 }, + { 254, 222, 227, 255, 226, 255, 251, 254 }, + { 254, 223, 227, 255, 226, 255, 251, 254 }, + { 254, 224, 228, 255, 227, 255, 251, 254 }, + { 254, 225, 229, 255, 227, 255, 251, 254 }, + { 254, 226, 230, 255, 228, 255, 251, 254 }, + { 254, 227, 230, 255, 229, 255, 251, 254 }, + { 255, 228, 231, 255, 230, 255, 251, 254 }, + { 255, 229, 232, 255, 230, 255, 251, 254 }, + { 255, 230, 233, 255, 231, 255, 252, 254 }, + { 255, 231, 234, 255, 231, 255, 252, 254 }, + { 255, 232, 235, 255, 232, 255, 252, 254 }, + { 255, 233, 236, 255, 232, 255, 252, 254 }, + { 255, 235, 237, 255, 233, 255, 252, 254 }, + { 255, 236, 238, 255, 234, 255, 252, 254 }, + { 255, 238, 240, 255, 235, 255, 252, 255 }, + { 255, 239, 241, 255, 235, 255, 252, 254 }, + { 255, 241, 243, 255, 236, 255, 252, 254 }, + { 255, 243, 245, 255, 237, 255, 252, 254 }, + { 255, 246, 247, 255, 239, 255, 253, 255 }, +}; + +const ProbContext ff_vp9_default_probs = { + { /* y_mode */ + { 65, 32, 18, 144, 162, 194, 41, 51, 98 } /* bsize < 8x8 */, + { 132, 68, 18, 165, 217, 196, 45, 40, 78 } /* bsize < 16x16 */, + { 173, 80, 19, 176, 240, 193, 64, 35, 46 } /* bsize < 32x32 */, + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } /* bsize >= 32x32 */ + }, { /* uv_mode */ + { 48, 12, 154, 155, 139, 90, 34, 117, 119 } /* y = v */, + { 67, 6, 25, 204, 243, 158, 13, 21, 96 } /* y = h */, + { 120, 7, 76, 176, 208, 126, 28, 54, 103 } /* y = dc */, + { 97, 5, 44, 131, 176, 139, 48, 68, 97 } /* y = d45 */, + { 83, 5, 42, 156, 111, 152, 26, 49, 152 } /* y = d135 */, + { 80, 5, 58, 178, 74, 83, 33, 62, 145 } /* y = d117 */, + { 86, 5, 32, 154, 192, 168, 14, 22, 163 } /* y = d153 */, + { 77, 7, 64, 116, 132, 122, 37, 126, 120 } /* y = d63 */, + { 85, 5, 32, 156, 216, 148, 19, 29, 73 } /* y = d27 */, + { 101, 21, 107, 181, 192, 103, 19, 67, 125 } /* y = tm */ + }, { /* filter */ + { 235, 162, }, + { 36, 255, }, + { 34, 3, }, + { 149, 144, }, + }, { /* mv_mode */ + { 2, 173, 34 }, // 0 = both zero mv + { 7, 145, 85 }, // 1 = one zero mv + one a predicted mv + { 7, 166, 63 }, // 2 = two predicted mvs + { 7, 94, 66 }, // 3 = one predicted/zero and one new mv + { 8, 64, 46 }, // 4 = two new mvs + { 17, 81, 31 }, // 5 = one intra neighbor + x + { 25, 29, 30 }, // 6 = two intra neighbors + }, { /* intra */ + 9, 102, 187, 225 + }, { /* comp */ + 239, 183, 119, 96, 41 + }, { /* single_ref */ + { 33, 16 }, + { 77, 74 }, + { 142, 142 }, + { 172, 170 }, + { 238, 247 } + }, { /* comp_ref */ + 50, 126, 123, 221, 226 + }, { /* tx32p */ + { 3, 136, 37, }, + { 5, 52, 13, }, + }, { /* tx16p */ + { 20, 152, }, + { 15, 101, }, + }, { /* tx8p */ + 100, 66 + }, { /* skip */ + 192, 128, 64 + }, { /* mv_joint */ + 32, 64, 96 + }, { + { /* mv vertical component */ + 128, /* sign */ + { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, /* class */ + 216, /* class0 */ + { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */ + { /* class0_fp */ + { 128, 128, 64 }, + { 96, 112, 64 } + }, + { 64, 96, 64 }, /* fp */ + 160, /* class0_hp bit */ + 128, /* hp */ + }, { /* mv horizontal component */ + 128, /* sign */ + { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, /* class */ + 208, /* class0 */ + { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */ + { /* class0_fp */ + { 128, 128, 64 }, + { 96, 112, 64 } + }, + { 64, 96, 64 }, /* fp */ + 160, /* class0_hp bit */ + 128, /* hp */ + } + }, { /* partition */ + { /* 64x64 -> 32x32 */ + { 222, 34, 30 } /* a/l both not split */, + { 72, 16, 44 } /* a split, l not split */, + { 58, 32, 12 } /* l split, a not split */, + { 10, 7, 6 } /* a/l both split */, + }, { /* 32x32 -> 16x16 */ + { 177, 58, 59 } /* a/l both not split */, + { 68, 26, 63 } /* a split, l not split */, + { 52, 79, 25 } /* l split, a not split */, + { 17, 14, 12 } /* a/l both split */, + }, { /* 16x16 -> 8x8 */ + { 174, 73, 87 } /* a/l both not split */, + { 92, 41, 83 } /* a split, l not split */, + { 82, 99, 50 } /* l split, a not split */, + { 53, 39, 39 } /* a/l both split */, + }, { /* 8x8 -> 4x4 */ + { 199, 122, 141 } /* a/l both not split */, + { 147, 63, 159 } /* a split, l not split */, + { 148, 133, 118 } /* l split, a not split */, + { 121, 104, 114 } /* a/l both split */, + } + }, +}; + +const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3] = { + { /* tx = 4x4 */ + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 195, 29, 183 }, + { 84, 49, 136 }, + { 8, 42, 71 } + }, { /* Coeff Band 1 */ + { 31, 107, 169 }, + { 35, 99, 159 }, + { 17, 82, 140 }, + { 8, 66, 114 }, + { 2, 44, 76 }, + { 1, 19, 32 } + }, { /* Coeff Band 2 */ + { 40, 132, 201 }, + { 29, 114, 187 }, + { 13, 91, 157 }, + { 7, 75, 127 }, + { 3, 58, 95 }, + { 1, 28, 47 } + }, { /* Coeff Band 3 */ + { 69, 142, 221 }, + { 42, 122, 201 }, + { 15, 91, 159 }, + { 6, 67, 121 }, + { 1, 42, 77 }, + { 1, 17, 31 } + }, { /* Coeff Band 4 */ + { 102, 148, 228 }, + { 67, 117, 204 }, + { 17, 82, 154 }, + { 6, 59, 114 }, + { 2, 39, 75 }, + { 1, 15, 29 } + }, { /* Coeff Band 5 */ + { 156, 57, 233 }, + { 119, 57, 212 }, + { 58, 48, 163 }, + { 29, 40, 124 }, + { 12, 30, 81 }, + { 3, 12, 31 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 191, 107, 226 }, + { 124, 117, 204 }, + { 25, 99, 155 } + }, { /* Coeff Band 1 */ + { 29, 148, 210 }, + { 37, 126, 194 }, + { 8, 93, 157 }, + { 2, 68, 118 }, + { 1, 39, 69 }, + { 1, 17, 33 } + }, { /* Coeff Band 2 */ + { 41, 151, 213 }, + { 27, 123, 193 }, + { 3, 82, 144 }, + { 1, 58, 105 }, + { 1, 32, 60 }, + { 1, 13, 26 } + }, { /* Coeff Band 3 */ + { 59, 159, 220 }, + { 23, 126, 198 }, + { 4, 88, 151 }, + { 1, 66, 114 }, + { 1, 38, 71 }, + { 1, 18, 34 } + }, { /* Coeff Band 4 */ + { 114, 136, 232 }, + { 51, 114, 207 }, + { 11, 83, 155 }, + { 3, 56, 105 }, + { 1, 33, 65 }, + { 1, 17, 34 } + }, { /* Coeff Band 5 */ + { 149, 65, 234 }, + { 121, 57, 215 }, + { 61, 49, 166 }, + { 28, 36, 114 }, + { 12, 25, 76 }, + { 3, 16, 42 } + } + } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 214, 49, 220 }, + { 132, 63, 188 }, + { 42, 65, 137 } + }, { /* Coeff Band 1 */ + { 85, 137, 221 }, + { 104, 131, 216 }, + { 49, 111, 192 }, + { 21, 87, 155 }, + { 2, 49, 87 }, + { 1, 16, 28 } + }, { /* Coeff Band 2 */ + { 89, 163, 230 }, + { 90, 137, 220 }, + { 29, 100, 183 }, + { 10, 70, 135 }, + { 2, 42, 81 }, + { 1, 17, 33 } + }, { /* Coeff Band 3 */ + { 108, 167, 237 }, + { 55, 133, 222 }, + { 15, 97, 179 }, + { 4, 72, 135 }, + { 1, 45, 85 }, + { 1, 19, 38 } + }, { /* Coeff Band 4 */ + { 124, 146, 240 }, + { 66, 124, 224 }, + { 17, 88, 175 }, + { 4, 58, 122 }, + { 1, 36, 75 }, + { 1, 18, 37 } + }, { /* Coeff Band 5 */ + { 141, 79, 241 }, + { 126, 70, 227 }, + { 66, 58, 182 }, + { 30, 44, 136 }, + { 12, 34, 96 }, + { 2, 20, 47 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 229, 99, 249 }, + { 143, 111, 235 }, + { 46, 109, 192 } + }, { /* Coeff Band 1 */ + { 82, 158, 236 }, + { 94, 146, 224 }, + { 25, 117, 191 }, + { 9, 87, 149 }, + { 3, 56, 99 }, + { 1, 33, 57 } + }, { /* Coeff Band 2 */ + { 83, 167, 237 }, + { 68, 145, 222 }, + { 10, 103, 177 }, + { 2, 72, 131 }, + { 1, 41, 79 }, + { 1, 20, 39 } + }, { /* Coeff Band 3 */ + { 99, 167, 239 }, + { 47, 141, 224 }, + { 10, 104, 178 }, + { 2, 73, 133 }, + { 1, 44, 85 }, + { 1, 22, 47 } + }, { /* Coeff Band 4 */ + { 127, 145, 243 }, + { 71, 129, 228 }, + { 17, 93, 177 }, + { 3, 61, 124 }, + { 1, 41, 84 }, + { 1, 21, 52 } + }, { /* Coeff Band 5 */ + { 157, 78, 244 }, + { 140, 72, 231 }, + { 69, 58, 184 }, + { 31, 44, 137 }, + { 14, 38, 105 }, + { 8, 23, 61 } + } + } + } + }, { /* tx = 8x8 */ + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 125, 34, 187 }, + { 52, 41, 133 }, + { 6, 31, 56 } + }, { /* Coeff Band 1 */ + { 37, 109, 153 }, + { 51, 102, 147 }, + { 23, 87, 128 }, + { 8, 67, 101 }, + { 1, 41, 63 }, + { 1, 19, 29 } + }, { /* Coeff Band 2 */ + { 31, 154, 185 }, + { 17, 127, 175 }, + { 6, 96, 145 }, + { 2, 73, 114 }, + { 1, 51, 82 }, + { 1, 28, 45 } + }, { /* Coeff Band 3 */ + { 23, 163, 200 }, + { 10, 131, 185 }, + { 2, 93, 148 }, + { 1, 67, 111 }, + { 1, 41, 69 }, + { 1, 14, 24 } + }, { /* Coeff Band 4 */ + { 29, 176, 217 }, + { 12, 145, 201 }, + { 3, 101, 156 }, + { 1, 69, 111 }, + { 1, 39, 63 }, + { 1, 14, 23 } + }, { /* Coeff Band 5 */ + { 57, 192, 233 }, + { 25, 154, 215 }, + { 6, 109, 167 }, + { 3, 78, 118 }, + { 1, 48, 69 }, + { 1, 21, 29 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 202, 105, 245 }, + { 108, 106, 216 }, + { 18, 90, 144 } + }, { /* Coeff Band 1 */ + { 33, 172, 219 }, + { 64, 149, 206 }, + { 14, 117, 177 }, + { 5, 90, 141 }, + { 2, 61, 95 }, + { 1, 37, 57 } + }, { /* Coeff Band 2 */ + { 33, 179, 220 }, + { 11, 140, 198 }, + { 1, 89, 148 }, + { 1, 60, 104 }, + { 1, 33, 57 }, + { 1, 12, 21 } + }, { /* Coeff Band 3 */ + { 30, 181, 221 }, + { 8, 141, 198 }, + { 1, 87, 145 }, + { 1, 58, 100 }, + { 1, 31, 55 }, + { 1, 12, 20 } + }, { /* Coeff Band 4 */ + { 32, 186, 224 }, + { 7, 142, 198 }, + { 1, 86, 143 }, + { 1, 58, 100 }, + { 1, 31, 55 }, + { 1, 12, 22 } + }, { /* Coeff Band 5 */ + { 57, 192, 227 }, + { 20, 143, 204 }, + { 3, 96, 154 }, + { 1, 68, 112 }, + { 1, 42, 69 }, + { 1, 19, 32 } + } + } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 212, 35, 215 }, + { 113, 47, 169 }, + { 29, 48, 105 } + }, { /* Coeff Band 1 */ + { 74, 129, 203 }, + { 106, 120, 203 }, + { 49, 107, 178 }, + { 19, 84, 144 }, + { 4, 50, 84 }, + { 1, 15, 25 } + }, { /* Coeff Band 2 */ + { 71, 172, 217 }, + { 44, 141, 209 }, + { 15, 102, 173 }, + { 6, 76, 133 }, + { 2, 51, 89 }, + { 1, 24, 42 } + }, { /* Coeff Band 3 */ + { 64, 185, 231 }, + { 31, 148, 216 }, + { 8, 103, 175 }, + { 3, 74, 131 }, + { 1, 46, 81 }, + { 1, 18, 30 } + }, { /* Coeff Band 4 */ + { 65, 196, 235 }, + { 25, 157, 221 }, + { 5, 105, 174 }, + { 1, 67, 120 }, + { 1, 38, 69 }, + { 1, 15, 30 } + }, { /* Coeff Band 5 */ + { 65, 204, 238 }, + { 30, 156, 224 }, + { 7, 107, 177 }, + { 2, 70, 124 }, + { 1, 42, 73 }, + { 1, 18, 34 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 225, 86, 251 }, + { 144, 104, 235 }, + { 42, 99, 181 } + }, { /* Coeff Band 1 */ + { 85, 175, 239 }, + { 112, 165, 229 }, + { 29, 136, 200 }, + { 12, 103, 162 }, + { 6, 77, 123 }, + { 2, 53, 84 } + }, { /* Coeff Band 2 */ + { 75, 183, 239 }, + { 30, 155, 221 }, + { 3, 106, 171 }, + { 1, 74, 128 }, + { 1, 44, 76 }, + { 1, 17, 28 } + }, { /* Coeff Band 3 */ + { 73, 185, 240 }, + { 27, 159, 222 }, + { 2, 107, 172 }, + { 1, 75, 127 }, + { 1, 42, 73 }, + { 1, 17, 29 } + }, { /* Coeff Band 4 */ + { 62, 190, 238 }, + { 21, 159, 222 }, + { 2, 107, 172 }, + { 1, 72, 122 }, + { 1, 40, 71 }, + { 1, 18, 32 } + }, { /* Coeff Band 5 */ + { 61, 199, 240 }, + { 27, 161, 226 }, + { 4, 113, 180 }, + { 1, 76, 129 }, + { 1, 46, 80 }, + { 1, 23, 41 } + } + } + } + }, { /* tx = 16x16 */ + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 7, 27, 153 }, + { 5, 30, 95 }, + { 1, 16, 30 } + }, { /* Coeff Band 1 */ + { 50, 75, 127 }, + { 57, 75, 124 }, + { 27, 67, 108 }, + { 10, 54, 86 }, + { 1, 33, 52 }, + { 1, 12, 18 } + }, { /* Coeff Band 2 */ + { 43, 125, 151 }, + { 26, 108, 148 }, + { 7, 83, 122 }, + { 2, 59, 89 }, + { 1, 38, 60 }, + { 1, 17, 27 } + }, { /* Coeff Band 3 */ + { 23, 144, 163 }, + { 13, 112, 154 }, + { 2, 75, 117 }, + { 1, 50, 81 }, + { 1, 31, 51 }, + { 1, 14, 23 } + }, { /* Coeff Band 4 */ + { 18, 162, 185 }, + { 6, 123, 171 }, + { 1, 78, 125 }, + { 1, 51, 86 }, + { 1, 31, 54 }, + { 1, 14, 23 } + }, { /* Coeff Band 5 */ + { 15, 199, 227 }, + { 3, 150, 204 }, + { 1, 91, 146 }, + { 1, 55, 95 }, + { 1, 30, 53 }, + { 1, 11, 20 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 19, 55, 240 }, + { 19, 59, 196 }, + { 3, 52, 105 } + }, { /* Coeff Band 1 */ + { 41, 166, 207 }, + { 104, 153, 199 }, + { 31, 123, 181 }, + { 14, 101, 152 }, + { 5, 72, 106 }, + { 1, 36, 52 } + }, { /* Coeff Band 2 */ + { 35, 176, 211 }, + { 12, 131, 190 }, + { 2, 88, 144 }, + { 1, 60, 101 }, + { 1, 36, 60 }, + { 1, 16, 28 } + }, { /* Coeff Band 3 */ + { 28, 183, 213 }, + { 8, 134, 191 }, + { 1, 86, 142 }, + { 1, 56, 96 }, + { 1, 30, 53 }, + { 1, 12, 20 } + }, { /* Coeff Band 4 */ + { 20, 190, 215 }, + { 4, 135, 192 }, + { 1, 84, 139 }, + { 1, 53, 91 }, + { 1, 28, 49 }, + { 1, 11, 20 } + }, { /* Coeff Band 5 */ + { 13, 196, 216 }, + { 2, 137, 192 }, + { 1, 86, 143 }, + { 1, 57, 99 }, + { 1, 32, 56 }, + { 1, 13, 24 } + } + } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 211, 29, 217 }, + { 96, 47, 156 }, + { 22, 43, 87 } + }, { /* Coeff Band 1 */ + { 78, 120, 193 }, + { 111, 116, 186 }, + { 46, 102, 164 }, + { 15, 80, 128 }, + { 2, 49, 76 }, + { 1, 18, 28 } + }, { /* Coeff Band 2 */ + { 71, 161, 203 }, + { 42, 132, 192 }, + { 10, 98, 150 }, + { 3, 69, 109 }, + { 1, 44, 70 }, + { 1, 18, 29 } + }, { /* Coeff Band 3 */ + { 57, 186, 211 }, + { 30, 140, 196 }, + { 4, 93, 146 }, + { 1, 62, 102 }, + { 1, 38, 65 }, + { 1, 16, 27 } + }, { /* Coeff Band 4 */ + { 47, 199, 217 }, + { 14, 145, 196 }, + { 1, 88, 142 }, + { 1, 57, 98 }, + { 1, 36, 62 }, + { 1, 15, 26 } + }, { /* Coeff Band 5 */ + { 26, 219, 229 }, + { 5, 155, 207 }, + { 1, 94, 151 }, + { 1, 60, 104 }, + { 1, 36, 62 }, + { 1, 16, 28 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 233, 29, 248 }, + { 146, 47, 220 }, + { 43, 52, 140 } + }, { /* Coeff Band 1 */ + { 100, 163, 232 }, + { 179, 161, 222 }, + { 63, 142, 204 }, + { 37, 113, 174 }, + { 26, 89, 137 }, + { 18, 68, 97 } + }, { /* Coeff Band 2 */ + { 85, 181, 230 }, + { 32, 146, 209 }, + { 7, 100, 164 }, + { 3, 71, 121 }, + { 1, 45, 77 }, + { 1, 18, 30 } + }, { /* Coeff Band 3 */ + { 65, 187, 230 }, + { 20, 148, 207 }, + { 2, 97, 159 }, + { 1, 68, 116 }, + { 1, 40, 70 }, + { 1, 14, 29 } + }, { /* Coeff Band 4 */ + { 40, 194, 227 }, + { 8, 147, 204 }, + { 1, 94, 155 }, + { 1, 65, 112 }, + { 1, 39, 66 }, + { 1, 14, 26 } + }, { /* Coeff Band 5 */ + { 16, 208, 228 }, + { 3, 151, 207 }, + { 1, 98, 160 }, + { 1, 67, 117 }, + { 1, 41, 74 }, + { 1, 17, 31 } + } + } + } + }, { /* tx = 32x32 */ + { /* block Type 0 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 17, 38, 140 }, + { 7, 34, 80 }, + { 1, 17, 29 } + }, { /* Coeff Band 1 */ + { 37, 75, 128 }, + { 41, 76, 128 }, + { 26, 66, 116 }, + { 12, 52, 94 }, + { 2, 32, 55 }, + { 1, 10, 16 } + }, { /* Coeff Band 2 */ + { 50, 127, 154 }, + { 37, 109, 152 }, + { 16, 82, 121 }, + { 5, 59, 85 }, + { 1, 35, 54 }, + { 1, 13, 20 } + }, { /* Coeff Band 3 */ + { 40, 142, 167 }, + { 17, 110, 157 }, + { 2, 71, 112 }, + { 1, 44, 72 }, + { 1, 27, 45 }, + { 1, 11, 17 } + }, { /* Coeff Band 4 */ + { 30, 175, 188 }, + { 9, 124, 169 }, + { 1, 74, 116 }, + { 1, 48, 78 }, + { 1, 30, 49 }, + { 1, 11, 18 } + }, { /* Coeff Band 5 */ + { 10, 222, 223 }, + { 2, 150, 194 }, + { 1, 83, 128 }, + { 1, 48, 79 }, + { 1, 27, 45 }, + { 1, 11, 17 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 36, 41, 235 }, + { 29, 36, 193 }, + { 10, 27, 111 } + }, { /* Coeff Band 1 */ + { 85, 165, 222 }, + { 177, 162, 215 }, + { 110, 135, 195 }, + { 57, 113, 168 }, + { 23, 83, 120 }, + { 10, 49, 61 } + }, { /* Coeff Band 2 */ + { 85, 190, 223 }, + { 36, 139, 200 }, + { 5, 90, 146 }, + { 1, 60, 103 }, + { 1, 38, 65 }, + { 1, 18, 30 } + }, { /* Coeff Band 3 */ + { 72, 202, 223 }, + { 23, 141, 199 }, + { 2, 86, 140 }, + { 1, 56, 97 }, + { 1, 36, 61 }, + { 1, 16, 27 } + }, { /* Coeff Band 4 */ + { 55, 218, 225 }, + { 13, 145, 200 }, + { 1, 86, 141 }, + { 1, 57, 99 }, + { 1, 35, 61 }, + { 1, 13, 22 } + }, { /* Coeff Band 5 */ + { 15, 235, 212 }, + { 1, 132, 184 }, + { 1, 84, 139 }, + { 1, 57, 97 }, + { 1, 34, 56 }, + { 1, 14, 23 } + } + } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 181, 21, 201 }, + { 61, 37, 123 }, + { 10, 38, 71 } + }, { /* Coeff Band 1 */ + { 47, 106, 172 }, + { 95, 104, 173 }, + { 42, 93, 159 }, + { 18, 77, 131 }, + { 4, 50, 81 }, + { 1, 17, 23 } + }, { /* Coeff Band 2 */ + { 62, 147, 199 }, + { 44, 130, 189 }, + { 28, 102, 154 }, + { 18, 75, 115 }, + { 2, 44, 65 }, + { 1, 12, 19 } + }, { /* Coeff Band 3 */ + { 55, 153, 210 }, + { 24, 130, 194 }, + { 3, 93, 146 }, + { 1, 61, 97 }, + { 1, 31, 50 }, + { 1, 10, 16 } + }, { /* Coeff Band 4 */ + { 49, 186, 223 }, + { 17, 148, 204 }, + { 1, 96, 142 }, + { 1, 53, 83 }, + { 1, 26, 44 }, + { 1, 11, 17 } + }, { /* Coeff Band 5 */ + { 13, 217, 212 }, + { 2, 136, 180 }, + { 1, 78, 124 }, + { 1, 50, 83 }, + { 1, 29, 49 }, + { 1, 14, 23 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 197, 13, 247 }, + { 82, 17, 222 }, + { 25, 17, 162 } + }, { /* Coeff Band 1 */ + { 126, 186, 247 }, + { 234, 191, 243 }, + { 176, 177, 234 }, + { 104, 158, 220 }, + { 66, 128, 186 }, + { 55, 90, 137 } + }, { /* Coeff Band 2 */ + { 111, 197, 242 }, + { 46, 158, 219 }, + { 9, 104, 171 }, + { 2, 65, 125 }, + { 1, 44, 80 }, + { 1, 17, 91 } + }, { /* Coeff Band 3 */ + { 104, 208, 245 }, + { 39, 168, 224 }, + { 3, 109, 162 }, + { 1, 79, 124 }, + { 1, 50, 102 }, + { 1, 43, 102 } + }, { /* Coeff Band 4 */ + { 84, 220, 246 }, + { 31, 177, 231 }, + { 2, 115, 180 }, + { 1, 79, 134 }, + { 1, 55, 77 }, + { 1, 60, 79 } + }, { /* Coeff Band 5 */ + { 43, 243, 240 }, + { 8, 180, 217 }, + { 1, 115, 166 }, + { 1, 84, 121 }, + { 1, 51, 67 }, + { 1, 16, 6 } + } + } + } + } +}; + +const int8_t ff_vp9_mv_joint_tree[3][2] = { + { -MV_JOINT_ZERO, 1 }, // '0' + { -MV_JOINT_H, 2 }, // '10' + { -MV_JOINT_V, -MV_JOINT_HV }, // '11x' +}; + +const int8_t ff_vp9_mv_class_tree[10][2] = { + { -0, 1 }, // '0' + { -1, 2 }, // '10' + { 3, 4 }, + { -2, -3 }, // '110x' + { 5, 6 }, + { -4, -5 }, // '1110x' + { -6, 7 }, // '11110' + { 8, 9 }, + { -7, -8 }, // '111110x' + { -9, -10 }, // '111111x' +}; + +const int8_t ff_vp9_mv_fp_tree[3][2] = { + { -0, 1 }, // '0' + { -1, 2 }, // '10' + { -2, -3 }, // '11x' +}; diff --git a/libs/ffvpx/libavcodec/vp9data.h b/libs/ffvpx/libavcodec/vp9data.h new file mode 100644 index 000000000..086dbdec0 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9data.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP9DATA_H +#define AVCODEC_VP9DATA_H + +#include <stdint.h> + +#include "vp9dec.h" + +extern const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]; +extern const int8_t ff_vp9_partition_tree[3][2]; +extern const uint8_t ff_vp9_default_kf_partition_probs[4][4][3]; +extern const int8_t ff_vp9_segmentation_tree[7][2]; +extern const int8_t ff_vp9_intramode_tree[9][2]; +extern const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]; +extern const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]; +extern const int8_t ff_vp9_inter_mode_tree[3][2]; +extern const int8_t ff_vp9_filter_tree[2][2]; +extern const enum FilterMode ff_vp9_filter_lut[3]; +extern const int16_t ff_vp9_dc_qlookup[3][256]; +extern const int16_t ff_vp9_ac_qlookup[3][256]; +extern const enum TxfmType ff_vp9_intra_txfm_type[14]; +extern const int16_t ff_vp9_default_scan_4x4[16]; +extern const int16_t ff_vp9_col_scan_4x4[16]; +extern const int16_t ff_vp9_row_scan_4x4[16]; +extern const int16_t ff_vp9_default_scan_8x8[64]; +extern const int16_t ff_vp9_col_scan_8x8[64]; +extern const int16_t ff_vp9_row_scan_8x8[64]; +extern const int16_t ff_vp9_default_scan_16x16[256]; +extern const int16_t ff_vp9_col_scan_16x16[256]; +extern const int16_t ff_vp9_row_scan_16x16[256]; +extern const int16_t ff_vp9_default_scan_32x32[1024]; +extern const int16_t * const ff_vp9_scans[5][4]; +extern const int16_t ff_vp9_default_scan_4x4_nb[16][2]; +extern const int16_t ff_vp9_col_scan_4x4_nb[16][2]; +extern const int16_t ff_vp9_row_scan_4x4_nb[16][2]; +extern const int16_t ff_vp9_default_scan_8x8_nb[64][2]; +extern const int16_t ff_vp9_col_scan_8x8_nb[64][2]; +extern const int16_t ff_vp9_row_scan_8x8_nb[64][2]; +extern const int16_t ff_vp9_default_scan_16x16_nb[256][2]; +extern const int16_t ff_vp9_col_scan_16x16_nb[256][2]; +extern const int16_t ff_vp9_row_scan_16x16_nb[256][2]; +extern const int16_t ff_vp9_default_scan_32x32_nb[1024][2]; +extern const int16_t (* const ff_vp9_scans_nb[5][4])[2]; +extern const uint8_t ff_vp9_model_pareto8[256][8]; +extern const ProbContext ff_vp9_default_probs; +extern const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3]; +extern const int8_t ff_vp9_mv_joint_tree[3][2]; +extern const int8_t ff_vp9_mv_class_tree[10][2]; +extern const int8_t ff_vp9_mv_fp_tree[3][2]; + +#endif /* AVCODEC_VP9DATA_H */ diff --git a/libs/ffvpx/libavcodec/vp9dec.h b/libs/ffvpx/libavcodec/vp9dec.h new file mode 100644 index 000000000..66573edc7 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dec.h @@ -0,0 +1,240 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP9DEC_H +#define AVCODEC_VP9DEC_H + +#include <stddef.h> +#include <stdint.h> +#include <stdatomic.h> + +#include "libavutil/buffer.h" +#include "libavutil/thread.h" +#include "libavutil/internal.h" + +#include "vp9.h" +#include "vp9dsp.h" +#include "vp9shared.h" + +enum MVJoint { + MV_JOINT_ZERO, + MV_JOINT_H, + MV_JOINT_V, + MV_JOINT_HV, +}; + +typedef struct ProbContext { + uint8_t y_mode[4][9]; + uint8_t uv_mode[10][9]; + uint8_t filter[4][2]; + uint8_t mv_mode[7][3]; + uint8_t intra[4]; + uint8_t comp[5]; + uint8_t single_ref[5][2]; + uint8_t comp_ref[5]; + uint8_t tx32p[2][3]; + uint8_t tx16p[2][2]; + uint8_t tx8p[2]; + uint8_t skip[3]; + uint8_t mv_joint[3]; + struct { + uint8_t sign; + uint8_t classes[10]; + uint8_t class0; + uint8_t bits[10]; + uint8_t class0_fp[2][3]; + uint8_t fp[3]; + uint8_t class0_hp; + uint8_t hp; + } mv_comp[2]; + uint8_t partition[4][4][3]; +} ProbContext; + +typedef struct VP9Filter { + uint8_t level[8 * 8]; + uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */] + [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */]; +} VP9Filter; + +typedef struct VP9Block { + uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip; + enum FilterMode filter; + VP56mv mv[4 /* b_idx */][2 /* ref */]; + enum BlockSize bs; + enum TxfmMode tx, uvtx; + enum BlockLevel bl; + enum BlockPartition bp; +} VP9Block; + +typedef struct VP9TileData VP9TileData; + +typedef struct VP9Context { + VP9SharedContext s; + VP9TileData *td; + + VP9DSPContext dsp; + VideoDSPContext vdsp; + GetBitContext gb; + VP56RangeCoder c; + int pass, active_tile_cols; + +#if HAVE_THREADS + pthread_mutex_t progress_mutex; + pthread_cond_t progress_cond; + atomic_int *entries; +#endif + + uint8_t ss_h, ss_v; + uint8_t last_bpp, bpp_index, bytesperpixel; + uint8_t last_keyframe; + // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal + // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads + // and are therefore per-stream. pix_fmt represents the value in the header + // of the currently processed frame. + int w, h; + enum AVPixelFormat pix_fmt, last_fmt, gf_fmt; + unsigned sb_cols, sb_rows, rows, cols; + ThreadFrame next_refs[8]; + + struct { + uint8_t lim_lut[64]; + uint8_t mblim_lut[64]; + } filter_lut; + struct { + ProbContext p; + uint8_t coef[4][2][2][6][6][3]; + } prob_ctx[4]; + struct { + ProbContext p; + uint8_t coef[4][2][2][6][6][11]; + } prob; + + // contextual (above) cache + uint8_t *above_partition_ctx; + uint8_t *above_mode_ctx; + // FIXME maybe merge some of the below in a flags field? + uint8_t *above_y_nnz_ctx; + uint8_t *above_uv_nnz_ctx[2]; + uint8_t *above_skip_ctx; // 1bit + uint8_t *above_txfm_ctx; // 2bit + uint8_t *above_segpred_ctx; // 1bit + uint8_t *above_intra_ctx; // 1bit + uint8_t *above_comp_ctx; // 1bit + uint8_t *above_ref_ctx; // 2bit + uint8_t *above_filter_ctx; + VP56mv (*above_mv_ctx)[2]; + + // whole-frame cache + uint8_t *intra_pred_data[3]; + VP9Filter *lflvl; + + // block reconstruction intermediates + int block_alloc_using_2pass; + uint16_t mvscale[3][2]; + uint8_t mvstep[3][2]; +} VP9Context; + +struct VP9TileData { + //VP9Context should be const, but because of the threading API(generates + //a lot of warnings) it's not. + VP9Context *s; + VP56RangeCoder *c_b; + VP56RangeCoder *c; + int row, row7, col, col7; + uint8_t *dst[3]; + ptrdiff_t y_stride, uv_stride; + VP9Block *b_base, *b; + unsigned tile_col_start; + + struct { + unsigned y_mode[4][10]; + unsigned uv_mode[10][10]; + unsigned filter[4][3]; + unsigned mv_mode[7][4]; + unsigned intra[4][2]; + unsigned comp[5][2]; + unsigned single_ref[5][2][2]; + unsigned comp_ref[5][2]; + unsigned tx32p[2][4]; + unsigned tx16p[2][3]; + unsigned tx8p[2][2]; + unsigned skip[3][2]; + unsigned mv_joint[4]; + struct { + unsigned sign[2]; + unsigned classes[11]; + unsigned class0[2]; + unsigned bits[10][2]; + unsigned class0_fp[2][4]; + unsigned fp[4]; + unsigned class0_hp[2]; + unsigned hp[2]; + } mv_comp[2]; + unsigned partition[4][4][4]; + unsigned coef[4][2][2][6][6][3]; + unsigned eob[4][2][2][6][6][2]; + } counts; + + // whole-frame cache + DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2]; + + // contextual (left) cache + DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16]; + DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16]; + DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2]; + DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16]; + DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8]; + DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8]; + // block reconstruction intermediates + DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2]; + DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2]; + struct { int x, y; } min_mv, max_mv; + int16_t *block_base, *block, *uvblock_base[2], *uvblock[2]; + uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2]; +}; + +void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb); + +void ff_vp9_adapt_probs(VP9Context *s); + +void ff_vp9_decode_block(VP9TileData *td, int row, int col, + VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, + enum BlockLevel bl, enum BlockPartition bp); + +void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl, + int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff); + +void ff_vp9_intra_recon_8bpp(VP9TileData *td, + ptrdiff_t y_off, ptrdiff_t uv_off); +void ff_vp9_intra_recon_16bpp(VP9TileData *td, + ptrdiff_t y_off, ptrdiff_t uv_off); +void ff_vp9_inter_recon_8bpp(VP9TileData *td); +void ff_vp9_inter_recon_16bpp(VP9TileData *td); + +#endif /* AVCODEC_VP9DEC_H */ diff --git a/libs/ffvpx/libavcodec/vp9dsp.c b/libs/ffvpx/libavcodec/vp9dsp.c new file mode 100644 index 000000000..f6d73f73c --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp.c @@ -0,0 +1,99 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "vp9dsp.h" + +const DECLARE_ALIGNED(16, int16_t, ff_vp9_subpel_filters)[3][16][8] = { + [FILTER_8TAP_REGULAR] = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { 0, 1, -5, 126, 8, -3, 1, 0 }, + { -1, 3, -10, 122, 18, -6, 2, 0 }, + { -1, 4, -13, 118, 27, -9, 3, -1 }, + { -1, 4, -16, 112, 37, -11, 4, -1 }, + { -1, 5, -18, 105, 48, -14, 4, -1 }, + { -1, 5, -19, 97, 58, -16, 5, -1 }, + { -1, 6, -19, 88, 68, -18, 5, -1 }, + { -1, 6, -19, 78, 78, -19, 6, -1 }, + { -1, 5, -18, 68, 88, -19, 6, -1 }, + { -1, 5, -16, 58, 97, -19, 5, -1 }, + { -1, 4, -14, 48, 105, -18, 5, -1 }, + { -1, 4, -11, 37, 112, -16, 4, -1 }, + { -1, 3, -9, 27, 118, -13, 4, -1 }, + { 0, 2, -6, 18, 122, -10, 3, -1 }, + { 0, 1, -3, 8, 126, -5, 1, 0 }, + }, [FILTER_8TAP_SHARP] = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { -1, 3, -7, 127, 8, -3, 1, 0 }, + { -2, 5, -13, 125, 17, -6, 3, -1 }, + { -3, 7, -17, 121, 27, -10, 5, -2 }, + { -4, 9, -20, 115, 37, -13, 6, -2 }, + { -4, 10, -23, 108, 48, -16, 8, -3 }, + { -4, 10, -24, 100, 59, -19, 9, -3 }, + { -4, 11, -24, 90, 70, -21, 10, -4 }, + { -4, 11, -23, 80, 80, -23, 11, -4 }, + { -4, 10, -21, 70, 90, -24, 11, -4 }, + { -3, 9, -19, 59, 100, -24, 10, -4 }, + { -3, 8, -16, 48, 108, -23, 10, -4 }, + { -2, 6, -13, 37, 115, -20, 9, -4 }, + { -2, 5, -10, 27, 121, -17, 7, -3 }, + { -1, 3, -6, 17, 125, -13, 5, -2 }, + { 0, 1, -3, 8, 127, -7, 3, -1 }, + }, [FILTER_8TAP_SMOOTH] = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { -3, -1, 32, 64, 38, 1, -3, 0 }, + { -2, -2, 29, 63, 41, 2, -3, 0 }, + { -2, -2, 26, 63, 43, 4, -4, 0 }, + { -2, -3, 24, 62, 46, 5, -4, 0 }, + { -2, -3, 21, 60, 49, 7, -4, 0 }, + { -1, -4, 18, 59, 51, 9, -4, 0 }, + { -1, -4, 16, 57, 53, 12, -4, -1 }, + { -1, -4, 14, 55, 55, 14, -4, -1 }, + { -1, -4, 12, 53, 57, 16, -4, -1 }, + { 0, -4, 9, 51, 59, 18, -4, -1 }, + { 0, -4, 7, 49, 60, 21, -3, -2 }, + { 0, -4, 5, 46, 62, 24, -3, -2 }, + { 0, -4, 4, 43, 63, 26, -2, -2 }, + { 0, -3, 2, 41, 63, 29, -2, -2 }, + { 0, -3, 1, 38, 64, 32, -1, -3 }, + } +}; + + +av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact) +{ + if (bpp == 8) { + ff_vp9dsp_init_8(dsp); + } else if (bpp == 10) { + ff_vp9dsp_init_10(dsp); + } else { + av_assert0(bpp == 12); + ff_vp9dsp_init_12(dsp); + } + + if (ARCH_AARCH64) ff_vp9dsp_init_aarch64(dsp, bpp); + if (ARCH_ARM) ff_vp9dsp_init_arm(dsp, bpp); + if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp, bitexact); + if (ARCH_MIPS) ff_vp9dsp_init_mips(dsp, bpp); +} diff --git a/libs/ffvpx/libavcodec/vp9dsp.h b/libs/ffvpx/libavcodec/vp9dsp.h new file mode 100644 index 000000000..e2256316a --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp.h @@ -0,0 +1,136 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP9DSP_H +#define AVCODEC_VP9DSP_H + +#include <stddef.h> +#include <stdint.h> + +#include "libavcodec/vp9.h" + +typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, + int h, int mx, int my); +typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, + int h, int mx, int my, int dx, int dy); + +typedef struct VP9DSPContext { + /* + * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32 + * dimension 2: intra prediction modes + * + * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels) + * stride is aligned by 16 pixels + * top[-1] is top/left; top[4,7] is top-right for 4x4 + */ + // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/ + // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place? + // also needs to fit in with what H.264/VP8/etc do + void (*intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, + ptrdiff_t stride, + const uint8_t *left, + const uint8_t *top); + + /* + * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only) + * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst + * + * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels) + * stride is aligned by 16 pixels + * block is 16-byte aligned + * eob indicates the position (+1) of the last non-zero coefficient, + * in scan-order. This can be used to write faster versions, e.g. a + * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32, + * etc. + */ + // FIXME also write idct_add_block() versions for whole (inter) pred + // blocks, so we can do 2 4x4s at once + void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst, + ptrdiff_t stride, + int16_t *block, int eob); + + /* + * dimension 1: width of filter (0=4, 1=8, 2=16) + * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v) + * + * dst/stride are aligned by 8 + */ + void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, + int mb_lim, int lim, int hev_thr); + + /* + * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v) + * + * The width of filter is assumed to be 16; dst/stride are aligned by 16 + */ + void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, + int mb_lim, int lim, int hev_thr); + + /* + * dimension 1/2: width of filter (0=4, 1=8) for each filter half + * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v) + * + * dst/stride are aligned by operation size + * this basically calls loop_filter[d1][d3][0](), followed by + * loop_filter[d2][d3][0]() on the next 8 pixels + * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the + * integer. + */ + // FIXME perhaps a mix4 that operates on 32px (for AVX2) + void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, + int mb_lim, int lim, int hev_thr); + + /* + * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4) + * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin) + * dimension 3: averaging type (0: put, 1: avg) + * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin) + * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin) + * + * dst/stride are aligned by hsize + */ + vp9_mc_func mc[5][N_FILTERS][2][2][2]; + + /* + * for scalable MC, first 3 dimensions identical to above, the other two + * don't exist since it changes per stepsize. + */ + vp9_scaled_mc_func smc[5][N_FILTERS][2]; +} VP9DSPContext; + +extern const int16_t ff_vp9_subpel_filters[3][16][8]; + +void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact); + +void ff_vp9dsp_init_8(VP9DSPContext *dsp); +void ff_vp9dsp_init_10(VP9DSPContext *dsp); +void ff_vp9dsp_init_12(VP9DSPContext *dsp); + +void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp); +void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp); +void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact); +void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp); + +#endif /* AVCODEC_VP9DSP_H */ diff --git a/libs/ffvpx/libavcodec/vp9dsp_10bpp.c b/libs/ffvpx/libavcodec/vp9dsp_10bpp.c new file mode 100644 index 000000000..62ce18207 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp_10bpp.c @@ -0,0 +1,26 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define BIT_DEPTH 10 +#define dctint int64_t +#include "vp9dsp_template.c" diff --git a/libs/ffvpx/libavcodec/vp9dsp_12bpp.c b/libs/ffvpx/libavcodec/vp9dsp_12bpp.c new file mode 100644 index 000000000..2f36471c5 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp_12bpp.c @@ -0,0 +1,26 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define BIT_DEPTH 12 +#define dctint int64_t +#include "vp9dsp_template.c" diff --git a/libs/ffvpx/libavcodec/vp9dsp_8bpp.c b/libs/ffvpx/libavcodec/vp9dsp_8bpp.c new file mode 100644 index 000000000..4b219b06b --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp_8bpp.c @@ -0,0 +1,26 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define BIT_DEPTH 8 +#define dctint int +#include "vp9dsp_template.c" diff --git a/libs/ffvpx/libavcodec/vp9dsp_template.c b/libs/ffvpx/libavcodec/vp9dsp_template.c new file mode 100644 index 000000000..bb54561a6 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9dsp_template.c @@ -0,0 +1,2546 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/common.h" +#include "bit_depth_template.c" +#include "vp9dsp.h" + +#if BIT_DEPTH != 12 + +// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8) +// back with h264pred.[ch] + +static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 p4 = AV_RN4PA(top); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, p4); + AV_WN4PA(dst + stride * 1, p4); + AV_WN4PA(dst + stride * 2, p4); + AV_WN4PA(dst + stride * 3, p4); +} + +static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 p4a = AV_RN4PA(top + 0); + pixel4 p4b = AV_RN4PA(top + 4); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, p4a); + AV_WN4PA(dst + 4, p4b); + dst += stride; + } +} + +static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 p4a = AV_RN4PA(top + 0); + pixel4 p4b = AV_RN4PA(top + 4); + pixel4 p4c = AV_RN4PA(top + 8); + pixel4 p4d = AV_RN4PA(top + 12); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, p4a); + AV_WN4PA(dst + 4, p4b); + AV_WN4PA(dst + 8, p4c); + AV_WN4PA(dst + 12, p4d); + dst += stride; + } +} + +static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 p4a = AV_RN4PA(top + 0); + pixel4 p4b = AV_RN4PA(top + 4); + pixel4 p4c = AV_RN4PA(top + 8); + pixel4 p4d = AV_RN4PA(top + 12); + pixel4 p4e = AV_RN4PA(top + 16); + pixel4 p4f = AV_RN4PA(top + 20); + pixel4 p4g = AV_RN4PA(top + 24); + pixel4 p4h = AV_RN4PA(top + 28); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, p4a); + AV_WN4PA(dst + 4, p4b); + AV_WN4PA(dst + 8, p4c); + AV_WN4PA(dst + 12, p4d); + AV_WN4PA(dst + 16, p4e); + AV_WN4PA(dst + 20, p4f); + AV_WN4PA(dst + 24, p4g); + AV_WN4PA(dst + 28, p4h); + dst += stride; + } +} + +static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3])); + AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2])); + AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1])); + AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0])); +} + +static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]); + + AV_WN4PA(dst + 0, p4); + AV_WN4PA(dst + 4, p4); + dst += stride; + } +} + +static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]); + + AV_WN4PA(dst + 0, p4); + AV_WN4PA(dst + 4, p4); + AV_WN4PA(dst + 8, p4); + AV_WN4PA(dst + 12, p4); + dst += stride; + } +} + +static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]); + + AV_WN4PA(dst + 0, p4); + AV_WN4PA(dst + 4, p4); + AV_WN4PA(dst + 8, p4); + AV_WN4PA(dst + 12, p4); + AV_WN4PA(dst + 16, p4); + AV_WN4PA(dst + 20, p4); + AV_WN4PA(dst + 24, p4); + AV_WN4PA(dst + 28, p4); + dst += stride; + } +} + +#endif /* BIT_DEPTH != 12 */ + +static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + int y, tl = top[-1]; + + stride /= sizeof(pixel); + for (y = 0; y < 4; y++) { + int l_m_tl = left[3 - y] - tl; + + dst[0] = av_clip_pixel(top[0] + l_m_tl); + dst[1] = av_clip_pixel(top[1] + l_m_tl); + dst[2] = av_clip_pixel(top[2] + l_m_tl); + dst[3] = av_clip_pixel(top[3] + l_m_tl); + dst += stride; + } +} + +static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + int y, tl = top[-1]; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + int l_m_tl = left[7 - y] - tl; + + dst[0] = av_clip_pixel(top[0] + l_m_tl); + dst[1] = av_clip_pixel(top[1] + l_m_tl); + dst[2] = av_clip_pixel(top[2] + l_m_tl); + dst[3] = av_clip_pixel(top[3] + l_m_tl); + dst[4] = av_clip_pixel(top[4] + l_m_tl); + dst[5] = av_clip_pixel(top[5] + l_m_tl); + dst[6] = av_clip_pixel(top[6] + l_m_tl); + dst[7] = av_clip_pixel(top[7] + l_m_tl); + dst += stride; + } +} + +static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + int y, tl = top[-1]; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + int l_m_tl = left[15 - y] - tl; + + dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl); + dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl); + dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl); + dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl); + dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl); + dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl); + dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl); + dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl); + dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl); + dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl); + dst[10] = av_clip_pixel(top[10] + l_m_tl); + dst[11] = av_clip_pixel(top[11] + l_m_tl); + dst[12] = av_clip_pixel(top[12] + l_m_tl); + dst[13] = av_clip_pixel(top[13] + l_m_tl); + dst[14] = av_clip_pixel(top[14] + l_m_tl); + dst[15] = av_clip_pixel(top[15] + l_m_tl); + dst += stride; + } +} + +static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + int y, tl = top[-1]; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + int l_m_tl = left[31 - y] - tl; + + dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl); + dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl); + dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl); + dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl); + dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl); + dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl); + dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl); + dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl); + dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl); + dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl); + dst[10] = av_clip_pixel(top[10] + l_m_tl); + dst[11] = av_clip_pixel(top[11] + l_m_tl); + dst[12] = av_clip_pixel(top[12] + l_m_tl); + dst[13] = av_clip_pixel(top[13] + l_m_tl); + dst[14] = av_clip_pixel(top[14] + l_m_tl); + dst[15] = av_clip_pixel(top[15] + l_m_tl); + dst[16] = av_clip_pixel(top[16] + l_m_tl); + dst[17] = av_clip_pixel(top[17] + l_m_tl); + dst[18] = av_clip_pixel(top[18] + l_m_tl); + dst[19] = av_clip_pixel(top[19] + l_m_tl); + dst[20] = av_clip_pixel(top[20] + l_m_tl); + dst[21] = av_clip_pixel(top[21] + l_m_tl); + dst[22] = av_clip_pixel(top[22] + l_m_tl); + dst[23] = av_clip_pixel(top[23] + l_m_tl); + dst[24] = av_clip_pixel(top[24] + l_m_tl); + dst[25] = av_clip_pixel(top[25] + l_m_tl); + dst[26] = av_clip_pixel(top[26] + l_m_tl); + dst[27] = av_clip_pixel(top[27] + l_m_tl); + dst[28] = av_clip_pixel(top[28] + l_m_tl); + dst[29] = av_clip_pixel(top[29] + l_m_tl); + dst[30] = av_clip_pixel(top[30] + l_m_tl); + dst[31] = av_clip_pixel(top[31] + l_m_tl); + dst += stride; + } +} + +#if BIT_DEPTH != 12 + +static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + + top[0] + top[1] + top[2] + top[3] + 4) >> 3); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, dc); + AV_WN4PA(dst + stride * 1, dc); + AV_WN4PA(dst + stride * 2, dc); + AV_WN4PA(dst + stride * 3, dc); +} + +static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + + left[6] + left[7] + top[0] + top[1] + top[2] + top[3] + + top[4] + top[5] + top[6] + top[7] + 8) >> 4); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + dst += stride; + } +} + +static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] + + left[7] + left[8] + left[9] + left[10] + left[11] + left[12] + + left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] + + top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] + + top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + dst += stride; + } +} + +static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] + + left[7] + left[8] + left[9] + left[10] + left[11] + left[12] + + left[13] + left[14] + left[15] + left[16] + left[17] + left[18] + + left[19] + left[20] + left[21] + left[22] + left[23] + left[24] + + left[25] + left[26] + left[27] + left[28] + left[29] + left[30] + + left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + + top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] + + top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] + + top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] + + top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + AV_WN4PA(dst + 16, dc); + AV_WN4PA(dst + 20, dc); + AV_WN4PA(dst + 24, dc); + AV_WN4PA(dst + 28, dc); + dst += stride; + } +} + +static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, dc); + AV_WN4PA(dst + stride * 1, dc); + AV_WN4PA(dst + stride * 2, dc); + AV_WN4PA(dst + stride * 3, dc); +} + +static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + + left[4] + left[5] + left[6] + left[7] + 4) >> 3); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + dst += stride; + } +} + +static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + + left[6] + left[7] + left[8] + left[9] + left[10] + left[11] + + left[12] + left[13] + left[14] + left[15] + 8) >> 4); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + dst += stride; + } +} + +static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + pixel4 dc = PIXEL_SPLAT_X4 + ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + + left[6] + left[7] + left[8] + left[9] + left[10] + left[11] + + left[12] + left[13] + left[14] + left[15] + left[16] + left[17] + + left[18] + left[19] + left[20] + left[21] + left[22] + left[23] + + left[24] + left[25] + left[26] + left[27] + left[28] + left[29] + + left[30] + left[31] + 16) >> 5); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + AV_WN4PA(dst + 16, dc); + AV_WN4PA(dst + 20, dc); + AV_WN4PA(dst + 24, dc); + AV_WN4PA(dst + 28, dc); + dst += stride; + } +} + +static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, dc); + AV_WN4PA(dst + stride * 1, dc); + AV_WN4PA(dst + stride * 2, dc); + AV_WN4PA(dst + stride * 3, dc); +} + +static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((top[0] + top[1] + top[2] + top[3] + + top[4] + top[5] + top[6] + top[7] + 4) >> 3); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + dst += stride; + } +} + +static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + + top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + + top[12] + top[13] + top[14] + top[15] + 8) >> 4); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + dst += stride; + } +} + +static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + pixel4 dc = PIXEL_SPLAT_X4 + ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + + top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + + top[12] + top[13] + top[14] + top[15] + top[16] + top[17] + + top[18] + top[19] + top[20] + top[21] + top[22] + top[23] + + top[24] + top[25] + top[26] + top[27] + top[28] + top[29] + + top[30] + top[31] + 16) >> 5); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, dc); + AV_WN4PA(dst + 4, dc); + AV_WN4PA(dst + 8, dc); + AV_WN4PA(dst + 12, dc); + AV_WN4PA(dst + 16, dc); + AV_WN4PA(dst + 20, dc); + AV_WN4PA(dst + 24, dc); + AV_WN4PA(dst + 28, dc); + dst += stride; + } +} + +#endif /* BIT_DEPTH != 12 */ + +static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, val); + AV_WN4PA(dst + stride * 1, val); + AV_WN4PA(dst + stride * 2, val); + AV_WN4PA(dst + stride * 3, val); +} + +static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + dst += stride; + } +} + +static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + dst += stride; + } +} + +static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + AV_WN4PA(dst + 16, val); + AV_WN4PA(dst + 20, val); + AV_WN4PA(dst + 24, val); + AV_WN4PA(dst + 28, val); + dst += stride; + } +} + +static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, val); + AV_WN4PA(dst + stride * 1, val); + AV_WN4PA(dst + stride * 2, val); + AV_WN4PA(dst + stride * 3, val);} + +static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + dst += stride; + } +} + +static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + dst += stride; + } +} + +static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + AV_WN4PA(dst + 16, val); + AV_WN4PA(dst + 20, val); + AV_WN4PA(dst + 24, val); + AV_WN4PA(dst + 28, val); + dst += stride; + } +} + +static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); + + stride /= sizeof(pixel); + AV_WN4PA(dst + stride * 0, val); + AV_WN4PA(dst + stride * 1, val); + AV_WN4PA(dst + stride * 2, val); + AV_WN4PA(dst + stride * 3, val); +} + +static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 8; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + dst += stride; + } +} + +static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 16; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + dst += stride; + } +} + +static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); + int y; + + stride /= sizeof(pixel); + for (y = 0; y < 32; y++) { + AV_WN4PA(dst + 0, val); + AV_WN4PA(dst + 4, val); + AV_WN4PA(dst + 8, val); + AV_WN4PA(dst + 12, val); + AV_WN4PA(dst + 16, val); + AV_WN4PA(dst + 20, val); + AV_WN4PA(dst + 24, val); + AV_WN4PA(dst + 28, val); + dst += stride; + } +} + +#if BIT_DEPTH != 12 + +#if BIT_DEPTH == 8 +#define memset_bpc memset +#else +static inline void memset_bpc(uint16_t *dst, int val, int len) { + int n; + for (n = 0; n < len; n++) { + dst[n] = val; + } +} +#endif + +#define DST(x, y) dst[(x) + (y) * stride] + +static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], + a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7]; + + stride /= sizeof(pixel); + DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2; + DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2; + DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2; + DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2; + DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2; + DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2; + DST(3,3) = a7; // note: this is different from vp8 and such +} + +#define def_diag_downleft(size) \ +static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *left, const uint8_t *_top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *top = (const pixel *) _top; \ + int i, j; \ + pixel v[size - 1]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size - 2; i++) \ + v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ + v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ +\ + for (j = 0; j < size; j++) { \ + memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \ + memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \ + } \ +} + +def_diag_downleft(8) +def_diag_downleft(16) +def_diag_downleft(32) + +static void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + const pixel *left = (const pixel *) _left; + int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], + l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0]; + + stride /= sizeof(pixel); + DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2; + DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2; + DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2; + DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2; + DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2; + DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2; + DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2; +} + +#define def_diag_downright(size) \ +static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *_left, const uint8_t *_top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *top = (const pixel *) _top; \ + const pixel *left = (const pixel *) _left; \ + int i, j; \ + pixel v[size + size - 1]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size - 2; i++) { \ + v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ + v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ + } \ + v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \ + v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \ + v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \ +\ + for (j = 0; j < size; j++) \ + memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \ +} + +def_diag_downright(8) +def_diag_downright(16) +def_diag_downright(32) + +static void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + const pixel *left = (const pixel *) _left; + int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], + l0 = left[3], l1 = left[2], l2 = left[1]; + + stride /= sizeof(pixel); + DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2; + DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2; + DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1; + DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2; + DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1; + DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2; + DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1; + DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2; + DST(3,0) = (a2 + a3 + 1) >> 1; + DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2; +} + +#define def_vert_right(size) \ +static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *_left, const uint8_t *_top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *top = (const pixel *) _top; \ + const pixel *left = (const pixel *) _left; \ + int i, j; \ + pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size/2 - 2; i++) { \ + vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \ + ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \ + } \ + vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \ + ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ +\ + ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \ + vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \ + for (i = 0; i < size - 1; i++) { \ + ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \ + vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ + } \ +\ + for (j = 0; j < size / 2; j++) { \ + memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \ + memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \ + } \ +} + +def_vert_right(8) +def_vert_right(16) +def_vert_right(32) + +static void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + const pixel *left = (const pixel *) _left; + int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0], + tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2]; + + stride /= sizeof(pixel); + DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2; + DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2; + DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1; + DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2; + DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1; + DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2; + DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1; + DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2; + DST(0,3) = (l2 + l3 + 1) >> 1; + DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2; +} + +#define def_hor_down(size) \ +static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *_left, const uint8_t *_top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *top = (const pixel *) _top; \ + const pixel *left = (const pixel *) _left; \ + int i, j; \ + pixel v[size * 3 - 2]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size - 2; i++) { \ + v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \ + v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \ + v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ + } \ + v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \ + v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \ + v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \ + v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ +\ + for (j = 0; j < size; j++) \ + memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \ +} + +def_hor_down(8) +def_hor_down(16) +def_hor_down(32) + +static void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *left, const uint8_t *_top) +{ + pixel *dst = (pixel *) _dst; + const pixel *top = (const pixel *) _top; + int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], + a4 = top[4], a5 = top[5], a6 = top[6]; + + stride /= sizeof(pixel); + DST(0,0) = (a0 + a1 + 1) >> 1; + DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2; + DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1; + DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2; + DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1; + DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2; + DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1; + DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2; + DST(3,2) = (a4 + a5 + 1) >> 1; + DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2; +} + +#define def_vert_left(size) \ +static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *left, const uint8_t *_top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *top = (const pixel *) _top; \ + int i, j; \ + pixel ve[size - 1], vo[size - 1]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size - 2; i++) { \ + ve[i] = (top[i] + top[i + 1] + 1) >> 1; \ + vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ + } \ + ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \ + vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ +\ + for (j = 0; j < size / 2; j++) { \ + memcpy(dst + j*2 * stride, ve + j, (size - j - 1) * sizeof(pixel)); \ + memset_bpc(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \ + memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \ + memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \ + } \ +} + +def_vert_left(8) +def_vert_left(16) +def_vert_left(32) + +static void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t stride, + const uint8_t *_left, const uint8_t *top) +{ + pixel *dst = (pixel *) _dst; + const pixel *left = (const pixel *) _left; + int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3]; + + stride /= sizeof(pixel); + DST(0,0) = (l0 + l1 + 1) >> 1; + DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2; + DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1; + DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2; + DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1; + DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2; + DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3; +} + +#define def_hor_up(size) \ +static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ + const uint8_t *_left, const uint8_t *top) \ +{ \ + pixel *dst = (pixel *) _dst; \ + const pixel *left = (const pixel *) _left; \ + int i, j; \ + pixel v[size*2 - 2]; \ +\ + stride /= sizeof(pixel); \ + for (i = 0; i < size - 2; i++) { \ + v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \ + v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ + } \ + v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \ + v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \ +\ + for (j = 0; j < size / 2; j++) \ + memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \ + for (j = size / 2; j < size; j++) { \ + memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \ + memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \ + 2 + j*2 - size); \ + } \ +} + +def_hor_up(8) +def_hor_up(16) +def_hor_up(32) + +#undef DST + +#endif /* BIT_DEPTH != 12 */ + +#if BIT_DEPTH != 8 +void ff_vp9dsp_intrapred_init_10(VP9DSPContext *dsp); +#endif +#if BIT_DEPTH != 10 +static +#endif +av_cold void FUNC(ff_vp9dsp_intrapred_init)(VP9DSPContext *dsp) +{ +#define init_intra_pred_bd_aware(tx, sz) \ + dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \ + dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \ + dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \ + dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c + +#if BIT_DEPTH == 12 + ff_vp9dsp_intrapred_init_10(dsp); +#define init_intra_pred(tx, sz) \ + init_intra_pred_bd_aware(tx, sz) +#else + #define init_intra_pred(tx, sz) \ + dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \ + dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \ + dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \ + dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \ + dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \ + dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \ + dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \ + dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \ + dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \ + dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \ + dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \ + init_intra_pred_bd_aware(tx, sz) +#endif + + init_intra_pred(TX_4X4, 4x4); + init_intra_pred(TX_8X8, 8x8); + init_intra_pred(TX_16X16, 16x16); + init_intra_pred(TX_32X32, 32x32); + +#undef init_intra_pred +#undef init_intra_pred_bd_aware +} + +#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \ +static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \ + ptrdiff_t stride, \ + int16_t *_block, int eob) \ +{ \ + int i, j; \ + pixel *dst = (pixel *) _dst; \ + dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \ +\ + stride /= sizeof(pixel); \ + if (has_dconly && eob == 1) { \ + const int t = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \ + * 11585 + (1 << 13)) >> 14; \ + block[0] = 0; \ + for (i = 0; i < sz; i++) { \ + for (j = 0; j < sz; j++) \ + dst[j * stride] = av_clip_pixel(dst[j * stride] + \ + (bits ? \ + (t + (1 << (bits - 1))) >> bits : \ + t)); \ + dst++; \ + } \ + return; \ + } \ +\ + for (i = 0; i < sz; i++) \ + type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \ + memset(block, 0, sz * sz * sizeof(*block)); \ + for (i = 0; i < sz; i++) { \ + type_b##sz##_1d(tmp + i, sz, out, 1); \ + for (j = 0; j < sz; j++) \ + dst[j * stride] = av_clip_pixel(dst[j * stride] + \ + (bits ? \ + (out[j] + (1 << (bits - 1))) >> bits : \ + out[j])); \ + dst++; \ + } \ +} + +#define itxfm_wrap(sz, bits) \ +itxfm_wrapper(idct, idct, sz, bits, 1) \ +itxfm_wrapper(iadst, idct, sz, bits, 0) \ +itxfm_wrapper(idct, iadst, sz, bits, 0) \ +itxfm_wrapper(iadst, iadst, sz, bits, 0) + +#define IN(x) ((dctint) in[(x) * stride]) + +static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t1, t2, t3; + + t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14; + t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14; + t2 = (IN(1) * 6270 - IN(3) * 15137 + (1 << 13)) >> 14; + t3 = (IN(1) * 15137 + IN(3) * 6270 + (1 << 13)) >> 14; + + out[0] = t0 + t3; + out[1] = t1 + t2; + out[2] = t1 - t2; + out[3] = t0 - t3; +} + +static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t1, t2, t3; + + t0 = 5283 * IN(0) + 15212 * IN(2) + 9929 * IN(3); + t1 = 9929 * IN(0) - 5283 * IN(2) - 15212 * IN(3); + t2 = 13377 * (IN(0) - IN(2) + IN(3)); + t3 = 13377 * IN(1); + + out[0] = (t0 + t3 + (1 << 13)) >> 14; + out[1] = (t1 + t3 + (1 << 13)) >> 14; + out[2] = (t2 + (1 << 13)) >> 14; + out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14; +} + +itxfm_wrap(4, 4) + +static av_always_inline void idct8_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a; + + t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14; + t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14; + t2a = (IN(2) * 6270 - IN(6) * 15137 + (1 << 13)) >> 14; + t3a = (IN(2) * 15137 + IN(6) * 6270 + (1 << 13)) >> 14; + t4a = (IN(1) * 3196 - IN(7) * 16069 + (1 << 13)) >> 14; + t5a = (IN(5) * 13623 - IN(3) * 9102 + (1 << 13)) >> 14; + t6a = (IN(5) * 9102 + IN(3) * 13623 + (1 << 13)) >> 14; + t7a = (IN(1) * 16069 + IN(7) * 3196 + (1 << 13)) >> 14; + + t0 = t0a + t3a; + t1 = t1a + t2a; + t2 = t1a - t2a; + t3 = t0a - t3a; + t4 = t4a + t5a; + t5a = t4a - t5a; + t7 = t7a + t6a; + t6a = t7a - t6a; + + t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14; + t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14; + + out[0] = t0 + t7; + out[1] = t1 + t6; + out[2] = t2 + t5; + out[3] = t3 + t4; + out[4] = t3 - t4; + out[5] = t2 - t5; + out[6] = t1 - t6; + out[7] = t0 - t7; +} + +static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a; + + t0a = 16305 * IN(7) + 1606 * IN(0); + t1a = 1606 * IN(7) - 16305 * IN(0); + t2a = 14449 * IN(5) + 7723 * IN(2); + t3a = 7723 * IN(5) - 14449 * IN(2); + t4a = 10394 * IN(3) + 12665 * IN(4); + t5a = 12665 * IN(3) - 10394 * IN(4); + t6a = 4756 * IN(1) + 15679 * IN(6); + t7a = 15679 * IN(1) - 4756 * IN(6); + + t0 = (t0a + t4a + (1 << 13)) >> 14; + t1 = (t1a + t5a + (1 << 13)) >> 14; + t2 = (t2a + t6a + (1 << 13)) >> 14; + t3 = (t3a + t7a + (1 << 13)) >> 14; + t4 = (t0a - t4a + (1 << 13)) >> 14; + t5 = (t1a - t5a + (1 << 13)) >> 14; + t6 = (t2a - t6a + (1 << 13)) >> 14; + t7 = (t3a - t7a + (1 << 13)) >> 14; + + t4a = 15137 * t4 + 6270 * t5; + t5a = 6270 * t4 - 15137 * t5; + t6a = 15137 * t7 - 6270 * t6; + t7a = 6270 * t7 + 15137 * t6; + + out[0] = t0 + t2; + out[7] = -(t1 + t3); + t2 = t0 - t2; + t3 = t1 - t3; + + out[1] = -((t4a + t6a + (1 << 13)) >> 14); + out[6] = (t5a + t7a + (1 << 13)) >> 14; + t6 = (t4a - t6a + (1 << 13)) >> 14; + t7 = (t5a - t7a + (1 << 13)) >> 14; + + out[3] = -(((t2 + t3) * 11585 + (1 << 13)) >> 14); + out[4] = ((t2 - t3) * 11585 + (1 << 13)) >> 14; + out[2] = ((t6 + t7) * 11585 + (1 << 13)) >> 14; + out[5] = -(((t6 - t7) * 11585 + (1 << 13)) >> 14); +} + +itxfm_wrap(8, 5) + +static av_always_inline void idct16_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; + dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a; + dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a; + + t0a = ((IN(0) + IN(8)) * 11585 + (1 << 13)) >> 14; + t1a = ((IN(0) - IN(8)) * 11585 + (1 << 13)) >> 14; + t2a = (IN(4) * 6270 - IN(12) * 15137 + (1 << 13)) >> 14; + t3a = (IN(4) * 15137 + IN(12) * 6270 + (1 << 13)) >> 14; + t4a = (IN(2) * 3196 - IN(14) * 16069 + (1 << 13)) >> 14; + t7a = (IN(2) * 16069 + IN(14) * 3196 + (1 << 13)) >> 14; + t5a = (IN(10) * 13623 - IN(6) * 9102 + (1 << 13)) >> 14; + t6a = (IN(10) * 9102 + IN(6) * 13623 + (1 << 13)) >> 14; + t8a = (IN(1) * 1606 - IN(15) * 16305 + (1 << 13)) >> 14; + t15a = (IN(1) * 16305 + IN(15) * 1606 + (1 << 13)) >> 14; + t9a = (IN(9) * 12665 - IN(7) * 10394 + (1 << 13)) >> 14; + t14a = (IN(9) * 10394 + IN(7) * 12665 + (1 << 13)) >> 14; + t10a = (IN(5) * 7723 - IN(11) * 14449 + (1 << 13)) >> 14; + t13a = (IN(5) * 14449 + IN(11) * 7723 + (1 << 13)) >> 14; + t11a = (IN(13) * 15679 - IN(3) * 4756 + (1 << 13)) >> 14; + t12a = (IN(13) * 4756 + IN(3) * 15679 + (1 << 13)) >> 14; + + t0 = t0a + t3a; + t1 = t1a + t2a; + t2 = t1a - t2a; + t3 = t0a - t3a; + t4 = t4a + t5a; + t5 = t4a - t5a; + t6 = t7a - t6a; + t7 = t7a + t6a; + t8 = t8a + t9a; + t9 = t8a - t9a; + t10 = t11a - t10a; + t11 = t11a + t10a; + t12 = t12a + t13a; + t13 = t12a - t13a; + t14 = t15a - t14a; + t15 = t15a + t14a; + + t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14; + t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14; + t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14; + t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14; + t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14; + t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14; + + t0a = t0 + t7; + t1a = t1 + t6a; + t2a = t2 + t5a; + t3a = t3 + t4; + t4 = t3 - t4; + t5 = t2 - t5a; + t6 = t1 - t6a; + t7 = t0 - t7; + t8a = t8 + t11; + t9 = t9a + t10a; + t10 = t9a - t10a; + t11a = t8 - t11; + t12a = t15 - t12; + t13 = t14a - t13a; + t14 = t14a + t13a; + t15a = t15 + t12; + + t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14; + t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14; + t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14; + t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14; + + out[ 0] = t0a + t15a; + out[ 1] = t1a + t14; + out[ 2] = t2a + t13a; + out[ 3] = t3a + t12; + out[ 4] = t4 + t11; + out[ 5] = t5 + t10a; + out[ 6] = t6 + t9; + out[ 7] = t7 + t8a; + out[ 8] = t7 - t8a; + out[ 9] = t6 - t9; + out[10] = t5 - t10a; + out[11] = t4 - t11; + out[12] = t3a - t12; + out[13] = t2a - t13a; + out[14] = t1a - t14; + out[15] = t0a - t15a; +} + +static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; + dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a; + dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a; + + t0 = IN(15) * 16364 + IN(0) * 804; + t1 = IN(15) * 804 - IN(0) * 16364; + t2 = IN(13) * 15893 + IN(2) * 3981; + t3 = IN(13) * 3981 - IN(2) * 15893; + t4 = IN(11) * 14811 + IN(4) * 7005; + t5 = IN(11) * 7005 - IN(4) * 14811; + t6 = IN(9) * 13160 + IN(6) * 9760; + t7 = IN(9) * 9760 - IN(6) * 13160; + t8 = IN(7) * 11003 + IN(8) * 12140; + t9 = IN(7) * 12140 - IN(8) * 11003; + t10 = IN(5) * 8423 + IN(10) * 14053; + t11 = IN(5) * 14053 - IN(10) * 8423; + t12 = IN(3) * 5520 + IN(12) * 15426; + t13 = IN(3) * 15426 - IN(12) * 5520; + t14 = IN(1) * 2404 + IN(14) * 16207; + t15 = IN(1) * 16207 - IN(14) * 2404; + + t0a = (t0 + t8 + (1 << 13)) >> 14; + t1a = (t1 + t9 + (1 << 13)) >> 14; + t2a = (t2 + t10 + (1 << 13)) >> 14; + t3a = (t3 + t11 + (1 << 13)) >> 14; + t4a = (t4 + t12 + (1 << 13)) >> 14; + t5a = (t5 + t13 + (1 << 13)) >> 14; + t6a = (t6 + t14 + (1 << 13)) >> 14; + t7a = (t7 + t15 + (1 << 13)) >> 14; + t8a = (t0 - t8 + (1 << 13)) >> 14; + t9a = (t1 - t9 + (1 << 13)) >> 14; + t10a = (t2 - t10 + (1 << 13)) >> 14; + t11a = (t3 - t11 + (1 << 13)) >> 14; + t12a = (t4 - t12 + (1 << 13)) >> 14; + t13a = (t5 - t13 + (1 << 13)) >> 14; + t14a = (t6 - t14 + (1 << 13)) >> 14; + t15a = (t7 - t15 + (1 << 13)) >> 14; + + t8 = t8a * 16069 + t9a * 3196; + t9 = t8a * 3196 - t9a * 16069; + t10 = t10a * 9102 + t11a * 13623; + t11 = t10a * 13623 - t11a * 9102; + t12 = t13a * 16069 - t12a * 3196; + t13 = t13a * 3196 + t12a * 16069; + t14 = t15a * 9102 - t14a * 13623; + t15 = t15a * 13623 + t14a * 9102; + + t0 = t0a + t4a; + t1 = t1a + t5a; + t2 = t2a + t6a; + t3 = t3a + t7a; + t4 = t0a - t4a; + t5 = t1a - t5a; + t6 = t2a - t6a; + t7 = t3a - t7a; + t8a = (t8 + t12 + (1 << 13)) >> 14; + t9a = (t9 + t13 + (1 << 13)) >> 14; + t10a = (t10 + t14 + (1 << 13)) >> 14; + t11a = (t11 + t15 + (1 << 13)) >> 14; + t12a = (t8 - t12 + (1 << 13)) >> 14; + t13a = (t9 - t13 + (1 << 13)) >> 14; + t14a = (t10 - t14 + (1 << 13)) >> 14; + t15a = (t11 - t15 + (1 << 13)) >> 14; + + t4a = t4 * 15137 + t5 * 6270; + t5a = t4 * 6270 - t5 * 15137; + t6a = t7 * 15137 - t6 * 6270; + t7a = t7 * 6270 + t6 * 15137; + t12 = t12a * 15137 + t13a * 6270; + t13 = t12a * 6270 - t13a * 15137; + t14 = t15a * 15137 - t14a * 6270; + t15 = t15a * 6270 + t14a * 15137; + + out[ 0] = t0 + t2; + out[15] = -(t1 + t3); + t2a = t0 - t2; + t3a = t1 - t3; + out[ 3] = -((t4a + t6a + (1 << 13)) >> 14); + out[12] = (t5a + t7a + (1 << 13)) >> 14; + t6 = (t4a - t6a + (1 << 13)) >> 14; + t7 = (t5a - t7a + (1 << 13)) >> 14; + out[ 1] = -(t8a + t10a); + out[14] = t9a + t11a; + t10 = t8a - t10a; + t11 = t9a - t11a; + out[ 2] = (t12 + t14 + (1 << 13)) >> 14; + out[13] = -((t13 + t15 + (1 << 13)) >> 14); + t14a = (t12 - t14 + (1 << 13)) >> 14; + t15a = (t13 - t15 + (1 << 13)) >> 14; + + out[ 7] = ((t2a + t3a) * -11585 + (1 << 13)) >> 14; + out[ 8] = ((t2a - t3a) * 11585 + (1 << 13)) >> 14; + out[ 4] = ((t7 + t6) * 11585 + (1 << 13)) >> 14; + out[11] = ((t7 - t6) * 11585 + (1 << 13)) >> 14; + out[ 6] = ((t11 + t10) * 11585 + (1 << 13)) >> 14; + out[ 9] = ((t11 - t10) * 11585 + (1 << 13)) >> 14; + out[ 5] = ((t14a + t15a) * -11585 + (1 << 13)) >> 14; + out[10] = ((t14a - t15a) * 11585 + (1 << 13)) >> 14; +} + +itxfm_wrap(16, 6) + +static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + dctint t0a = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14; + dctint t1a = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14; + dctint t2a = (IN( 8) * 6270 - IN(24) * 15137 + (1 << 13)) >> 14; + dctint t3a = (IN( 8) * 15137 + IN(24) * 6270 + (1 << 13)) >> 14; + dctint t4a = (IN( 4) * 3196 - IN(28) * 16069 + (1 << 13)) >> 14; + dctint t7a = (IN( 4) * 16069 + IN(28) * 3196 + (1 << 13)) >> 14; + dctint t5a = (IN(20) * 13623 - IN(12) * 9102 + (1 << 13)) >> 14; + dctint t6a = (IN(20) * 9102 + IN(12) * 13623 + (1 << 13)) >> 14; + dctint t8a = (IN( 2) * 1606 - IN(30) * 16305 + (1 << 13)) >> 14; + dctint t15a = (IN( 2) * 16305 + IN(30) * 1606 + (1 << 13)) >> 14; + dctint t9a = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14; + dctint t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14; + dctint t10a = (IN(10) * 7723 - IN(22) * 14449 + (1 << 13)) >> 14; + dctint t13a = (IN(10) * 14449 + IN(22) * 7723 + (1 << 13)) >> 14; + dctint t11a = (IN(26) * 15679 - IN( 6) * 4756 + (1 << 13)) >> 14; + dctint t12a = (IN(26) * 4756 + IN( 6) * 15679 + (1 << 13)) >> 14; + dctint t16a = (IN( 1) * 804 - IN(31) * 16364 + (1 << 13)) >> 14; + dctint t31a = (IN( 1) * 16364 + IN(31) * 804 + (1 << 13)) >> 14; + dctint t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14; + dctint t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14; + dctint t18a = (IN( 9) * 7005 - IN(23) * 14811 + (1 << 13)) >> 14; + dctint t29a = (IN( 9) * 14811 + IN(23) * 7005 + (1 << 13)) >> 14; + dctint t19a = (IN(25) * 15426 - IN( 7) * 5520 + (1 << 13)) >> 14; + dctint t28a = (IN(25) * 5520 + IN( 7) * 15426 + (1 << 13)) >> 14; + dctint t20a = (IN( 5) * 3981 - IN(27) * 15893 + (1 << 13)) >> 14; + dctint t27a = (IN( 5) * 15893 + IN(27) * 3981 + (1 << 13)) >> 14; + dctint t21a = (IN(21) * 14053 - IN(11) * 8423 + (1 << 13)) >> 14; + dctint t26a = (IN(21) * 8423 + IN(11) * 14053 + (1 << 13)) >> 14; + dctint t22a = (IN(13) * 9760 - IN(19) * 13160 + (1 << 13)) >> 14; + dctint t25a = (IN(13) * 13160 + IN(19) * 9760 + (1 << 13)) >> 14; + dctint t23a = (IN(29) * 16207 - IN( 3) * 2404 + (1 << 13)) >> 14; + dctint t24a = (IN(29) * 2404 + IN( 3) * 16207 + (1 << 13)) >> 14; + + dctint t0 = t0a + t3a; + dctint t1 = t1a + t2a; + dctint t2 = t1a - t2a; + dctint t3 = t0a - t3a; + dctint t4 = t4a + t5a; + dctint t5 = t4a - t5a; + dctint t6 = t7a - t6a; + dctint t7 = t7a + t6a; + dctint t8 = t8a + t9a; + dctint t9 = t8a - t9a; + dctint t10 = t11a - t10a; + dctint t11 = t11a + t10a; + dctint t12 = t12a + t13a; + dctint t13 = t12a - t13a; + dctint t14 = t15a - t14a; + dctint t15 = t15a + t14a; + dctint t16 = t16a + t17a; + dctint t17 = t16a - t17a; + dctint t18 = t19a - t18a; + dctint t19 = t19a + t18a; + dctint t20 = t20a + t21a; + dctint t21 = t20a - t21a; + dctint t22 = t23a - t22a; + dctint t23 = t23a + t22a; + dctint t24 = t24a + t25a; + dctint t25 = t24a - t25a; + dctint t26 = t27a - t26a; + dctint t27 = t27a + t26a; + dctint t28 = t28a + t29a; + dctint t29 = t28a - t29a; + dctint t30 = t31a - t30a; + dctint t31 = t31a + t30a; + + t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14; + t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14; + t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14; + t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14; + t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14; + t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14; + t17a = ( t30 * 3196 - t17 * 16069 + (1 << 13)) >> 14; + t30a = ( t30 * 16069 + t17 * 3196 + (1 << 13)) >> 14; + t18a = (-(t29 * 16069 + t18 * 3196) + (1 << 13)) >> 14; + t29a = ( t29 * 3196 - t18 * 16069 + (1 << 13)) >> 14; + t21a = ( t26 * 13623 - t21 * 9102 + (1 << 13)) >> 14; + t26a = ( t26 * 9102 + t21 * 13623 + (1 << 13)) >> 14; + t22a = (-(t25 * 9102 + t22 * 13623) + (1 << 13)) >> 14; + t25a = ( t25 * 13623 - t22 * 9102 + (1 << 13)) >> 14; + + t0a = t0 + t7; + t1a = t1 + t6a; + t2a = t2 + t5a; + t3a = t3 + t4; + t4a = t3 - t4; + t5 = t2 - t5a; + t6 = t1 - t6a; + t7a = t0 - t7; + t8a = t8 + t11; + t9 = t9a + t10a; + t10 = t9a - t10a; + t11a = t8 - t11; + t12a = t15 - t12; + t13 = t14a - t13a; + t14 = t14a + t13a; + t15a = t15 + t12; + t16a = t16 + t19; + t17 = t17a + t18a; + t18 = t17a - t18a; + t19a = t16 - t19; + t20a = t23 - t20; + t21 = t22a - t21a; + t22 = t22a + t21a; + t23a = t23 + t20; + t24a = t24 + t27; + t25 = t25a + t26a; + t26 = t25a - t26a; + t27a = t24 - t27; + t28a = t31 - t28; + t29 = t30a - t29a; + t30 = t30a + t29a; + t31a = t31 + t28; + + t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14; + t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14; + t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14; + t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14; + t18a = ( t29 * 6270 - t18 * 15137 + (1 << 13)) >> 14; + t29a = ( t29 * 15137 + t18 * 6270 + (1 << 13)) >> 14; + t19 = ( t28a * 6270 - t19a * 15137 + (1 << 13)) >> 14; + t28 = ( t28a * 15137 + t19a * 6270 + (1 << 13)) >> 14; + t20 = (-(t27a * 15137 + t20a * 6270) + (1 << 13)) >> 14; + t27 = ( t27a * 6270 - t20a * 15137 + (1 << 13)) >> 14; + t21a = (-(t26 * 15137 + t21 * 6270) + (1 << 13)) >> 14; + t26a = ( t26 * 6270 - t21 * 15137 + (1 << 13)) >> 14; + + t0 = t0a + t15a; + t1 = t1a + t14; + t2 = t2a + t13a; + t3 = t3a + t12; + t4 = t4a + t11; + t5a = t5 + t10a; + t6a = t6 + t9; + t7 = t7a + t8a; + t8 = t7a - t8a; + t9a = t6 - t9; + t10 = t5 - t10a; + t11a = t4a - t11; + t12a = t3a - t12; + t13 = t2a - t13a; + t14a = t1a - t14; + t15 = t0a - t15a; + t16 = t16a + t23a; + t17a = t17 + t22; + t18 = t18a + t21a; + t19a = t19 + t20; + t20a = t19 - t20; + t21 = t18a - t21a; + t22a = t17 - t22; + t23 = t16a - t23a; + t24 = t31a - t24a; + t25a = t30 - t25; + t26 = t29a - t26a; + t27a = t28 - t27; + t28a = t28 + t27; + t29 = t29a + t26a; + t30a = t30 + t25; + t31 = t31a + t24a; + + t20 = ((t27a - t20a) * 11585 + (1 << 13)) >> 14; + t27 = ((t27a + t20a) * 11585 + (1 << 13)) >> 14; + t21a = ((t26 - t21 ) * 11585 + (1 << 13)) >> 14; + t26a = ((t26 + t21 ) * 11585 + (1 << 13)) >> 14; + t22 = ((t25a - t22a) * 11585 + (1 << 13)) >> 14; + t25 = ((t25a + t22a) * 11585 + (1 << 13)) >> 14; + t23a = ((t24 - t23 ) * 11585 + (1 << 13)) >> 14; + t24a = ((t24 + t23 ) * 11585 + (1 << 13)) >> 14; + + out[ 0] = t0 + t31; + out[ 1] = t1 + t30a; + out[ 2] = t2 + t29; + out[ 3] = t3 + t28a; + out[ 4] = t4 + t27; + out[ 5] = t5a + t26a; + out[ 6] = t6a + t25; + out[ 7] = t7 + t24a; + out[ 8] = t8 + t23a; + out[ 9] = t9a + t22; + out[10] = t10 + t21a; + out[11] = t11a + t20; + out[12] = t12a + t19a; + out[13] = t13 + t18; + out[14] = t14a + t17a; + out[15] = t15 + t16; + out[16] = t15 - t16; + out[17] = t14a - t17a; + out[18] = t13 - t18; + out[19] = t12a - t19a; + out[20] = t11a - t20; + out[21] = t10 - t21a; + out[22] = t9a - t22; + out[23] = t8 - t23a; + out[24] = t7 - t24a; + out[25] = t6a - t25; + out[26] = t5a - t26a; + out[27] = t4 - t27; + out[28] = t3 - t28a; + out[29] = t2 - t29; + out[30] = t1 - t30a; + out[31] = t0 - t31; +} + +itxfm_wrapper(idct, idct, 32, 6, 1) + +static av_always_inline void iwht4_1d(const dctcoef *in, ptrdiff_t stride, + dctcoef *out, int pass) +{ + int t0, t1, t2, t3, t4; + + if (pass == 0) { + t0 = IN(0) >> 2; + t1 = IN(3) >> 2; + t2 = IN(1) >> 2; + t3 = IN(2) >> 2; + } else { + t0 = IN(0); + t1 = IN(3); + t2 = IN(1); + t3 = IN(2); + } + + t0 += t2; + t3 -= t1; + t4 = (t0 - t3) >> 1; + t1 = t4 - t1; + t2 = t4 - t2; + t0 -= t1; + t3 += t2; + + out[0] = t0; + out[1] = t1; + out[2] = t2; + out[3] = t3; +} + +itxfm_wrapper(iwht, iwht, 4, 0, 0) + +#undef IN +#undef itxfm_wrapper +#undef itxfm_wrap + +static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp) +{ +#define init_itxfm(tx, sz) \ + dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \ + dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \ + dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \ + dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c + +#define init_idct(tx, nm) \ + dsp->itxfm_add[tx][DCT_DCT] = \ + dsp->itxfm_add[tx][ADST_DCT] = \ + dsp->itxfm_add[tx][DCT_ADST] = \ + dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c + + init_itxfm(TX_4X4, 4x4); + init_itxfm(TX_8X8, 8x8); + init_itxfm(TX_16X16, 16x16); + init_idct(TX_32X32, idct_idct_32x32); + init_idct(4 /* lossless */, iwht_iwht_4x4); + +#undef init_itxfm +#undef init_idct +} + +static av_always_inline void loop_filter(pixel *dst, int E, int I, int H, + ptrdiff_t stridea, ptrdiff_t strideb, + int wd) +{ + int i, F = 1 << (BIT_DEPTH - 8); + + E <<= (BIT_DEPTH - 8); + I <<= (BIT_DEPTH - 8); + H <<= (BIT_DEPTH - 8); + for (i = 0; i < 8; i++, dst += stridea) { + int p7, p6, p5, p4; + int p3 = dst[strideb * -4], p2 = dst[strideb * -3]; + int p1 = dst[strideb * -2], p0 = dst[strideb * -1]; + int q0 = dst[strideb * +0], q1 = dst[strideb * +1]; + int q2 = dst[strideb * +2], q3 = dst[strideb * +3]; + int q4, q5, q6, q7; + int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && + FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I && + FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I && + FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E; + int flat8out, flat8in; + + if (!fm) + continue; + + if (wd >= 16) { + p7 = dst[strideb * -8]; + p6 = dst[strideb * -7]; + p5 = dst[strideb * -6]; + p4 = dst[strideb * -5]; + q4 = dst[strideb * +4]; + q5 = dst[strideb * +5]; + q6 = dst[strideb * +6]; + q7 = dst[strideb * +7]; + + flat8out = FFABS(p7 - p0) <= F && FFABS(p6 - p0) <= F && + FFABS(p5 - p0) <= F && FFABS(p4 - p0) <= F && + FFABS(q4 - q0) <= F && FFABS(q5 - q0) <= F && + FFABS(q6 - q0) <= F && FFABS(q7 - q0) <= F; + } + + if (wd >= 8) + flat8in = FFABS(p3 - p0) <= F && FFABS(p2 - p0) <= F && + FFABS(p1 - p0) <= F && FFABS(q1 - q0) <= F && + FFABS(q2 - q0) <= F && FFABS(q3 - q0) <= F; + + if (wd >= 16 && flat8out && flat8in) { + dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 + + p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4; + dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 + + p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4; + dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 + + p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4; + dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 + + p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4; + dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 + + p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4; + dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + + p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4; + dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + + q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4; + dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + + q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4; + dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + + q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4; + dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + + q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4; + dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + + q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4; + dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + + q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; + dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + + q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; + dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + + q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; + } else if (wd >= 8 && flat8in) { + dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3; + dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3; + dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3; + dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3; + dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3; + dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3; + } else { + int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H; + + if (hev) { + int f = av_clip_intp2(p1 - q1, BIT_DEPTH - 1), f1, f2; + f = av_clip_intp2(3 * (q0 - p0) + f, BIT_DEPTH - 1); + + f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3; + f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3; + + dst[strideb * -1] = av_clip_pixel(p0 + f2); + dst[strideb * +0] = av_clip_pixel(q0 - f1); + } else { + int f = av_clip_intp2(3 * (q0 - p0), BIT_DEPTH - 1), f1, f2; + + f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3; + f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3; + + dst[strideb * -1] = av_clip_pixel(p0 + f2); + dst[strideb * +0] = av_clip_pixel(q0 - f1); + + f = (f1 + 1) >> 1; + dst[strideb * -2] = av_clip_pixel(p1 + f); + dst[strideb * +1] = av_clip_pixel(q1 - f); + } + } + } +} + +#define lf_8_fn(dir, wd, stridea, strideb) \ +static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \ + ptrdiff_t stride, \ + int E, int I, int H) \ +{ \ + pixel *dst = (pixel *) _dst; \ + stride /= sizeof(pixel); \ + loop_filter(dst, E, I, H, stridea, strideb, wd); \ +} + +#define lf_8_fns(wd) \ +lf_8_fn(h, wd, stride, 1) \ +lf_8_fn(v, wd, 1, stride) + +lf_8_fns(4) +lf_8_fns(8) +lf_8_fns(16) + +#undef lf_8_fn +#undef lf_8_fns + +#define lf_16_fn(dir, stridea) \ +static void loop_filter_##dir##_16_16_c(uint8_t *dst, \ + ptrdiff_t stride, \ + int E, int I, int H) \ +{ \ + loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \ + loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \ +} + +lf_16_fn(h, stride) +lf_16_fn(v, sizeof(pixel)) + +#undef lf_16_fn + +#define lf_mix_fn(dir, wd1, wd2, stridea) \ +static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \ + ptrdiff_t stride, \ + int E, int I, int H) \ +{ \ + loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \ + loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \ +} + +#define lf_mix_fns(wd1, wd2) \ +lf_mix_fn(h, wd1, wd2, stride) \ +lf_mix_fn(v, wd1, wd2, sizeof(pixel)) + +lf_mix_fns(4, 4) +lf_mix_fns(4, 8) +lf_mix_fns(8, 4) +lf_mix_fns(8, 8) + +#undef lf_mix_fn +#undef lf_mix_fns + +static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp) +{ + dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c; + dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c; + dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c; + dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c; + dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c; + dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c; + + dsp->loop_filter_16[0] = loop_filter_h_16_16_c; + dsp->loop_filter_16[1] = loop_filter_v_16_16_c; + + dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c; + dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c; + dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c; + dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c; + dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c; + dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c; + dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c; + dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c; +} + +#if BIT_DEPTH != 12 + +static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, + int w, int h) +{ + do { + memcpy(dst, src, w * sizeof(pixel)); + + dst += dst_stride; + src += src_stride; + } while (--h); +} + +static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h) +{ + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + do { + int x; + + for (x = 0; x < w; x += 4) + AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x]))); + + dst += dst_stride; + src += src_stride; + } while (--h); +} + +#define fpel_fn(type, sz) \ +static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + type##_c(dst, dst_stride, src, src_stride, sz, h); \ +} + +#define copy_avg_fn(sz) \ +fpel_fn(copy, sz) \ +fpel_fn(avg, sz) + +copy_avg_fn(64) +copy_avg_fn(32) +copy_avg_fn(16) +copy_avg_fn(8) +copy_avg_fn(4) + +#undef fpel_fn +#undef copy_avg_fn + +#endif /* BIT_DEPTH != 12 */ + +#define FILTER_8TAP(src, x, F, stride) \ + av_clip_pixel((F[0] * src[x + -3 * stride] + \ + F[1] * src[x + -2 * stride] + \ + F[2] * src[x + -1 * stride] + \ + F[3] * src[x + +0 * stride] + \ + F[4] * src[x + +1 * stride] + \ + F[5] * src[x + +2 * stride] + \ + F[6] * src[x + +3 * stride] + \ + F[7] * src[x + +4 * stride] + 64) >> 7) + +static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, ptrdiff_t ds, + const int16_t *filter, int avg) +{ + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + do { + int x; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1; + } else { + dst[x] = FILTER_8TAP(src, x, filter, ds); + } + + dst += dst_stride; + src += src_stride; + } while (--h); +} + +#define filter_8tap_1d_fn(opn, opa, dir, ds) \ +static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, const int16_t *filter) \ +{ \ + do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ +} + +filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel)) +filter_8tap_1d_fn(put, 0, h, 1) +filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel)) +filter_8tap_1d_fn(avg, 1, h, 1) + +#undef filter_8tap_1d_fn + +static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, const int16_t *filterx, + const int16_t *filtery, int avg) +{ + int tmp_h = h + 7; + pixel tmp[64 * 71], *tmp_ptr = tmp; + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + src -= src_stride * 3; + do { + int x; + + for (x = 0; x < w; x++) + tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1); + + tmp_ptr += 64; + src += src_stride; + } while (--tmp_h); + + tmp_ptr = tmp + 64 * 3; + do { + int x; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1; + } else { + dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64); + } + + tmp_ptr += 64; + dst += dst_stride; + } while (--h); +} + +#define filter_8tap_2d_fn(opn, opa) \ +static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, const int16_t *filterx, \ + const int16_t *filtery) \ +{ \ + do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \ +} + +filter_8tap_2d_fn(put, 0) +filter_8tap_2d_fn(avg, 1) + +#undef filter_8tap_2d_fn + +#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ +static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \ + ff_vp9_subpel_filters[type_idx][dir_m]); \ +} + +#define filter_fn_2d(sz, type, type_idx, avg) \ +static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ + ff_vp9_subpel_filters[type_idx][mx], \ + ff_vp9_subpel_filters[type_idx][my]); \ +} + +#if BIT_DEPTH != 12 + +#define FILTER_BILIN(src, x, mxy, stride) \ + (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) + +static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, ptrdiff_t ds, int mxy, int avg) +{ + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + do { + int x; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1; + } else { + dst[x] = FILTER_BILIN(src, x, mxy, ds); + } + + dst += dst_stride; + src += src_stride; + } while (--h); +} + +#define bilin_1d_fn(opn, opa, dir, ds) \ +static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, int mxy) \ +{ \ + do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ +} + +bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel)) +bilin_1d_fn(put, 0, h, 1) +bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel)) +bilin_1d_fn(avg, 1, h, 1) + +#undef bilin_1d_fn + +static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, int mx, int my, int avg) +{ + pixel tmp[64 * 65], *tmp_ptr = tmp; + int tmp_h = h + 1; + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + do { + int x; + + for (x = 0; x < w; x++) + tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1); + + tmp_ptr += 64; + src += src_stride; + } while (--tmp_h); + + tmp_ptr = tmp; + do { + int x; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1; + } else { + dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64); + } + + tmp_ptr += 64; + dst += dst_stride; + } while (--h); +} + +#define bilin_2d_fn(opn, opa) \ +static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, int mx, int my) \ +{ \ + do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \ +} + +bilin_2d_fn(put, 0) +bilin_2d_fn(avg, 1) + +#undef bilin_2d_fn + +#define bilinf_fn_1d(sz, dir, dir_m, avg) \ +static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \ +} + +#define bilinf_fn_2d(sz, avg) \ +static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \ +} + +#else + +#define bilinf_fn_1d(a, b, c, d) +#define bilinf_fn_2d(a, b) + +#endif + +#define filter_fn(sz, avg) \ +filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \ +filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \ +filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \ +filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \ +filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \ +filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \ +filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \ +filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \ +filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \ +bilinf_fn_1d(sz, h, mx, avg) \ +bilinf_fn_1d(sz, v, my, avg) \ +bilinf_fn_2d(sz, avg) + +#define filter_fn_set(avg) \ +filter_fn(64, avg) \ +filter_fn(32, avg) \ +filter_fn(16, avg) \ +filter_fn(8, avg) \ +filter_fn(4, avg) + +filter_fn_set(put) +filter_fn_set(avg) + +#undef filter_fn +#undef filter_fn_set +#undef filter_fn_1d +#undef filter_fn_2d +#undef bilinf_fn_1d +#undef bilinf_fn_2d + +#if BIT_DEPTH != 8 +void ff_vp9dsp_mc_init_10(VP9DSPContext *dsp); +#endif +#if BIT_DEPTH != 10 +static +#endif +av_cold void FUNC(ff_vp9dsp_mc_init)(VP9DSPContext *dsp) +{ +#if BIT_DEPTH == 12 + ff_vp9dsp_mc_init_10(dsp); +#else /* BIT_DEPTH == 12 */ + +#define init_fpel(idx1, idx2, sz, type) \ + dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \ + dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \ + dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \ + dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c + +#define init_copy_avg(idx, sz) \ + init_fpel(idx, 0, sz, copy); \ + init_fpel(idx, 1, sz, avg) + + init_copy_avg(0, 64); + init_copy_avg(1, 32); + init_copy_avg(2, 16); + init_copy_avg(3, 8); + init_copy_avg(4, 4); + +#undef init_copy_avg +#undef init_fpel + +#endif /* BIT_DEPTH == 12 */ + +#define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \ + dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \ + dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \ + dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c + +#if BIT_DEPTH == 12 +#define init_subpel1 init_subpel1_bd_aware +#else +#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \ + init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \ + dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c +#endif + +#define init_subpel2(idx, idxh, idxv, dir, type) \ + init_subpel1(0, idx, idxh, idxv, 64, dir, type); \ + init_subpel1(1, idx, idxh, idxv, 32, dir, type); \ + init_subpel1(2, idx, idxh, idxv, 16, dir, type); \ + init_subpel1(3, idx, idxh, idxv, 8, dir, type); \ + init_subpel1(4, idx, idxh, idxv, 4, dir, type) + +#define init_subpel3(idx, type) \ + init_subpel2(idx, 1, 1, hv, type); \ + init_subpel2(idx, 0, 1, v, type); \ + init_subpel2(idx, 1, 0, h, type) + + init_subpel3(0, put); + init_subpel3(1, avg); + +#undef init_subpel1 +#undef init_subpel2 +#undef init_subpel3 +#undef init_subpel1_bd_aware +} + +static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, int mx, int my, + int dx, int dy, int avg, + const int16_t (*filters)[8]) +{ + int tmp_h = (((h - 1) * dy + my) >> 4) + 8; + pixel tmp[64 * 135], *tmp_ptr = tmp; + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + src -= src_stride * 3; + do { + int x; + int imx = mx, ioff = 0; + + for (x = 0; x < w; x++) { + tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1); + imx += dx; + ioff += imx >> 4; + imx &= 0xf; + } + + tmp_ptr += 64; + src += src_stride; + } while (--tmp_h); + + tmp_ptr = tmp + 64 * 3; + do { + int x; + const int16_t *filter = filters[my]; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1; + } else { + dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64); + } + + my += dy; + tmp_ptr += (my >> 4) * 64; + my &= 0xf; + dst += dst_stride; + } while (--h); +} + +#define scaled_filter_8tap_fn(opn, opa) \ +static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, int mx, int my, int dx, int dy, \ + const int16_t (*filters)[8]) \ +{ \ + do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ + opa, filters); \ +} + +scaled_filter_8tap_fn(put, 0) +scaled_filter_8tap_fn(avg, 1) + +#undef scaled_filter_8tap_fn + +#undef FILTER_8TAP + +#define scaled_filter_fn(sz, type, type_idx, avg) \ +static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my, int dx, int dy) \ +{ \ + avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \ + ff_vp9_subpel_filters[type_idx]); \ +} + +#if BIT_DEPTH != 12 + +static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride, + const uint8_t *_src, ptrdiff_t src_stride, + int w, int h, int mx, int my, + int dx, int dy, int avg) +{ + pixel tmp[64 * 129], *tmp_ptr = tmp; + int tmp_h = (((h - 1) * dy + my) >> 4) + 2; + pixel *dst = (pixel *) _dst; + const pixel *src = (const pixel *) _src; + + dst_stride /= sizeof(pixel); + src_stride /= sizeof(pixel); + do { + int x; + int imx = mx, ioff = 0; + + for (x = 0; x < w; x++) { + tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1); + imx += dx; + ioff += imx >> 4; + imx &= 0xf; + } + + tmp_ptr += 64; + src += src_stride; + } while (--tmp_h); + + tmp_ptr = tmp; + do { + int x; + + for (x = 0; x < w; x++) + if (avg) { + dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1; + } else { + dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64); + } + + my += dy; + tmp_ptr += (my >> 4) * 64; + my &= 0xf; + dst += dst_stride; + } while (--h); +} + +#define scaled_bilin_fn(opn, opa) \ +static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int w, int h, int mx, int my, int dx, int dy) \ +{ \ + do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \ +} + +scaled_bilin_fn(put, 0) +scaled_bilin_fn(avg, 1) + +#undef scaled_bilin_fn + +#undef FILTER_BILIN + +#define scaled_bilinf_fn(sz, avg) \ +static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my, int dx, int dy) \ +{ \ + avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \ +} + +#else + +#define scaled_bilinf_fn(a, b) + +#endif + +#define scaled_filter_fns(sz, avg) \ +scaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \ +scaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \ +scaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \ +scaled_bilinf_fn(sz, avg) + +#define scaled_filter_fn_set(avg) \ +scaled_filter_fns(64, avg) \ +scaled_filter_fns(32, avg) \ +scaled_filter_fns(16, avg) \ +scaled_filter_fns(8, avg) \ +scaled_filter_fns(4, avg) + +scaled_filter_fn_set(put) +scaled_filter_fn_set(avg) + +#undef scaled_filter_fns +#undef scaled_filter_fn_set +#undef scaled_filter_fn +#undef scaled_bilinf_fn + +#if BIT_DEPTH != 8 +void ff_vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp); +#endif +#if BIT_DEPTH != 10 +static +#endif +av_cold void FUNC(ff_vp9dsp_scaled_mc_init)(VP9DSPContext *dsp) +{ +#define init_scaled_bd_aware(idx1, idx2, sz, type) \ + dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \ + dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \ + dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c + +#if BIT_DEPTH == 12 + ff_vp9dsp_scaled_mc_init_10(dsp); +#define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d) +#else +#define init_scaled(idx1, idx2, sz, type) \ + init_scaled_bd_aware(idx1, idx2, sz, type); \ + dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c +#endif + +#define init_scaled_put_avg(idx, sz) \ + init_scaled(idx, 0, sz, put); \ + init_scaled(idx, 1, sz, avg) + + init_scaled_put_avg(0, 64); + init_scaled_put_avg(1, 32); + init_scaled_put_avg(2, 16); + init_scaled_put_avg(3, 8); + init_scaled_put_avg(4, 4); + +#undef init_scaled_put_avg +#undef init_scaled +#undef init_scaled_bd_aware +} + +av_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp) +{ + FUNC(ff_vp9dsp_intrapred_init)(dsp); + vp9dsp_itxfm_init(dsp); + vp9dsp_loopfilter_init(dsp); + FUNC(ff_vp9dsp_mc_init)(dsp); + FUNC(ff_vp9dsp_scaled_mc_init)(dsp); +} diff --git a/libs/ffvpx/libavcodec/vp9lpf.c b/libs/ffvpx/libavcodec/vp9lpf.c new file mode 100644 index 000000000..414cede85 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9lpf.c @@ -0,0 +1,202 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vp9dec.h" + +static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v, + uint8_t *lvl, uint8_t (*mask)[4], + uint8_t *dst, ptrdiff_t ls) +{ + int y, x, bytesperpixel = s->bytesperpixel; + + // filter edges between columns (e.g. block1 | block2) + for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) { + uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v]; + unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3]; + unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3]; + unsigned hm = hm1 | hm2 | hm13 | hm23; + + for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) { + if (col || x > 1) { + if (hm1 & x) { + int L = *l, H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + if (hmask1[0] & x) { + if (hmask2[0] & x) { + av_assert2(l[8 << ss_v] == L); + s->dsp.loop_filter_16[0](ptr, ls, E, I, H); + } else { + s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H); + } + } else if (hm2 & x) { + L = l[8 << ss_v]; + H |= (L >> 4) << 8; + E |= s->filter_lut.mblim_lut[L] << 8; + I |= s->filter_lut.lim_lut[L] << 8; + s->dsp.loop_filter_mix2[!!(hmask1[1] & x)] + [!!(hmask2[1] & x)] + [0](ptr, ls, E, I, H); + } else { + s->dsp.loop_filter_8[!!(hmask1[1] & x)] + [0](ptr, ls, E, I, H); + } + } else if (hm2 & x) { + int L = l[8 << ss_v], H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + s->dsp.loop_filter_8[!!(hmask2[1] & x)] + [0](ptr + 8 * ls, ls, E, I, H); + } + } + if (ss_h) { + if (x & 0xAA) + l += 2; + } else { + if (hm13 & x) { + int L = *l, H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + if (hm23 & x) { + L = l[8 << ss_v]; + H |= (L >> 4) << 8; + E |= s->filter_lut.mblim_lut[L] << 8; + I |= s->filter_lut.lim_lut[L] << 8; + s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H); + } else { + s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H); + } + } else if (hm23 & x) { + int L = l[8 << ss_v], H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H); + } + l++; + } + } + } +} + +static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v, + uint8_t *lvl, uint8_t (*mask)[4], + uint8_t *dst, ptrdiff_t ls) +{ + int y, x, bytesperpixel = s->bytesperpixel; + + // block1 + // filter edges between rows (e.g. ------) + // block2 + for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) { + uint8_t *ptr = dst, *l = lvl, *vmask = mask[y]; + unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3]; + + for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) { + if (row || y) { + if (vm & x) { + int L = *l, H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + if (vmask[0] & x) { + if (vmask[0] & (x << (1 + ss_h))) { + av_assert2(l[1 + ss_h] == L); + s->dsp.loop_filter_16[1](ptr, ls, E, I, H); + } else { + s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H); + } + } else if (vm & (x << (1 + ss_h))) { + L = l[1 + ss_h]; + H |= (L >> 4) << 8; + E |= s->filter_lut.mblim_lut[L] << 8; + I |= s->filter_lut.lim_lut[L] << 8; + s->dsp.loop_filter_mix2[!!(vmask[1] & x)] + [!!(vmask[1] & (x << (1 + ss_h)))] + [1](ptr, ls, E, I, H); + } else { + s->dsp.loop_filter_8[!!(vmask[1] & x)] + [1](ptr, ls, E, I, H); + } + } else if (vm & (x << (1 + ss_h))) { + int L = l[1 + ss_h], H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))] + [1](ptr + 8 * bytesperpixel, ls, E, I, H); + } + } + if (!ss_v) { + if (vm3 & x) { + int L = *l, H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + if (vm3 & (x << (1 + ss_h))) { + L = l[1 + ss_h]; + H |= (L >> 4) << 8; + E |= s->filter_lut.mblim_lut[L] << 8; + I |= s->filter_lut.lim_lut[L] << 8; + s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H); + } else { + s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H); + } + } else if (vm3 & (x << (1 + ss_h))) { + int L = l[1 + ss_h], H = L >> 4; + int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L]; + + s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H); + } + } + } + if (ss_v) { + if (y & 1) + lvl += 16; + } else { + lvl += 8; + } + } +} + +void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl, + int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff) +{ + VP9Context *s = avctx->priv_data; + AVFrame *f = s->s.frames[CUR_FRAME].tf.f; + uint8_t *dst = f->data[0] + yoff; + ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1]; + uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v]; + int p; + + /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g. + * if you think of them as acting on a 8x8 block max, we can interleave + * each v/h within the single x loop, but that only works if we work on + * 8 pixel blocks, and we won't always do that (we want at least 16px + * to use SSE2 optimizations, perhaps 32 for AVX2) */ + + filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y); + filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y); + + for (p = 0; p < 2; p++) { + dst = f->data[1 + p] + uvoff; + filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv); + filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv); + } +} diff --git a/libs/ffvpx/libavcodec/vp9mvs.c b/libs/ffvpx/libavcodec/vp9mvs.c new file mode 100644 index 000000000..88db1c341 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9mvs.c @@ -0,0 +1,364 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "internal.h" +#include "vp56.h" +#include "vp9.h" +#include "vp9data.h" +#include "vp9dec.h" + +static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src, + VP9TileData *td) +{ + dst->x = av_clip(src->x, td->min_mv.x, td->max_mv.x); + dst->y = av_clip(src->y, td->min_mv.y, td->max_mv.y); +} + +static void find_ref_mvs(VP9TileData *td, + VP56mv *pmv, int ref, int z, int idx, int sb) +{ + static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = { + [BS_64x64] = { { 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 }, + { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 } }, + [BS_64x32] = { { 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 }, + { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 } }, + [BS_32x64] = { { -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 }, + { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 } }, + [BS_32x32] = { { 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 }, + { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, + [BS_32x16] = { { 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 }, + { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, + [BS_16x32] = { { -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 }, + { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 } }, + [BS_16x16] = { { 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 }, + { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, + [BS_16x8] = { { 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 }, + { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 } }, + [BS_8x16] = { { -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 }, + { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 } }, + [BS_8x8] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, + { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, + [BS_8x4] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, + { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, + [BS_4x8] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, + { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, + [BS_4x4] = { { 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 }, + { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } }, + }; + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col, row7 = td->row7; + const int8_t (*p)[2] = mv_ref_blk_off[b->bs]; +#define INVALID_MV 0x80008000U + uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV; + int i; + +#define RETURN_DIRECT_MV(mv) \ + do { \ + uint32_t m = AV_RN32A(&mv); \ + if (!idx) { \ + AV_WN32A(pmv, m); \ + return; \ + } else if (mem == INVALID_MV) { \ + mem = m; \ + } else if (m != mem) { \ + AV_WN32A(pmv, m); \ + return; \ + } \ + } while (0) + + if (sb >= 0) { + if (sb == 2 || sb == 1) { + RETURN_DIRECT_MV(b->mv[0][z]); + } else if (sb == 3) { + RETURN_DIRECT_MV(b->mv[2][z]); + RETURN_DIRECT_MV(b->mv[1][z]); + RETURN_DIRECT_MV(b->mv[0][z]); + } + +#define RETURN_MV(mv) \ + do { \ + if (sb > 0) { \ + VP56mv tmp; \ + uint32_t m; \ + av_assert2(idx == 1); \ + av_assert2(mem != INVALID_MV); \ + if (mem_sub8x8 == INVALID_MV) { \ + clamp_mv(&tmp, &mv, td); \ + m = AV_RN32A(&tmp); \ + if (m != mem) { \ + AV_WN32A(pmv, m); \ + return; \ + } \ + mem_sub8x8 = AV_RN32A(&mv); \ + } else if (mem_sub8x8 != AV_RN32A(&mv)) { \ + clamp_mv(&tmp, &mv, td); \ + m = AV_RN32A(&tmp); \ + if (m != mem) { \ + AV_WN32A(pmv, m); \ + } else { \ + /* BUG I'm pretty sure this isn't the intention */ \ + AV_WN32A(pmv, 0); \ + } \ + return; \ + } \ + } else { \ + uint32_t m = AV_RN32A(&mv); \ + if (!idx) { \ + clamp_mv(pmv, &mv, td); \ + return; \ + } else if (mem == INVALID_MV) { \ + mem = m; \ + } else if (m != mem) { \ + clamp_mv(pmv, &mv, td); \ + return; \ + } \ + } \ + } while (0) + + if (row > 0) { + VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col]; + if (mv->ref[0] == ref) + RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]); + else if (mv->ref[1] == ref) + RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]); + } + if (col > td->tile_col_start) { + VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1]; + if (mv->ref[0] == ref) + RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][0]); + else if (mv->ref[1] == ref) + RETURN_MV(td->left_mv_ctx[2 * row7 + (sb >> 1)][1]); + } + i = 2; + } else { + i = 0; + } + + // previously coded MVs in this neighborhood, using same reference frame + for (; i < 8; i++) { + int c = p[i][0] + col, r = p[i][1] + row; + + if (c >= td->tile_col_start && c < s->cols && + r >= 0 && r < s->rows) { + VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c]; + + if (mv->ref[0] == ref) + RETURN_MV(mv->mv[0]); + else if (mv->ref[1] == ref) + RETURN_MV(mv->mv[1]); + } + } + + // MV at this position in previous frame, using same reference frame + if (s->s.h.use_last_frame_mvs) { + VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col]; + + if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass) + ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0); + if (mv->ref[0] == ref) + RETURN_MV(mv->mv[0]); + else if (mv->ref[1] == ref) + RETURN_MV(mv->mv[1]); + } + +#define RETURN_SCALE_MV(mv, scale) \ + do { \ + if (scale) { \ + VP56mv mv_temp = { -mv.x, -mv.y }; \ + RETURN_MV(mv_temp); \ + } else { \ + RETURN_MV(mv); \ + } \ + } while (0) + + // previously coded MVs in this neighborhood, using different reference frame + for (i = 0; i < 8; i++) { + int c = p[i][0] + col, r = p[i][1] + row; + + if (c >= td->tile_col_start && c < s->cols && r >= 0 && r < s->rows) { + VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c]; + + if (mv->ref[0] != ref && mv->ref[0] >= 0) + RETURN_SCALE_MV(mv->mv[0], + s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]); + if (mv->ref[1] != ref && mv->ref[1] >= 0 && + // BUG - libvpx has this condition regardless of whether + // we used the first ref MV and pre-scaling + AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) { + RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]); + } + } + } + + // MV at this position in previous frame, using different reference frame + if (s->s.h.use_last_frame_mvs) { + VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col]; + + // no need to await_progress, because we already did that above + if (mv->ref[0] != ref && mv->ref[0] >= 0) + RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]); + if (mv->ref[1] != ref && mv->ref[1] >= 0 && + // BUG - libvpx has this condition regardless of whether + // we used the first ref MV and pre-scaling + AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) { + RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]); + } + } + + AV_ZERO32(pmv); + clamp_mv(pmv, pmv, td); +#undef INVALID_MV +#undef RETURN_MV +#undef RETURN_SCALE_MV +} + +static av_always_inline int read_mv_component(VP9TileData *td, int idx, int hp) +{ + VP9Context *s = td->s; + int bit, sign = vp56_rac_get_prob(td->c, s->prob.p.mv_comp[idx].sign); + int n, c = vp8_rac_get_tree(td->c, ff_vp9_mv_class_tree, + s->prob.p.mv_comp[idx].classes); + + td->counts.mv_comp[idx].sign[sign]++; + td->counts.mv_comp[idx].classes[c]++; + if (c) { + int m; + + for (n = 0, m = 0; m < c; m++) { + bit = vp56_rac_get_prob(td->c, s->prob.p.mv_comp[idx].bits[m]); + n |= bit << m; + td->counts.mv_comp[idx].bits[m][bit]++; + } + n <<= 3; + bit = vp8_rac_get_tree(td->c, ff_vp9_mv_fp_tree, + s->prob.p.mv_comp[idx].fp); + n |= bit << 1; + td->counts.mv_comp[idx].fp[bit]++; + if (hp) { + bit = vp56_rac_get_prob(td->c, s->prob.p.mv_comp[idx].hp); + td->counts.mv_comp[idx].hp[bit]++; + n |= bit; + } else { + n |= 1; + // bug in libvpx - we count for bw entropy purposes even if the + // bit wasn't coded + td->counts.mv_comp[idx].hp[1]++; + } + n += 8 << c; + } else { + n = vp56_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0); + td->counts.mv_comp[idx].class0[n]++; + bit = vp8_rac_get_tree(td->c, ff_vp9_mv_fp_tree, + s->prob.p.mv_comp[idx].class0_fp[n]); + td->counts.mv_comp[idx].class0_fp[n][bit]++; + n = (n << 3) | (bit << 1); + if (hp) { + bit = vp56_rac_get_prob(td->c, s->prob.p.mv_comp[idx].class0_hp); + td->counts.mv_comp[idx].class0_hp[bit]++; + n |= bit; + } else { + n |= 1; + // bug in libvpx - we count for bw entropy purposes even if the + // bit wasn't coded + td->counts.mv_comp[idx].class0_hp[1]++; + } + } + + return sign ? -(n + 1) : (n + 1); +} + +void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb) +{ + VP9Context *s = td->s; + VP9Block *b = td->b; + + if (mode == ZEROMV) { + AV_ZERO64(mv); + } else { + int hp; + + // FIXME cache this value and reuse for other subblocks + find_ref_mvs(td, &mv[0], b->ref[0], 0, mode == NEARMV, + mode == NEWMV ? -1 : sb); + // FIXME maybe move this code into find_ref_mvs() + if ((mode == NEWMV || sb == -1) && + !(hp = s->s.h.highprecisionmvs && + abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) { + if (mv[0].y & 1) { + if (mv[0].y < 0) + mv[0].y++; + else + mv[0].y--; + } + if (mv[0].x & 1) { + if (mv[0].x < 0) + mv[0].x++; + else + mv[0].x--; + } + } + if (mode == NEWMV) { + enum MVJoint j = vp8_rac_get_tree(td->c, ff_vp9_mv_joint_tree, + s->prob.p.mv_joint); + + td->counts.mv_joint[j]++; + if (j >= MV_JOINT_V) + mv[0].y += read_mv_component(td, 0, hp); + if (j & 1) + mv[0].x += read_mv_component(td, 1, hp); + } + + if (b->comp) { + // FIXME cache this value and reuse for other subblocks + find_ref_mvs(td, &mv[1], b->ref[1], 1, mode == NEARMV, + mode == NEWMV ? -1 : sb); + if ((mode == NEWMV || sb == -1) && + !(hp = s->s.h.highprecisionmvs && + abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) { + if (mv[1].y & 1) { + if (mv[1].y < 0) + mv[1].y++; + else + mv[1].y--; + } + if (mv[1].x & 1) { + if (mv[1].x < 0) + mv[1].x++; + else + mv[1].x--; + } + } + if (mode == NEWMV) { + enum MVJoint j = vp8_rac_get_tree(td->c, ff_vp9_mv_joint_tree, + s->prob.p.mv_joint); + + td->counts.mv_joint[j]++; + if (j >= MV_JOINT_V) + mv[1].y += read_mv_component(td, 0, hp); + if (j & 1) + mv[1].x += read_mv_component(td, 1, hp); + } + } + } +} diff --git a/libs/ffvpx/libavcodec/vp9prob.c b/libs/ffvpx/libavcodec/vp9prob.c new file mode 100644 index 000000000..fb295b482 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9prob.c @@ -0,0 +1,274 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vp56.h" +#include "vp9.h" +#include "vp9data.h" +#include "vp9dec.h" + +static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1, + int max_count, int update_factor) +{ + unsigned ct = ct0 + ct1, p2, p1; + + if (!ct) + return; + + update_factor = FASTDIV(update_factor * FFMIN(ct, max_count), max_count); + p1 = *p; + p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct; + p2 = av_clip(p2, 1, 255); + + // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8 + *p = p1 + (((p2 - p1) * update_factor + 128) >> 8); +} + +void ff_vp9_adapt_probs(VP9Context *s) +{ + int i, j, k, l, m; + ProbContext *p = &s->prob_ctx[s->s.h.framectxid].p; + int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128; + + // coefficients + for (i = 0; i < 4; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + for (l = 0; l < 6; l++) + for (m = 0; m < 6; m++) { + uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m]; + unsigned *e = s->td[0].counts.eob[i][j][k][l][m]; + unsigned *c = s->td[0].counts.coef[i][j][k][l][m]; + + if (l == 0 && m >= 3) // dc only has 3 pt + break; + + adapt_prob(&pp[0], e[0], e[1], 24, uf); + adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf); + adapt_prob(&pp[2], c[1], c[2], 24, uf); + } + + if (s->s.h.keyframe || s->s.h.intraonly) { + memcpy(p->skip, s->prob.p.skip, sizeof(p->skip)); + memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p)); + memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p)); + memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p)); + return; + } + + // skip flag + for (i = 0; i < 3; i++) + adapt_prob(&p->skip[i], s->td[0].counts.skip[i][0], + s->td[0].counts.skip[i][1], 20, 128); + + // intra/inter flag + for (i = 0; i < 4; i++) + adapt_prob(&p->intra[i], s->td[0].counts.intra[i][0], + s->td[0].counts.intra[i][1], 20, 128); + + // comppred flag + if (s->s.h.comppredmode == PRED_SWITCHABLE) { + for (i = 0; i < 5; i++) + adapt_prob(&p->comp[i], s->td[0].counts.comp[i][0], + s->td[0].counts.comp[i][1], 20, 128); + } + + // reference frames + if (s->s.h.comppredmode != PRED_SINGLEREF) { + for (i = 0; i < 5; i++) + adapt_prob(&p->comp_ref[i], s->td[0].counts.comp_ref[i][0], + s->td[0].counts.comp_ref[i][1], 20, 128); + } + + if (s->s.h.comppredmode != PRED_COMPREF) { + for (i = 0; i < 5; i++) { + uint8_t *pp = p->single_ref[i]; + unsigned (*c)[2] = s->td[0].counts.single_ref[i]; + + adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128); + adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128); + } + } + + // block partitioning + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) { + uint8_t *pp = p->partition[i][j]; + unsigned *c = s->td[0].counts.partition[i][j]; + + adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); + adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); + adapt_prob(&pp[2], c[2], c[3], 20, 128); + } + + // tx size + if (s->s.h.txfmmode == TX_SWITCHABLE) { + for (i = 0; i < 2; i++) { + unsigned *c16 = s->td[0].counts.tx16p[i], *c32 = s->td[0].counts.tx32p[i]; + + adapt_prob(&p->tx8p[i], s->td[0].counts.tx8p[i][0], + s->td[0].counts.tx8p[i][1], 20, 128); + adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128); + adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128); + adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128); + adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128); + adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128); + } + } + + // interpolation filter + if (s->s.h.filtermode == FILTER_SWITCHABLE) { + for (i = 0; i < 4; i++) { + uint8_t *pp = p->filter[i]; + unsigned *c = s->td[0].counts.filter[i]; + + adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128); + adapt_prob(&pp[1], c[1], c[2], 20, 128); + } + } + + // inter modes + for (i = 0; i < 7; i++) { + uint8_t *pp = p->mv_mode[i]; + unsigned *c = s->td[0].counts.mv_mode[i]; + + adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128); + adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128); + adapt_prob(&pp[2], c[1], c[3], 20, 128); + } + + // mv joints + { + uint8_t *pp = p->mv_joint; + unsigned *c = s->td[0].counts.mv_joint; + + adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); + adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); + adapt_prob(&pp[2], c[2], c[3], 20, 128); + } + + // mv components + for (i = 0; i < 2; i++) { + uint8_t *pp; + unsigned *c, (*c2)[2], sum; + + adapt_prob(&p->mv_comp[i].sign, s->td[0].counts.mv_comp[i].sign[0], + s->td[0].counts.mv_comp[i].sign[1], 20, 128); + + pp = p->mv_comp[i].classes; + c = s->td[0].counts.mv_comp[i].classes; + sum = c[1] + c[2] + c[3] + c[4] + c[5] + + c[6] + c[7] + c[8] + c[9] + c[10]; + adapt_prob(&pp[0], c[0], sum, 20, 128); + sum -= c[1]; + adapt_prob(&pp[1], c[1], sum, 20, 128); + sum -= c[2] + c[3]; + adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128); + adapt_prob(&pp[3], c[2], c[3], 20, 128); + sum -= c[4] + c[5]; + adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128); + adapt_prob(&pp[5], c[4], c[5], 20, 128); + sum -= c[6]; + adapt_prob(&pp[6], c[6], sum, 20, 128); + adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128); + adapt_prob(&pp[8], c[7], c[8], 20, 128); + adapt_prob(&pp[9], c[9], c[10], 20, 128); + + adapt_prob(&p->mv_comp[i].class0, s->td[0].counts.mv_comp[i].class0[0], + s->td[0].counts.mv_comp[i].class0[1], 20, 128); + pp = p->mv_comp[i].bits; + c2 = s->td[0].counts.mv_comp[i].bits; + for (j = 0; j < 10; j++) + adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128); + + for (j = 0; j < 2; j++) { + pp = p->mv_comp[i].class0_fp[j]; + c = s->td[0].counts.mv_comp[i].class0_fp[j]; + adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); + adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); + adapt_prob(&pp[2], c[2], c[3], 20, 128); + } + pp = p->mv_comp[i].fp; + c = s->td[0].counts.mv_comp[i].fp; + adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); + adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); + adapt_prob(&pp[2], c[2], c[3], 20, 128); + + if (s->s.h.highprecisionmvs) { + adapt_prob(&p->mv_comp[i].class0_hp, + s->td[0].counts.mv_comp[i].class0_hp[0], + s->td[0].counts.mv_comp[i].class0_hp[1], 20, 128); + adapt_prob(&p->mv_comp[i].hp, s->td[0].counts.mv_comp[i].hp[0], + s->td[0].counts.mv_comp[i].hp[1], 20, 128); + } + } + + // y intra modes + for (i = 0; i < 4; i++) { + uint8_t *pp = p->y_mode[i]; + unsigned *c = s->td[0].counts.y_mode[i], sum, s2; + + sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9]; + adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128); + sum -= c[TM_VP8_PRED]; + adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128); + sum -= c[VERT_PRED]; + adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128); + s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED]; + sum -= s2; + adapt_prob(&pp[3], s2, sum, 20, 128); + s2 -= c[HOR_PRED]; + adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128); + adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], + 20, 128); + sum -= c[DIAG_DOWN_LEFT_PRED]; + adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128); + sum -= c[VERT_LEFT_PRED]; + adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128); + adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128); + } + + // uv intra modes + for (i = 0; i < 10; i++) { + uint8_t *pp = p->uv_mode[i]; + unsigned *c = s->td[0].counts.uv_mode[i], sum, s2; + + sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9]; + adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128); + sum -= c[TM_VP8_PRED]; + adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128); + sum -= c[VERT_PRED]; + adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128); + s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED]; + sum -= s2; + adapt_prob(&pp[3], s2, sum, 20, 128); + s2 -= c[HOR_PRED]; + adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128); + adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], + 20, 128); + sum -= c[DIAG_DOWN_LEFT_PRED]; + adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128); + sum -= c[VERT_LEFT_PRED]; + adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128); + adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128); + } +} diff --git a/libs/ffvpx/libavcodec/vp9recon.c b/libs/ffvpx/libavcodec/vp9recon.c new file mode 100644 index 000000000..49bb04e1f --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9recon.c @@ -0,0 +1,644 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" + +#include "avcodec.h" +#include "internal.h" +#include "videodsp.h" +#include "vp9data.h" +#include "vp9dec.h" + +static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, + uint8_t *dst_edge, ptrdiff_t stride_edge, + uint8_t *dst_inner, ptrdiff_t stride_inner, + uint8_t *l, int col, int x, int w, + int row, int y, enum TxfmMode tx, + int p, int ss_h, int ss_v, int bytesperpixel) +{ + VP9Context *s = td->s; + int have_top = row > 0 || y > 0; + int have_left = col > td->tile_col_start || x > 0; + int have_right = x < w - 1; + int bpp = s->s.h.bpp; + static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { + [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, + { DC_127_PRED, VERT_PRED } }, + [HOR_PRED] = { { DC_129_PRED, DC_129_PRED }, + { HOR_PRED, HOR_PRED } }, + [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED }, + { LEFT_DC_PRED, DC_PRED } }, + [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED }, + { DC_127_PRED, DIAG_DOWN_LEFT_PRED } }, + [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED }, + { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } }, + [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED }, + { VERT_RIGHT_PRED, VERT_RIGHT_PRED } }, + [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED }, + { HOR_DOWN_PRED, HOR_DOWN_PRED } }, + [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED }, + { DC_127_PRED, VERT_LEFT_PRED } }, + [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED }, + { HOR_UP_PRED, HOR_UP_PRED } }, + [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED }, + { HOR_PRED, TM_VP8_PRED } }, + }; + static const struct { + uint8_t needs_left:1; + uint8_t needs_top:1; + uint8_t needs_topleft:1; + uint8_t needs_topright:1; + uint8_t invert_left:1; + } edges[N_INTRA_PRED_MODES] = { + [VERT_PRED] = { .needs_top = 1 }, + [HOR_PRED] = { .needs_left = 1 }, + [DC_PRED] = { .needs_top = 1, .needs_left = 1 }, + [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, + [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, + .needs_topleft = 1 }, + [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, + .needs_topleft = 1 }, + [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, + .needs_topleft = 1 }, + [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, + [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 }, + [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, + .needs_topleft = 1 }, + [LEFT_DC_PRED] = { .needs_left = 1 }, + [TOP_DC_PRED] = { .needs_top = 1 }, + [DC_128_PRED] = { 0 }, + [DC_127_PRED] = { 0 }, + [DC_129_PRED] = { 0 } + }; + + av_assert2(mode >= 0 && mode < 10); + mode = mode_conv[mode][have_left][have_top]; + if (edges[mode].needs_top) { + uint8_t *top, *topleft; + int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4; + int n_px_need_tr = 0; + + if (tx == TX_4X4 && edges[mode].needs_topright && have_right) + n_px_need_tr = 4; + + // if top of sb64-row, use s->intra_pred_data[] instead of + // dst[-stride] for intra prediction (it contains pre- instead of + // post-loopfilter data) + if (have_top) { + top = !(row & 7) && !y ? + s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : + y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner]; + if (have_left) + topleft = !(row & 7) && !y ? + s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : + y == 0 || x == 0 ? &dst_edge[-stride_edge] : + &dst_inner[-stride_inner]; + } + + if (have_top && + (!edges[mode].needs_topleft || (have_left && top == topleft)) && + (tx != TX_4X4 || !edges[mode].needs_topright || have_right) && + n_px_need + n_px_need_tr <= n_px_have) { + *a = top; + } else { + if (have_top) { + if (n_px_need <= n_px_have) { + memcpy(*a, top, n_px_need * bytesperpixel); + } else { +#define memset_bpp(c, i1, v, i2, num) do { \ + if (bytesperpixel == 1) { \ + memset(&(c)[(i1)], (v)[(i2)], (num)); \ + } else { \ + int n, val = AV_RN16A(&(v)[(i2) * 2]); \ + for (n = 0; n < (num); n++) { \ + AV_WN16A(&(c)[((i1) + n) * 2], val); \ + } \ + } \ +} while (0) + memcpy(*a, top, n_px_have * bytesperpixel); + memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have); + } + } else { +#define memset_val(c, val, num) do { \ + if (bytesperpixel == 1) { \ + memset((c), (val), (num)); \ + } else { \ + int n; \ + for (n = 0; n < (num); n++) { \ + AV_WN16A(&(c)[n * 2], (val)); \ + } \ + } \ +} while (0) + memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need); + } + if (edges[mode].needs_topleft) { + if (have_left && have_top) { +#define assign_bpp(c, i1, v, i2) do { \ + if (bytesperpixel == 1) { \ + (c)[(i1)] = (v)[(i2)]; \ + } else { \ + AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \ + } \ +} while (0) + assign_bpp(*a, -1, topleft, -1); + } else { +#define assign_val(c, i, v) do { \ + if (bytesperpixel == 1) { \ + (c)[(i)] = (v); \ + } else { \ + AV_WN16A(&(c)[(i) * 2], (v)); \ + } \ +} while (0) + assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1)); + } + } + if (tx == TX_4X4 && edges[mode].needs_topright) { + if (have_top && have_right && + n_px_need + n_px_need_tr <= n_px_have) { + memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel); + } else { + memset_bpp(*a, 4, *a, 3, 4); + } + } + } + } + if (edges[mode].needs_left) { + if (have_left) { + int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4; + uint8_t *dst = x == 0 ? dst_edge : dst_inner; + ptrdiff_t stride = x == 0 ? stride_edge : stride_inner; + + if (edges[mode].invert_left) { + if (n_px_need <= n_px_have) { + for (i = 0; i < n_px_need; i++) + assign_bpp(l, i, &dst[i * stride], -1); + } else { + for (i = 0; i < n_px_have; i++) + assign_bpp(l, i, &dst[i * stride], -1); + memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have); + } + } else { + if (n_px_need <= n_px_have) { + for (i = 0; i < n_px_need; i++) + assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); + } else { + for (i = 0; i < n_px_have; i++) + assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); + memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have); + } + } + } else { + memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx); + } + } + + return mode; +} + +static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, + ptrdiff_t uv_off, int bytesperpixel) +{ + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col; + int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; + int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); + int end_x = FFMIN(2 * (s->cols - col), w4); + int end_y = FFMIN(2 * (s->rows - row), h4); + int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; + int uvstep1d = 1 << b->uvtx, p; + uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off; + LOCAL_ALIGNED_32(uint8_t, a_buf, [96]); + LOCAL_ALIGNED_32(uint8_t, l, [64]); + + for (n = 0, y = 0; y < end_y; y += step1d) { + uint8_t *ptr = dst, *ptr_r = dst_r; + for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel, + ptr_r += 4 * step1d * bytesperpixel, n += step) { + int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ? + y * 2 + x : 0]; + uint8_t *a = &a_buf[32]; + enum TxfmType txtp = ff_vp9_intra_txfm_type[mode]; + int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n]; + + mode = check_intra_mode(td, mode, &a, ptr_r, + s->s.frames[CUR_FRAME].tf.f->linesize[0], + ptr, td->y_stride, l, + col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel); + s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a); + if (eob) + s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride, + td->block + 16 * n * bytesperpixel, eob); + } + dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0]; + dst += 4 * step1d * td->y_stride; + } + + // U/V + w4 >>= s->ss_h; + end_x >>= s->ss_h; + end_y >>= s->ss_v; + step = 1 << (b->uvtx * 2); + for (p = 0; p < 2; p++) { + dst = td->dst[1 + p]; + dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off; + for (n = 0, y = 0; y < end_y; y += uvstep1d) { + uint8_t *ptr = dst, *ptr_r = dst_r; + for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel, + ptr_r += 4 * uvstep1d * bytesperpixel, n += step) { + int mode = b->uvmode; + uint8_t *a = &a_buf[32]; + int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n]; + + mode = check_intra_mode(td, mode, &a, ptr_r, + s->s.frames[CUR_FRAME].tf.f->linesize[1], + ptr, td->uv_stride, l, col, x, w4, row, y, + b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel); + s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a); + if (eob) + s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride, + td->uvblock[p] + 16 * n * bytesperpixel, eob); + } + dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1]; + dst += 4 * uvstep1d * td->uv_stride; + } + } +} + +void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off) +{ + intra_recon(td, y_off, uv_off, 1); +} + +void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off) +{ + intra_recon(td, y_off, uv_off, 2); +} + +static av_always_inline void mc_luma_unscaled(VP9TileData *td, vp9_mc_func (*mc)[2], + uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, + ThreadFrame *ref_frame, + ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, + int bw, int bh, int w, int h, int bytesperpixel) +{ + VP9Context *s = td->s; + int mx = mv->x, my = mv->y, th; + + y += my >> 3; + x += mx >> 3; + ref += y * ref_stride + x * bytesperpixel; + mx &= 7; + my &= 7; + // FIXME bilinear filter only needs 0/1 pixels, not 3/4 + // we use +7 because the last 7 pixels of each sbrow can be changed in + // the longest loopfilter of the next sbrow + th = (y + bh + 4 * !!my + 7) >> 6; + ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (!!my * 5) than horizontally (!!mx * 4). + if (x < !!mx * 3 || y < !!my * 3 || + x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel, + 160, ref_stride, + bw + !!mx * 7, bh + !!my * 7, + x - !!mx * 3, y - !!my * 3, w, h); + ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; + ref_stride = 160; + } + mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); +} + +static av_always_inline void mc_chroma_unscaled(VP9TileData *td, vp9_mc_func (*mc)[2], + uint8_t *dst_u, uint8_t *dst_v, + ptrdiff_t dst_stride, + const uint8_t *ref_u, ptrdiff_t src_stride_u, + const uint8_t *ref_v, ptrdiff_t src_stride_v, + ThreadFrame *ref_frame, + ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, + int bw, int bh, int w, int h, int bytesperpixel) +{ + VP9Context *s = td->s; + int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th; + + y += my >> 4; + x += mx >> 4; + ref_u += y * src_stride_u + x * bytesperpixel; + ref_v += y * src_stride_v + x * bytesperpixel; + mx &= 15; + my &= 15; + // FIXME bilinear filter only needs 0/1 pixels, not 3/4 + // we use +7 because the last 7 pixels of each sbrow can be changed in + // the longest loopfilter of the next sbrow + th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v); + ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (!!my * 5) than horizontally (!!mx * 4). + if (x < !!mx * 3 || y < !!my * 3 || + x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel, + 160, src_stride_u, + bw + !!mx * 7, bh + !!my * 7, + x - !!mx * 3, y - !!my * 3, w, h); + ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; + mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my); + + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel, + 160, src_stride_v, + bw + !!mx * 7, bh + !!my * 7, + x - !!mx * 3, y - !!my * 3, w, h); + ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; + mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my); + } else { + mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); + mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); + } +} + +#define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ + px, py, pw, ph, bw, bh, w, h, i) \ + mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ + mv, bw, bh, w, h, bytesperpixel) +#define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ + row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ + mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ + row, col, mv, bw, bh, w, h, bytesperpixel) +#define SCALED 0 +#define FN(x) x##_8bpp +#define BYTES_PER_PIXEL 1 +#include "vp9_mc_template.c" +#undef FN +#undef BYTES_PER_PIXEL +#define FN(x) x##_16bpp +#define BYTES_PER_PIXEL 2 +#include "vp9_mc_template.c" +#undef mc_luma_dir +#undef mc_chroma_dir +#undef FN +#undef BYTES_PER_PIXEL +#undef SCALED + +static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, + vp9_mc_func (*mc)[2], + uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, + ThreadFrame *ref_frame, + ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, + int px, int py, int pw, int ph, + int bw, int bh, int w, int h, int bytesperpixel, + const uint16_t *scale, const uint8_t *step) +{ + VP9Context *s = td->s; + if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && + s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { + mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame, + y, x, in_mv, bw, bh, w, h, bytesperpixel); + } else { +#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14) + int mx, my; + int refbw_m1, refbh_m1; + int th; + VP56mv mv; + + mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); + mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); + // BUG libvpx seems to scale the two components separately. This introduces + // rounding errors but we have to reproduce them to be exactly compatible + // with the output from libvpx... + mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); + my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); + + y = my >> 4; + x = mx >> 4; + ref += y * ref_stride + x * bytesperpixel; + mx &= 15; + my &= 15; + refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; + refbh_m1 = ((bh - 1) * step[1] + my) >> 4; + // FIXME bilinear filter only needs 0/1 pixels, not 3/4 + // we use +7 because the last 7 pixels of each sbrow can be changed in + // the longest loopfilter of the next sbrow + th = (y + refbh_m1 + 4 + 7) >> 6; + ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). + if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref - 3 * ref_stride - 3 * bytesperpixel, + 288, ref_stride, + refbw_m1 + 8, refbh_m1 + 8, + x - 3, y - 3, w, h); + ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; + ref_stride = 288; + } + smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); + } +} + +static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, + vp9_mc_func (*mc)[2], + uint8_t *dst_u, uint8_t *dst_v, + ptrdiff_t dst_stride, + const uint8_t *ref_u, ptrdiff_t src_stride_u, + const uint8_t *ref_v, ptrdiff_t src_stride_v, + ThreadFrame *ref_frame, + ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, + int px, int py, int pw, int ph, + int bw, int bh, int w, int h, int bytesperpixel, + const uint16_t *scale, const uint8_t *step) +{ + VP9Context *s = td->s; + if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && + s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { + mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u, + ref_v, src_stride_v, ref_frame, + y, x, in_mv, bw, bh, w, h, bytesperpixel); + } else { + int mx, my; + int refbw_m1, refbh_m1; + int th; + VP56mv mv; + + if (s->ss_h) { + // BUG https://code.google.com/p/webm/issues/detail?id=820 + mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16); + mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15); + } else { + mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); + mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); + } + if (s->ss_v) { + // BUG https://code.google.com/p/webm/issues/detail?id=820 + mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16); + my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15); + } else { + mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); + my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); + } +#undef scale_mv + y = my >> 4; + x = mx >> 4; + ref_u += y * src_stride_u + x * bytesperpixel; + ref_v += y * src_stride_v + x * bytesperpixel; + mx &= 15; + my &= 15; + refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; + refbh_m1 = ((bh - 1) * step[1] + my) >> 4; + // FIXME bilinear filter only needs 0/1 pixels, not 3/4 + // we use +7 because the last 7 pixels of each sbrow can be changed in + // the longest loopfilter of the next sbrow + th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v); + ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). + if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref_u - 3 * src_stride_u - 3 * bytesperpixel, + 288, src_stride_u, + refbw_m1 + 8, refbh_m1 + 8, + x - 3, y - 3, w, h); + ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; + smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]); + + s->vdsp.emulated_edge_mc(td->edge_emu_buffer, + ref_v - 3 * src_stride_v - 3 * bytesperpixel, + 288, src_stride_v, + refbw_m1 + 8, refbh_m1 + 8, + x - 3, y - 3, w, h); + ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; + smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]); + } else { + smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); + smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); + } + } +} + +#define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ + px, py, pw, ph, bw, bh, w, h, i) \ + mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ + mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ + s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) +#define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ + row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ + mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ + row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ + s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) +#define SCALED 1 +#define FN(x) x##_scaled_8bpp +#define BYTES_PER_PIXEL 1 +#include "vp9_mc_template.c" +#undef FN +#undef BYTES_PER_PIXEL +#define FN(x) x##_scaled_16bpp +#define BYTES_PER_PIXEL 2 +#include "vp9_mc_template.c" +#undef mc_luma_dir +#undef mc_chroma_dir +#undef FN +#undef BYTES_PER_PIXEL +#undef SCALED + +static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel) +{ + VP9Context *s = td->s; + VP9Block *b = td->b; + int row = td->row, col = td->col; + + if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { + if (bytesperpixel == 1) { + inter_pred_scaled_8bpp(td); + } else { + inter_pred_scaled_16bpp(td); + } + } else { + if (bytesperpixel == 1) { + inter_pred_8bpp(td); + } else { + inter_pred_16bpp(td); + } + } + + if (!b->skip) { + /* mostly copied intra_recon() */ + + int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; + int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); + int end_x = FFMIN(2 * (s->cols - col), w4); + int end_y = FFMIN(2 * (s->rows - row), h4); + int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; + int uvstep1d = 1 << b->uvtx, p; + uint8_t *dst = td->dst[0]; + + // y itxfm add + for (n = 0, y = 0; y < end_y; y += step1d) { + uint8_t *ptr = dst; + for (x = 0; x < end_x; x += step1d, + ptr += 4 * step1d * bytesperpixel, n += step) { + int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n]; + + if (eob) + s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride, + td->block + 16 * n * bytesperpixel, eob); + } + dst += 4 * td->y_stride * step1d; + } + + // uv itxfm add + end_x >>= s->ss_h; + end_y >>= s->ss_v; + step = 1 << (b->uvtx * 2); + for (p = 0; p < 2; p++) { + dst = td->dst[p + 1]; + for (n = 0, y = 0; y < end_y; y += uvstep1d) { + uint8_t *ptr = dst; + for (x = 0; x < end_x; x += uvstep1d, + ptr += 4 * uvstep1d * bytesperpixel, n += step) { + int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n]; + + if (eob) + s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride, + td->uvblock[p] + 16 * n * bytesperpixel, eob); + } + dst += 4 * uvstep1d * td->uv_stride; + } + } + } +} + +void ff_vp9_inter_recon_8bpp(VP9TileData *td) +{ + inter_recon(td, 1); +} + +void ff_vp9_inter_recon_16bpp(VP9TileData *td) +{ + inter_recon(td, 2); +} diff --git a/libs/ffvpx/libavcodec/vp9shared.h b/libs/ffvpx/libavcodec/vp9shared.h new file mode 100644 index 000000000..54726df74 --- /dev/null +++ b/libs/ffvpx/libavcodec/vp9shared.h @@ -0,0 +1,169 @@ +/* + * VP9 compatible video decoder + * + * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> + * Copyright (C) 2013 Clément BÅ“sch <u pkh me> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP9SHARED_H +#define AVCODEC_VP9SHARED_H + +#include <stddef.h> +#include <stdint.h> + +#include "vp9.h" +#include "thread.h" +#include "vp56.h" + +enum BlockPartition { + PARTITION_NONE, // [ ] <-. + PARTITION_H, // [-] | + PARTITION_V, // [|] | + PARTITION_SPLIT, // [+] --' +}; + +enum InterPredMode { + NEARESTMV = 10, + NEARMV = 11, + ZEROMV = 12, + NEWMV = 13, +}; + +enum CompPredMode { + PRED_SINGLEREF, + PRED_COMPREF, + PRED_SWITCHABLE, +}; + +typedef struct VP9mvrefPair { + VP56mv mv[2]; + int8_t ref[2]; +} VP9mvrefPair; + +typedef struct VP9Frame { + ThreadFrame tf; + AVBufferRef *extradata; + uint8_t *segmentation_map; + VP9mvrefPair *mv; + int uses_2pass; + + AVBufferRef *hwaccel_priv_buf; + void *hwaccel_picture_private; +} VP9Frame; + +enum BlockLevel { + BL_64X64, + BL_32X32, + BL_16X16, + BL_8X8, +}; + +enum BlockSize { + BS_64x64, + BS_64x32, + BS_32x64, + BS_32x32, + BS_32x16, + BS_16x32, + BS_16x16, + BS_16x8, + BS_8x16, + BS_8x8, + BS_8x4, + BS_4x8, + BS_4x4, + N_BS_SIZES, +}; + +typedef struct VP9BitstreamHeader { + // bitstream header + uint8_t profile; + uint8_t bpp; + uint8_t keyframe; + uint8_t invisible; + uint8_t errorres; + uint8_t intraonly; + uint8_t resetctx; + uint8_t refreshrefmask; + uint8_t highprecisionmvs; + enum FilterMode filtermode; + uint8_t allowcompinter; + uint8_t refreshctx; + uint8_t parallelmode; + uint8_t framectxid; + uint8_t use_last_frame_mvs; + uint8_t refidx[3]; + uint8_t signbias[3]; + uint8_t fixcompref; + uint8_t varcompref[2]; + struct { + uint8_t level; + int8_t sharpness; + } filter; + struct { + uint8_t enabled; + uint8_t updated; + int8_t mode[2]; + int8_t ref[4]; + } lf_delta; + uint8_t yac_qi; + int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta; + uint8_t lossless; +#define MAX_SEGMENT 8 + struct { + uint8_t enabled; + uint8_t temporal; + uint8_t absolute_vals; + uint8_t update_map; + uint8_t prob[7]; + uint8_t pred_prob[3]; + struct { + uint8_t q_enabled; + uint8_t lf_enabled; + uint8_t ref_enabled; + uint8_t skip_enabled; + uint8_t ref_val; + int16_t q_val; + int8_t lf_val; + int16_t qmul[2][2]; + uint8_t lflvl[4][2]; + } feat[MAX_SEGMENT]; + } segmentation; + enum TxfmMode txfmmode; + enum CompPredMode comppredmode; + struct { + unsigned log2_tile_cols, log2_tile_rows; + unsigned tile_cols, tile_rows; + } tiling; + + int uncompressed_header_size; + int compressed_header_size; +} VP9BitstreamHeader; + +typedef struct VP9SharedContext { + VP9BitstreamHeader h; + + ThreadFrame refs[8]; +#define CUR_FRAME 0 +#define REF_FRAME_MVPAIR 1 +#define REF_FRAME_SEGMAP 2 + VP9Frame frames[3]; +} VP9SharedContext; + +#endif /* AVCODEC_VP9SHARED_H */ diff --git a/libs/ffvpx/libavcodec/x86/constants.c b/libs/ffvpx/libavcodec/x86/constants.c new file mode 100644 index 000000000..4bfb78cc3 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/constants.c @@ -0,0 +1,94 @@ +/* + * MMX/SSE/AVX constants used across x86 dsp optimizations. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "libavutil/x86/asm.h" // for xmm_reg +#include "constants.h" + +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL, + 0x0001000100010001ULL, 0x0001000100010001ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL, + 0x0002000200020002ULL, 0x0002000200020002ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL }; +DECLARE_ASM_ALIGNED(32, const ymm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL, + 0x0004000400040004ULL, 0x0004000400040004ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL }; +DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_20) = { 0x0014001400140014ULL, 0x0014001400140014ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL }; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL }; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, + 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL, + 0x0100010001000100ULL, 0x0100010001000100ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL, + 0x0200020002000200ULL, 0x0200020002000200ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL, + 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL}; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL, + 0x0400040004000400ULL, 0x0400040004000400ULL}; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL, + 0x0800080008000800ULL, 0x0800080008000800ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4095) = { 0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL, + 0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL, + 0x1000100010001000ULL, 0x1000100010001000ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL, + 0x2000200020002000ULL, 0x2000200020002000ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL }; + +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL, + 0x0101010101010101ULL, 0x0101010101010101ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x0202020202020202ULL, + 0x0202020202020202ULL, 0x0202020202020202ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL, + 0x0303030303030303ULL, 0x0303030303030303ULL }; +DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL, + 0x8080808080808080ULL, 0x8080808080808080ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL, + 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL }; +DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; + +DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL }; + +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL, + 0x0000000100000001ULL, 0x0000000100000001ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_16) = { 0x0000001000000010ULL, 0x0000001000000010ULL, + 0x0000001000000010ULL, 0x0000001000000010ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL, + 0x0000002000000020ULL, 0x0000002000000020ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_8192) = { 0x0000200000002000ULL, 0x0000200000002000ULL, + 0x0000200000002000ULL, 0x0000200000002000ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, + 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL }; diff --git a/libs/ffvpx/libavcodec/x86/constants.h b/libs/ffvpx/libavcodec/x86/constants.h new file mode 100644 index 000000000..85da38b7b --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/constants.h @@ -0,0 +1,72 @@ +/* + * MMX/SSE constants used across x86 dsp optimizations. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_CONSTANTS_H +#define AVCODEC_X86_CONSTANTS_H + +#include <stdint.h> + +#include "libavutil/x86/asm.h" + +extern const ymm_reg ff_pw_1; +extern const ymm_reg ff_pw_2; +extern const xmm_reg ff_pw_3; +extern const ymm_reg ff_pw_4; +extern const xmm_reg ff_pw_5; +extern const xmm_reg ff_pw_8; +extern const xmm_reg ff_pw_9; +extern const uint64_t ff_pw_15; +extern const xmm_reg ff_pw_16; +extern const xmm_reg ff_pw_18; +extern const xmm_reg ff_pw_20; +extern const xmm_reg ff_pw_32; +extern const uint64_t ff_pw_42; +extern const uint64_t ff_pw_53; +extern const xmm_reg ff_pw_64; +extern const uint64_t ff_pw_96; +extern const uint64_t ff_pw_128; +extern const ymm_reg ff_pw_255; +extern const ymm_reg ff_pw_256; +extern const ymm_reg ff_pw_512; +extern const ymm_reg ff_pw_1023; +extern const ymm_reg ff_pw_1024; +extern const ymm_reg ff_pw_2048; +extern const ymm_reg ff_pw_4095; +extern const ymm_reg ff_pw_4096; +extern const ymm_reg ff_pw_8192; +extern const ymm_reg ff_pw_m1; + +extern const ymm_reg ff_pb_0; +extern const ymm_reg ff_pb_1; +extern const ymm_reg ff_pb_2; +extern const ymm_reg ff_pb_3; +extern const ymm_reg ff_pb_80; +extern const ymm_reg ff_pb_FE; +extern const uint64_t ff_pb_FC; + +extern const xmm_reg ff_ps_neg; + +extern const ymm_reg ff_pd_1; +extern const ymm_reg ff_pd_16; +extern const ymm_reg ff_pd_32; +extern const ymm_reg ff_pd_8192; +extern const ymm_reg ff_pd_65535; + +#endif /* AVCODEC_X86_CONSTANTS_H */ diff --git a/libs/ffvpx/libavcodec/x86/flacdsp.asm b/libs/ffvpx/libavcodec/x86/flacdsp.asm new file mode 100644 index 000000000..713861152 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/flacdsp.asm @@ -0,0 +1,313 @@ +;****************************************************************************** +;* FLAC DSP SIMD optimizations +;* +;* Copyright (C) 2014 Loren Merritt +;* Copyright (C) 2014 James Almer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +%macro PMACSDQL 5 +%if cpuflag(xop) + pmacsdql %1, %2, %3, %1 +%else + pmuldq %2, %3 + paddq %1, %2 +%endif +%endmacro + +%macro LPC_32 1 +INIT_XMM %1 +cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j + sub lend, pred_orderd + jle .ret + lea decodedq, [decodedq+pred_orderq*4-8] + lea coeffsq, [coeffsq+pred_orderq*4] + neg pred_orderq + movd m4, qlevelm +ALIGN 16 +.loop_sample: + movd m0, [decodedq+pred_orderq*4+8] + add decodedq, 8 + movd m1, [coeffsq+pred_orderq*4] + pxor m2, m2 + pxor m3, m3 + lea jq, [pred_orderq+1] + test jq, jq + jz .end_order +.loop_order: + PMACSDQL m2, m0, m1, m2, m0 + movd m0, [decodedq+jq*4] + PMACSDQL m3, m1, m0, m3, m1 + movd m1, [coeffsq+jq*4] + inc jq + jl .loop_order +.end_order: + PMACSDQL m2, m0, m1, m2, m0 + psrlq m2, m4 + movd m0, [decodedq] + paddd m0, m2 + movd [decodedq], m0 + sub lend, 2 + jl .ret + PMACSDQL m3, m1, m0, m3, m1 + psrlq m3, m4 + movd m1, [decodedq+4] + paddd m1, m3 + movd [decodedq+4], m1 + jg .loop_sample +.ret: + REP_RET +%endmacro + +%if HAVE_XOP_EXTERNAL +LPC_32 xop +%endif +LPC_32 sse4 + +;---------------------------------------------------------------------------------- +;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels, +; int len, int shift); +;---------------------------------------------------------------------------------- +%macro FLAC_DECORRELATE_16 3-4 +cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len +%if ARCH_X86_32 + mov lend, lenm +%endif + movd m3, r4m + shl lend, 2 + mov in1q, [in0q + gprsize] + mov in0q, [in0q] + mov outq, [outq] + add in1q, lenq + add in0q, lenq + add outq, lenq + neg lenq + +align 16 +.loop: + mova m0, [in0q + lenq] + mova m1, [in1q + lenq] +%ifidn %1, ms + psrad m2, m1, 1 + psubd m0, m2 +%endif +%ifnidn %1, indep2 + p%4d m2, m0, m1 +%endif + packssdw m%2, m%2 + packssdw m%3, m%3 + punpcklwd m%2, m%3 + psllw m%2, m3 + mova [outq + lenq], m%2 + add lenq, 16 + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +FLAC_DECORRELATE_16 ls, 0, 2, sub +FLAC_DECORRELATE_16 rs, 2, 1, add +FLAC_DECORRELATE_16 ms, 2, 0, add + +;---------------------------------------------------------------------------------- +;void ff_flac_decorrelate_[lrm]s_32_sse2(uint8_t **out, int32_t **in, int channels, +; int len, int shift); +;---------------------------------------------------------------------------------- +%macro FLAC_DECORRELATE_32 5 +cglobal flac_decorrelate_%1_32, 2, 4, 4, out, in0, in1, len +%if ARCH_X86_32 + mov lend, lenm +%endif + movd m3, r4m + mov in1q, [in0q + gprsize] + mov in0q, [in0q] + mov outq, [outq] + sub in1q, in0q + +align 16 +.loop: + mova m0, [in0q] + mova m1, [in0q + in1q] +%ifidn %1, ms + psrad m2, m1, 1 + psubd m0, m2 +%endif + p%5d m2, m0, m1 + pslld m%2, m3 + pslld m%3, m3 + + SBUTTERFLY dq, %2, %3, %4 + + mova [outq ], m%2 + mova [outq + mmsize], m%3 + + add in0q, mmsize + add outq, mmsize*2 + sub lend, mmsize/4 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +FLAC_DECORRELATE_32 ls, 0, 2, 1, sub +FLAC_DECORRELATE_32 rs, 2, 1, 0, add +FLAC_DECORRELATE_32 ms, 2, 0, 1, add + +;----------------------------------------------------------------------------------------- +;void ff_flac_decorrelate_indep<ch>_<bps>_<opt>(uint8_t **out, int32_t **in, int channels, +; int len, int shift); +;----------------------------------------------------------------------------------------- +;%1 = bps +;%2 = channels +;%3 = last xmm reg used +;%4 = word/dword (shift instruction) +%macro FLAC_DECORRELATE_INDEP 4 +%define REPCOUNT %2/(32/%1) ; 16bits = channels / 2; 32bits = channels +cglobal flac_decorrelate_indep%2_%1, 2, %2+2, %3+1, out, in0, in1, len, in2, in3, in4, in5, in6, in7 +%if ARCH_X86_32 +%if %2 == 6 + DEFINE_ARGS out, in0, in1, in2, in3, in4, in5 + %define lend dword r3m +%else + mov lend, lenm +%endif +%endif + movd m%3, r4m + +%assign %%i 1 +%rep %2-1 + mov in %+ %%i %+ q, [in0q+%%i*gprsize] +%assign %%i %%i+1 +%endrep + + mov in0q, [in0q] + mov outq, [outq] + +%assign %%i 1 +%rep %2-1 + sub in %+ %%i %+ q, in0q +%assign %%i %%i+1 +%endrep + +align 16 +.loop: + mova m0, [in0q] + +%assign %%i 1 +%rep REPCOUNT-1 + mova m %+ %%i, [in0q + in %+ %%i %+ q] +%assign %%i %%i+1 +%endrep + +%if %1 == 32 + +%if %2 == 8 + TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8 +%elif %2 == 6 + SBUTTERFLY dq, 0, 1, 6 + SBUTTERFLY dq, 2, 3, 6 + SBUTTERFLY dq, 4, 5, 6 + + punpcklqdq m6, m0, m2 + punpckhqdq m2, m4 + shufps m4, m0, 0xe4 + punpcklqdq m0, m1, m3 + punpckhqdq m3, m5 + shufps m5, m1, 0xe4 + SWAP 0,6,1,4,5,3 +%elif %2 == 4 + TRANSPOSE4x4D 0, 1, 2, 3, 4 +%else ; %2 == 2 + SBUTTERFLY dq, 0, 1, 2 +%endif + +%else ; %1 == 16 + +%if %2 == 8 + packssdw m0, [in0q + in4q] + packssdw m1, [in0q + in5q] + packssdw m2, [in0q + in6q] + packssdw m3, [in0q + in7q] + TRANSPOSE2x4x4W 0, 1, 2, 3, 4 +%elif %2 == 6 + packssdw m0, [in0q + in3q] + packssdw m1, [in0q + in4q] + packssdw m2, [in0q + in5q] + pshufd m3, m0, q1032 + punpcklwd m0, m1 + punpckhwd m1, m2 + punpcklwd m2, m3 + + shufps m3, m0, m2, q2020 + shufps m0, m1, q2031 + shufps m2, m1, q3131 + shufps m1, m2, m3, q3120 + shufps m3, m0, q0220 + shufps m0, m2, q3113 + SWAP 2, 0, 3 +%else ; %2 == 4 + packssdw m0, [in0q + in2q] + packssdw m1, [in0q + in3q] + SBUTTERFLY wd, 0, 1, 2 + SBUTTERFLY dq, 0, 1, 2 +%endif + +%endif + +%assign %%i 0 +%rep REPCOUNT + psll%4 m %+ %%i, m%3 +%assign %%i %%i+1 +%endrep + +%assign %%i 0 +%rep REPCOUNT + mova [outq + %%i*mmsize], m %+ %%i +%assign %%i %%i+1 +%endrep + + add in0q, mmsize + add outq, mmsize*REPCOUNT + sub lend, mmsize/4 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro +FLAC_DECORRELATE_INDEP 32, 2, 3, d +FLAC_DECORRELATE_INDEP 16, 4, 3, w +FLAC_DECORRELATE_INDEP 32, 4, 5, d +FLAC_DECORRELATE_INDEP 16, 6, 4, w +FLAC_DECORRELATE_INDEP 32, 6, 7, d +%if ARCH_X86_64 +FLAC_DECORRELATE_INDEP 16, 8, 5, w +FLAC_DECORRELATE_INDEP 32, 8, 9, d +%endif + +INIT_XMM avx +FLAC_DECORRELATE_INDEP 32, 4, 5, d +FLAC_DECORRELATE_INDEP 32, 6, 7, d +%if ARCH_X86_64 +FLAC_DECORRELATE_INDEP 16, 8, 5, w +FLAC_DECORRELATE_INDEP 32, 8, 9, d +%endif diff --git a/libs/ffvpx/libavcodec/x86/flacdsp_init.c b/libs/ffvpx/libavcodec/x86/flacdsp_init.c new file mode 100644 index 000000000..1971f81b8 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/flacdsp_init.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014 James Almer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/flacdsp.h" +#include "libavutil/x86/cpu.h" +#include "config.h" + +void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); +void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); + +void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const int32_t *,int); + +#define DECORRELATE_FUNCS(fmt, opt) \ +void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_indep6_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift); \ +void ff_flac_decorrelate_indep8_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ + int len, int shift) + +DECORRELATE_FUNCS(16, sse2); +DECORRELATE_FUNCS(16, avx); +DECORRELATE_FUNCS(32, sse2); +DECORRELATE_FUNCS(32, avx); + +av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, + int bps) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + +#if CONFIG_FLAC_DECODER + if (EXTERNAL_SSE2(cpu_flags)) { + if (fmt == AV_SAMPLE_FMT_S16) { + if (channels == 2) + c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2; + else if (channels == 4) + c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2; + else if (channels == 6) + c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2; + else if (ARCH_X86_64 && channels == 8) + c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2; + c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2; + c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2; + c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2; + } else if (fmt == AV_SAMPLE_FMT_S32) { + if (channels == 2) + c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2; + else if (channels == 4) + c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2; + else if (channels == 6) + c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2; + else if (ARCH_X86_64 && channels == 8) + c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2; + c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2; + c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2; + c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2; + } + } + if (EXTERNAL_SSE4(cpu_flags)) { + c->lpc32 = ff_flac_lpc_32_sse4; + } + if (EXTERNAL_AVX(cpu_flags)) { + if (fmt == AV_SAMPLE_FMT_S16) { + if (ARCH_X86_64 && channels == 8) + c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx; + } else if (fmt == AV_SAMPLE_FMT_S32) { + if (channels == 4) + c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx; + else if (channels == 6) + c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx; + else if (ARCH_X86_64 && channels == 8) + c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx; + } + } + if (EXTERNAL_XOP(cpu_flags)) { + c->lpc32 = ff_flac_lpc_32_xop; + } +#endif + +#if CONFIG_FLAC_ENCODER + if (EXTERNAL_SSE4(cpu_flags)) { + if (CONFIG_GPL) + c->lpc16_encode = ff_flac_enc_lpc_16_sse4; + } +#endif +#endif /* HAVE_X86ASM */ +} diff --git a/libs/ffvpx/libavcodec/x86/h264_intrapred.asm b/libs/ffvpx/libavcodec/x86/h264_intrapred.asm new file mode 100644 index 000000000..f3aa3172f --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/h264_intrapred.asm @@ -0,0 +1,2757 @@ +;****************************************************************************** +;* H.264 intra prediction asm optimizations +;* Copyright (c) 2010 Fiona Glaser +;* Copyright (c) 2010 Holger Lubitz +;* Copyright (c) 2010 Loren Merritt +;* Copyright (c) 2010 Ronald S. Bultje +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +tm_shuf: times 8 db 0x03, 0x80 +pw_ff00: times 8 dw 0xff00 +plane_shuf: db -8, -7, -6, -5, -4, -3, -2, -1 + db 1, 2, 3, 4, 5, 6, 7, 8 +plane8_shuf: db -4, -3, -2, -1, 0, 0, 0, 0 + db 1, 2, 3, 4, 0, 0, 0, 0 +pw_0to7: dw 0, 1, 2, 3, 4, 5, 6, 7 +pw_1to8: dw 1, 2, 3, 4, 5, 6, 7, 8 +pw_m8tom1: dw -8, -7, -6, -5, -4, -3, -2, -1 +pw_m4to4: dw -4, -3, -2, -1, 1, 2, 3, 4 + +SECTION .text + +cextern pb_1 +cextern pb_3 +cextern pw_4 +cextern pw_5 +cextern pw_8 +cextern pw_16 +cextern pw_17 +cextern pw_32 + +;----------------------------------------------------------------------------- +; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmx +cglobal pred16x16_vertical_8, 2,3 + sub r0, r1 + mov r2, 8 + movq mm0, [r0+0] + movq mm1, [r0+8] +.loop: + movq [r0+r1*1+0], mm0 + movq [r0+r1*1+8], mm1 + movq [r0+r1*2+0], mm0 + movq [r0+r1*2+8], mm1 + lea r0, [r0+r1*2] + dec r2 + jg .loop + REP_RET + +INIT_XMM sse +cglobal pred16x16_vertical_8, 2,3 + sub r0, r1 + mov r2, 4 + movaps xmm0, [r0] +.loop: + movaps [r0+r1*1], xmm0 + movaps [r0+r1*2], xmm0 + lea r0, [r0+r1*2] + movaps [r0+r1*1], xmm0 + movaps [r0+r1*2], xmm0 + lea r0, [r0+r1*2] + dec r2 + jg .loop + REP_RET + +;----------------------------------------------------------------------------- +; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED16x16_H 0 +cglobal pred16x16_horizontal_8, 2,3 + mov r2, 8 +%if cpuflag(ssse3) + mova m2, [pb_3] +%endif +.loop: + movd m0, [r0+r1*0-4] + movd m1, [r0+r1*1-4] + +%if cpuflag(ssse3) + pshufb m0, m2 + pshufb m1, m2 +%else + punpcklbw m0, m0 + punpcklbw m1, m1 + SPLATW m0, m0, 3 + SPLATW m1, m1, 3 + mova [r0+r1*0+8], m0 + mova [r0+r1*1+8], m1 +%endif + + mova [r0+r1*0], m0 + mova [r0+r1*1], m1 + lea r0, [r0+r1*2] + dec r2 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +PRED16x16_H +INIT_MMX mmxext +PRED16x16_H +INIT_XMM ssse3 +PRED16x16_H + +;----------------------------------------------------------------------------- +; void ff_pred16x16_dc_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED16x16_DC 0 +cglobal pred16x16_dc_8, 2,7 + mov r4, r0 + sub r0, r1 + pxor mm0, mm0 + pxor mm1, mm1 + psadbw mm0, [r0+0] + psadbw mm1, [r0+8] + dec r0 + movzx r5d, byte [r0+r1*1] + paddw mm0, mm1 + movd r6d, mm0 + lea r0, [r0+r1*2] +%rep 7 + movzx r2d, byte [r0+r1*0] + movzx r3d, byte [r0+r1*1] + add r5d, r2d + add r6d, r3d + lea r0, [r0+r1*2] +%endrep + movzx r2d, byte [r0+r1*0] + add r5d, r6d + lea r2d, [r2+r5+16] + shr r2d, 5 +%if cpuflag(ssse3) + pxor m1, m1 +%endif + SPLATB_REG m0, r2, m1 + +%if mmsize==8 + mov r3d, 8 +.loop: + mova [r4+r1*0+0], m0 + mova [r4+r1*0+8], m0 + mova [r4+r1*1+0], m0 + mova [r4+r1*1+8], m0 +%else + mov r3d, 4 +.loop: + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 + lea r4, [r4+r1*2] + mova [r4+r1*0], m0 + mova [r4+r1*1], m0 +%endif + lea r4, [r4+r1*2] + dec r3d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_DC +INIT_XMM sse2 +PRED16x16_DC +INIT_XMM ssse3 +PRED16x16_DC + +;----------------------------------------------------------------------------- +; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED16x16_TM 0 +cglobal pred16x16_tm_vp8_8, 2,5 + sub r0, r1 + pxor mm7, mm7 + movq mm0, [r0+0] + movq mm2, [r0+8] + movq mm1, mm0 + movq mm3, mm2 + punpcklbw mm0, mm7 + punpckhbw mm1, mm7 + punpcklbw mm2, mm7 + punpckhbw mm3, mm7 + movzx r3d, byte [r0-1] + mov r4d, 16 +.loop: + movzx r2d, byte [r0+r1-1] + sub r2d, r3d + movd mm4, r2d + SPLATW mm4, mm4, 0 + movq mm5, mm4 + movq mm6, mm4 + movq mm7, mm4 + paddw mm4, mm0 + paddw mm5, mm1 + paddw mm6, mm2 + paddw mm7, mm3 + packuswb mm4, mm5 + packuswb mm6, mm7 + movq [r0+r1+0], mm4 + movq [r0+r1+8], mm6 + add r0, r1 + dec r4d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +PRED16x16_TM +INIT_MMX mmxext +PRED16x16_TM + +INIT_XMM sse2 +cglobal pred16x16_tm_vp8_8, 2,6,6 + sub r0, r1 + pxor xmm2, xmm2 + movdqa xmm0, [r0] + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm2 + punpckhbw xmm1, xmm2 + movzx r4d, byte [r0-1] + mov r5d, 8 +.loop: + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + sub r2d, r4d + sub r3d, r4d + movd xmm2, r2d + movd xmm4, r3d + pshuflw xmm2, xmm2, 0 + pshuflw xmm4, xmm4, 0 + punpcklqdq xmm2, xmm2 + punpcklqdq xmm4, xmm4 + movdqa xmm3, xmm2 + movdqa xmm5, xmm4 + paddw xmm2, xmm0 + paddw xmm3, xmm1 + paddw xmm4, xmm0 + paddw xmm5, xmm1 + packuswb xmm2, xmm3 + packuswb xmm4, xmm5 + movdqa [r0+r1*1], xmm2 + movdqa [r0+r1*2], xmm4 + lea r0, [r0+r1*2] + dec r5d + jg .loop + REP_RET + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration + sub dstq, strideq + pmovzxbw m0, [dstq] + vpbroadcastb xm1, [r0-1] + pmovzxbw m1, xm1 + psubw m0, m1 + mov iterationd, 4 + lea stride3q, [strideq*3] +.loop: + vpbroadcastb xm1, [dstq+strideq*1-1] + vpbroadcastb xm2, [dstq+strideq*2-1] + vpbroadcastb xm3, [dstq+stride3q-1] + vpbroadcastb xm4, [dstq+strideq*4-1] + pmovzxbw m1, xm1 + pmovzxbw m2, xm2 + pmovzxbw m3, xm3 + pmovzxbw m4, xm4 + paddw m1, m0 + paddw m2, m0 + paddw m3, m0 + paddw m4, m0 + vpackuswb m1, m1, m2 + vpackuswb m3, m3, m4 + vpermq m1, m1, q3120 + vpermq m3, m3, q3120 + movdqa [dstq+strideq*1], xm1 + vextracti128 [dstq+strideq*2], m1, 1 + movdqa [dstq+stride3q*1], xm3 + vextracti128 [dstq+strideq*4], m3, 1 + lea dstq, [dstq+strideq*4] + dec iterationd + jg .loop + REP_RET +%endif + +;----------------------------------------------------------------------------- +; void ff_pred16x16_plane_*_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro H264_PRED16x16_PLANE 1 +cglobal pred16x16_plane_%1_8, 2,9,7 + mov r2, r1 ; +stride + neg r1 ; -stride + + movh m0, [r0+r1 -1] +%if mmsize == 8 + pxor m4, m4 + movh m1, [r0+r1 +3 ] + movh m2, [r0+r1 +8 ] + movh m3, [r0+r1 +12] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + pmullw m0, [pw_m8tom1 ] + pmullw m1, [pw_m8tom1+8] + pmullw m2, [pw_1to8 ] + pmullw m3, [pw_1to8 +8] + paddw m0, m2 + paddw m1, m3 +%else ; mmsize == 16 +%if cpuflag(ssse3) + movhps m0, [r0+r1 +8] + pmaddubsw m0, [plane_shuf] ; H coefficients +%else ; sse2 + pxor m2, m2 + movh m1, [r0+r1 +8] + punpcklbw m0, m2 + punpcklbw m1, m2 + pmullw m0, [pw_m8tom1] + pmullw m1, [pw_1to8] + paddw m0, m1 +%endif + movhlps m1, m0 +%endif + paddw m0, m1 +%if cpuflag(mmxext) + PSHUFLW m1, m0, 0xE +%elif cpuflag(mmx) + mova m1, m0 + psrlq m1, 32 +%endif + paddw m0, m1 +%if cpuflag(mmxext) + PSHUFLW m1, m0, 0x1 +%elif cpuflag(mmx) + mova m1, m0 + psrlq m1, 16 +%endif + paddw m0, m1 ; sum of H coefficients + + lea r4, [r0+r2*8-1] + lea r3, [r0+r2*4-1] + add r4, r2 + +%if ARCH_X86_64 +%define e_reg r8 +%else +%define e_reg r0 +%endif + + movzx e_reg, byte [r3+r2*2 ] + movzx r5, byte [r4+r1 ] + sub r5, e_reg + + movzx e_reg, byte [r3+r2 ] + movzx r6, byte [r4 ] + sub r6, e_reg + lea r5, [r5+r6*2] + + movzx e_reg, byte [r3+r1 ] + movzx r6, byte [r4+r2*2 ] + sub r6, e_reg + lea r5, [r5+r6*4] + + movzx e_reg, byte [r3 ] +%if ARCH_X86_64 + movzx r7, byte [r4+r2 ] + sub r7, e_reg +%else + movzx r6, byte [r4+r2 ] + sub r6, e_reg + lea r5, [r5+r6*4] + sub r5, r6 +%endif + + lea e_reg, [r3+r1*4] + lea r3, [r4+r2*4] + + movzx r4, byte [e_reg+r2 ] + movzx r6, byte [r3 ] + sub r6, r4 +%if ARCH_X86_64 + lea r6, [r7+r6*2] + lea r5, [r5+r6*2] + add r5, r6 +%else + lea r5, [r5+r6*4] + lea r5, [r5+r6*2] +%endif + + movzx r4, byte [e_reg ] +%if ARCH_X86_64 + movzx r7, byte [r3 +r2 ] + sub r7, r4 + sub r5, r7 +%else + movzx r6, byte [r3 +r2 ] + sub r6, r4 + lea r5, [r5+r6*8] + sub r5, r6 +%endif + + movzx r4, byte [e_reg+r1 ] + movzx r6, byte [r3 +r2*2] + sub r6, r4 +%if ARCH_X86_64 + add r6, r7 +%endif + lea r5, [r5+r6*8] + + movzx r4, byte [e_reg+r2*2] + movzx r6, byte [r3 +r1 ] + sub r6, r4 + lea r5, [r5+r6*4] + add r5, r6 ; sum of V coefficients + +%if ARCH_X86_64 == 0 + mov r0, r0m +%endif + +%ifidn %1, h264 + lea r5, [r5*5+32] + sar r5, 6 +%elifidn %1, rv40 + lea r5, [r5*5] + sar r5, 6 +%elifidn %1, svq3 + test r5, r5 + lea r6, [r5+3] + cmovs r5, r6 + sar r5, 2 ; V/4 + lea r5, [r5*5] ; 5*(V/4) + test r5, r5 + lea r6, [r5+15] + cmovs r5, r6 + sar r5, 4 ; (5*(V/4))/16 +%endif + + movzx r4, byte [r0+r1 +15] + movzx r3, byte [r3+r2*2 ] + lea r3, [r3+r4+1] + shl r3, 4 + + movd r1d, m0 + movsx r1d, r1w +%ifnidn %1, svq3 +%ifidn %1, h264 + lea r1d, [r1d*5+32] +%else ; rv40 + lea r1d, [r1d*5] +%endif + sar r1d, 6 +%else ; svq3 + test r1d, r1d + lea r4d, [r1d+3] + cmovs r1d, r4d + sar r1d, 2 ; H/4 + lea r1d, [r1d*5] ; 5*(H/4) + test r1d, r1d + lea r4d, [r1d+15] + cmovs r1d, r4d + sar r1d, 4 ; (5*(H/4))/16 +%endif + movd m0, r1d + + add r1d, r5d + add r3d, r1d + shl r1d, 3 + sub r3d, r1d ; a + + movd m1, r5d + movd m3, r3d + SPLATW m0, m0, 0 ; H + SPLATW m1, m1, 0 ; V + SPLATW m3, m3, 0 ; a +%ifidn %1, svq3 + SWAP 0, 1 +%endif + mova m2, m0 +%if mmsize == 8 + mova m5, m0 +%endif + pmullw m0, [pw_0to7] ; 0*H, 1*H, ..., 7*H (words) +%if mmsize == 16 + psllw m2, 3 +%else + psllw m5, 3 + psllw m2, 2 + mova m6, m5 + paddw m6, m2 +%endif + paddw m0, m3 ; a + {0,1,2,3,4,5,6,7}*H + paddw m2, m0 ; a + {8,9,10,11,12,13,14,15}*H +%if mmsize == 8 + paddw m5, m0 ; a + {8,9,10,11}*H + paddw m6, m0 ; a + {12,13,14,15}*H +%endif + + mov r4, 8 +.loop: + mova m3, m0 ; b[0..7] + mova m4, m2 ; b[8..15] + psraw m3, 5 + psraw m4, 5 + packuswb m3, m4 + mova [r0], m3 +%if mmsize == 8 + mova m3, m5 ; b[8..11] + mova m4, m6 ; b[12..15] + psraw m3, 5 + psraw m4, 5 + packuswb m3, m4 + mova [r0+8], m3 +%endif + paddw m0, m1 + paddw m2, m1 +%if mmsize == 8 + paddw m5, m1 + paddw m6, m1 +%endif + + mova m3, m0 ; b[0..7] + mova m4, m2 ; b[8..15] + psraw m3, 5 + psraw m4, 5 + packuswb m3, m4 + mova [r0+r2], m3 +%if mmsize == 8 + mova m3, m5 ; b[8..11] + mova m4, m6 ; b[12..15] + psraw m3, 5 + psraw m4, 5 + packuswb m3, m4 + mova [r0+r2+8], m3 +%endif + paddw m0, m1 + paddw m2, m1 +%if mmsize == 8 + paddw m5, m1 + paddw m6, m1 +%endif + + lea r0, [r0+r2*2] + dec r4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +H264_PRED16x16_PLANE h264 +H264_PRED16x16_PLANE rv40 +H264_PRED16x16_PLANE svq3 +INIT_MMX mmxext +H264_PRED16x16_PLANE h264 +H264_PRED16x16_PLANE rv40 +H264_PRED16x16_PLANE svq3 +INIT_XMM sse2 +H264_PRED16x16_PLANE h264 +H264_PRED16x16_PLANE rv40 +H264_PRED16x16_PLANE svq3 +INIT_XMM ssse3 +H264_PRED16x16_PLANE h264 +H264_PRED16x16_PLANE rv40 +H264_PRED16x16_PLANE svq3 + +;----------------------------------------------------------------------------- +; void ff_pred8x8_plane_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro H264_PRED8x8_PLANE 0 +cglobal pred8x8_plane_8, 2,9,7 + mov r2, r1 ; +stride + neg r1 ; -stride + + movd m0, [r0+r1 -1] +%if mmsize == 8 + pxor m2, m2 + movh m1, [r0+r1 +4 ] + punpcklbw m0, m2 + punpcklbw m1, m2 + pmullw m0, [pw_m4to4] + pmullw m1, [pw_m4to4+8] +%else ; mmsize == 16 +%if cpuflag(ssse3) + movhps m0, [r0+r1 +4] ; this reads 4 bytes more than necessary + pmaddubsw m0, [plane8_shuf] ; H coefficients +%else ; sse2 + pxor m2, m2 + movd m1, [r0+r1 +4] + punpckldq m0, m1 + punpcklbw m0, m2 + pmullw m0, [pw_m4to4] +%endif + movhlps m1, m0 +%endif + paddw m0, m1 + +%if notcpuflag(ssse3) +%if cpuflag(mmxext) + PSHUFLW m1, m0, 0xE +%elif cpuflag(mmx) + mova m1, m0 + psrlq m1, 32 +%endif + paddw m0, m1 +%endif ; !ssse3 + +%if cpuflag(mmxext) + PSHUFLW m1, m0, 0x1 +%elif cpuflag(mmx) + mova m1, m0 + psrlq m1, 16 +%endif + paddw m0, m1 ; sum of H coefficients + + lea r4, [r0+r2*4-1] + lea r3, [r0 -1] + add r4, r2 + +%if ARCH_X86_64 +%define e_reg r8 +%else +%define e_reg r0 +%endif + + movzx e_reg, byte [r3+r2*2 ] + movzx r5, byte [r4+r1 ] + sub r5, e_reg + + movzx e_reg, byte [r3 ] +%if ARCH_X86_64 + movzx r7, byte [r4+r2 ] + sub r7, e_reg + sub r5, r7 +%else + movzx r6, byte [r4+r2 ] + sub r6, e_reg + lea r5, [r5+r6*4] + sub r5, r6 +%endif + + movzx e_reg, byte [r3+r1 ] + movzx r6, byte [r4+r2*2 ] + sub r6, e_reg +%if ARCH_X86_64 + add r6, r7 +%endif + lea r5, [r5+r6*4] + + movzx e_reg, byte [r3+r2 ] + movzx r6, byte [r4 ] + sub r6, e_reg + lea r6, [r5+r6*2] + + lea r5, [r6*9+16] + lea r5, [r5+r6*8] + sar r5, 5 + +%if ARCH_X86_64 == 0 + mov r0, r0m +%endif + + movzx r3, byte [r4+r2*2 ] + movzx r4, byte [r0+r1 +7] + lea r3, [r3+r4+1] + shl r3, 4 + movd r1d, m0 + movsx r1d, r1w + imul r1d, 17 + add r1d, 16 + sar r1d, 5 + movd m0, r1d + add r1d, r5d + sub r3d, r1d + add r1d, r1d + sub r3d, r1d ; a + + movd m1, r5d + movd m3, r3d + SPLATW m0, m0, 0 ; H + SPLATW m1, m1, 0 ; V + SPLATW m3, m3, 0 ; a +%if mmsize == 8 + mova m2, m0 +%endif + pmullw m0, [pw_0to7] ; 0*H, 1*H, ..., 7*H (words) + paddw m0, m3 ; a + {0,1,2,3,4,5,6,7}*H +%if mmsize == 8 + psllw m2, 2 + paddw m2, m0 ; a + {4,5,6,7}*H +%endif + + mov r4, 4 +ALIGN 16 +.loop: +%if mmsize == 16 + mova m3, m0 ; b[0..7] + paddw m0, m1 + psraw m3, 5 + mova m4, m0 ; V+b[0..7] + paddw m0, m1 + psraw m4, 5 + packuswb m3, m4 + movh [r0], m3 + movhps [r0+r2], m3 +%else ; mmsize == 8 + mova m3, m0 ; b[0..3] + mova m4, m2 ; b[4..7] + paddw m0, m1 + paddw m2, m1 + psraw m3, 5 + psraw m4, 5 + mova m5, m0 ; V+b[0..3] + mova m6, m2 ; V+b[4..7] + paddw m0, m1 + paddw m2, m1 + psraw m5, 5 + psraw m6, 5 + packuswb m3, m4 + packuswb m5, m6 + mova [r0], m3 + mova [r0+r2], m5 +%endif + + lea r0, [r0+r2*2] + dec r4 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +H264_PRED8x8_PLANE +INIT_MMX mmxext +H264_PRED8x8_PLANE +INIT_XMM sse2 +H264_PRED8x8_PLANE +INIT_XMM ssse3 +H264_PRED8x8_PLANE + +;----------------------------------------------------------------------------- +; void ff_pred8x8_vertical_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmx +cglobal pred8x8_vertical_8, 2,2 + sub r0, r1 + movq mm0, [r0] +%rep 3 + movq [r0+r1*1], mm0 + movq [r0+r1*2], mm0 + lea r0, [r0+r1*2] +%endrep + movq [r0+r1*1], mm0 + movq [r0+r1*2], mm0 + RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_horizontal_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8_H 0 +cglobal pred8x8_horizontal_8, 2,3 + mov r2, 4 +%if cpuflag(ssse3) + mova m2, [pb_3] +%endif +.loop: + SPLATB_LOAD m0, r0+r1*0-1, m2 + SPLATB_LOAD m1, r0+r1*1-1, m2 + mova [r0+r1*0], m0 + mova [r0+r1*1], m1 + lea r0, [r0+r1*2] + dec r2 + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +PRED8x8_H +INIT_MMX mmxext +PRED8x8_H +INIT_MMX ssse3 +PRED8x8_H + +;----------------------------------------------------------------------------- +; void ff_pred8x8_top_dc_8_mmxext(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pred8x8_top_dc_8, 2,5 + sub r0, r1 + movq mm0, [r0] + pxor mm1, mm1 + pxor mm2, mm2 + lea r2, [r0+r1*2] + punpckhbw mm1, mm0 + punpcklbw mm0, mm2 + psadbw mm1, mm2 ; s1 + lea r3, [r2+r1*2] + psadbw mm0, mm2 ; s0 + psrlw mm1, 1 + psrlw mm0, 1 + pavgw mm1, mm2 + lea r4, [r3+r1*2] + pavgw mm0, mm2 + pshufw mm1, mm1, 0 + pshufw mm0, mm0, 0 ; dc0 (w) + packuswb mm0, mm1 ; dc0,dc1 (b) + movq [r0+r1*1], mm0 + movq [r0+r1*2], mm0 + lea r0, [r3+r1*2] + movq [r2+r1*1], mm0 + movq [r2+r1*2], mm0 + movq [r3+r1*1], mm0 + movq [r3+r1*2], mm0 + movq [r0+r1*1], mm0 + movq [r0+r1*2], mm0 + RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_dc_8_mmxext(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8_dc_8, 2,5 + sub r0, r1 + pxor m7, m7 + movd m0, [r0+0] + movd m1, [r0+4] + psadbw m0, m7 ; s0 + mov r4, r0 + psadbw m1, m7 ; s1 + + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + lea r0, [r0+r1*2] + movd m2, r2d ; s2 + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + lea r0, [r0+r1*2] + add r2d, r3d + movzx r3d, byte [r0+r1*1-1] + add r2d, r3d + movzx r3d, byte [r0+r1*2-1] + add r2d, r3d + movd m3, r2d ; s3 + + punpcklwd m0, m1 + mov r0, r4 + punpcklwd m2, m3 + punpckldq m0, m2 ; s0, s1, s2, s3 + pshufw m3, m0, 11110110b ; s2, s1, s3, s3 + lea r2, [r0+r1*2] + pshufw m0, m0, 01110100b ; s0, s1, s3, s1 + paddw m0, m3 + lea r3, [r2+r1*2] + psrlw m0, 2 + pavgw m0, m7 ; s0+s2, s1, s3, s1+s3 + lea r4, [r3+r1*2] + packuswb m0, m0 + punpcklbw m0, m0 + movq m1, m0 + punpcklbw m0, m0 + punpckhbw m1, m1 + movq [r0+r1*1], m0 + movq [r0+r1*2], m0 + movq [r2+r1*1], m0 + movq [r2+r1*2], m0 + movq [r3+r1*1], m1 + movq [r3+r1*2], m1 + movq [r4+r1*1], m1 + movq [r4+r1*2], m1 + RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_dc_rv40_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8_dc_rv40_8, 2,7 + mov r4, r0 + sub r0, r1 + pxor mm0, mm0 + psadbw mm0, [r0] + dec r0 + movzx r5d, byte [r0+r1*1] + movd r6d, mm0 + lea r0, [r0+r1*2] +%rep 3 + movzx r2d, byte [r0+r1*0] + movzx r3d, byte [r0+r1*1] + add r5d, r2d + add r6d, r3d + lea r0, [r0+r1*2] +%endrep + movzx r2d, byte [r0+r1*0] + add r5d, r6d + lea r2d, [r2+r5+8] + shr r2d, 4 + movd mm0, r2d + punpcklbw mm0, mm0 + pshufw mm0, mm0, 0 + mov r3d, 4 +.loop: + movq [r4+r1*0], mm0 + movq [r4+r1*1], mm0 + lea r4, [r4+r1*2] + dec r3d + jg .loop + REP_RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8_TM 0 +cglobal pred8x8_tm_vp8_8, 2,6 + sub r0, r1 + pxor mm7, mm7 + movq mm0, [r0] + movq mm1, mm0 + punpcklbw mm0, mm7 + punpckhbw mm1, mm7 + movzx r4d, byte [r0-1] + mov r5d, 4 +.loop: + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + sub r2d, r4d + sub r3d, r4d + movd mm2, r2d + movd mm4, r3d + SPLATW mm2, mm2, 0 + SPLATW mm4, mm4, 0 + movq mm3, mm2 + movq mm5, mm4 + paddw mm2, mm0 + paddw mm3, mm1 + paddw mm4, mm0 + paddw mm5, mm1 + packuswb mm2, mm3 + packuswb mm4, mm5 + movq [r0+r1*1], mm2 + movq [r0+r1*2], mm4 + lea r0, [r0+r1*2] + dec r5d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +PRED8x8_TM +INIT_MMX mmxext +PRED8x8_TM + +INIT_XMM sse2 +cglobal pred8x8_tm_vp8_8, 2,6,4 + sub r0, r1 + pxor xmm1, xmm1 + movq xmm0, [r0] + punpcklbw xmm0, xmm1 + movzx r4d, byte [r0-1] + mov r5d, 4 +.loop: + movzx r2d, byte [r0+r1*1-1] + movzx r3d, byte [r0+r1*2-1] + sub r2d, r4d + sub r3d, r4d + movd xmm2, r2d + movd xmm3, r3d + pshuflw xmm2, xmm2, 0 + pshuflw xmm3, xmm3, 0 + punpcklqdq xmm2, xmm2 + punpcklqdq xmm3, xmm3 + paddw xmm2, xmm0 + paddw xmm3, xmm0 + packuswb xmm2, xmm3 + movq [r0+r1*1], xmm2 + movhps [r0+r1*2], xmm2 + lea r0, [r0+r1*2] + dec r5d + jg .loop + REP_RET + +INIT_XMM ssse3 +cglobal pred8x8_tm_vp8_8, 2,3,6 + sub r0, r1 + movdqa xmm4, [tm_shuf] + pxor xmm1, xmm1 + movq xmm0, [r0] + punpcklbw xmm0, xmm1 + movd xmm5, [r0-4] + pshufb xmm5, xmm4 + mov r2d, 4 +.loop: + movd xmm2, [r0+r1*1-4] + movd xmm3, [r0+r1*2-4] + pshufb xmm2, xmm4 + pshufb xmm3, xmm4 + psubw xmm2, xmm5 + psubw xmm3, xmm5 + paddw xmm2, xmm0 + paddw xmm3, xmm0 + packuswb xmm2, xmm3 + movq [r0+r1*1], xmm2 + movhps [r0+r1*2], xmm2 + lea r0, [r0+r1*2] + dec r2d + jg .loop + REP_RET + +; dest, left, right, src, tmp +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 +%macro PRED4x4_LOWPASS 5 + mova %5, %2 + pavgb %2, %3 + pxor %3, %5 + mova %1, %4 + pand %3, [pb_1] + psubusb %2, %3 + pavgb %1, %2 +%endmacro + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_TOP_DC 0 +cglobal pred8x8l_top_dc_8, 4,4 + sub r0, r3 + pxor mm7, mm7 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d ; top_left + jz .fix_lt_2 + test r2d, r2d ; top_right + jz .fix_tr_1 + jmp .body +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d ; top_right + jnz .body +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 +.body: + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5 + psadbw mm7, mm0 + paddw mm7, [pw_4] + psrlw mm7, 3 + pshufw mm7, mm7, 0 + packuswb mm7, mm7 +%rep 3 + movq [r0+r3*1], mm7 + movq [r0+r3*2], mm7 + lea r0, [r0+r3*2] +%endrep + movq [r0+r3*1], mm7 + movq [r0+r3*2], mm7 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_TOP_DC +INIT_MMX ssse3 +PRED8x8L_TOP_DC + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8L_DC 0 +cglobal pred8x8l_dc_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .body +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .body +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.body: + lea r1, [r0+r3*2] + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + pxor mm0, mm0 + pxor mm1, mm1 + lea r2, [r1+r3*2] + psadbw mm0, mm7 + psadbw mm1, mm6 + paddw mm0, [pw_8] + paddw mm0, mm1 + lea r4, [r2+r3*2] + psrlw mm0, 4 + pshufw mm0, mm0, 0 + packuswb mm0, mm0 + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm0 + movq [r1+r3*1], mm0 + movq [r1+r3*2], mm0 + movq [r2+r3*1], mm0 + movq [r2+r3*2], mm0 + movq [r4+r3*1], mm0 + movq [r4+r3*2], mm0 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_DC +INIT_MMX ssse3 +PRED8x8L_DC + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_horizontal_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8L_HORIZONTAL 0 +cglobal pred8x8l_horizontal_8, 4,4 + sub r0, r3 + lea r2, [r0+r3*2] + movq mm0, [r0+r3*1-8] + test r1d, r1d + lea r1, [r0+r3] + cmovnz r1, r0 + punpckhbw mm0, [r1+r3*0-8] + movq mm1, [r2+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r2, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r1+r3*0-8] + mov r0, r2 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm3, mm7 + lea r1, [r0+r3*2] + movq mm7, mm3 + punpckhbw mm3, mm3 + punpcklbw mm7, mm7 + pshufw mm0, mm3, 0xff + pshufw mm1, mm3, 0xaa + lea r2, [r1+r3*2] + pshufw mm2, mm3, 0x55 + pshufw mm3, mm3, 0x00 + pshufw mm4, mm7, 0xff + pshufw mm5, mm7, 0xaa + pshufw mm6, mm7, 0x55 + pshufw mm7, mm7, 0x00 + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm1 + movq [r1+r3*1], mm2 + movq [r1+r3*2], mm3 + movq [r2+r3*1], mm4 + movq [r2+r3*2], mm5 + lea r0, [r2+r3*2] + movq [r0+r3*1], mm6 + movq [r0+r3*2], mm7 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_HORIZONTAL +INIT_MMX ssse3 +PRED8x8L_HORIZONTAL + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_vertical_8(uint8_t *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8L_VERTICAL 0 +cglobal pred8x8l_vertical_8, 4,4 + sub r0, r3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d ; top_left + jz .fix_lt_2 + test r2d, r2d ; top_right + jz .fix_tr_1 + jmp .body +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d ; top_right + jnz .body +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 +.body: + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5 +%rep 3 + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm0 + lea r0, [r0+r3*2] +%endrep + movq [r0+r3*1], mm0 + movq [r0+r3*2], mm0 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_VERTICAL +INIT_MMX ssse3 +PRED8x8L_VERTICAL + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_down_left_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8l_down_left_8, 4,5 + sub r0, r3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 + jmp .do_top +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.fix_tr_2: + punpckhbw mm3, mm3 + pshufw mm1, mm3, 0xFF + jmp .do_topright +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq mm7, mm4 + test r2d, r2d + jz .fix_tr_2 + movq mm0, [r0+8] + movq mm5, mm0 + movq mm2, mm0 + movq mm4, mm0 + psrlq mm5, 56 + PALIGNR mm2, mm3, 7, mm3 + PALIGNR mm5, mm4, 1, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 +.do_topright: + lea r1, [r0+r3*2] + movq mm6, mm1 + psrlq mm1, 56 + movq mm4, mm1 + lea r2, [r1+r3*2] + movq mm2, mm6 + PALIGNR mm2, mm7, 1, mm0 + movq mm3, mm6 + PALIGNR mm3, mm7, 7, mm0 + PALIGNR mm4, mm6, 1, mm0 + movq mm5, mm7 + movq mm1, mm7 + movq mm7, mm6 + lea r4, [r2+r3*2] + psllq mm1, 8 + PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6 + PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6 + movq [r4+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r4+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r2+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r2+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r1+r3*2], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r1+r3*1], mm1 + movq mm2, mm0 + psllq mm1, 8 + psrlq mm2, 56 + psllq mm0, 8 + por mm1, mm2 + movq [r0+r3*2], mm1 + psllq mm1, 8 + psrlq mm0, 56 + por mm1, mm0 + movq [r0+r3*1], mm1 + RET + +%macro PRED8x8L_DOWN_LEFT 0 +cglobal pred8x8l_down_left_8, 4,4 + sub r0, r3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d ; top_left + jz .fix_lt_2 + test r2d, r2d ; top_right + jz .fix_tr_1 + jmp .do_top +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d ; top_right + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.fix_tr_2: + punpckhbw mm3, mm3 + pshufw mm1, mm3, 0xFF + jmp .do_topright +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq2dq xmm3, mm4 + test r2d, r2d ; top_right + jz .fix_tr_2 + movq mm0, [r0+8] + movq mm5, mm0 + movq mm2, mm0 + movq mm4, mm0 + psrlq mm5, 56 + PALIGNR mm2, mm3, 7, mm3 + PALIGNR mm5, mm4, 1, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 +.do_topright: + movq2dq xmm4, mm1 + psrlq mm1, 56 + movq2dq xmm5, mm1 + lea r1, [r0+r3*2] + pslldq xmm4, 8 + por xmm3, xmm4 + movdqa xmm2, xmm3 + psrldq xmm2, 1 + pslldq xmm5, 15 + por xmm2, xmm5 + lea r2, [r1+r3*2] + movdqa xmm1, xmm3 + pslldq xmm1, 1 +INIT_XMM cpuname + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4 + psrldq xmm0, 1 + movq [r0+r3*1], xmm0 + psrldq xmm0, 1 + movq [r0+r3*2], xmm0 + psrldq xmm0, 1 + lea r0, [r2+r3*2] + movq [r1+r3*1], xmm0 + psrldq xmm0, 1 + movq [r1+r3*2], xmm0 + psrldq xmm0, 1 + movq [r2+r3*1], xmm0 + psrldq xmm0, 1 + movq [r2+r3*2], xmm0 + psrldq xmm0, 1 + movq [r0+r3*1], xmm0 + psrldq xmm0, 1 + movq [r0+r3*2], xmm0 + RET +%endmacro + +INIT_MMX sse2 +PRED8x8L_DOWN_LEFT +INIT_MMX ssse3 +PRED8x8L_DOWN_LEFT + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_down_right_8_mmxext(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8l_down_right_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d ; top_left + jz .fix_lt_1 +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + movq mm6, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d ; top_left + jz .fix_lt_2 + test r2d, r2d ; top_right + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq mm5, mm4 + jmp .body +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d ; top_right + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.body: + lea r1, [r0+r3*2] + movq mm1, mm7 + movq mm7, mm5 + movq mm5, mm6 + movq mm2, mm7 + lea r2, [r1+r3*2] + PALIGNR mm2, mm6, 1, mm0 + movq mm3, mm7 + PALIGNR mm3, mm6, 7, mm0 + movq mm4, mm7 + lea r4, [r2+r3*2] + psrlq mm4, 8 + PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6 + PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6 + movq [r4+r3*2], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r4+r3*1], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r2+r3*2], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r2+r3*1], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r1+r3*2], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r1+r3*1], mm0 + movq mm2, mm1 + psrlq mm0, 8 + psllq mm2, 56 + psrlq mm1, 8 + por mm0, mm2 + movq [r0+r3*2], mm0 + psrlq mm0, 8 + psllq mm1, 56 + por mm0, mm1 + movq [r0+r3*1], mm0 + RET + +%macro PRED8x8L_DOWN_RIGHT 0 +cglobal pred8x8l_down_right_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jz .fix_lt_1 + jmp .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + movq2dq xmm3, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq2dq xmm1, mm7 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq2dq xmm4, mm4 + lea r1, [r0+r3*2] + movdqa xmm0, xmm3 + pslldq xmm4, 8 + por xmm3, xmm4 + lea r2, [r1+r3*2] + pslldq xmm4, 1 + por xmm1, xmm4 + psrldq xmm0, 7 + pslldq xmm0, 15 + psrldq xmm0, 7 + por xmm1, xmm0 + lea r0, [r2+r3*2] + movdqa xmm2, xmm3 + psrldq xmm2, 1 +INIT_XMM cpuname + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4 + movdqa xmm1, xmm0 + psrldq xmm1, 1 + movq [r0+r3*2], xmm0 + movq [r0+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r2+r3*2], xmm0 + movq [r2+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r1+r3*2], xmm0 + movq [r1+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r4+r3*2], xmm0 + movq [r4+r3*1], xmm1 + RET +%endmacro + +INIT_MMX sse2 +PRED8x8L_DOWN_RIGHT +INIT_MMX ssse3 +PRED8x8L_DOWN_RIGHT + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_vertical_right_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8l_vertical_right_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jz .fix_lt_1 + jmp .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm7, mm2 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + lea r1, [r0+r3*2] + movq mm2, mm6 + movq mm3, mm6 + PALIGNR mm3, mm7, 7, mm0 + PALIGNR mm6, mm7, 6, mm1 + movq mm4, mm3 + pavgb mm3, mm2 + lea r2, [r1+r3*2] + PRED4x4_LOWPASS mm0, mm6, mm2, mm4, mm5 + movq [r0+r3*1], mm3 + movq [r0+r3*2], mm0 + movq mm5, mm0 + movq mm6, mm3 + movq mm1, mm7 + movq mm2, mm1 + psllq mm2, 8 + movq mm3, mm1 + psllq mm3, 16 + lea r4, [r2+r3*2] + PRED4x4_LOWPASS mm0, mm1, mm3, mm2, mm4 + PALIGNR mm6, mm0, 7, mm2 + movq [r1+r3*1], mm6 + psllq mm0, 8 + PALIGNR mm5, mm0, 7, mm1 + movq [r1+r3*2], mm5 + psllq mm0, 8 + PALIGNR mm6, mm0, 7, mm2 + movq [r2+r3*1], mm6 + psllq mm0, 8 + PALIGNR mm5, mm0, 7, mm1 + movq [r2+r3*2], mm5 + psllq mm0, 8 + PALIGNR mm6, mm0, 7, mm2 + movq [r4+r3*1], mm6 + psllq mm0, 8 + PALIGNR mm5, mm0, 7, mm1 + movq [r4+r3*2], mm5 + RET + +%macro PRED8x8L_VERTICAL_RIGHT 0 +cglobal pred8x8l_vertical_right_8, 4,5,7 + ; manually spill XMM registers for Win64 because + ; the code here is initialized with INIT_MMX + WIN64_SPILL_XMM 7 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq2dq xmm0, mm2 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 + lea r1, [r0+r3*2] + movq2dq xmm4, mm6 + pslldq xmm4, 8 + por xmm0, xmm4 + movdqa xmm6, [pw_ff00] + movdqa xmm1, xmm0 + lea r2, [r1+r3*2] + movdqa xmm2, xmm0 + movdqa xmm3, xmm0 + pslldq xmm0, 1 + pslldq xmm1, 2 + pavgb xmm2, xmm0 +INIT_XMM cpuname + PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5 + pandn xmm6, xmm4 + movdqa xmm5, xmm4 + psrlw xmm4, 8 + packuswb xmm6, xmm4 + movhlps xmm4, xmm6 + movhps [r0+r3*2], xmm5 + movhps [r0+r3*1], xmm2 + psrldq xmm5, 4 + movss xmm5, xmm6 + psrldq xmm2, 4 + movss xmm2, xmm4 + lea r0, [r2+r3*2] + psrldq xmm5, 1 + psrldq xmm2, 1 + movq [r0+r3*2], xmm5 + movq [r0+r3*1], xmm2 + psrldq xmm5, 1 + psrldq xmm2, 1 + movq [r2+r3*2], xmm5 + movq [r2+r3*1], xmm2 + psrldq xmm5, 1 + psrldq xmm2, 1 + movq [r1+r3*2], xmm5 + movq [r1+r3*1], xmm2 + RET +%endmacro + +INIT_MMX sse2 +PRED8x8L_VERTICAL_RIGHT +INIT_MMX ssse3 +PRED8x8L_VERTICAL_RIGHT + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_vertical_left_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8L_VERTICAL_LEFT 0 +cglobal pred8x8l_vertical_left_8, 4,4 + sub r0, r3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 + jmp .do_top +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.fix_tr_2: + punpckhbw mm3, mm3 + pshufw mm1, mm3, 0xFF + jmp .do_topright +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq2dq xmm4, mm4 + test r2d, r2d + jz .fix_tr_2 + movq mm0, [r0+8] + movq mm5, mm0 + movq mm2, mm0 + movq mm4, mm0 + psrlq mm5, 56 + PALIGNR mm2, mm3, 7, mm3 + PALIGNR mm5, mm4, 1, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 +.do_topright: + movq2dq xmm3, mm1 + lea r1, [r0+r3*2] + pslldq xmm3, 8 + por xmm4, xmm3 + movdqa xmm2, xmm4 + movdqa xmm1, xmm4 + movdqa xmm3, xmm4 + psrldq xmm2, 1 + pslldq xmm1, 1 + pavgb xmm3, xmm2 + lea r2, [r1+r3*2] +INIT_XMM cpuname + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5 + psrldq xmm0, 1 + movq [r0+r3*1], xmm3 + movq [r0+r3*2], xmm0 + lea r0, [r2+r3*2] + psrldq xmm3, 1 + psrldq xmm0, 1 + movq [r1+r3*1], xmm3 + movq [r1+r3*2], xmm0 + psrldq xmm3, 1 + psrldq xmm0, 1 + movq [r2+r3*1], xmm3 + movq [r2+r3*2], xmm0 + psrldq xmm3, 1 + psrldq xmm0, 1 + movq [r0+r3*1], xmm3 + movq [r0+r3*2], xmm0 + RET +%endmacro + +INIT_MMX sse2 +PRED8x8L_VERTICAL_LEFT +INIT_MMX ssse3 +PRED8x8L_VERTICAL_LEFT + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_horizontal_up_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED8x8L_HORIZONTAL_UP 0 +cglobal pred8x8l_horizontal_up_8, 4,4 + sub r0, r3 + lea r2, [r0+r3*2] + movq mm0, [r0+r3*1-8] + test r1d, r1d + lea r1, [r0+r3] + cmovnz r1, r0 + punpckhbw mm0, [r1+r3*0-8] + movq mm1, [r2+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r2, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r1+r3*0-8] + mov r0, r2 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + lea r1, [r0+r3*2] + pshufw mm0, mm7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1 + psllq mm7, 56 ; l7 .. .. .. .. .. .. .. + movq mm2, mm0 + psllw mm0, 8 + psrlw mm2, 8 + por mm2, mm0 ; l7 l6 l5 l4 l3 l2 l1 l0 + movq mm3, mm2 + movq mm4, mm2 + movq mm5, mm2 + psrlq mm2, 8 + psrlq mm3, 16 + lea r2, [r1+r3*2] + por mm2, mm7 ; l7 l7 l6 l5 l4 l3 l2 l1 + punpckhbw mm7, mm7 + por mm3, mm7 ; l7 l7 l7 l6 l5 l4 l3 l2 + pavgb mm4, mm2 + PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6 + movq mm5, mm4 + punpcklbw mm4, mm1 ; p4 p3 p2 p1 + punpckhbw mm5, mm1 ; p8 p7 p6 p5 + movq mm6, mm5 + movq mm7, mm5 + movq mm0, mm5 + PALIGNR mm5, mm4, 2, mm1 + pshufw mm1, mm6, 11111001b + PALIGNR mm6, mm4, 4, mm2 + pshufw mm2, mm7, 11111110b + PALIGNR mm7, mm4, 6, mm3 + pshufw mm3, mm0, 11111111b + movq [r0+r3*1], mm4 + movq [r0+r3*2], mm5 + lea r0, [r2+r3*2] + movq [r1+r3*1], mm6 + movq [r1+r3*2], mm7 + movq [r2+r3*1], mm0 + movq [r2+r3*2], mm1 + movq [r0+r3*1], mm2 + movq [r0+r3*2], mm3 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_HORIZONTAL_UP +INIT_MMX ssse3 +PRED8x8L_HORIZONTAL_UP + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_horizontal_down_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred8x8l_horizontal_down_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + movq mm6, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq mm5, mm4 + lea r1, [r0+r3*2] + psllq mm7, 56 + movq mm2, mm5 + movq mm3, mm6 + movq mm4, mm2 + PALIGNR mm2, mm6, 7, mm5 + PALIGNR mm6, mm7, 7, mm0 + lea r2, [r1+r3*2] + PALIGNR mm4, mm3, 1, mm7 + movq mm5, mm3 + pavgb mm3, mm6 + PRED4x4_LOWPASS mm0, mm4, mm6, mm5, mm7 + movq mm4, mm2 + movq mm1, mm2 + lea r4, [r2+r3*2] + psrlq mm4, 16 + psrlq mm1, 8 + PRED4x4_LOWPASS mm6, mm4, mm2, mm1, mm5 + movq mm7, mm3 + punpcklbw mm3, mm0 + punpckhbw mm7, mm0 + movq mm1, mm7 + movq mm0, mm7 + movq mm4, mm7 + movq [r4+r3*2], mm3 + PALIGNR mm7, mm3, 2, mm5 + movq [r4+r3*1], mm7 + PALIGNR mm1, mm3, 4, mm5 + movq [r2+r3*2], mm1 + PALIGNR mm0, mm3, 6, mm3 + movq [r2+r3*1], mm0 + movq mm2, mm6 + movq mm3, mm6 + movq [r1+r3*2], mm4 + PALIGNR mm6, mm4, 2, mm5 + movq [r1+r3*1], mm6 + PALIGNR mm2, mm4, 4, mm5 + movq [r0+r3*2], mm2 + PALIGNR mm3, mm4, 6, mm4 + movq [r0+r3*1], mm3 + RET + +%macro PRED8x8L_HORIZONTAL_DOWN 0 +cglobal pred8x8l_horizontal_down_8, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1d, r1d + jnz .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2d, r2d + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.fix_tr_2: + punpckhbw mm3, mm3 + pshufw mm1, mm3, 0xFF + jmp .do_topright +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq2dq xmm0, mm2 + pslldq xmm0, 8 + movq mm4, mm0 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + movq2dq xmm2, mm1 + pslldq xmm2, 15 + psrldq xmm2, 8 + por xmm0, xmm2 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1d, r1d + jz .fix_lt_2 + test r2d, r2d + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq2dq xmm1, mm4 + test r2d, r2d + jz .fix_tr_2 + movq mm0, [r0+8] + movq mm5, mm0 + movq mm2, mm0 + movq mm4, mm0 + psrlq mm5, 56 + PALIGNR mm2, mm3, 7, mm3 + PALIGNR mm5, mm4, 1, mm4 + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 +.do_topright: + movq2dq xmm5, mm1 + pslldq xmm5, 8 + por xmm1, xmm5 +INIT_XMM cpuname + lea r2, [r4+r3*2] + movdqa xmm2, xmm1 + movdqa xmm3, xmm1 + PALIGNR xmm1, xmm0, 7, xmm4 + PALIGNR xmm2, xmm0, 9, xmm5 + lea r1, [r2+r3*2] + PALIGNR xmm3, xmm0, 8, xmm0 + movdqa xmm4, xmm1 + pavgb xmm4, xmm3 + lea r0, [r1+r3*2] + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5 + punpcklbw xmm4, xmm0 + movhlps xmm0, xmm4 + movq [r0+r3*2], xmm4 + movq [r2+r3*2], xmm0 + psrldq xmm4, 2 + psrldq xmm0, 2 + movq [r0+r3*1], xmm4 + movq [r2+r3*1], xmm0 + psrldq xmm4, 2 + psrldq xmm0, 2 + movq [r1+r3*2], xmm4 + movq [r4+r3*2], xmm0 + psrldq xmm4, 2 + psrldq xmm0, 2 + movq [r1+r3*1], xmm4 + movq [r4+r3*1], xmm0 + RET +%endmacro + +INIT_MMX sse2 +PRED8x8L_HORIZONTAL_DOWN +INIT_MMX ssse3 +PRED8x8L_HORIZONTAL_DOWN + +;------------------------------------------------------------------------------- +; void ff_pred4x4_dc_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred4x4_dc_8, 3,5 + pxor mm7, mm7 + mov r4, r0 + sub r0, r2 + movd mm0, [r0] + psadbw mm0, mm7 + movzx r1d, byte [r0+r2*1-1] + movd r3d, mm0 + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + lea r0, [r0+r2*2] + add r3d, r1d + movzx r1d, byte [r0+r2*1-1] + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + add r3d, r1d + add r3d, 4 + shr r3d, 3 + imul r3d, 0x01010101 + mov [r4+r2*0], r3d + mov [r0+r2*0], r3d + mov [r0+r2*1], r3d + mov [r0+r2*2], r3d + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_tm_vp8_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +%macro PRED4x4_TM 0 +cglobal pred4x4_tm_vp8_8, 3,6 + sub r0, r2 + pxor mm7, mm7 + movd mm0, [r0] + punpcklbw mm0, mm7 + movzx r4d, byte [r0-1] + mov r5d, 2 +.loop: + movzx r1d, byte [r0+r2*1-1] + movzx r3d, byte [r0+r2*2-1] + sub r1d, r4d + sub r3d, r4d + movd mm2, r1d + movd mm4, r3d +%if cpuflag(mmxext) + pshufw mm2, mm2, 0 + pshufw mm4, mm4, 0 +%else + punpcklwd mm2, mm2 + punpcklwd mm4, mm4 + punpckldq mm2, mm2 + punpckldq mm4, mm4 +%endif + paddw mm2, mm0 + paddw mm4, mm0 + packuswb mm2, mm2 + packuswb mm4, mm4 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm4 + lea r0, [r0+r2*2] + dec r5d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +PRED4x4_TM +INIT_MMX mmxext +PRED4x4_TM + +INIT_XMM ssse3 +cglobal pred4x4_tm_vp8_8, 3,3 + sub r0, r2 + movq mm6, [tm_shuf] + pxor mm1, mm1 + movd mm0, [r0] + punpcklbw mm0, mm1 + movd mm7, [r0-4] + pshufb mm7, mm6 + lea r1, [r0+r2*2] + movd mm2, [r0+r2*1-4] + movd mm3, [r0+r2*2-4] + movd mm4, [r1+r2*1-4] + movd mm5, [r1+r2*2-4] + pshufb mm2, mm6 + pshufb mm3, mm6 + pshufb mm4, mm6 + pshufb mm5, mm6 + psubw mm0, mm7 + paddw mm2, mm0 + paddw mm3, mm0 + paddw mm4, mm0 + paddw mm5, mm0 + packuswb mm2, mm2 + packuswb mm3, mm3 + packuswb mm4, mm4 + packuswb mm5, mm5 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm3 + movd [r1+r2*1], mm4 + movd [r1+r2*2], mm5 + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_vertical_vp8_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred4x4_vertical_vp8_8, 3,3 + sub r0, r2 + movd m1, [r0-1] + movd m0, [r0] + mova m2, m0 ;t0 t1 t2 t3 + punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7 + lea r1, [r0+r2*2] + psrlq m0, 8 ;t1 t2 t3 t4 + PRED4x4_LOWPASS m3, m1, m0, m2, m4 + movd [r0+r2*1], m3 + movd [r0+r2*2], m3 + movd [r1+r2*1], m3 + movd [r1+r2*2], m3 + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_down_left_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pred4x4_down_left_8, 3,3 + sub r0, r2 + movq m1, [r0] + punpckldq m1, [r1] + movq m2, m1 + movq m3, m1 + psllq m1, 8 + pxor m2, m1 + psrlq m2, 8 + pxor m2, m3 + PRED4x4_LOWPASS m0, m1, m2, m3, m4 + lea r1, [r0+r2*2] + psrlq m0, 8 + movd [r0+r2*1], m0 + psrlq m0, 8 + movd [r0+r2*2], m0 + psrlq m0, 8 + movd [r1+r2*1], m0 + psrlq m0, 8 + movd [r1+r2*2], m0 + RET + +;------------------------------------------------------------------------------ +; void ff_pred4x4_vertical_left_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------ + +INIT_MMX mmxext +cglobal pred4x4_vertical_left_8, 3,3 + sub r0, r2 + movq m1, [r0] + punpckldq m1, [r1] + movq m3, m1 + movq m2, m1 + psrlq m3, 8 + psrlq m2, 16 + movq m4, m3 + pavgb m4, m1 + PRED4x4_LOWPASS m0, m1, m2, m3, m5 + lea r1, [r0+r2*2] + movh [r0+r2*1], m4 + movh [r0+r2*2], m0 + psrlq m4, 8 + psrlq m0, 8 + movh [r1+r2*1], m4 + movh [r1+r2*2], m0 + RET + +;------------------------------------------------------------------------------ +; void ff_pred4x4_horizontal_up_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------ + +INIT_MMX mmxext +cglobal pred4x4_horizontal_up_8, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movd m0, [r0+r2*1-4] + punpcklbw m0, [r0+r2*2-4] + movd m1, [r1+r2*1-4] + punpcklbw m1, [r1+r2*2-4] + punpckhwd m0, m1 + movq m1, m0 + punpckhbw m1, m1 + pshufw m1, m1, 0xFF + punpckhdq m0, m1 + movq m2, m0 + movq m3, m0 + movq m7, m0 + psrlq m2, 16 + psrlq m3, 8 + pavgb m7, m3 + PRED4x4_LOWPASS m4, m0, m2, m3, m5 + punpcklbw m7, m4 + movd [r0+r2*1], m7 + psrlq m7, 16 + movd [r0+r2*2], m7 + psrlq m7, 16 + movd [r1+r2*1], m7 + movd [r1+r2*2], m1 + RET + +;------------------------------------------------------------------------------ +; void ff_pred4x4_horizontal_down_8_mmxext(uint8_t *src, +; const uint8_t *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------ + +INIT_MMX mmxext +cglobal pred4x4_horizontal_down_8, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movh m0, [r0-4] ; lt .. + punpckldq m0, [r0] ; t3 t2 t1 t0 lt .. .. .. + psllq m0, 8 ; t2 t1 t0 lt .. .. .. .. + movd m1, [r1+r2*2-4] ; l3 + punpcklbw m1, [r1+r2*1-4] ; l2 l3 + movd m2, [r0+r2*2-4] ; l1 + punpcklbw m2, [r0+r2*1-4] ; l0 l1 + punpckhwd m1, m2 ; l0 l1 l2 l3 + punpckhdq m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3 + movq m0, m1 + movq m2, m1 + movq m5, m1 + psrlq m0, 16 ; .. .. t2 t1 t0 lt l0 l1 + psrlq m2, 8 ; .. t2 t1 t0 lt l0 l1 l2 + pavgb m5, m2 + PRED4x4_LOWPASS m3, m1, m0, m2, m4 + punpcklbw m5, m3 + psrlq m3, 32 + PALIGNR m3, m5, 6, m4 + movh [r1+r2*2], m5 + psrlq m5, 16 + movh [r1+r2*1], m5 + psrlq m5, 16 + movh [r0+r2*2], m5 + movh [r0+r2*1], m3 + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_vertical_right_8_mmxext(uint8_t *src, +; const uint8_t *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred4x4_vertical_right_8, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movh m0, [r0] ; ........t3t2t1t0 + movq m5, m0 + PALIGNR m0, [r0-8], 7, m1 ; ......t3t2t1t0lt + pavgb m5, m0 + PALIGNR m0, [r0+r2*1-8], 7, m1 ; ....t3t2t1t0ltl0 + movq m1, m0 + PALIGNR m0, [r0+r2*2-8], 7, m2 ; ..t3t2t1t0ltl0l1 + movq m2, m0 + PALIGNR m0, [r1+r2*1-8], 7, m3 ; t3t2t1t0ltl0l1l2 + PRED4x4_LOWPASS m3, m1, m0, m2, m4 + movq m1, m3 + psrlq m3, 16 + psllq m1, 48 + movh [r0+r2*1], m5 + movh [r0+r2*2], m3 + PALIGNR m5, m1, 7, m2 + psllq m1, 8 + movh [r1+r2*1], m5 + PALIGNR m3, m1, 7, m1 + movh [r1+r2*2], m3 + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_down_right_8_mmxext(uint8_t *src, const uint8_t *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred4x4_down_right_8, 3,3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m1, [r1-8] + movq m2, [r0+r2*1-8] + punpckhbw m2, [r0-8] + movh m3, [r0] + punpckhwd m1, m2 + PALIGNR m3, m1, 5, m1 + movq m1, m3 + PALIGNR m3, [r1+r2*1-8], 7, m4 + movq m2, m3 + PALIGNR m3, [r1+r2*2-8], 7, m4 + PRED4x4_LOWPASS m0, m3, m1, m2, m4 + movh [r1+r2*2], m0 + psrlq m0, 8 + movh [r1+r2*1], m0 + psrlq m0, 8 + movh [r0+r2*2], m0 + psrlq m0, 8 + movh [r0+r2*1], m0 + RET diff --git a/libs/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm b/libs/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm new file mode 100644 index 000000000..629e0a72e --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm @@ -0,0 +1,1199 @@ +;***************************************************************************** +;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code +;***************************************************************************** +;* Copyright (C) 2005-2011 x264 project +;* +;* Authors: Daniel Kang <daniel.d.kang@gmail.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +cextern pw_1023 +%define pw_pixel_max pw_1023 +cextern pw_512 +cextern pw_16 +cextern pw_8 +cextern pw_4 +cextern pw_2 +cextern pw_1 +cextern pd_16 + +pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4 +pw_m3: times 8 dw -3 +pd_17: times 4 dd 17 + +SECTION .text + +; dest, left, right, src +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 +%macro PRED4x4_LOWPASS 4 + paddw %2, %3 + psrlw %2, 1 + pavgw %1, %4, %2 +%endmacro + +;----------------------------------------------------------------------------- +; void ff_pred4x4_down_right_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_DR 0 +cglobal pred4x4_down_right_10, 3, 3 + sub r0, r2 + lea r1, [r0+r2*2] + movhps m1, [r1-8] + movhps m2, [r0+r2*1-8] + movhps m4, [r0-8] + punpckhwd m2, m4 + movq m3, [r0] + punpckhdq m1, m2 + PALIGNR m3, m1, 10, m1 + movhps m4, [r1+r2*1-8] + PALIGNR m0, m3, m4, 14, m4 + movhps m4, [r1+r2*2-8] + PALIGNR m2, m0, m4, 14, m4 + PRED4x4_LOWPASS m0, m2, m3, m0 + movq [r1+r2*2], m0 + psrldq m0, 2 + movq [r1+r2*1], m0 + psrldq m0, 2 + movq [r0+r2*2], m0 + psrldq m0, 2 + movq [r0+r2*1], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED4x4_DR +INIT_XMM ssse3 +PRED4x4_DR +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED4x4_DR +%endif + +;------------------------------------------------------------------------------ +; void ff_pred4x4_vertical_right_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------ +%macro PRED4x4_VR 0 +cglobal pred4x4_vertical_right_10, 3, 3, 6 + sub r0, r2 + lea r1, [r0+r2*2] + movq m5, [r0] ; ........t3t2t1t0 + movhps m1, [r0-8] + PALIGNR m0, m5, m1, 14, m1 ; ......t3t2t1t0lt + pavgw m5, m0 + movhps m1, [r0+r2*1-8] + PALIGNR m0, m1, 14, m1 ; ....t3t2t1t0ltl0 + movhps m2, [r0+r2*2-8] + PALIGNR m1, m0, m2, 14, m2 ; ..t3t2t1t0ltl0l1 + movhps m3, [r1+r2*1-8] + PALIGNR m2, m1, m3, 14, m3 ; t3t2t1t0ltl0l1l2 + PRED4x4_LOWPASS m1, m0, m2, m1 + pslldq m0, m1, 12 + psrldq m1, 4 + movq [r0+r2*1], m5 + movq [r0+r2*2], m1 + PALIGNR m5, m0, 14, m2 + pslldq m0, 2 + movq [r1+r2*1], m5 + PALIGNR m1, m0, 14, m0 + movq [r1+r2*2], m1 + RET +%endmacro + +INIT_XMM sse2 +PRED4x4_VR +INIT_XMM ssse3 +PRED4x4_VR +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED4x4_VR +%endif + +;------------------------------------------------------------------------------- +; void ff_pred4x4_horizontal_down_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------- +%macro PRED4x4_HD 0 +cglobal pred4x4_horizontal_down_10, 3, 3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m0, [r0-8] ; lt .. + movhps m0, [r0] + pslldq m0, 2 ; t2 t1 t0 lt .. .. .. .. + movq m1, [r1+r2*2-8] ; l3 + movq m3, [r1+r2*1-8] + punpcklwd m1, m3 ; l2 l3 + movq m2, [r0+r2*2-8] ; l1 + movq m3, [r0+r2*1-8] + punpcklwd m2, m3 ; l0 l1 + punpckhdq m1, m2 ; l0 l1 l2 l3 + punpckhqdq m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3 + psrldq m0, m1, 4 ; .. .. t2 t1 t0 lt l0 l1 + psrldq m3, m1, 2 ; .. t2 t1 t0 lt l0 l1 l2 + pavgw m5, m1, m3 + PRED4x4_LOWPASS m3, m1, m0, m3 + punpcklwd m5, m3 + psrldq m3, 8 + PALIGNR m3, m5, 12, m4 + movq [r1+r2*2], m5 + movhps [r0+r2*2], m5 + psrldq m5, 4 + movq [r1+r2*1], m5 + movq [r0+r2*1], m3 + RET +%endmacro + +INIT_XMM sse2 +PRED4x4_HD +INIT_XMM ssse3 +PRED4x4_HD +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED4x4_HD +%endif + +;----------------------------------------------------------------------------- +; void ff_pred4x4_dc_10(pixel *src, const pixel *topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- + +INIT_MMX mmxext +cglobal pred4x4_dc_10, 3, 3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m2, [r0+r2*1-8] + paddw m2, [r0+r2*2-8] + paddw m2, [r1+r2*1-8] + paddw m2, [r1+r2*2-8] + psrlq m2, 48 + movq m0, [r0] + HADDW m0, m1 + paddw m0, [pw_4] + paddw m0, m2 + psrlw m0, 3 + SPLATW m0, m0, 0 + movq [r0+r2*1], m0 + movq [r0+r2*2], m0 + movq [r1+r2*1], m0 + movq [r1+r2*2], m0 + RET + +;----------------------------------------------------------------------------- +; void ff_pred4x4_down_left_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_DL 0 +cglobal pred4x4_down_left_10, 3, 3 + sub r0, r2 + movq m0, [r0] + movhps m0, [r1] + psrldq m2, m0, 2 + pslldq m3, m0, 2 + pshufhw m2, m2, 10100100b + PRED4x4_LOWPASS m0, m3, m2, m0 + lea r1, [r0+r2*2] + movhps [r1+r2*2], m0 + psrldq m0, 2 + movq [r0+r2*1], m0 + psrldq m0, 2 + movq [r0+r2*2], m0 + psrldq m0, 2 + movq [r1+r2*1], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED4x4_DL +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED4x4_DL +%endif + +;----------------------------------------------------------------------------- +; void ff_pred4x4_vertical_left_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED4x4_VL 0 +cglobal pred4x4_vertical_left_10, 3, 3 + sub r0, r2 + movu m1, [r0] + movhps m1, [r1] + psrldq m0, m1, 2 + psrldq m2, m1, 4 + pavgw m4, m0, m1 + PRED4x4_LOWPASS m0, m1, m2, m0 + lea r1, [r0+r2*2] + movq [r0+r2*1], m4 + movq [r0+r2*2], m0 + psrldq m4, 2 + psrldq m0, 2 + movq [r1+r2*1], m4 + movq [r1+r2*2], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED4x4_VL +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED4x4_VL +%endif + +;----------------------------------------------------------------------------- +; void ff_pred4x4_horizontal_up_10(pixel *src, const pixel *topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pred4x4_horizontal_up_10, 3, 3 + sub r0, r2 + lea r1, [r0+r2*2] + movq m0, [r0+r2*1-8] + punpckhwd m0, [r0+r2*2-8] + movq m1, [r1+r2*1-8] + punpckhwd m1, [r1+r2*2-8] + punpckhdq m0, m1 + pshufw m1, m1, 0xFF + movq [r1+r2*2], m1 + movd [r1+r2*1+4], m1 + pshufw m2, m0, 11111001b + movq m1, m2 + pavgw m2, m0 + + pshufw m5, m0, 11111110b + PRED4x4_LOWPASS m1, m0, m5, m1 + movq m6, m2 + punpcklwd m6, m1 + movq [r0+r2*1], m6 + psrlq m2, 16 + psrlq m1, 16 + punpcklwd m2, m1 + movq [r0+r2*2], m2 + psrlq m2, 32 + movd [r1+r2*1], m2 + RET + + + +;----------------------------------------------------------------------------- +; void ff_pred8x8_vertical_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pred8x8_vertical_10, 2, 2 + sub r0, r1 + mova m0, [r0] +%rep 3 + mova [r0+r1*1], m0 + mova [r0+r1*2], m0 + lea r0, [r0+r1*2] +%endrep + mova [r0+r1*1], m0 + mova [r0+r1*2], m0 + RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_horizontal_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pred8x8_horizontal_10, 2, 3 + mov r2d, 4 +.loop: + movq m0, [r0+r1*0-8] + movq m1, [r0+r1*1-8] + pshuflw m0, m0, 0xff + pshuflw m1, m1, 0xff + punpcklqdq m0, m0 + punpcklqdq m1, m1 + mova [r0+r1*0], m0 + mova [r0+r1*1], m1 + lea r0, [r0+r1*2] + dec r2d + jg .loop + REP_RET + +;----------------------------------------------------------------------------- +; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro MOV8 2-3 +; sort of a hack, but it works +%if mmsize==8 + movq [%1+0], %2 + movq [%1+8], %3 +%else + movdqa [%1], %2 +%endif +%endmacro + +%macro PRED8x8_DC 1 +cglobal pred8x8_dc_10, 2, 6 + sub r0, r1 + pxor m4, m4 + movq m0, [r0+0] + movq m1, [r0+8] +%if mmsize==16 + punpcklwd m0, m1 + movhlps m1, m0 + paddw m0, m1 +%else + pshufw m2, m0, 00001110b + pshufw m3, m1, 00001110b + paddw m0, m2 + paddw m1, m3 + punpcklwd m0, m1 +%endif + %1 m2, m0, 00001110b + paddw m0, m2 + + lea r5, [r1*3] + lea r4, [r0+r1*4] + movzx r2d, word [r0+r1*1-2] + movzx r3d, word [r0+r1*2-2] + add r2d, r3d + movzx r3d, word [r0+r5*1-2] + add r2d, r3d + movzx r3d, word [r4-2] + add r2d, r3d + movd m2, r2d ; s2 + + movzx r2d, word [r4+r1*1-2] + movzx r3d, word [r4+r1*2-2] + add r2d, r3d + movzx r3d, word [r4+r5*1-2] + add r2d, r3d + movzx r3d, word [r4+r1*4-2] + add r2d, r3d + movd m3, r2d ; s3 + + punpcklwd m2, m3 + punpckldq m0, m2 ; s0, s1, s2, s3 + %1 m3, m0, 11110110b ; s2, s1, s3, s3 + %1 m0, m0, 01110100b ; s0, s1, s3, s1 + paddw m0, m3 + psrlw m0, 2 + pavgw m0, m4 ; s0+s2, s1, s3, s1+s3 +%if mmsize==16 + punpcklwd m0, m0 + pshufd m3, m0, 11111010b + punpckldq m0, m0 + SWAP 0,1 +%else + pshufw m1, m0, 0x00 + pshufw m2, m0, 0x55 + pshufw m3, m0, 0xaa + pshufw m4, m0, 0xff +%endif + MOV8 r0+r1*1, m1, m2 + MOV8 r0+r1*2, m1, m2 + MOV8 r0+r5*1, m1, m2 + MOV8 r0+r1*4, m1, m2 + MOV8 r4+r1*1, m3, m4 + MOV8 r4+r1*2, m3, m4 + MOV8 r4+r5*1, m3, m4 + MOV8 r4+r1*4, m3, m4 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8_DC pshufw +INIT_XMM sse2 +PRED8x8_DC pshuflw + +;----------------------------------------------------------------------------- +; void ff_pred8x8_top_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pred8x8_top_dc_10, 2, 4 + sub r0, r1 + mova m0, [r0] + pshuflw m1, m0, 0x4e + pshufhw m1, m1, 0x4e + paddw m0, m1 + pshuflw m1, m0, 0xb1 + pshufhw m1, m1, 0xb1 + paddw m0, m1 + lea r2, [r1*3] + lea r3, [r0+r1*4] + paddw m0, [pw_2] + psrlw m0, 2 + mova [r0+r1*1], m0 + mova [r0+r1*2], m0 + mova [r0+r2*1], m0 + mova [r0+r1*4], m0 + mova [r3+r1*1], m0 + mova [r3+r1*2], m0 + mova [r3+r2*1], m0 + mova [r3+r1*4], m0 + RET + +;----------------------------------------------------------------------------- +; void ff_pred8x8_plane_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pred8x8_plane_10, 2, 7, 7 + sub r0, r1 + lea r2, [r1*3] + lea r3, [r0+r1*4] + mova m2, [r0] + pmaddwd m2, [pw_m32101234] + HADDD m2, m1 + movd m0, [r0-4] + psrld m0, 14 + psubw m2, m0 ; H + movd m0, [r3+r1*4-4] + movd m1, [r0+12] + paddw m0, m1 + psllw m0, 4 ; 16*(src[7*stride-1] + src[-stride+7]) + movzx r4d, word [r3+r1*1-2] ; src[4*stride-1] + movzx r5d, word [r0+r2*1-2] ; src[2*stride-1] + sub r4d, r5d + movzx r6d, word [r3+r1*2-2] ; src[5*stride-1] + movzx r5d, word [r0+r1*2-2] ; src[1*stride-1] + sub r6d, r5d + lea r4d, [r4+r6*2] + movzx r5d, word [r3+r2*1-2] ; src[6*stride-1] + movzx r6d, word [r0+r1*1-2] ; src[0*stride-1] + sub r5d, r6d + lea r5d, [r5*3] + add r4d, r5d + movzx r6d, word [r3+r1*4-2] ; src[7*stride-1] + movzx r5d, word [r0+r1*0-2] ; src[ -stride-1] + sub r6d, r5d + lea r4d, [r4+r6*4] + movd m3, r4d ; V + punpckldq m2, m3 + pmaddwd m2, [pd_17] + paddd m2, [pd_16] + psrad m2, 5 ; b, c + + mova m3, [pw_pixel_max] + pxor m1, m1 + SPLATW m0, m0, 1 + SPLATW m4, m2, 2 + SPLATW m2, m2, 0 + pmullw m2, [pw_m32101234] ; b + pmullw m5, m4, [pw_m3] ; c + paddw m5, [pw_16] + mov r2d, 8 + add r0, r1 +.loop: + paddsw m6, m2, m5 + paddsw m6, m0 + psraw m6, 5 + CLIPW m6, m1, m3 + mova [r0], m6 + paddw m5, m4 + add r0, r1 + dec r2d + jg .loop + REP_RET + + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_128_dc_10(pixel *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_128_DC 0 +cglobal pred8x8l_128_dc_10, 4, 4 + mova m0, [pw_512] ; (1<<(BIT_DEPTH-1)) + lea r1, [r3*3] + lea r2, [r0+r3*4] + MOV8 r0+r3*0, m0, m0 + MOV8 r0+r3*1, m0, m0 + MOV8 r0+r3*2, m0, m0 + MOV8 r0+r1*1, m0, m0 + MOV8 r2+r3*0, m0, m0 + MOV8 r2+r3*1, m0, m0 + MOV8 r2+r3*2, m0, m0 + MOV8 r2+r1*1, m0, m0 + RET +%endmacro + +INIT_MMX mmxext +PRED8x8L_128_DC +INIT_XMM sse2 +PRED8x8L_128_DC + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_top_dc_10(pixel *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_TOP_DC 0 +cglobal pred8x8l_top_dc_10, 4, 4, 6 + sub r0, r3 + mova m0, [r0] + shr r1d, 14 + shr r2d, 13 + neg r1 + pslldq m1, m0, 2 + psrldq m2, m0, 2 + pinsrw m1, [r0+r1], 0 + pinsrw m2, [r0+r2+14], 7 + lea r1, [r3*3] + lea r2, [r0+r3*4] + PRED4x4_LOWPASS m0, m2, m1, m0 + HADDW m0, m1 + paddw m0, [pw_4] + psrlw m0, 3 + SPLATW m0, m0, 0 + mova [r0+r3*1], m0 + mova [r0+r3*2], m0 + mova [r0+r1*1], m0 + mova [r0+r3*4], m0 + mova [r2+r3*1], m0 + mova [r2+r3*2], m0 + mova [r2+r1*1], m0 + mova [r2+r3*4], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_TOP_DC +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_TOP_DC +%endif + +;------------------------------------------------------------------------------- +; void ff_pred8x8l_dc_10(pixel *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;------------------------------------------------------------------------------- +;TODO: see if scalar is faster +%macro PRED8x8L_DC 0 +cglobal pred8x8l_dc_10, 4, 6, 6 + sub r0, r3 + lea r4, [r0+r3*4] + lea r5, [r3*3] + mova m0, [r0+r3*2-16] + punpckhwd m0, [r0+r3*1-16] + mova m1, [r4+r3*0-16] + punpckhwd m1, [r0+r5*1-16] + punpckhdq m1, m0 + mova m2, [r4+r3*2-16] + punpckhwd m2, [r4+r3*1-16] + mova m3, [r4+r3*4-16] + punpckhwd m3, [r4+r5*1-16] + punpckhdq m3, m2 + punpckhqdq m3, m1 + mova m0, [r0] + shr r1d, 14 + shr r2d, 13 + neg r1 + pslldq m1, m0, 2 + psrldq m2, m0, 2 + pinsrw m1, [r0+r1], 0 + pinsrw m2, [r0+r2+14], 7 + not r1 + and r1, r3 + pslldq m4, m3, 2 + psrldq m5, m3, 2 + pshuflw m4, m4, 11100101b + pinsrw m5, [r0+r1-2], 7 + PRED4x4_LOWPASS m3, m4, m5, m3 + PRED4x4_LOWPASS m0, m2, m1, m0 + paddw m0, m3 + HADDW m0, m1 + paddw m0, [pw_8] + psrlw m0, 4 + SPLATW m0, m0 + mova [r0+r3*1], m0 + mova [r0+r3*2], m0 + mova [r0+r5*1], m0 + mova [r0+r3*4], m0 + mova [r4+r3*1], m0 + mova [r4+r3*2], m0 + mova [r4+r5*1], m0 + mova [r4+r3*4], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_DC +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_DC +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_vertical_10(pixel *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_VERTICAL 0 +cglobal pred8x8l_vertical_10, 4, 4, 6 + sub r0, r3 + mova m0, [r0] + shr r1d, 14 + shr r2d, 13 + neg r1 + pslldq m1, m0, 2 + psrldq m2, m0, 2 + pinsrw m1, [r0+r1], 0 + pinsrw m2, [r0+r2+14], 7 + lea r1, [r3*3] + lea r2, [r0+r3*4] + PRED4x4_LOWPASS m0, m2, m1, m0 + mova [r0+r3*1], m0 + mova [r0+r3*2], m0 + mova [r0+r1*1], m0 + mova [r0+r3*4], m0 + mova [r2+r3*1], m0 + mova [r2+r3*2], m0 + mova [r2+r1*1], m0 + mova [r2+r3*4], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_VERTICAL +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_VERTICAL +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_horizontal_10(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_HORIZONTAL 0 +cglobal pred8x8l_horizontal_10, 4, 4, 5 + mova m0, [r0-16] + shr r1d, 14 + dec r1 + and r1, r3 + sub r1, r3 + punpckhwd m0, [r0+r1-16] + mova m1, [r0+r3*2-16] + punpckhwd m1, [r0+r3*1-16] + lea r2, [r0+r3*4] + lea r1, [r3*3] + punpckhdq m1, m0 + mova m2, [r2+r3*0-16] + punpckhwd m2, [r0+r1-16] + mova m3, [r2+r3*2-16] + punpckhwd m3, [r2+r3*1-16] + punpckhdq m3, m2 + punpckhqdq m3, m1 + PALIGNR m4, m3, [r2+r1-16], 14, m0 + pslldq m0, m4, 2 + pshuflw m0, m0, 11100101b + PRED4x4_LOWPASS m4, m3, m0, m4 + punpckhwd m3, m4, m4 + punpcklwd m4, m4 + pshufd m0, m3, 0xff + pshufd m1, m3, 0xaa + pshufd m2, m3, 0x55 + pshufd m3, m3, 0x00 + mova [r0+r3*0], m0 + mova [r0+r3*1], m1 + mova [r0+r3*2], m2 + mova [r0+r1*1], m3 + pshufd m0, m4, 0xff + pshufd m1, m4, 0xaa + pshufd m2, m4, 0x55 + pshufd m3, m4, 0x00 + mova [r2+r3*0], m0 + mova [r2+r3*1], m1 + mova [r2+r3*2], m2 + mova [r2+r1*1], m3 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_HORIZONTAL +INIT_XMM ssse3 +PRED8x8L_HORIZONTAL +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_HORIZONTAL +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_down_left_10(pixel *src, int has_topleft, int has_topright, +; ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_DOWN_LEFT 0 +cglobal pred8x8l_down_left_10, 4, 4, 7 + sub r0, r3 + mova m3, [r0] + shr r1d, 14 + neg r1 + shr r2d, 13 + pslldq m1, m3, 2 + psrldq m2, m3, 2 + pinsrw m1, [r0+r1], 0 + pinsrw m2, [r0+r2+14], 7 + PRED4x4_LOWPASS m6, m2, m1, m3 + jz .fix_tr ; flags from shr r2d + mova m1, [r0+16] + psrldq m5, m1, 2 + PALIGNR m2, m1, m3, 14, m3 + pshufhw m5, m5, 10100100b + PRED4x4_LOWPASS m1, m2, m5, m1 +.do_topright: + lea r1, [r3*3] + psrldq m5, m1, 14 + lea r2, [r0+r3*4] + PALIGNR m2, m1, m6, 2, m0 + PALIGNR m3, m1, m6, 14, m0 + PALIGNR m5, m1, 2, m0 + pslldq m4, m6, 2 + PRED4x4_LOWPASS m6, m4, m2, m6 + PRED4x4_LOWPASS m1, m3, m5, m1 + mova [r2+r3*4], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r2+r1*1], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r2+r3*2], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r2+r3*1], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r0+r3*4], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r0+r1*1], m1 + PALIGNR m1, m6, 14, m2 + pslldq m6, 2 + mova [r0+r3*2], m1 + PALIGNR m1, m6, 14, m6 + mova [r0+r3*1], m1 + RET +.fix_tr: + punpckhwd m3, m3 + pshufd m1, m3, 0xFF + jmp .do_topright +%endmacro + +INIT_XMM sse2 +PRED8x8L_DOWN_LEFT +INIT_XMM ssse3 +PRED8x8L_DOWN_LEFT +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_DOWN_LEFT +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_down_right_10(pixel *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_DOWN_RIGHT 0 +; standard forbids this when has_topleft is false +; no need to check +cglobal pred8x8l_down_right_10, 4, 5, 8 + sub r0, r3 + lea r4, [r0+r3*4] + lea r1, [r3*3] + mova m0, [r0+r3*1-16] + punpckhwd m0, [r0+r3*0-16] + mova m1, [r0+r1*1-16] + punpckhwd m1, [r0+r3*2-16] + punpckhdq m1, m0 + mova m2, [r4+r3*1-16] + punpckhwd m2, [r4+r3*0-16] + mova m3, [r4+r1*1-16] + punpckhwd m3, [r4+r3*2-16] + punpckhdq m3, m2 + punpckhqdq m3, m1 + mova m0, [r4+r3*4-16] + mova m1, [r0] + PALIGNR m4, m3, m0, 14, m0 + PALIGNR m1, m3, 2, m2 + pslldq m0, m4, 2 + pshuflw m0, m0, 11100101b + PRED4x4_LOWPASS m6, m1, m4, m3 + PRED4x4_LOWPASS m4, m3, m0, m4 + mova m3, [r0] + shr r2d, 13 + pslldq m1, m3, 2 + psrldq m2, m3, 2 + pinsrw m1, [r0-2], 0 + pinsrw m2, [r0+r2+14], 7 + PRED4x4_LOWPASS m3, m2, m1, m3 + PALIGNR m2, m3, m6, 2, m0 + PALIGNR m5, m3, m6, 14, m0 + psrldq m7, m3, 2 + PRED4x4_LOWPASS m6, m4, m2, m6 + PRED4x4_LOWPASS m3, m5, m7, m3 + mova [r4+r3*4], m6 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r0+r3*1], m3 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r0+r3*2], m3 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r0+r1*1], m3 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r0+r3*4], m3 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r4+r3*1], m3 + PALIGNR m3, m6, 14, m2 + pslldq m6, 2 + mova [r4+r3*2], m3 + PALIGNR m3, m6, 14, m6 + mova [r4+r1*1], m3 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_DOWN_RIGHT +INIT_XMM ssse3 +PRED8x8L_DOWN_RIGHT +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_DOWN_RIGHT +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_vertical_right_10(pixel *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_VERTICAL_RIGHT 0 +; likewise with 8x8l_down_right +cglobal pred8x8l_vertical_right_10, 4, 5, 7 + sub r0, r3 + lea r4, [r0+r3*4] + lea r1, [r3*3] + mova m0, [r0+r3*1-16] + punpckhwd m0, [r0+r3*0-16] + mova m1, [r0+r1*1-16] + punpckhwd m1, [r0+r3*2-16] + punpckhdq m1, m0 + mova m2, [r4+r3*1-16] + punpckhwd m2, [r4+r3*0-16] + mova m3, [r4+r1*1-16] + punpckhwd m3, [r4+r3*2-16] + punpckhdq m3, m2 + punpckhqdq m3, m1 + mova m0, [r4+r3*4-16] + mova m1, [r0] + PALIGNR m4, m3, m0, 14, m0 + PALIGNR m1, m3, 2, m2 + PRED4x4_LOWPASS m3, m1, m4, m3 + mova m2, [r0] + shr r2d, 13 + pslldq m1, m2, 2 + psrldq m5, m2, 2 + pinsrw m1, [r0-2], 0 + pinsrw m5, [r0+r2+14], 7 + PRED4x4_LOWPASS m2, m5, m1, m2 + PALIGNR m6, m2, m3, 12, m1 + PALIGNR m5, m2, m3, 14, m0 + PRED4x4_LOWPASS m0, m6, m2, m5 + pavgw m2, m5 + mova [r0+r3*2], m0 + mova [r0+r3*1], m2 + pslldq m6, m3, 4 + pslldq m1, m3, 2 + PRED4x4_LOWPASS m1, m3, m6, m1 + PALIGNR m2, m1, 14, m4 + mova [r0+r1*1], m2 + pslldq m1, 2 + PALIGNR m0, m1, 14, m3 + mova [r0+r3*4], m0 + pslldq m1, 2 + PALIGNR m2, m1, 14, m4 + mova [r4+r3*1], m2 + pslldq m1, 2 + PALIGNR m0, m1, 14, m3 + mova [r4+r3*2], m0 + pslldq m1, 2 + PALIGNR m2, m1, 14, m4 + mova [r4+r1*1], m2 + pslldq m1, 2 + PALIGNR m0, m1, 14, m1 + mova [r4+r3*4], m0 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_VERTICAL_RIGHT +INIT_XMM ssse3 +PRED8x8L_VERTICAL_RIGHT +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_VERTICAL_RIGHT +%endif + +;----------------------------------------------------------------------------- +; void ff_pred8x8l_horizontal_up_10(pixel *src, int has_topleft, +; int has_topright, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED8x8L_HORIZONTAL_UP 0 +cglobal pred8x8l_horizontal_up_10, 4, 4, 6 + mova m0, [r0+r3*0-16] + punpckhwd m0, [r0+r3*1-16] + shr r1d, 14 + dec r1 + and r1, r3 + sub r1, r3 + mova m4, [r0+r1*1-16] + lea r1, [r3*3] + lea r2, [r0+r3*4] + mova m1, [r0+r3*2-16] + punpckhwd m1, [r0+r1*1-16] + punpckhdq m0, m1 + mova m2, [r2+r3*0-16] + punpckhwd m2, [r2+r3*1-16] + mova m3, [r2+r3*2-16] + punpckhwd m3, [r2+r1*1-16] + punpckhdq m2, m3 + punpckhqdq m0, m2 + PALIGNR m1, m0, m4, 14, m4 + psrldq m2, m0, 2 + pshufhw m2, m2, 10100100b + PRED4x4_LOWPASS m0, m1, m2, m0 + psrldq m1, m0, 2 + psrldq m2, m0, 4 + pshufhw m1, m1, 10100100b + pshufhw m2, m2, 01010100b + pavgw m4, m0, m1 + PRED4x4_LOWPASS m1, m2, m0, m1 + punpckhwd m5, m4, m1 + punpcklwd m4, m1 + mova [r2+r3*0], m5 + mova [r0+r3*0], m4 + pshufd m0, m5, 11111001b + pshufd m1, m5, 11111110b + pshufd m2, m5, 11111111b + mova [r2+r3*1], m0 + mova [r2+r3*2], m1 + mova [r2+r1*1], m2 + PALIGNR m2, m5, m4, 4, m0 + PALIGNR m3, m5, m4, 8, m1 + PALIGNR m5, m5, m4, 12, m4 + mova [r0+r3*1], m2 + mova [r0+r3*2], m3 + mova [r0+r1*1], m5 + RET +%endmacro + +INIT_XMM sse2 +PRED8x8L_HORIZONTAL_UP +INIT_XMM ssse3 +PRED8x8L_HORIZONTAL_UP +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PRED8x8L_HORIZONTAL_UP +%endif + + +;----------------------------------------------------------------------------- +; void ff_pred16x16_vertical_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro MOV16 3-5 + mova [%1+ 0], %2 + mova [%1+mmsize], %3 +%if mmsize==8 + mova [%1+ 16], %4 + mova [%1+ 24], %5 +%endif +%endmacro + +%macro PRED16x16_VERTICAL 0 +cglobal pred16x16_vertical_10, 2, 3 + sub r0, r1 + mov r2d, 8 + mova m0, [r0+ 0] + mova m1, [r0+mmsize] +%if mmsize==8 + mova m2, [r0+16] + mova m3, [r0+24] +%endif +.loop: + MOV16 r0+r1*1, m0, m1, m2, m3 + MOV16 r0+r1*2, m0, m1, m2, m3 + lea r0, [r0+r1*2] + dec r2d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_VERTICAL +INIT_XMM sse2 +PRED16x16_VERTICAL + +;----------------------------------------------------------------------------- +; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED16x16_HORIZONTAL 0 +cglobal pred16x16_horizontal_10, 2, 3 + mov r2d, 8 +.vloop: + movd m0, [r0+r1*0-4] + movd m1, [r0+r1*1-4] + SPLATW m0, m0, 1 + SPLATW m1, m1, 1 + MOV16 r0+r1*0, m0, m0, m0, m0 + MOV16 r0+r1*1, m1, m1, m1, m1 + lea r0, [r0+r1*2] + dec r2d + jg .vloop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_HORIZONTAL +INIT_XMM sse2 +PRED16x16_HORIZONTAL + +;----------------------------------------------------------------------------- +; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED16x16_DC 0 +cglobal pred16x16_dc_10, 2, 6 + mov r5, r0 + sub r0, r1 + mova m0, [r0+0] + paddw m0, [r0+mmsize] +%if mmsize==8 + paddw m0, [r0+16] + paddw m0, [r0+24] +%endif + HADDW m0, m2 + + lea r0, [r0+r1-2] + movzx r3d, word [r0] + movzx r4d, word [r0+r1] +%rep 7 + lea r0, [r0+r1*2] + movzx r2d, word [r0] + add r3d, r2d + movzx r2d, word [r0+r1] + add r4d, r2d +%endrep + lea r3d, [r3+r4+16] + + movd m1, r3d + paddw m0, m1 + psrlw m0, 5 + SPLATW m0, m0 + mov r3d, 8 +.loop: + MOV16 r5+r1*0, m0, m0, m0, m0 + MOV16 r5+r1*1, m0, m0, m0, m0 + lea r5, [r5+r1*2] + dec r3d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_DC +INIT_XMM sse2 +PRED16x16_DC + +;----------------------------------------------------------------------------- +; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED16x16_TOP_DC 0 +cglobal pred16x16_top_dc_10, 2, 3 + sub r0, r1 + mova m0, [r0+0] + paddw m0, [r0+mmsize] +%if mmsize==8 + paddw m0, [r0+16] + paddw m0, [r0+24] +%endif + HADDW m0, m2 + + SPLATW m0, m0 + paddw m0, [pw_8] + psrlw m0, 4 + mov r2d, 8 +.loop: + MOV16 r0+r1*1, m0, m0, m0, m0 + MOV16 r0+r1*2, m0, m0, m0, m0 + lea r0, [r0+r1*2] + dec r2d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_TOP_DC +INIT_XMM sse2 +PRED16x16_TOP_DC + +;----------------------------------------------------------------------------- +; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED16x16_LEFT_DC 0 +cglobal pred16x16_left_dc_10, 2, 6 + mov r5, r0 + + sub r0, 2 + movzx r3d, word [r0] + movzx r4d, word [r0+r1] +%rep 7 + lea r0, [r0+r1*2] + movzx r2d, word [r0] + add r3d, r2d + movzx r2d, word [r0+r1] + add r4d, r2d +%endrep + lea r3d, [r3+r4+8] + shr r3d, 4 + + movd m0, r3d + SPLATW m0, m0 + mov r3d, 8 +.loop: + MOV16 r5+r1*0, m0, m0, m0, m0 + MOV16 r5+r1*1, m0, m0, m0, m0 + lea r5, [r5+r1*2] + dec r3d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_LEFT_DC +INIT_XMM sse2 +PRED16x16_LEFT_DC + +;----------------------------------------------------------------------------- +; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride) +;----------------------------------------------------------------------------- +%macro PRED16x16_128_DC 0 +cglobal pred16x16_128_dc_10, 2,3 + mova m0, [pw_512] + mov r2d, 8 +.loop: + MOV16 r0+r1*0, m0, m0, m0, m0 + MOV16 r0+r1*1, m0, m0, m0, m0 + lea r0, [r0+r1*2] + dec r2d + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PRED16x16_128_DC +INIT_XMM sse2 +PRED16x16_128_DC diff --git a/libs/ffvpx/libavcodec/x86/h264_intrapred_init.c b/libs/ffvpx/libavcodec/x86/h264_intrapred_init.c new file mode 100644 index 000000000..bdd5125d6 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/h264_intrapred_init.c @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2010 Fiona Glaser <fiona@x264.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/h264pred.h" + +#define PRED4x4(TYPE, DEPTH, OPT) \ +void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ + const uint8_t *topright, \ + ptrdiff_t stride); + +PRED4x4(dc, 10, mmxext) +PRED4x4(down_left, 10, sse2) +PRED4x4(down_left, 10, avx) +PRED4x4(down_right, 10, sse2) +PRED4x4(down_right, 10, ssse3) +PRED4x4(down_right, 10, avx) +PRED4x4(vertical_left, 10, sse2) +PRED4x4(vertical_left, 10, avx) +PRED4x4(vertical_right, 10, sse2) +PRED4x4(vertical_right, 10, ssse3) +PRED4x4(vertical_right, 10, avx) +PRED4x4(horizontal_up, 10, mmxext) +PRED4x4(horizontal_down, 10, sse2) +PRED4x4(horizontal_down, 10, ssse3) +PRED4x4(horizontal_down, 10, avx) + +#define PRED8x8(TYPE, DEPTH, OPT) \ +void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ + ptrdiff_t stride); + +PRED8x8(dc, 10, mmxext) +PRED8x8(dc, 10, sse2) +PRED8x8(top_dc, 10, sse2) +PRED8x8(plane, 10, sse2) +PRED8x8(vertical, 10, sse2) +PRED8x8(horizontal, 10, sse2) + +#define PRED8x8L(TYPE, DEPTH, OPT)\ +void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ + int has_topleft, \ + int has_topright, \ + ptrdiff_t stride); + +PRED8x8L(dc, 10, sse2) +PRED8x8L(dc, 10, avx) +PRED8x8L(128_dc, 10, mmxext) +PRED8x8L(128_dc, 10, sse2) +PRED8x8L(top_dc, 10, sse2) +PRED8x8L(top_dc, 10, avx) +PRED8x8L(vertical, 10, sse2) +PRED8x8L(vertical, 10, avx) +PRED8x8L(horizontal, 10, sse2) +PRED8x8L(horizontal, 10, ssse3) +PRED8x8L(horizontal, 10, avx) +PRED8x8L(down_left, 10, sse2) +PRED8x8L(down_left, 10, ssse3) +PRED8x8L(down_left, 10, avx) +PRED8x8L(down_right, 10, sse2) +PRED8x8L(down_right, 10, ssse3) +PRED8x8L(down_right, 10, avx) +PRED8x8L(vertical_right, 10, sse2) +PRED8x8L(vertical_right, 10, ssse3) +PRED8x8L(vertical_right, 10, avx) +PRED8x8L(horizontal_up, 10, sse2) +PRED8x8L(horizontal_up, 10, ssse3) +PRED8x8L(horizontal_up, 10, avx) + +#define PRED16x16(TYPE, DEPTH, OPT)\ +void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ + ptrdiff_t stride); + +PRED16x16(dc, 10, mmxext) +PRED16x16(dc, 10, sse2) +PRED16x16(top_dc, 10, mmxext) +PRED16x16(top_dc, 10, sse2) +PRED16x16(128_dc, 10, mmxext) +PRED16x16(128_dc, 10, sse2) +PRED16x16(left_dc, 10, mmxext) +PRED16x16(left_dc, 10, sse2) +PRED16x16(vertical, 10, mmxext) +PRED16x16(vertical, 10, sse2) +PRED16x16(horizontal, 10, mmxext) +PRED16x16(horizontal, 10, sse2) + +/* 8-bit versions */ +PRED16x16(vertical, 8, mmx) +PRED16x16(vertical, 8, sse) +PRED16x16(horizontal, 8, mmx) +PRED16x16(horizontal, 8, mmxext) +PRED16x16(horizontal, 8, ssse3) +PRED16x16(dc, 8, mmxext) +PRED16x16(dc, 8, sse2) +PRED16x16(dc, 8, ssse3) +PRED16x16(plane_h264, 8, mmx) +PRED16x16(plane_h264, 8, mmxext) +PRED16x16(plane_h264, 8, sse2) +PRED16x16(plane_h264, 8, ssse3) +PRED16x16(plane_rv40, 8, mmx) +PRED16x16(plane_rv40, 8, mmxext) +PRED16x16(plane_rv40, 8, sse2) +PRED16x16(plane_rv40, 8, ssse3) +PRED16x16(plane_svq3, 8, mmx) +PRED16x16(plane_svq3, 8, mmxext) +PRED16x16(plane_svq3, 8, sse2) +PRED16x16(plane_svq3, 8, ssse3) +PRED16x16(tm_vp8, 8, mmx) +PRED16x16(tm_vp8, 8, mmxext) +PRED16x16(tm_vp8, 8, sse2) +PRED16x16(tm_vp8, 8, avx2) + +PRED8x8(top_dc, 8, mmxext) +PRED8x8(dc_rv40, 8, mmxext) +PRED8x8(dc, 8, mmxext) +PRED8x8(vertical, 8, mmx) +PRED8x8(horizontal, 8, mmx) +PRED8x8(horizontal, 8, mmxext) +PRED8x8(horizontal, 8, ssse3) +PRED8x8(plane, 8, mmx) +PRED8x8(plane, 8, mmxext) +PRED8x8(plane, 8, sse2) +PRED8x8(plane, 8, ssse3) +PRED8x8(tm_vp8, 8, mmx) +PRED8x8(tm_vp8, 8, mmxext) +PRED8x8(tm_vp8, 8, sse2) +PRED8x8(tm_vp8, 8, ssse3) + +PRED8x8L(top_dc, 8, mmxext) +PRED8x8L(top_dc, 8, ssse3) +PRED8x8L(dc, 8, mmxext) +PRED8x8L(dc, 8, ssse3) +PRED8x8L(horizontal, 8, mmxext) +PRED8x8L(horizontal, 8, ssse3) +PRED8x8L(vertical, 8, mmxext) +PRED8x8L(vertical, 8, ssse3) +PRED8x8L(down_left, 8, mmxext) +PRED8x8L(down_left, 8, sse2) +PRED8x8L(down_left, 8, ssse3) +PRED8x8L(down_right, 8, mmxext) +PRED8x8L(down_right, 8, sse2) +PRED8x8L(down_right, 8, ssse3) +PRED8x8L(vertical_right, 8, mmxext) +PRED8x8L(vertical_right, 8, sse2) +PRED8x8L(vertical_right, 8, ssse3) +PRED8x8L(vertical_left, 8, sse2) +PRED8x8L(vertical_left, 8, ssse3) +PRED8x8L(horizontal_up, 8, mmxext) +PRED8x8L(horizontal_up, 8, ssse3) +PRED8x8L(horizontal_down, 8, mmxext) +PRED8x8L(horizontal_down, 8, sse2) +PRED8x8L(horizontal_down, 8, ssse3) + +PRED4x4(dc, 8, mmxext) +PRED4x4(down_left, 8, mmxext) +PRED4x4(down_right, 8, mmxext) +PRED4x4(vertical_left, 8, mmxext) +PRED4x4(vertical_right, 8, mmxext) +PRED4x4(horizontal_up, 8, mmxext) +PRED4x4(horizontal_down, 8, mmxext) +PRED4x4(tm_vp8, 8, mmx) +PRED4x4(tm_vp8, 8, mmxext) +PRED4x4(tm_vp8, 8, ssse3) +PRED4x4(vertical_vp8, 8, mmxext) + +av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, + const int bit_depth, + const int chroma_format_idc) +{ + int cpu_flags = av_get_cpu_flags(); + + if (bit_depth == 8) { + if (EXTERNAL_MMX(cpu_flags)) { + h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_8_mmx; + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmx; + if (chroma_format_idc <= 1) { + h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx; + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmx; + } + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmx; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmx; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmx; + } else { + if (chroma_format_idc <= 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx; + if (codec_id == AV_CODEC_ID_SVQ3) { + if (cpu_flags & AV_CPU_FLAG_CMOV) + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx; + } else if (codec_id == AV_CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx; + } + } + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext; + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmxext; + if (chroma_format_idc <= 1) + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext; + h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext; + h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_mmxext; + h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_mmxext; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_mmxext; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_mmxext; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_mmxext; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_mmxext; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_mmxext; + h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_8_mmxext; + h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_8_mmxext; + h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_8_mmxext; + h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_8_mmxext; + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 || + codec_id == AV_CODEC_ID_H264) { + h->pred4x4 [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext; + } + if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) { + h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_8_mmxext; + } + if (codec_id != AV_CODEC_ID_RV40) { + h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_8_mmxext; + } + if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) { + if (chroma_format_idc <= 1) { + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_8_mmxext; + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_8_mmxext; + } + } + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmxext; + h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_8_mmxext; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmxext; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmxext; + h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_8_mmxext; + } else { + if (chroma_format_idc <= 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext; + if (codec_id == AV_CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_8_mmxext; + } else if (codec_id == AV_CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_rv40_8_mmxext; + } else { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_h264_8_mmxext; + } + } + } + + if (EXTERNAL_SSE(cpu_flags)) { + h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_sse2; + h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_sse2; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_sse2; + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_sse2; + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_sse2; + } else { + if (chroma_format_idc <= 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2; + if (codec_id == AV_CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2; + } else if (codec_id == AV_CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2; + } + } + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_ssse3; + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3; + if (chroma_format_idc <= 1) + h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_ssse3; + h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_ssse3; + h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_ssse3; + h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_ssse3; + h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_ssse3; + h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_ssse3; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3; + h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_ssse3; + h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_ssse3; + h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_ssse3; + h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_ssse3; + if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { + h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_ssse3; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_ssse3; + } else { + if (chroma_format_idc <= 1) + h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3; + if (codec_id == AV_CODEC_ID_SVQ3) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3; + } else if (codec_id == AV_CODEC_ID_RV40) { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3; + } else { + h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3; + } + } + } + + if(EXTERNAL_AVX2(cpu_flags)){ + if (codec_id == AV_CODEC_ID_VP8) { + h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_avx2; + } + } + } else if (bit_depth == 10) { + if (EXTERNAL_MMXEXT(cpu_flags)) { + h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; + h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; + + if (chroma_format_idc <= 1) + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; + + h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; + + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_10_mmxext; + h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_10_mmxext; + h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_10_mmxext; + h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_mmxext; + h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; + } + if (EXTERNAL_SSE2(cpu_flags)) { + h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; + h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; + + if (chroma_format_idc <= 1) { + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; + h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; + h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; + } + + h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_sse2; + h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_sse2; + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_sse2; + h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2; + h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2; + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_sse2; + h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_sse2; + + h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_10_sse2; + h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_10_sse2; + h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_10_sse2; + h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_sse2; + h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; + } + if (EXTERNAL_SSSE3(cpu_flags)) { + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; + + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_ssse3; + h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3; + h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3; + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; + h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; + } + if (EXTERNAL_AVX(cpu_flags)) { + h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; + h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; + h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; + h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_avx; + h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_avx; + + h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_avx; + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_avx; + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_10_avx; + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_10_avx; + h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx; + h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx; + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx; + h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; + } + } +} diff --git a/libs/ffvpx/libavcodec/x86/mathops.h b/libs/ffvpx/libavcodec/x86/mathops.h new file mode 100644 index 000000000..6298f5ed1 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/mathops.h @@ -0,0 +1,133 @@ +/* + * simple math operations + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_MATHOPS_H +#define AVCODEC_X86_MATHOPS_H + +#include "config.h" + +#include "libavutil/common.h" +#include "libavutil/x86/asm.h" + +#if HAVE_INLINE_ASM + +#if ARCH_X86_32 + +#define MULL MULL +static av_always_inline av_const int MULL(int a, int b, unsigned shift) +{ + int rt, dummy; + __asm__ ( + "imull %3 \n\t" + "shrdl %4, %%edx, %%eax \n\t" + :"=a"(rt), "=d"(dummy) + :"a"(a), "rm"(b), "ci"((uint8_t)shift) + ); + return rt; +} + +#define MULH MULH +static av_always_inline av_const int MULH(int a, int b) +{ + int rt, dummy; + __asm__ ( + "imull %3" + :"=d"(rt), "=a"(dummy) + :"a"(a), "rm"(b) + ); + return rt; +} + +#define MUL64 MUL64 +static av_always_inline av_const int64_t MUL64(int a, int b) +{ + int64_t rt; + __asm__ ( + "imull %2" + :"=A"(rt) + :"a"(a), "rm"(b) + ); + return rt; +} + +#endif /* ARCH_X86_32 */ + +#if HAVE_I686 +/* median of 3 */ +#define mid_pred mid_pred +static inline av_const int mid_pred(int a, int b, int c) +{ + int i=b; + __asm__ ( + "cmp %2, %1 \n\t" + "cmovg %1, %0 \n\t" + "cmovg %2, %1 \n\t" + "cmp %3, %1 \n\t" + "cmovl %3, %1 \n\t" + "cmp %1, %0 \n\t" + "cmovg %1, %0 \n\t" + :"+&r"(i), "+&r"(a) + :"r"(b), "r"(c) + ); + return i; +} + +#if HAVE_6REGS +#define COPY3_IF_LT(x, y, a, b, c, d)\ +__asm__ volatile(\ + "cmpl %0, %3 \n\t"\ + "cmovl %3, %0 \n\t"\ + "cmovl %4, %1 \n\t"\ + "cmovl %5, %2 \n\t"\ + : "+&r" (x), "+&r" (a), "+r" (c)\ + : "r" (y), "r" (b), "r" (d)\ +); +#endif /* HAVE_6REGS */ + +#endif /* HAVE_I686 */ + +#define MASK_ABS(mask, level) \ + __asm__ ("cdq \n\t" \ + "xorl %1, %0 \n\t" \ + "subl %1, %0 \n\t" \ + : "+a"(level), "=&d"(mask)) + +// avoid +32 for shift optimization (gcc should do that ...) +#define NEG_SSR32 NEG_SSR32 +static inline int32_t NEG_SSR32( int32_t a, int8_t s){ + __asm__ ("sarl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} + +#define NEG_USR32 NEG_USR32 +static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ + __asm__ ("shrl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} + +#endif /* HAVE_INLINE_ASM */ +#endif /* AVCODEC_X86_MATHOPS_H */ diff --git a/libs/ffvpx/libavcodec/x86/moz.build b/libs/ffvpx/libavcodec/x86/moz.build new file mode 100644 index 000000000..847d40808 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/moz.build @@ -0,0 +1,34 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += [ + 'constants.c', + 'flacdsp.asm', + 'flacdsp_init.c', + 'h264_intrapred.asm', + 'h264_intrapred_10bit.asm', + 'h264_intrapred_init.c', + 'videodsp.asm', + 'videodsp_init.c', + 'vp8dsp.asm', + 'vp8dsp_init.c', + 'vp8dsp_loopfilter.asm', + 'vp9dsp_init.c', + 'vp9dsp_init_10bpp.c', + 'vp9dsp_init_12bpp.c', + 'vp9dsp_init_16bpp.c', + 'vp9intrapred.asm', + 'vp9intrapred_16bpp.asm', + 'vp9itxfm.asm', + 'vp9itxfm_16bpp.asm', + 'vp9lpf.asm', + 'vp9lpf_16bpp.asm', + 'vp9mc.asm', + 'vp9mc_16bpp.asm', +] + +FINAL_LIBRARY = 'mozavcodec' + +include('/libs/ffvpx/ffvpxcommon.mozbuild') diff --git a/libs/ffvpx/libavcodec/x86/videodsp.asm b/libs/ffvpx/libavcodec/x86/videodsp.asm new file mode 100644 index 000000000..e23786070 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/videodsp.asm @@ -0,0 +1,468 @@ +;****************************************************************************** +;* Core video DSP functions +;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +; slow vertical extension loop function. Works with variable-width, and +; does per-line reading/writing of source data + +%macro V_COPY_ROW 2 ; type (top/body/bottom), h +.%1_y_loop: ; do { + mov wq, r7mp ; initialize w (r7mp = wmp) +.%1_x_loop: ; do { + movu m0, [srcq+wq] ; m0 = read($mmsize) + movu [dstq+wq], m0 ; write(m0, $mmsize) + add wq, mmsize ; w -= $mmsize + cmp wq, -mmsize ; } while (w > $mmsize); + jl .%1_x_loop + movu m0, [srcq-mmsize] ; m0 = read($mmsize) + movu [dstq-mmsize], m0 ; write(m0, $mmsize) +%ifidn %1, body ; if ($type == body) { + add srcq, src_strideq ; src += src_stride +%endif ; } + add dstq, dst_strideq ; dst += dst_stride + dec %2 ; } while (--$h); + jnz .%1_y_loop +%endmacro + +%macro vvar_fn 0 +; .----. <- zero +; | | <- top is copied from first line in body of source +; |----| <- start_y +; | | <- body is copied verbatim (line-by-line) from source +; |----| <- end_y +; | | <- bottom is copied from last line in body of source +; '----' <- bh +%if ARCH_X86_64 +cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \ + start_y, end_y, bh, w +%else ; x86-32 +cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w +%define src_strideq r3mp +%define dst_strideq r1mp + mov srcq, r2mp + mov start_yq, r4mp + mov end_yq, r5mp + mov bhq, r6mp +%endif + sub bhq, end_yq ; bh -= end_q + sub end_yq, start_yq ; end_q -= start_q + add srcq, r7mp ; (r7mp = wmp) + add dstq, r7mp ; (r7mp = wmp) + neg r7mp ; (r7mp = wmp) + test start_yq, start_yq ; if (start_q) { + jz .body + V_COPY_ROW top, start_yq ; v_copy_row(top, start_yq) +.body: ; } + V_COPY_ROW body, end_yq ; v_copy_row(body, end_yq) + test bhq, bhq ; if (bh) { + jz .end + sub srcq, src_strideq ; src -= src_stride + V_COPY_ROW bottom, bhq ; v_copy_row(bottom, bh) +.end: ; } + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +vvar_fn +%endif + +INIT_XMM sse +vvar_fn + +%macro hvar_fn 0 +cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w + lea dstq, [dstq+n_wordsq*2] + neg n_wordsq + lea start_xq, [start_xq+n_wordsq*2] +.y_loop: ; do { +%if cpuflag(avx2) + vpbroadcastb m0, [dstq+start_xq] + mov wq, n_wordsq ; initialize w +%else + movzx wd, byte [dstq+start_xq] ; w = read(1) + imul wd, 0x01010101 ; w *= 0x01010101 + movd m0, wd + mov wq, n_wordsq ; initialize w +%if cpuflag(sse2) + pshufd m0, m0, q0000 ; splat +%else ; mmx + punpckldq m0, m0 ; splat +%endif ; mmx/sse +%endif ; avx2 +.x_loop: ; do { + movu [dstq+wq*2], m0 ; write($reg, $mmsize) + add wq, mmsize/2 ; w -= $mmsize/2 + cmp wq, -(mmsize/2) ; } while (w > $mmsize/2) + jl .x_loop + movu [dstq-mmsize], m0 ; write($reg, $mmsize) + add dstq, dst_strideq ; dst += dst_stride + dec hq ; } while (h--) + jnz .y_loop + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +hvar_fn +%endif + +INIT_XMM sse2 +hvar_fn + +%if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +hvar_fn +%endif + +; macro to read/write a horizontal number of pixels (%2) to/from registers +; on sse, - fills xmm0-15 for consecutive sets of 16 pixels +; - if (%2 & 8) fills 8 bytes into xmm$next +; - if (%2 & 4) fills 4 bytes into xmm$next +; - if (%2 & 3) fills 1, 2 or 4 bytes in eax +; on mmx, - fills mm0-7 for consecutive sets of 8 pixels +; - if (%2 & 4) fills 4 bytes into mm$next +; - if (%2 & 3) fills 1, 2 or 4 bytes in eax +; writing data out is in the same way +%macro READ_NUM_BYTES 2 +%assign %%off 0 ; offset in source buffer +%assign %%mmx_idx 0 ; mmx register index +%assign %%xmm_idx 0 ; xmm register index + +%rep %2/mmsize +%if mmsize == 16 + movu xmm %+ %%xmm_idx, [srcq+%%off] +%assign %%xmm_idx %%xmm_idx+1 +%else ; mmx + movu mm %+ %%mmx_idx, [srcq+%%off] +%assign %%mmx_idx %%mmx_idx+1 +%endif +%assign %%off %%off+mmsize +%endrep ; %2/mmsize + +%if mmsize == 16 +%if (%2-%%off) >= 8 +%if %2 > 16 && (%2-%%off) > 8 + movu xmm %+ %%xmm_idx, [srcq+%2-16] +%assign %%xmm_idx %%xmm_idx+1 +%assign %%off %2 +%else + movq mm %+ %%mmx_idx, [srcq+%%off] +%assign %%mmx_idx %%mmx_idx+1 +%assign %%off %%off+8 +%endif +%endif ; (%2-%%off) >= 8 +%endif + +%if (%2-%%off) >= 4 +%if %2 > 8 && (%2-%%off) > 4 + movq mm %+ %%mmx_idx, [srcq+%2-8] +%assign %%off %2 +%else + movd mm %+ %%mmx_idx, [srcq+%%off] +%assign %%off %%off+4 +%endif +%assign %%mmx_idx %%mmx_idx+1 +%endif ; (%2-%%off) >= 4 + +%if (%2-%%off) >= 1 +%if %2 >= 4 + movd mm %+ %%mmx_idx, [srcq+%2-4] +%elif (%2-%%off) == 1 + mov valb, [srcq+%2-1] +%elif (%2-%%off) == 2 + mov valw, [srcq+%2-2] +%else + mov valb, [srcq+%2-1] + ror vald, 16 + mov valw, [srcq+%2-3] +%endif +%endif ; (%2-%%off) >= 1 +%endmacro ; READ_NUM_BYTES + +%macro WRITE_NUM_BYTES 2 +%assign %%off 0 ; offset in destination buffer +%assign %%mmx_idx 0 ; mmx register index +%assign %%xmm_idx 0 ; xmm register index + +%rep %2/mmsize +%if mmsize == 16 + movu [dstq+%%off], xmm %+ %%xmm_idx +%assign %%xmm_idx %%xmm_idx+1 +%else ; mmx + movu [dstq+%%off], mm %+ %%mmx_idx +%assign %%mmx_idx %%mmx_idx+1 +%endif +%assign %%off %%off+mmsize +%endrep ; %2/mmsize + +%if mmsize == 16 +%if (%2-%%off) >= 8 +%if %2 > 16 && (%2-%%off) > 8 + movu [dstq+%2-16], xmm %+ %%xmm_idx +%assign %%xmm_idx %%xmm_idx+1 +%assign %%off %2 +%else + movq [dstq+%%off], mm %+ %%mmx_idx +%assign %%mmx_idx %%mmx_idx+1 +%assign %%off %%off+8 +%endif +%endif ; (%2-%%off) >= 8 +%endif + +%if (%2-%%off) >= 4 +%if %2 > 8 && (%2-%%off) > 4 + movq [dstq+%2-8], mm %+ %%mmx_idx +%assign %%off %2 +%else + movd [dstq+%%off], mm %+ %%mmx_idx +%assign %%off %%off+4 +%endif +%assign %%mmx_idx %%mmx_idx+1 +%endif ; (%2-%%off) >= 4 + +%if (%2-%%off) >= 1 +%if %2 >= 4 + movd [dstq+%2-4], mm %+ %%mmx_idx +%elif (%2-%%off) == 1 + mov [dstq+%2-1], valb +%elif (%2-%%off) == 2 + mov [dstq+%2-2], valw +%else + mov [dstq+%2-3], valw + ror vald, 16 + mov [dstq+%2-1], valb +%ifnidn %1, body + ror vald, 16 +%endif +%endif +%endif ; (%2-%%off) >= 1 +%endmacro ; WRITE_NUM_BYTES + +; vertical top/bottom extend and body copy fast loops +; these are function pointers to set-width line copy functions, i.e. +; they read a fixed number of pixels into set registers, and write +; those out into the destination buffer +%macro VERTICAL_EXTEND 2 +%assign %%n %1 +%rep 1+%2-%1 +%if %%n <= 3 +%if ARCH_X86_64 +cglobal emu_edge_vfix %+ %%n, 6, 8, 0, dst, dst_stride, src, src_stride, \ + start_y, end_y, val, bh + mov bhq, r6mp ; r6mp = bhmp +%else ; x86-32 +cglobal emu_edge_vfix %+ %%n, 0, 6, 0, val, dst, src, start_y, end_y, bh + mov dstq, r0mp + mov srcq, r2mp + mov start_yq, r4mp + mov end_yq, r5mp + mov bhq, r6mp +%define dst_strideq r1mp +%define src_strideq r3mp +%endif ; x86-64/32 +%else +%if ARCH_X86_64 +cglobal emu_edge_vfix %+ %%n, 7, 7, 1, dst, dst_stride, src, src_stride, \ + start_y, end_y, bh +%else ; x86-32 +cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, end_y, bh + mov srcq, r2mp + mov start_yq, r4mp + mov end_yq, r5mp + mov bhq, r6mp +%define dst_strideq r1mp +%define src_strideq r3mp +%endif ; x86-64/32 +%endif + ; FIXME move this to c wrapper? + sub bhq, end_yq ; bh -= end_y + sub end_yq, start_yq ; end_y -= start_y + + ; extend pixels above body + test start_yq, start_yq ; if (start_y) { + jz .body_loop + READ_NUM_BYTES top, %%n ; $variable_regs = read($n) +.top_loop: ; do { + WRITE_NUM_BYTES top, %%n ; write($variable_regs, $n) + add dstq, dst_strideq ; dst += linesize + dec start_yq ; } while (--start_y) + jnz .top_loop ; } + + ; copy body pixels +.body_loop: ; do { + READ_NUM_BYTES body, %%n ; $variable_regs = read($n) + WRITE_NUM_BYTES body, %%n ; write($variable_regs, $n) + add dstq, dst_strideq ; dst += dst_stride + add srcq, src_strideq ; src += src_stride + dec end_yq ; } while (--end_y) + jnz .body_loop + + ; copy bottom pixels + test bhq, bhq ; if (block_h) { + jz .end + sub srcq, src_strideq ; src -= linesize + READ_NUM_BYTES bottom, %%n ; $variable_regs = read($n) +.bottom_loop: ; do { + WRITE_NUM_BYTES bottom, %%n ; write($variable_regs, $n) + add dstq, dst_strideq ; dst += linesize + dec bhq ; } while (--bh) + jnz .bottom_loop ; } + +.end: + RET +%assign %%n %%n+1 +%endrep ; 1+%2-%1 +%endmacro ; VERTICAL_EXTEND + +INIT_MMX mmx +VERTICAL_EXTEND 1, 15 +%if ARCH_X86_32 +VERTICAL_EXTEND 16, 22 +%endif + +INIT_XMM sse +VERTICAL_EXTEND 16, 22 + +; left/right (horizontal) fast extend functions +; these are essentially identical to the vertical extend ones above, +; just left/right separated because number of pixels to extend is +; obviously not the same on both sides. + +%macro READ_V_PIXEL 2 +%if cpuflag(avx2) + vpbroadcastb m0, %2 +%else + movzx vald, byte %2 + imul vald, 0x01010101 +%if %1 >= 8 + movd m0, vald +%if mmsize == 16 + pshufd m0, m0, q0000 +%else + punpckldq m0, m0 +%endif ; mmsize == 16 +%endif ; %1 > 16 +%endif ; avx2 +%endmacro ; READ_V_PIXEL + +%macro WRITE_V_PIXEL 2 +%assign %%off 0 + +%if %1 >= 8 + +%rep %1/mmsize + movu [%2+%%off], m0 +%assign %%off %%off+mmsize +%endrep ; %1/mmsize + +%if mmsize == 16 +%if %1-%%off >= 8 +%if %1 > 16 && %1-%%off > 8 + movu [%2+%1-16], m0 +%assign %%off %1 +%else + movq [%2+%%off], m0 +%assign %%off %%off+8 +%endif +%endif ; %1-%%off >= 8 +%endif ; mmsize == 16 + +%if %1-%%off >= 4 +%if %1 > 8 && %1-%%off > 4 + movq [%2+%1-8], m0 +%assign %%off %1 +%else + movd [%2+%%off], m0 +%assign %%off %%off+4 +%endif +%endif ; %1-%%off >= 4 + +%else ; %1 < 8 + +%rep %1/4 + mov [%2+%%off], vald +%assign %%off %%off+4 +%endrep ; %1/4 + +%endif ; %1 >=/< 8 + +%if %1-%%off == 2 +%if cpuflag(avx2) + movd [%2+%%off-2], m0 +%else + mov [%2+%%off], valw +%endif ; avx2 +%endif ; (%1-%%off)/2 +%endmacro ; WRITE_V_PIXEL + +%macro H_EXTEND 2 +%assign %%n %1 +%rep 1+(%2-%1)/2 +%if cpuflag(avx2) +cglobal emu_edge_hfix %+ %%n, 4, 4, 1, dst, dst_stride, start_x, bh +%else +cglobal emu_edge_hfix %+ %%n, 4, 5, 1, dst, dst_stride, start_x, bh, val +%endif +.loop_y: ; do { + READ_V_PIXEL %%n, [dstq+start_xq] ; $variable_regs = read($n) + WRITE_V_PIXEL %%n, dstq ; write($variable_regs, $n) + add dstq, dst_strideq ; dst += dst_stride + dec bhq ; } while (--bh) + jnz .loop_y + RET +%assign %%n %%n+2 +%endrep ; 1+(%2-%1)/2 +%endmacro ; H_EXTEND + +INIT_MMX mmx +H_EXTEND 2, 14 +%if ARCH_X86_32 +H_EXTEND 16, 22 +%endif + +INIT_XMM sse2 +H_EXTEND 16, 22 + +%if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +H_EXTEND 8, 22 +%endif + +%macro PREFETCH_FN 1 +cglobal prefetch, 3, 3, 0, buf, stride, h +.loop: + %1 [bufq] + add bufq, strideq + dec hd + jg .loop + REP_RET +%endmacro + +INIT_MMX mmxext +PREFETCH_FN prefetcht0 +%if ARCH_X86_32 +INIT_MMX 3dnow +PREFETCH_FN prefetch +%endif diff --git a/libs/ffvpx/libavcodec/x86/videodsp_init.c b/libs/ffvpx/libavcodec/x86/videodsp_init.c new file mode 100644 index 000000000..eeebb4154 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/videodsp_init.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) 2002-2012 Michael Niedermayer + * Copyright (C) 2012 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/videodsp.h" + +#if HAVE_X86ASM +typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride, + const uint8_t *src, x86_reg src_stride, + x86_reg start_y, x86_reg end_y, x86_reg bh); +typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride, + const uint8_t *src, x86_reg src_stride, + x86_reg start_y, x86_reg end_y, x86_reg bh, + x86_reg w); + +extern emu_edge_vfix_func ff_emu_edge_vfix1_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix2_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix3_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix4_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix5_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix6_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix7_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix8_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix9_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix10_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix11_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix16_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix17_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix18_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix19_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix20_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix21_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix22_mmx; +#if ARCH_X86_32 +static emu_edge_vfix_func * const vfixtbl_mmx[22] = { + &ff_emu_edge_vfix1_mmx, &ff_emu_edge_vfix2_mmx, &ff_emu_edge_vfix3_mmx, + &ff_emu_edge_vfix4_mmx, &ff_emu_edge_vfix5_mmx, &ff_emu_edge_vfix6_mmx, + &ff_emu_edge_vfix7_mmx, &ff_emu_edge_vfix8_mmx, &ff_emu_edge_vfix9_mmx, + &ff_emu_edge_vfix10_mmx, &ff_emu_edge_vfix11_mmx, &ff_emu_edge_vfix12_mmx, + &ff_emu_edge_vfix13_mmx, &ff_emu_edge_vfix14_mmx, &ff_emu_edge_vfix15_mmx, + &ff_emu_edge_vfix16_mmx, &ff_emu_edge_vfix17_mmx, &ff_emu_edge_vfix18_mmx, + &ff_emu_edge_vfix19_mmx, &ff_emu_edge_vfix20_mmx, &ff_emu_edge_vfix21_mmx, + &ff_emu_edge_vfix22_mmx +}; +#endif +extern emu_edge_vvar_func ff_emu_edge_vvar_mmx; +extern emu_edge_vfix_func ff_emu_edge_vfix16_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix17_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix18_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix19_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix20_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix21_sse; +extern emu_edge_vfix_func ff_emu_edge_vfix22_sse; +static emu_edge_vfix_func * const vfixtbl_sse[22] = { + ff_emu_edge_vfix1_mmx, ff_emu_edge_vfix2_mmx, ff_emu_edge_vfix3_mmx, + ff_emu_edge_vfix4_mmx, ff_emu_edge_vfix5_mmx, ff_emu_edge_vfix6_mmx, + ff_emu_edge_vfix7_mmx, ff_emu_edge_vfix8_mmx, ff_emu_edge_vfix9_mmx, + ff_emu_edge_vfix10_mmx, ff_emu_edge_vfix11_mmx, ff_emu_edge_vfix12_mmx, + ff_emu_edge_vfix13_mmx, ff_emu_edge_vfix14_mmx, ff_emu_edge_vfix15_mmx, + ff_emu_edge_vfix16_sse, ff_emu_edge_vfix17_sse, ff_emu_edge_vfix18_sse, + ff_emu_edge_vfix19_sse, ff_emu_edge_vfix20_sse, ff_emu_edge_vfix21_sse, + ff_emu_edge_vfix22_sse +}; +extern emu_edge_vvar_func ff_emu_edge_vvar_sse; + +typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride, + x86_reg start_x, x86_reg bh); +typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride, + x86_reg start_x, x86_reg n_words, x86_reg bh); + +extern emu_edge_hfix_func ff_emu_edge_hfix2_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix4_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix6_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix8_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix10_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix12_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix14_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix16_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix18_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix20_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix22_mmx; +#if ARCH_X86_32 +static emu_edge_hfix_func * const hfixtbl_mmx[11] = { + ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx, + ff_emu_edge_hfix8_mmx, ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx, + ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_mmx, ff_emu_edge_hfix18_mmx, + ff_emu_edge_hfix20_mmx, ff_emu_edge_hfix22_mmx +}; +#endif +extern emu_edge_hvar_func ff_emu_edge_hvar_mmx; +extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2; +extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2; +extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2; +extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2; +static emu_edge_hfix_func * const hfixtbl_sse2[11] = { + ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx, + ff_emu_edge_hfix8_mmx, ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx, + ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2, + ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2 +}; +extern emu_edge_hvar_func ff_emu_edge_hvar_sse2; +#if HAVE_AVX2_EXTERNAL +extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2; +static emu_edge_hfix_func * const hfixtbl_avx2[11] = { + ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx, + ff_emu_edge_hfix8_avx2, ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2, + ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2, + ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2 +}; +extern emu_edge_hvar_func ff_emu_edge_hvar_avx2; +#endif + +static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src, + ptrdiff_t dst_stride, + ptrdiff_t src_stride, + x86_reg block_w, x86_reg block_h, + x86_reg src_x, x86_reg src_y, + x86_reg w, x86_reg h, + emu_edge_vfix_func * const *vfix_tbl, + emu_edge_vvar_func *v_extend_var, + emu_edge_hfix_func * const *hfix_tbl, + emu_edge_hvar_func *h_extend_var) +{ + x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p; + + if (!w || !h) + return; + + av_assert2(block_w <= FFABS(dst_stride)); + + if (src_y >= h) { + src -= src_y*src_stride; + src_y_add = h - 1; + src_y = h - 1; + } else if (src_y <= -block_h) { + src -= src_y*src_stride; + src_y_add = 1 - block_h; + src_y = 1 - block_h; + } + if (src_x >= w) { + src += w - 1 - src_x; + src_x = w - 1; + } else if (src_x <= -block_w) { + src += 1 - block_w - src_x; + src_x = 1 - block_w; + } + + start_y = FFMAX(0, -src_y); + start_x = FFMAX(0, -src_x); + end_y = FFMIN(block_h, h-src_y); + end_x = FFMIN(block_w, w-src_x); + av_assert2(start_x < end_x && block_w > 0); + av_assert2(start_y < end_y && block_h > 0); + + // fill in the to-be-copied part plus all above/below + src += (src_y_add + start_y) * src_stride + start_x; + w = end_x - start_x; + if (w <= 22) { + vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride, + start_y, end_y, block_h); + } else { + v_extend_var(dst + start_x, dst_stride, src, src_stride, + start_y, end_y, block_h, w); + } + + // fill left + if (start_x) { + if (start_x <= 22) { + hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h); + } else { + h_extend_var(dst, dst_stride, + start_x, (start_x + 1) >> 1, block_h); + } + } + + // fill right + p = block_w - end_x; + if (p) { + if (p <= 22) { + hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride, + -!(p & 1), block_h); + } else { + h_extend_var(dst + end_x - (p & 1), dst_stride, + -!(p & 1), (p + 1) >> 1, block_h); + } + } +} + +#if ARCH_X86_32 +static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_stride, + ptrdiff_t src_stride, + int block_w, int block_h, + int src_x, int src_y, int w, int h) +{ + emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, + src_x, src_y, w, h, vfixtbl_mmx, &ff_emu_edge_vvar_mmx, + hfixtbl_mmx, &ff_emu_edge_hvar_mmx); +} + +static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_stride, + ptrdiff_t src_stride, + int block_w, int block_h, + int src_x, int src_y, int w, int h) +{ + emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, + src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse, + hfixtbl_mmx, &ff_emu_edge_hvar_mmx); +} +#endif + +static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_stride, + ptrdiff_t src_stride, + int block_w, int block_h, + int src_x, int src_y, int w, + int h) +{ + emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, + src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse, + hfixtbl_sse2, &ff_emu_edge_hvar_sse2); +} + +#if HAVE_AVX2_EXTERNAL +static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_stride, + ptrdiff_t src_stride, + int block_w, int block_h, + int src_x, int src_y, int w, + int h) +{ + emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, + src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse, + hfixtbl_avx2, &ff_emu_edge_hvar_avx2); +} +#endif /* HAVE_AVX2_EXTERNAL */ +#endif /* HAVE_X86ASM */ + +void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h); +void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h); + +av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + +#if ARCH_X86_32 + if (EXTERNAL_MMX(cpu_flags) && bpc <= 8) { + ctx->emulated_edge_mc = emulated_edge_mc_mmx; + } + if (EXTERNAL_AMD3DNOW(cpu_flags)) { + ctx->prefetch = ff_prefetch_3dnow; + } +#endif /* ARCH_X86_32 */ + if (EXTERNAL_MMXEXT(cpu_flags)) { + ctx->prefetch = ff_prefetch_mmxext; + } +#if ARCH_X86_32 + if (EXTERNAL_SSE(cpu_flags) && bpc <= 8) { + ctx->emulated_edge_mc = emulated_edge_mc_sse; + } +#endif /* ARCH_X86_32 */ + if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) { + ctx->emulated_edge_mc = emulated_edge_mc_sse2; + } +#if HAVE_AVX2_EXTERNAL + if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) { + ctx->emulated_edge_mc = emulated_edge_mc_avx2; + } +#endif +#endif /* HAVE_X86ASM */ +} diff --git a/libs/ffvpx/libavcodec/x86/vp56_arith.h b/libs/ffvpx/libavcodec/x86/vp56_arith.h new file mode 100644 index 000000000..810cc8dcd --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp56_arith.h @@ -0,0 +1,51 @@ +/** + * VP5 and VP6 compatible video decoder (arith decoder) + * + * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> + * Copyright (C) 2010 Eli Friedman + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_VP56_ARITH_H +#define AVCODEC_X86_VP56_ARITH_H + +#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS +#define vp56_rac_get_prob vp56_rac_get_prob +static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +{ + unsigned int code_word = vp56_rac_renorm(c); + unsigned int low = 1 + (((c->high - 1) * prob) >> 8); + unsigned int low_shift = low << 16; + int bit = 0; + c->code_word = code_word; + + __asm__( + "subl %4, %1 \n\t" + "subl %3, %2 \n\t" + "setae %b0 \n\t" + "cmovb %4, %1 \n\t" + "cmovb %5, %2 \n\t" + : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word) + : "r"(low_shift), "r"(low), "r"(code_word) + ); + + return bit; +} +#endif + +#endif /* AVCODEC_X86_VP56_ARITH_H */ diff --git a/libs/ffvpx/libavcodec/x86/vp8dsp.asm b/libs/ffvpx/libavcodec/x86/vp8dsp.asm new file mode 100644 index 000000000..75de5690a --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp8dsp.asm @@ -0,0 +1,1231 @@ +;****************************************************************************** +;* VP8 MMXEXT optimizations +;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> +;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +fourtap_filter_hw_m: times 4 dw -6, 123 + times 4 dw 12, -1 + times 4 dw -9, 93 + times 4 dw 50, -6 + times 4 dw -6, 50 + times 4 dw 93, -9 + times 4 dw -1, 12 + times 4 dw 123, -6 + +sixtap_filter_hw_m: times 4 dw 2, -11 + times 4 dw 108, 36 + times 4 dw -8, 1 + times 4 dw 3, -16 + times 4 dw 77, 77 + times 4 dw -16, 3 + times 4 dw 1, -8 + times 4 dw 36, 108 + times 4 dw -11, 2 + +fourtap_filter_hb_m: times 8 db -6, 123 + times 8 db 12, -1 + times 8 db -9, 93 + times 8 db 50, -6 + times 8 db -6, 50 + times 8 db 93, -9 + times 8 db -1, 12 + times 8 db 123, -6 + +sixtap_filter_hb_m: times 8 db 2, 1 + times 8 db -11, 108 + times 8 db 36, -8 + times 8 db 3, 3 + times 8 db -16, 77 + times 8 db 77, -16 + times 8 db 1, 2 + times 8 db -8, 36 + times 8 db 108, -11 + +fourtap_filter_v_m: times 8 dw -6 + times 8 dw 123 + times 8 dw 12 + times 8 dw -1 + times 8 dw -9 + times 8 dw 93 + times 8 dw 50 + times 8 dw -6 + times 8 dw -6 + times 8 dw 50 + times 8 dw 93 + times 8 dw -9 + times 8 dw -1 + times 8 dw 12 + times 8 dw 123 + times 8 dw -6 + +sixtap_filter_v_m: times 8 dw 2 + times 8 dw -11 + times 8 dw 108 + times 8 dw 36 + times 8 dw -8 + times 8 dw 1 + times 8 dw 3 + times 8 dw -16 + times 8 dw 77 + times 8 dw 77 + times 8 dw -16 + times 8 dw 3 + times 8 dw 1 + times 8 dw -8 + times 8 dw 36 + times 8 dw 108 + times 8 dw -11 + times 8 dw 2 + +bilinear_filter_vw_m: times 8 dw 1 + times 8 dw 2 + times 8 dw 3 + times 8 dw 4 + times 8 dw 5 + times 8 dw 6 + times 8 dw 7 + +bilinear_filter_vb_m: times 8 db 7, 1 + times 8 db 6, 2 + times 8 db 5, 3 + times 8 db 4, 4 + times 8 db 3, 5 + times 8 db 2, 6 + times 8 db 1, 7 + +%ifdef PIC +%define fourtap_filter_hw picregq +%define sixtap_filter_hw picregq +%define fourtap_filter_hb picregq +%define sixtap_filter_hb picregq +%define fourtap_filter_v picregq +%define sixtap_filter_v picregq +%define bilinear_filter_vw picregq +%define bilinear_filter_vb picregq +%define npicregs 1 +%else +%define fourtap_filter_hw fourtap_filter_hw_m +%define sixtap_filter_hw sixtap_filter_hw_m +%define fourtap_filter_hb fourtap_filter_hb_m +%define sixtap_filter_hb sixtap_filter_hb_m +%define fourtap_filter_v fourtap_filter_v_m +%define sixtap_filter_v sixtap_filter_v_m +%define bilinear_filter_vw bilinear_filter_vw_m +%define bilinear_filter_vb bilinear_filter_vb_m +%define npicregs 0 +%endif + +filter_h2_shuf: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +filter_h4_shuf: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 + +filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 +filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 +filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 + +pw_20091: times 4 dw 20091 +pw_17734: times 4 dw 17734 + +cextern pw_3 +cextern pw_4 +cextern pw_64 +cextern pw_256 + +SECTION .text + +;------------------------------------------------------------------------------- +; subpel MC functions: +; +; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, ptrdiff_t deststride, +; uint8_t *src, ptrdiff_t srcstride, +; int height, int mx, int my); +;------------------------------------------------------------------------------- + +%macro FILTER_SSSE3 1 +cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] + mova m3, [filter_h6_shuf2] + mova m4, [filter_h6_shuf3] +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes + mova m6, [sixtap_filter_hb+mxq*8-32] + mova m7, [sixtap_filter_hb+mxq*8-16] + +.nextrow: + movu m0, [srcq-2] + mova m1, m0 + mova m2, m0 +%if mmsize == 8 +; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the +; shuffle with a memory operand + punpcklbw m0, [srcq+3] +%else + pshufb m0, [filter_h6_shuf1] +%endif + pshufb m1, m3 + pshufb m2, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m6 + pmaddubsw m2, m7 + paddsw m0, m1 + paddsw m0, m2 + pmulhrsw m0, [pw_256] + packuswb m0, m0 + movh [dstq], m0 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 + mova m2, [pw_256] + mova m3, [filter_h2_shuf] + mova m4, [filter_h4_shuf] +%ifdef PIC + lea picregq, [fourtap_filter_hb_m] +%endif + mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes + mova m6, [fourtap_filter_hb+mxq] + +.nextrow: + movu m0, [srcq-1] + mova m1, m0 + pshufb m0, m3 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m6 + paddsw m0, m1 + pmulhrsw m0, m2 + packuswb m0, m0 + movh [dstq], m0 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [fourtap_filter_hb_m] +%endif + mova m5, [fourtap_filter_hb+myq-16] + mova m6, [fourtap_filter_hb+myq] + mova m7, [pw_256] + + ; read 3 lines + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+ srcstrideq] + movh m2, [srcq+2*srcstrideq] + add srcq, srcstrideq + +.nextrow: + movh m3, [srcq+2*srcstrideq] ; read new row + mova m4, m0 + mova m0, m1 + punpcklbw m4, m1 + mova m1, m2 + punpcklbw m2, m3 + pmaddubsw m4, m5 + pmaddubsw m2, m6 + paddsw m4, m2 + mova m2, m3 + pmulhrsw m4, m7 + packuswb m4, m4 + movh [dstq], m4 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + lea myd, [myq*3] +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + lea myq, [sixtap_filter_hb+myq*8] + + ; read 5 lines + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + +.nextrow: + movh m5, [srcq+2*srcstrideq] ; read new row + mova m6, m0 + punpcklbw m6, m5 + mova m0, m1 + punpcklbw m1, m2 + mova m7, m3 + punpcklbw m7, m4 + pmaddubsw m6, [myq-48] + pmaddubsw m1, [myq-32] + pmaddubsw m7, [myq-16] + paddsw m6, m1 + paddsw m6, m7 + mova m1, m2 + mova m2, m3 + pmulhrsw m6, [pw_256] + mova m3, m4 + packuswb m6, m6 + mova m4, m5 + movh [dstq], m6 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +INIT_MMX ssse3 +FILTER_SSSE3 4 +INIT_XMM ssse3 +FILTER_SSSE3 8 + +; 4x4 block, H-only 4-tap filter +INIT_MMX mmxext +cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [fourtap_filter_hw_m] +%endif + movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words + movq mm5, [fourtap_filter_hw+mxq] + movq mm7, [pw_64] + pxor mm6, mm6 + +.nextrow: + movq mm1, [srcq-1] ; (ABCDEFGH) load 8 horizontal pixels + + ; first set of 2 pixels + movq mm2, mm1 ; byte ABCD.. + punpcklbw mm1, mm6 ; byte->word ABCD + pshufw mm0, mm2, 9 ; byte CDEF.. + punpcklbw mm0, mm6 ; byte->word CDEF + pshufw mm3, mm1, 0x94 ; word ABBC + pshufw mm1, mm0, 0x94 ; word CDDE + pmaddwd mm3, mm4 ; multiply 2px with F0/F1 + movq mm0, mm1 ; backup for second set of pixels + pmaddwd mm1, mm5 ; multiply 2px with F2/F3 + paddd mm3, mm1 ; finish 1st 2px + + ; second set of 2 pixels, use backup of above + punpckhbw mm2, mm6 ; byte->word EFGH + pmaddwd mm0, mm4 ; multiply backed up 2px with F0/F1 + pshufw mm1, mm2, 0x94 ; word EFFG + pmaddwd mm1, mm5 ; multiply 2px with F2/F3 + paddd mm0, mm1 ; finish 2nd 2px + + ; merge two sets of 2 pixels into one set of 4, round/clip/store + packssdw mm3, mm0 ; merge dword->word (4px) + paddsw mm3, mm7 ; rounding + psraw mm3, 7 + packuswb mm3, mm6 ; clip and word->bytes + movd [dstq], mm3 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +; 4x4 block, H-only 6-tap filter +INIT_MMX mmxext +cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] +%ifdef PIC + lea picregq, [sixtap_filter_hw_m] +%endif + movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words + movq mm5, [sixtap_filter_hw+mxq*8-32] + movq mm6, [sixtap_filter_hw+mxq*8-16] + movq mm7, [pw_64] + pxor mm3, mm3 + +.nextrow: + movq mm1, [srcq-2] ; (ABCDEFGH) load 8 horizontal pixels + + ; first set of 2 pixels + movq mm2, mm1 ; byte ABCD.. + punpcklbw mm1, mm3 ; byte->word ABCD + pshufw mm0, mm2, 0x9 ; byte CDEF.. + punpckhbw mm2, mm3 ; byte->word EFGH + punpcklbw mm0, mm3 ; byte->word CDEF + pshufw mm1, mm1, 0x94 ; word ABBC + pshufw mm2, mm2, 0x94 ; word EFFG + pmaddwd mm1, mm4 ; multiply 2px with F0/F1 + pshufw mm3, mm0, 0x94 ; word CDDE + movq mm0, mm3 ; backup for second set of pixels + pmaddwd mm3, mm5 ; multiply 2px with F2/F3 + paddd mm1, mm3 ; add to 1st 2px cache + movq mm3, mm2 ; backup for second set of pixels + pmaddwd mm2, mm6 ; multiply 2px with F4/F5 + paddd mm1, mm2 ; finish 1st 2px + + ; second set of 2 pixels, use backup of above + movd mm2, [srcq+3] ; byte FGHI (prevent overreads) + pmaddwd mm0, mm4 ; multiply 1st backed up 2px with F0/F1 + pmaddwd mm3, mm5 ; multiply 2nd backed up 2px with F2/F3 + paddd mm0, mm3 ; add to 2nd 2px cache + pxor mm3, mm3 + punpcklbw mm2, mm3 ; byte->word FGHI + pshufw mm2, mm2, 0xE9 ; word GHHI + pmaddwd mm2, mm6 ; multiply 2px with F4/F5 + paddd mm0, mm2 ; finish 2nd 2px + + ; merge two sets of 2 pixels into one set of 4, round/clip/store + packssdw mm1, mm0 ; merge dword->word (4px) + paddsw mm1, mm7 ; rounding + psraw mm1, 7 + packuswb mm1, mm3 ; clip and word->bytes + movd [dstq], mm1 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +INIT_XMM sse2 +cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 5 +%ifdef PIC + lea picregq, [fourtap_filter_v_m] +%endif + lea mxq, [fourtap_filter_v+mxq-32] + pxor m7, m7 + mova m4, [pw_64] + mova m5, [mxq+ 0] + mova m6, [mxq+16] +%ifdef m8 + mova m8, [mxq+32] + mova m9, [mxq+48] +%endif +.nextrow: + movq m0, [srcq-1] + movq m1, [srcq-0] + movq m2, [srcq+1] + movq m3, [srcq+2] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + pmullw m0, m5 + pmullw m1, m6 +%ifdef m8 + pmullw m2, m8 + pmullw m3, m9 +%else + pmullw m2, [mxq+32] + pmullw m3, [mxq+48] +%endif + paddsw m0, m1 + paddsw m2, m3 + paddsw m0, m2 + paddsw m0, m4 + psraw m0, 7 + packuswb m0, m7 + movh [dstq], m0 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +INIT_XMM sse2 +cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg + lea mxd, [mxq*3] + shl mxd, 4 +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + lea mxq, [sixtap_filter_v+mxq-96] + pxor m7, m7 + mova m6, [pw_64] +%ifdef m8 + mova m8, [mxq+ 0] + mova m9, [mxq+16] + mova m10, [mxq+32] + mova m11, [mxq+48] + mova m12, [mxq+64] + mova m13, [mxq+80] +%endif +.nextrow: + movq m0, [srcq-2] + movq m1, [srcq-1] + movq m2, [srcq-0] + movq m3, [srcq+1] + movq m4, [srcq+2] + movq m5, [srcq+3] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + punpcklbw m5, m7 +%ifdef m8 + pmullw m0, m8 + pmullw m1, m9 + pmullw m2, m10 + pmullw m3, m11 + pmullw m4, m12 + pmullw m5, m13 +%else + pmullw m0, [mxq+ 0] + pmullw m1, [mxq+16] + pmullw m2, [mxq+32] + pmullw m3, [mxq+48] + pmullw m4, [mxq+64] + pmullw m5, [mxq+80] +%endif + paddsw m1, m4 + paddsw m0, m5 + paddsw m1, m2 + paddsw m0, m3 + paddsw m0, m1 + paddsw m0, m6 + psraw m0, 7 + packuswb m0, m7 + movh [dstq], m0 ; store + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +%macro FILTER_V 1 +; 4x4 block, V-only 4-tap filter +cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 5 +%ifdef PIC + lea picregq, [fourtap_filter_v_m] +%endif + lea myq, [fourtap_filter_v+myq-32] + mova m6, [pw_64] + pxor m7, m7 + mova m5, [myq+48] + + ; read 3 lines + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+ srcstrideq] + movh m2, [srcq+2*srcstrideq] + add srcq, srcstrideq + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + +.nextrow: + ; first calculate negative taps (to prevent losing positive overflows) + movh m4, [srcq+2*srcstrideq] ; read new row + punpcklbw m4, m7 + mova m3, m4 + pmullw m0, [myq+0] + pmullw m4, m5 + paddsw m4, m0 + + ; then calculate positive taps + mova m0, m1 + pmullw m1, [myq+16] + paddsw m4, m1 + mova m1, m2 + pmullw m2, [myq+32] + paddsw m4, m2 + mova m2, m3 + + ; round/clip/store + paddsw m4, m6 + psraw m4, 7 + packuswb m4, m7 + movh [dstq], m4 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + + +; 4x4 block, V-only 6-tap filter +cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 + lea myq, [myq*3] +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + lea myq, [sixtap_filter_v+myq-96] + pxor m7, m7 + + ; read 5 lines + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + +.nextrow: + ; first calculate negative taps (to prevent losing positive overflows) + mova m5, m1 + pmullw m5, [myq+16] + mova m6, m4 + pmullw m6, [myq+64] + paddsw m6, m5 + + ; then calculate positive taps + movh m5, [srcq+2*srcstrideq] ; read new row + punpcklbw m5, m7 + pmullw m0, [myq+0] + paddsw m6, m0 + mova m0, m1 + mova m1, m2 + pmullw m2, [myq+32] + paddsw m6, m2 + mova m2, m3 + pmullw m3, [myq+48] + paddsw m6, m3 + mova m3, m4 + mova m4, m5 + pmullw m5, [myq+80] + paddsw m6, m5 + + ; round/clip/store + paddsw m6, [pw_64] + psraw m6, 7 + packuswb m6, m7 + movh [dstq], m6 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +INIT_MMX mmxext +FILTER_V 4 +INIT_XMM sse2 +FILTER_V 8 + +%macro FILTER_BILINEAR 1 +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m3, [bilinear_filter_vb+myq-16] +.nextrow: + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] + punpcklbw m0, m1 + punpcklbw m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) +cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vw_m] +%endif + pxor m6, m6 + mova m5, [bilinear_filter_vw+myq-1*16] + neg myq + mova m4, [bilinear_filter_vw+myq+7*16] +.nextrow: + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m3, [srcq+srcstrideq*2] + punpcklbw m0, m6 + punpcklbw m1, m6 + punpcklbw m3, m6 + mova m2, m1 + pmullw m0, m4 + pmullw m1, m5 + pmullw m2, m4 + pmullw m3, m5 + paddsw m0, m1 + paddsw m2, m3 + psraw m0, 2 + psraw m2, 2 + pavgw m0, m6 + pavgw m2, m6 +%if mmsize == 8 + packuswb m0, m0 + packuswb m2, m2 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m2 +%else + packuswb m0, m2 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%endif ; cpuflag(ssse3) + + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET + +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m2, [filter_h2_shuf] + mova m3, [bilinear_filter_vb+mxq-16] +.nextrow: + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] + pshufb m0, m2 + pshufb m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vw_m] +%endif + pxor m6, m6 + mova m5, [bilinear_filter_vw+mxq-1*16] + neg mxq + mova m4, [bilinear_filter_vw+mxq+7*16] +.nextrow: + movh m0, [srcq+srcstrideq*0+0] + movh m1, [srcq+srcstrideq*0+1] + movh m2, [srcq+srcstrideq*1+0] + movh m3, [srcq+srcstrideq*1+1] + punpcklbw m0, m6 + punpcklbw m1, m6 + punpcklbw m2, m6 + punpcklbw m3, m6 + pmullw m0, m4 + pmullw m1, m5 + pmullw m2, m4 + pmullw m3, m5 + paddsw m0, m1 + paddsw m2, m3 + psraw m0, 2 + psraw m2, 2 + pavgw m0, m6 + pavgw m2, m6 +%if mmsize == 8 + packuswb m0, m0 + packuswb m2, m2 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m2 +%else + packuswb m0, m2 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%endif ; cpuflag(ssse3) + + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET +%endmacro + +INIT_MMX mmxext +FILTER_BILINEAR 4 +INIT_XMM sse2 +FILTER_BILINEAR 8 +INIT_MMX ssse3 +FILTER_BILINEAR 4 +INIT_XMM ssse3 +FILTER_BILINEAR 8 + +INIT_MMX mmx +cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height +.nextrow: + movq mm0, [srcq+srcstrideq*0] + movq mm1, [srcq+srcstrideq*1] + lea srcq, [srcq+srcstrideq*2] + movq [dstq+dststrideq*0], mm0 + movq [dstq+dststrideq*1], mm1 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height +.nextrow: + movq mm0, [srcq+srcstrideq*0+0] + movq mm1, [srcq+srcstrideq*0+8] + movq mm2, [srcq+srcstrideq*1+0] + movq mm3, [srcq+srcstrideq*1+8] + lea srcq, [srcq+srcstrideq*2] + movq [dstq+dststrideq*0+0], mm0 + movq [dstq+dststrideq*0+8], mm1 + movq [dstq+dststrideq*1+0], mm2 + movq [dstq+dststrideq*1+8], mm3 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET +%endif + +INIT_XMM sse +cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height +.nextrow: + movups xmm0, [srcq+srcstrideq*0] + movups xmm1, [srcq+srcstrideq*1] + lea srcq, [srcq+srcstrideq*2] + movaps [dstq+dststrideq*0], xmm0 + movaps [dstq+dststrideq*1], xmm1 + lea dstq, [dstq+dststrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET + +;----------------------------------------------------------------------------- +; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride); +;----------------------------------------------------------------------------- + +%macro ADD_DC 4 + %4 m2, [dst1q+%3] + %4 m3, [dst1q+strideq+%3] + %4 m4, [dst2q+%3] + %4 m5, [dst2q+strideq+%3] + paddusb m2, %1 + paddusb m3, %1 + paddusb m4, %1 + paddusb m5, %1 + psubusb m2, %2 + psubusb m3, %2 + psubusb m4, %2 + psubusb m5, %2 + %4 [dst1q+%3], m2 + %4 [dst1q+strideq+%3], m3 + %4 [dst2q+%3], m4 + %4 [dst2q+strideq+%3], m5 +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal vp8_idct_dc_add, 3, 3, 0, dst, block, stride + ; load data + movd m0, [blockq] + + ; calculate DC + paddw m0, [pw_4] + pxor m1, m1 + psraw m0, 3 + movd [blockq], m1 + psubw m1, m0 + packuswb m0, m0 + packuswb m1, m1 + punpcklbw m0, m0 + punpcklbw m1, m1 + punpcklwd m0, m0 + punpcklwd m1, m1 + + ; add DC + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + ADD_DC m0, m1, 0, movh + RET +%endif + +%macro VP8_IDCT_DC_ADD 0 +cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride + ; load data + movd m0, [blockq] + pxor m1, m1 + + ; calculate DC + paddw m0, [pw_4] + movd [blockq], m1 + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + movd m2, [dst1q] + movd m3, [dst1q+strideq] + movd m4, [dst2q] + movd m5, [dst2q+strideq] + psraw m0, 3 + pshuflw m0, m0, 0 + punpcklqdq m0, m0 + punpckldq m2, m3 + punpckldq m4, m5 + punpcklbw m2, m1 + punpcklbw m4, m1 + paddw m2, m0 + paddw m4, m0 + packuswb m2, m4 + movd [dst1q], m2 +%if cpuflag(sse4) + pextrd [dst1q+strideq], m2, 1 + pextrd [dst2q], m2, 2 + pextrd [dst2q+strideq], m2, 3 +%else + psrldq m2, 4 + movd [dst1q+strideq], m2 + psrldq m2, 4 + movd [dst2q], m2 + psrldq m2, 4 + movd [dst2q+strideq], m2 +%endif + RET +%endmacro + +INIT_XMM sse2 +VP8_IDCT_DC_ADD +INIT_XMM sse4 +VP8_IDCT_DC_ADD + +;----------------------------------------------------------------------------- +; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride); +;----------------------------------------------------------------------------- + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal vp8_idct_dc_add4y, 3, 3, 0, dst, block, stride + ; load data + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D + punpckldq m0, m1 ; A B C D + pxor m6, m6 + + ; calculate DC + paddw m0, [pw_4] + movd [blockq+32*0], m6 + movd [blockq+32*1], m6 + movd [blockq+32*2], m6 + movd [blockq+32*3], m6 + psraw m0, 3 + psubw m6, m0 + packuswb m0, m0 + packuswb m6, m6 + punpcklbw m0, m0 ; AABBCCDD + punpcklbw m6, m6 ; AABBCCDD + movq m1, m0 + movq m7, m6 + punpcklbw m0, m0 ; AAAABBBB + punpckhbw m1, m1 ; CCCCDDDD + punpcklbw m6, m6 ; AAAABBBB + punpckhbw m7, m7 ; CCCCDDDD + + ; add DC + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + ADD_DC m0, m6, 0, mova + ADD_DC m1, m7, 8, mova + RET +%endif + +INIT_XMM sse2 +cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride + ; load data + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D + punpckldq m0, m1 ; A B C D + pxor m1, m1 + + ; calculate DC + paddw m0, [pw_4] + movd [blockq+32*0], m1 + movd [blockq+32*1], m1 + movd [blockq+32*2], m1 + movd [blockq+32*3], m1 + psraw m0, 3 + psubw m1, m0 + packuswb m0, m0 + packuswb m1, m1 + punpcklbw m0, m0 + punpcklbw m1, m1 + punpcklbw m0, m0 + punpcklbw m1, m1 + + ; add DC + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + ADD_DC m0, m1, 0, mova + RET + +;----------------------------------------------------------------------------- +; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride); +;----------------------------------------------------------------------------- + +INIT_MMX mmx +cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride + ; load data + movd m0, [blockq+32*0] ; A + movd m1, [blockq+32*2] ; C + punpcklwd m0, [blockq+32*1] ; A B + punpcklwd m1, [blockq+32*3] ; C D + punpckldq m0, m1 ; A B C D + pxor m6, m6 + + ; calculate DC + paddw m0, [pw_4] + movd [blockq+32*0], m6 + movd [blockq+32*1], m6 + movd [blockq+32*2], m6 + movd [blockq+32*3], m6 + psraw m0, 3 + psubw m6, m0 + packuswb m0, m0 + packuswb m6, m6 + punpcklbw m0, m0 ; AABBCCDD + punpcklbw m6, m6 ; AABBCCDD + movq m1, m0 + movq m7, m6 + punpcklbw m0, m0 ; AAAABBBB + punpckhbw m1, m1 ; CCCCDDDD + punpcklbw m6, m6 ; AAAABBBB + punpckhbw m7, m7 ; CCCCDDDD + + ; add DC + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+strideq*2] + ADD_DC m0, m6, 0, mova + lea dst1q, [dst1q+strideq*4] + lea dst2q, [dst2q+strideq*4] + ADD_DC m1, m7, 0, mova + RET + +;----------------------------------------------------------------------------- +; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride); +;----------------------------------------------------------------------------- + +; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2) +; this macro assumes that m6/m7 have words for 20091/17734 loaded +%macro VP8_MULTIPLY_SUMSUB 4 + mova %3, %1 + mova %4, %2 + pmulhw %3, m6 ;20091(1) + pmulhw %4, m6 ;20091(2) + paddw %3, %1 + paddw %4, %2 + paddw %1, %1 + paddw %2, %2 + pmulhw %1, m7 ;35468(1) + pmulhw %2, m7 ;35468(2) + psubw %1, %4 + paddw %2, %3 +%endmacro + +; calculate x0=%1+%3; x1=%1-%3 +; x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4) +; %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3) +; %5/%6 are temporary registers +; we assume m6/m7 have constant words 20091/17734 loaded in them +%macro VP8_IDCT_TRANSFORM4x4_1D 6 + SUMSUB_BA w, %3, %1, %5 ;t0, t1 + VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3 + SUMSUB_BA w, %4, %3, %5 ;tmp0, tmp3 + SUMSUB_BA w, %2, %1, %5 ;tmp1, tmp2 + SWAP %4, %1 + SWAP %4, %3 +%endmacro + +%macro VP8_IDCT_ADD 0 +cglobal vp8_idct_add, 3, 3, 0, dst, block, stride + ; load block data + movq m0, [blockq+ 0] + movq m1, [blockq+ 8] + movq m2, [blockq+16] + movq m3, [blockq+24] + movq m6, [pw_20091] + movq m7, [pw_17734] +%if cpuflag(sse) + xorps xmm0, xmm0 + movaps [blockq+ 0], xmm0 + movaps [blockq+16], xmm0 +%else + pxor m4, m4 + movq [blockq+ 0], m4 + movq [blockq+ 8], m4 + movq [blockq+16], m4 + movq [blockq+24], m4 +%endif + + ; actual IDCT + VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5 + TRANSPOSE4x4W 0, 1, 2, 3, 4 + paddw m0, [pw_4] + VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5 + TRANSPOSE4x4W 0, 1, 2, 3, 4 + + ; store + pxor m4, m4 + DEFINE_ARGS dst1, dst2, stride + lea dst2q, [dst1q+2*strideq] + STORE_DIFFx2 m0, m1, m6, m7, m4, 3, dst1q, strideq + STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq + + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +VP8_IDCT_ADD +%endif +INIT_MMX sse +VP8_IDCT_ADD + +;----------------------------------------------------------------------------- +; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16]) +;----------------------------------------------------------------------------- + +%macro SCATTER_WHT 3 + movd dc1d, m%1 + movd dc2d, m%2 + mov [blockq+2*16*(0+%3)], dc1w + mov [blockq+2*16*(1+%3)], dc2w + shr dc1d, 16 + shr dc2d, 16 + psrlq m%1, 32 + psrlq m%2, 32 + mov [blockq+2*16*(4+%3)], dc1w + mov [blockq+2*16*(5+%3)], dc2w + movd dc1d, m%1 + movd dc2d, m%2 + mov [blockq+2*16*(8+%3)], dc1w + mov [blockq+2*16*(9+%3)], dc2w + shr dc1d, 16 + shr dc2d, 16 + mov [blockq+2*16*(12+%3)], dc1w + mov [blockq+2*16*(13+%3)], dc2w +%endmacro + +%macro HADAMARD4_1D 4 + SUMSUB_BADC w, %2, %1, %4, %3 + SUMSUB_BADC w, %4, %2, %3, %1 + SWAP %1, %4, %3 +%endmacro + +%macro VP8_DC_WHT 0 +cglobal vp8_luma_dc_wht, 2, 3, 0, block, dc1, dc2 + movq m0, [dc1q] + movq m1, [dc1q+8] + movq m2, [dc1q+16] + movq m3, [dc1q+24] +%if cpuflag(sse) + xorps xmm0, xmm0 + movaps [dc1q+ 0], xmm0 + movaps [dc1q+16], xmm0 +%else + pxor m4, m4 + movq [dc1q+ 0], m4 + movq [dc1q+ 8], m4 + movq [dc1q+16], m4 + movq [dc1q+24], m4 +%endif + HADAMARD4_1D 0, 1, 2, 3 + TRANSPOSE4x4W 0, 1, 2, 3, 4 + paddw m0, [pw_3] + HADAMARD4_1D 0, 1, 2, 3 + psraw m0, 3 + psraw m1, 3 + psraw m2, 3 + psraw m3, 3 + SCATTER_WHT 0, 1, 0 + SCATTER_WHT 2, 3, 2 + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +VP8_DC_WHT +%endif +INIT_MMX sse +VP8_DC_WHT diff --git a/libs/ffvpx/libavcodec/x86/vp8dsp_init.c b/libs/ffvpx/libavcodec/x86/vp8dsp_init.c new file mode 100644 index 000000000..397b2518c --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp8dsp_init.c @@ -0,0 +1,467 @@ +/* + * VP8 DSP functions x86-optimized + * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> + * Copyright (c) 2010 Fiona Glaser <fiona@x264.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/vp8dsp.h" + +#if HAVE_X86ASM + +/* + * MC functions + */ +void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + +void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + +void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + +void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + +void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + + +void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); +void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride, + uint8_t *src, ptrdiff_t srcstride, + int height, int mx, int my); + +#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \ +static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ + uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t srcstride, int height, int mx, int my) \ +{ \ + ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ + dst, dststride, src, srcstride, height, mx, my); \ + ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ + dst + 8, dststride, src + 8, srcstride, height, mx, my); \ +} +#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \ +static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ + uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t srcstride, int height, int mx, int my) \ +{ \ + ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ + dst, dststride, src, srcstride, height, mx, my); \ + ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ + dst + 4, dststride, src + 4, srcstride, height, mx, my); \ +} + +#if ARCH_X86_32 +TAP_W8 (mmxext, epel, h4) +TAP_W8 (mmxext, epel, h6) +TAP_W16(mmxext, epel, h6) +TAP_W8 (mmxext, epel, v4) +TAP_W8 (mmxext, epel, v6) +TAP_W16(mmxext, epel, v6) +TAP_W8 (mmxext, bilinear, h) +TAP_W16(mmxext, bilinear, h) +TAP_W8 (mmxext, bilinear, v) +TAP_W16(mmxext, bilinear, v) +#endif + +TAP_W16(sse2, epel, h6) +TAP_W16(sse2, epel, v6) +TAP_W16(sse2, bilinear, h) +TAP_W16(sse2, bilinear, v) + +TAP_W16(ssse3, epel, h6) +TAP_W16(ssse3, epel, v6) +TAP_W16(ssse3, bilinear, h) +TAP_W16(ssse3, bilinear, v) + +#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ +static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ + uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t srcstride, int height, int mx, int my) \ +{ \ + LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \ + uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \ + src -= srcstride * (TAPNUMY / 2 - 1); \ + ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \ + tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \ + ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \ + dst, dststride, tmpptr, SIZE, height, mx, my); \ +} + +#if ARCH_X86_32 +#define HVTAPMMX(x, y) \ +HVTAP(mmxext, 8, x, y, 4, 8) \ +HVTAP(mmxext, 8, x, y, 8, 16) + +HVTAP(mmxext, 8, 6, 6, 16, 16) +#else +#define HVTAPMMX(x, y) \ +HVTAP(mmxext, 8, x, y, 4, 8) +#endif + +HVTAPMMX(4, 4) +HVTAPMMX(4, 6) +HVTAPMMX(6, 4) +HVTAPMMX(6, 6) + +#define HVTAPSSE2(x, y, w) \ +HVTAP(sse2, 16, x, y, w, 16) \ +HVTAP(ssse3, 16, x, y, w, 16) + +HVTAPSSE2(4, 4, 8) +HVTAPSSE2(4, 6, 8) +HVTAPSSE2(6, 4, 8) +HVTAPSSE2(6, 6, 8) +HVTAPSSE2(6, 6, 16) + +HVTAP(ssse3, 16, 4, 4, 4, 8) +HVTAP(ssse3, 16, 4, 6, 4, 8) +HVTAP(ssse3, 16, 6, 4, 4, 8) +HVTAP(ssse3, 16, 6, 6, 4, 8) + +#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ +static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ + uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ + ptrdiff_t srcstride, int height, int mx, int my) \ +{ \ + LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \ + ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \ + tmp, SIZE, src, srcstride, height + 1, mx, my); \ + ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \ + dst, dststride, tmp, SIZE, height, mx, my); \ +} + +HVBILIN(mmxext, 8, 4, 8) +#if ARCH_X86_32 +HVBILIN(mmxext, 8, 8, 16) +HVBILIN(mmxext, 8, 16, 16) +#endif +HVBILIN(sse2, 8, 8, 16) +HVBILIN(sse2, 8, 16, 16) +HVBILIN(ssse3, 8, 4, 8) +HVBILIN(ssse3, 8, 8, 16) +HVBILIN(ssse3, 8, 16, 16) + +void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16], + ptrdiff_t stride); +void ff_vp8_idct_dc_add_sse2(uint8_t *dst, int16_t block[16], + ptrdiff_t stride); +void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16], + ptrdiff_t stride); +void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride); +void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride); +void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16], + ptrdiff_t stride); +void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]); +void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]); +void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16], ptrdiff_t stride); +void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride); + +#define DECLARE_LOOP_FILTER(NAME) \ +void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim); \ +void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim); \ +void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ + ptrdiff_t stride, \ + int e, int i, int hvt); \ +void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ + ptrdiff_t stride, \ + int e, int i, int hvt); \ +void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t s, \ + int e, int i, int hvt); \ +void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t s, \ + int e, int i, int hvt); \ +void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ + ptrdiff_t stride, \ + int e, int i, int hvt); \ +void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ + ptrdiff_t stride, \ + int e, int i, int hvt); \ +void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t s, \ + int e, int i, int hvt); \ +void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t s, \ + int e, int i, int hvt); + +DECLARE_LOOP_FILTER(mmx) +DECLARE_LOOP_FILTER(mmxext) +DECLARE_LOOP_FILTER(sse2) +DECLARE_LOOP_FILTER(ssse3) +DECLARE_LOOP_FILTER(sse4) + +#endif /* HAVE_X86ASM */ + +#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ + c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT + +#define VP8_MC_FUNC(IDX, SIZE, OPT) \ + c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \ + c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \ + VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) + +#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \ + c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ + c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT + + +av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { +#if ARCH_X86_32 + c->put_vp8_epel_pixels_tab[0][0][0] = + c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx; +#endif + c->put_vp8_epel_pixels_tab[1][0][0] = + c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; + } + + /* note that 4-tap width=16 functions are missing because w=16 + * is only used for luma, and luma is always a copy or sixtap. */ + if (EXTERNAL_MMXEXT(cpu_flags)) { + VP8_MC_FUNC(2, 4, mmxext); + VP8_BILINEAR_MC_FUNC(2, 4, mmxext); +#if ARCH_X86_32 + VP8_LUMA_MC_FUNC(0, 16, mmxext); + VP8_MC_FUNC(1, 8, mmxext); + VP8_BILINEAR_MC_FUNC(0, 16, mmxext); + VP8_BILINEAR_MC_FUNC(1, 8, mmxext); +#endif + } + + if (EXTERNAL_SSE(cpu_flags)) { + c->put_vp8_epel_pixels_tab[0][0][0] = + c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; + } + + if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { + VP8_LUMA_MC_FUNC(0, 16, sse2); + VP8_MC_FUNC(1, 8, sse2); + VP8_BILINEAR_MC_FUNC(0, 16, sse2); + VP8_BILINEAR_MC_FUNC(1, 8, sse2); + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + VP8_LUMA_MC_FUNC(0, 16, ssse3); + VP8_MC_FUNC(1, 8, ssse3); + VP8_MC_FUNC(2, 4, ssse3); + VP8_BILINEAR_MC_FUNC(0, 16, ssse3); + VP8_BILINEAR_MC_FUNC(1, 8, ssse3); + VP8_BILINEAR_MC_FUNC(2, 4, ssse3); + } +#endif /* HAVE_X86ASM */ +} + +av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; +#if ARCH_X86_32 + c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; + c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; + c->vp8_idct_add = ff_vp8_idct_add_mmx; + c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; + + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; +#endif + } + + /* note that 4-tap width=16 functions are missing because w=16 + * is only used for luma, and luma is always a copy or sixtap. */ + if (EXTERNAL_MMXEXT(cpu_flags)) { +#if ARCH_X86_32 + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; +#endif + } + + if (EXTERNAL_SSE(cpu_flags)) { + c->vp8_idct_add = ff_vp8_idct_add_sse; + c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; + } + + if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse2; + c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; + + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; + + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; + + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; + } + + if (EXTERNAL_SSE4(cpu_flags)) { + c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; + + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; + } +#endif /* HAVE_X86ASM */ +} diff --git a/libs/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm b/libs/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm new file mode 100644 index 000000000..caeb40526 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm @@ -0,0 +1,1584 @@ +;****************************************************************************** +;* VP8 MMXEXT optimizations +;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> +;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pw_27: times 8 dw 27 +pw_63: times 8 dw 63 + +pb_4: times 16 db 4 +pb_F8: times 16 db 0xF8 +pb_FE: times 16 db 0xFE +pb_27_63: times 8 db 27, 63 +pb_18_63: times 8 db 18, 63 +pb_9_63: times 8 db 9, 63 + +cextern pb_1 +cextern pb_3 +cextern pw_9 +cextern pw_18 +cextern pb_80 + +SECTION .text + +;----------------------------------------------------------------------------- +; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, ptrdiff_t stride, int flim); +;----------------------------------------------------------------------------- + +; macro called with 7 mm register indexes as argument, and 4 regular registers +; +; first 4 mm registers will carry the transposed pixel data +; the other three are scratchspace (one would be sufficient, but this allows +; for more spreading/pipelining and thus faster execution on OOE CPUs) +; +; first two regular registers are buf+4*stride and buf+5*stride +; third is -stride, fourth is +stride +%macro READ_8x4_INTERLEAVED 11 + ; interleave 8 (A-H) rows of 4 pixels each + movd m%1, [%8+%10*4] ; A0-3 + movd m%5, [%9+%10*4] ; B0-3 + movd m%2, [%8+%10*2] ; C0-3 + movd m%6, [%8+%10] ; D0-3 + movd m%3, [%8] ; E0-3 + movd m%7, [%9] ; F0-3 + movd m%4, [%9+%11] ; G0-3 + punpcklbw m%1, m%5 ; A/B interleaved + movd m%5, [%9+%11*2] ; H0-3 + punpcklbw m%2, m%6 ; C/D interleaved + punpcklbw m%3, m%7 ; E/F interleaved + punpcklbw m%4, m%5 ; G/H interleaved +%endmacro + +; macro called with 7 mm register indexes as argument, and 5 regular registers +; first 11 mean the same as READ_8x4_TRANSPOSED above +; fifth regular register is scratchspace to reach the bottom 8 rows, it +; will be set to second regular register + 8*stride at the end +%macro READ_16x4_INTERLEAVED 12 + ; transpose 16 (A-P) rows of 4 pixels each + lea %12, [r0+8*r2] + + ; read (and interleave) those addressable by %8 (=r0), A/C/D/E/I/K/L/M + movd m%1, [%8+%10*4] ; A0-3 + movd m%3, [%12+%10*4] ; I0-3 + movd m%2, [%8+%10*2] ; C0-3 + movd m%4, [%12+%10*2] ; K0-3 + movd m%6, [%8+%10] ; D0-3 + movd m%5, [%12+%10] ; L0-3 + movd m%7, [%12] ; M0-3 + add %12, %11 + punpcklbw m%1, m%3 ; A/I + movd m%3, [%8] ; E0-3 + punpcklbw m%2, m%4 ; C/K + punpcklbw m%6, m%5 ; D/L + punpcklbw m%3, m%7 ; E/M + punpcklbw m%2, m%6 ; C/D/K/L interleaved + + ; read (and interleave) those addressable by %9 (=r4), B/F/G/H/J/N/O/P + movd m%5, [%9+%10*4] ; B0-3 + movd m%4, [%12+%10*4] ; J0-3 + movd m%7, [%9] ; F0-3 + movd m%6, [%12] ; N0-3 + punpcklbw m%5, m%4 ; B/J + punpcklbw m%7, m%6 ; F/N + punpcklbw m%1, m%5 ; A/B/I/J interleaved + punpcklbw m%3, m%7 ; E/F/M/N interleaved + movd m%4, [%9+%11] ; G0-3 + movd m%6, [%12+%11] ; O0-3 + movd m%5, [%9+%11*2] ; H0-3 + movd m%7, [%12+%11*2] ; P0-3 + punpcklbw m%4, m%6 ; G/O + punpcklbw m%5, m%7 ; H/P + punpcklbw m%4, m%5 ; G/H/O/P interleaved +%endmacro + +; write 4 mm registers of 2 dwords each +; first four arguments are mm register indexes containing source data +; last four are registers containing buf+4*stride, buf+5*stride, +; -stride and +stride +%macro WRITE_4x2D 8 + ; write out (2 dwords per register) + movd [%5+%7*4], m%1 + movd [%5+%7*2], m%2 + movd [%5], m%3 + movd [%6+%8], m%4 + punpckhdq m%1, m%1 + punpckhdq m%2, m%2 + punpckhdq m%3, m%3 + punpckhdq m%4, m%4 + movd [%6+%7*4], m%1 + movd [%5+%7], m%2 + movd [%6], m%3 + movd [%6+%8*2], m%4 +%endmacro + +; write 4 xmm registers of 4 dwords each +; arguments same as WRITE_2x4D, but with an extra register, so that the 5 regular +; registers contain buf+4*stride, buf+5*stride, buf+12*stride, -stride and +stride +; we add 1*stride to the third regular registry in the process +; the 10th argument is 16 if it's a Y filter (i.e. all regular registers cover the +; same memory region), or 8 if they cover two separate buffers (third one points to +; a different memory region than the first two), allowing for more optimal code for +; the 16-width case +%macro WRITE_4x4D 10 + ; write out (4 dwords per register), start with dwords zero + movd [%5+%8*4], m%1 + movd [%5], m%2 + movd [%7+%8*4], m%3 + movd [%7], m%4 + + ; store dwords 1 + psrldq m%1, 4 + psrldq m%2, 4 + psrldq m%3, 4 + psrldq m%4, 4 + movd [%6+%8*4], m%1 + movd [%6], m%2 +%if %10 == 16 + movd [%6+%9*4], m%3 +%endif + movd [%7+%9], m%4 + + ; write dwords 2 + psrldq m%1, 4 + psrldq m%2, 4 +%if %10 == 8 + movd [%5+%8*2], m%1 + movd %5d, m%3 +%endif + psrldq m%3, 4 + psrldq m%4, 4 +%if %10 == 16 + movd [%5+%8*2], m%1 +%endif + movd [%6+%9], m%2 + movd [%7+%8*2], m%3 + movd [%7+%9*2], m%4 + add %7, %9 + + ; store dwords 3 + psrldq m%1, 4 + psrldq m%2, 4 + psrldq m%3, 4 + psrldq m%4, 4 +%if %10 == 8 + mov [%7+%8*4], %5d + movd [%6+%8*2], m%1 +%else + movd [%5+%8], m%1 +%endif + movd [%6+%9*2], m%2 + movd [%7+%8*2], m%3 + movd [%7+%9*2], m%4 +%endmacro + +; write 4 or 8 words in the mmx/xmm registers as 8 lines +; 1 and 2 are the registers to write, this can be the same (for SSE2) +; for pre-SSE4: +; 3 is a general-purpose register that we will clobber +; for SSE4: +; 3 is a pointer to the destination's 5th line +; 4 is a pointer to the destination's 4th line +; 5/6 is -stride and +stride +%macro WRITE_2x4W 6 + movd %3d, %1 + punpckhdq %1, %1 + mov [%4+%5*4], %3w + shr %3, 16 + add %4, %6 + mov [%4+%5*4], %3w + + movd %3d, %1 + add %4, %5 + mov [%4+%5*2], %3w + shr %3, 16 + mov [%4+%5 ], %3w + + movd %3d, %2 + punpckhdq %2, %2 + mov [%4 ], %3w + shr %3, 16 + mov [%4+%6 ], %3w + + movd %3d, %2 + add %4, %6 + mov [%4+%6 ], %3w + shr %3, 16 + mov [%4+%6*2], %3w + add %4, %5 +%endmacro + +%macro WRITE_8W 5 +%if cpuflag(sse4) + pextrw [%3+%4*4], %1, 0 + pextrw [%2+%4*4], %1, 1 + pextrw [%3+%4*2], %1, 2 + pextrw [%3+%4 ], %1, 3 + pextrw [%3 ], %1, 4 + pextrw [%2 ], %1, 5 + pextrw [%2+%5 ], %1, 6 + pextrw [%2+%5*2], %1, 7 +%else + movd %2d, %1 + psrldq %1, 4 + mov [%3+%4*4], %2w + shr %2, 16 + add %3, %5 + mov [%3+%4*4], %2w + + movd %2d, %1 + psrldq %1, 4 + add %3, %4 + mov [%3+%4*2], %2w + shr %2, 16 + mov [%3+%4 ], %2w + + movd %2d, %1 + psrldq %1, 4 + mov [%3 ], %2w + shr %2, 16 + mov [%3+%5 ], %2w + + movd %2d, %1 + add %3, %5 + mov [%3+%5 ], %2w + shr %2, 16 + mov [%3+%5*2], %2w +%endif +%endmacro + +%macro SIMPLE_LOOPFILTER 2 +cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr +%if mmsize == 8 ; mmx/mmxext + mov cntrq, 2 +%endif +%if cpuflag(ssse3) + pxor m0, m0 +%endif + SPLATB_REG m7, flim, m0 ; splat "flim" into register + + ; set up indexes to address 4 rows +%if mmsize == 8 + DEFINE_ARGS dst1, mstride, stride, cntr, dst2 +%else + DEFINE_ARGS dst1, mstride, stride, dst3, dst2 +%endif + mov strideq, mstrideq + neg mstrideq +%ifidn %1, h + lea dst1q, [dst1q+4*strideq-2] +%endif + +%if mmsize == 8 ; mmx / mmxext +.next8px: +%endif +%ifidn %1, v + ; read 4 half/full rows of pixels + mova m0, [dst1q+mstrideq*2] ; p1 + mova m1, [dst1q+mstrideq] ; p0 + mova m2, [dst1q] ; q0 + mova m3, [dst1q+ strideq] ; q1 +%else ; h + lea dst2q, [dst1q+ strideq] + +%if mmsize == 8 ; mmx/mmxext + READ_8x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq +%else ; sse2 + READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq, dst3q +%endif + TRANSPOSE4x4W 0, 1, 2, 3, 4 +%endif + + ; simple_limit + mova m5, m2 ; m5=backup of q0 + mova m6, m1 ; m6=backup of p0 + psubusb m1, m2 ; p0-q0 + psubusb m2, m6 ; q0-p0 + por m1, m2 ; FFABS(p0-q0) + paddusb m1, m1 ; m1=FFABS(p0-q0)*2 + + mova m4, m3 + mova m2, m0 + psubusb m3, m0 ; q1-p1 + psubusb m0, m4 ; p1-q1 + por m3, m0 ; FFABS(p1-q1) + mova m0, [pb_80] + pxor m2, m0 + pxor m4, m0 + psubsb m2, m4 ; m2=p1-q1 (signed) backup for below + pand m3, [pb_FE] + psrlq m3, 1 ; m3=FFABS(p1-q1)/2, this can be used signed + paddusb m3, m1 + psubusb m3, m7 + pxor m1, m1 + pcmpeqb m3, m1 ; abs(p0-q0)*2+abs(p1-q1)/2<=flim mask(0xff/0x0) + + ; filter_common (use m2/p1-q1, m4=q0, m6=p0, m5/q0-p0 and m3/mask) + mova m4, m5 + pxor m5, m0 + pxor m0, m6 + psubsb m5, m0 ; q0-p0 (signed) + paddsb m2, m5 + paddsb m2, m5 + paddsb m2, m5 ; a=(p1-q1) + 3*(q0-p0) + pand m2, m3 ; apply filter mask (m3) + + mova m3, [pb_F8] + mova m1, m2 + paddsb m2, [pb_4] ; f1<<3=a+4 + paddsb m1, [pb_3] ; f2<<3=a+3 + pand m2, m3 + pand m1, m3 ; cache f2<<3 + + pxor m0, m0 + pxor m3, m3 + pcmpgtb m0, m2 ; which values are <0? + psubb m3, m2 ; -f1<<3 + psrlq m2, 3 ; +f1 + psrlq m3, 3 ; -f1 + pand m3, m0 + pandn m0, m2 + psubusb m4, m0 + paddusb m4, m3 ; q0-f1 + + pxor m0, m0 + pxor m3, m3 + pcmpgtb m0, m1 ; which values are <0? + psubb m3, m1 ; -f2<<3 + psrlq m1, 3 ; +f2 + psrlq m3, 3 ; -f2 + pand m3, m0 + pandn m0, m1 + paddusb m6, m0 + psubusb m6, m3 ; p0+f2 + + ; store +%ifidn %1, v + mova [dst1q], m4 + mova [dst1q+mstrideq], m6 +%else ; h + inc dst1q + SBUTTERFLY bw, 6, 4, 0 + +%if mmsize == 16 ; sse2 +%if cpuflag(sse4) + inc dst2q +%endif + WRITE_8W m6, dst2q, dst1q, mstrideq, strideq + lea dst2q, [dst3q+mstrideq+1] +%if cpuflag(sse4) + inc dst3q +%endif + WRITE_8W m4, dst3q, dst2q, mstrideq, strideq +%else ; mmx/mmxext + WRITE_2x4W m6, m4, dst2q, dst1q, mstrideq, strideq +%endif +%endif + +%if mmsize == 8 ; mmx/mmxext + ; next 8 pixels +%ifidn %1, v + add dst1q, 8 ; advance 8 cols = pixels +%else ; h + lea dst1q, [dst1q+strideq*8-1] ; advance 8 rows = lines +%endif + dec cntrq + jg .next8px + REP_RET +%else ; sse2 + RET +%endif +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +SIMPLE_LOOPFILTER v, 4 +SIMPLE_LOOPFILTER h, 5 +INIT_MMX mmxext +SIMPLE_LOOPFILTER v, 4 +SIMPLE_LOOPFILTER h, 5 +%endif + +INIT_XMM sse2 +SIMPLE_LOOPFILTER v, 3 +SIMPLE_LOOPFILTER h, 5 +INIT_XMM ssse3 +SIMPLE_LOOPFILTER v, 3 +SIMPLE_LOOPFILTER h, 5 +INIT_XMM sse4 +SIMPLE_LOOPFILTER h, 5 + +;----------------------------------------------------------------------------- +; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride, +; int flimE, int flimI, int hev_thr); +;----------------------------------------------------------------------------- + +%macro INNER_LOOPFILTER 2 +%define stack_size 0 +%ifndef m8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr +%ifidn %1, v ; [3]=hev() result +%define stack_size mmsize * -4 +%else ; h ; extra storage space for transposes +%define stack_size mmsize * -5 +%endif +%endif + +%if %2 == 8 ; chroma +cglobal vp8_%1_loop_filter8uv_inner, 6, 6, 13, stack_size, dst, dst8, stride, flimE, flimI, hevthr +%else ; luma +cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, stack_size, dst, stride, flimE, flimI, hevthr +%endif + +%if cpuflag(ssse3) + pxor m7, m7 +%endif + +%ifndef m8 + ; splat function arguments + SPLATB_REG m0, flimEq, m7 ; E + SPLATB_REG m1, flimIq, m7 ; I + SPLATB_REG m2, hevthrq, m7 ; hev_thresh + +%define m_flimE [rsp] +%define m_flimI [rsp+mmsize] +%define m_hevthr [rsp+mmsize*2] +%define m_maskres [rsp+mmsize*3] +%define m_p0backup [rsp+mmsize*3] +%define m_q0backup [rsp+mmsize*4] + + mova m_flimE, m0 + mova m_flimI, m1 + mova m_hevthr, m2 +%else +%define m_flimE m9 +%define m_flimI m10 +%define m_hevthr m11 +%define m_maskres m12 +%define m_p0backup m12 +%define m_q0backup m8 + + ; splat function arguments + SPLATB_REG m_flimE, flimEq, m7 ; E + SPLATB_REG m_flimI, flimIq, m7 ; I + SPLATB_REG m_hevthr, hevthrq, m7 ; hev_thresh +%endif + +%if %2 == 8 ; chroma + DEFINE_ARGS dst1, dst8, mstride, stride, dst2 +%elif mmsize == 8 + DEFINE_ARGS dst1, mstride, stride, dst2, cntr + mov cntrq, 2 +%else + DEFINE_ARGS dst1, mstride, stride, dst2, dst8 +%endif + mov strideq, mstrideq + neg mstrideq +%ifidn %1, h + lea dst1q, [dst1q+strideq*4-4] +%if %2 == 8 ; chroma + lea dst8q, [dst8q+strideq*4-4] +%endif +%endif + +%if mmsize == 8 +.next8px: +%endif + ; read + lea dst2q, [dst1q+strideq] +%ifidn %1, v +%if %2 == 8 && mmsize == 16 +%define movrow movh +%else +%define movrow mova +%endif + movrow m0, [dst1q+mstrideq*4] ; p3 + movrow m1, [dst2q+mstrideq*4] ; p2 + movrow m2, [dst1q+mstrideq*2] ; p1 + movrow m5, [dst2q] ; q1 + movrow m6, [dst2q+ strideq*1] ; q2 + movrow m7, [dst2q+ strideq*2] ; q3 +%if mmsize == 16 && %2 == 8 + movhps m0, [dst8q+mstrideq*4] + movhps m2, [dst8q+mstrideq*2] + add dst8q, strideq + movhps m1, [dst8q+mstrideq*4] + movhps m5, [dst8q] + movhps m6, [dst8q+ strideq ] + movhps m7, [dst8q+ strideq*2] + add dst8q, mstrideq +%endif +%elif mmsize == 8 ; mmx/mmxext (h) + ; read 8 rows of 8px each + movu m0, [dst1q+mstrideq*4] + movu m1, [dst2q+mstrideq*4] + movu m2, [dst1q+mstrideq*2] + movu m3, [dst1q+mstrideq ] + movu m4, [dst1q] + movu m5, [dst2q] + movu m6, [dst2q+ strideq ] + + ; 8x8 transpose + TRANSPOSE4x4B 0, 1, 2, 3, 7 + mova m_q0backup, m1 + movu m7, [dst2q+ strideq*2] + TRANSPOSE4x4B 4, 5, 6, 7, 1 + SBUTTERFLY dq, 0, 4, 1 ; p3/p2 + SBUTTERFLY dq, 2, 6, 1 ; q0/q1 + SBUTTERFLY dq, 3, 7, 1 ; q2/q3 + mova m1, m_q0backup + mova m_q0backup, m2 ; store q0 + SBUTTERFLY dq, 1, 5, 2 ; p1/p0 + mova m_p0backup, m5 ; store p0 + SWAP 1, 4 + SWAP 2, 4 + SWAP 6, 3 + SWAP 5, 3 +%else ; sse2 (h) +%if %2 == 16 + lea dst8q, [dst1q+ strideq*8] +%endif + + ; read 16 rows of 8px each, interleave + movh m0, [dst1q+mstrideq*4] + movh m1, [dst8q+mstrideq*4] + movh m2, [dst1q+mstrideq*2] + movh m5, [dst8q+mstrideq*2] + movh m3, [dst1q+mstrideq ] + movh m6, [dst8q+mstrideq ] + movh m4, [dst1q] + movh m7, [dst8q] + punpcklbw m0, m1 ; A/I + punpcklbw m2, m5 ; C/K + punpcklbw m3, m6 ; D/L + punpcklbw m4, m7 ; E/M + + add dst8q, strideq + movh m1, [dst2q+mstrideq*4] + movh m6, [dst8q+mstrideq*4] + movh m5, [dst2q] + movh m7, [dst8q] + punpcklbw m1, m6 ; B/J + punpcklbw m5, m7 ; F/N + movh m6, [dst2q+ strideq ] + movh m7, [dst8q+ strideq ] + punpcklbw m6, m7 ; G/O + + ; 8x16 transpose + TRANSPOSE4x4B 0, 1, 2, 3, 7 +%ifdef m8 + SWAP 1, 8 +%else + mova m_q0backup, m1 +%endif + movh m7, [dst2q+ strideq*2] + movh m1, [dst8q+ strideq*2] + punpcklbw m7, m1 ; H/P + TRANSPOSE4x4B 4, 5, 6, 7, 1 + SBUTTERFLY dq, 0, 4, 1 ; p3/p2 + SBUTTERFLY dq, 2, 6, 1 ; q0/q1 + SBUTTERFLY dq, 3, 7, 1 ; q2/q3 +%ifdef m8 + SWAP 1, 8 + SWAP 2, 8 +%else + mova m1, m_q0backup + mova m_q0backup, m2 ; store q0 +%endif + SBUTTERFLY dq, 1, 5, 2 ; p1/p0 +%ifdef m12 + SWAP 5, 12 +%else + mova m_p0backup, m5 ; store p0 +%endif + SWAP 1, 4 + SWAP 2, 4 + SWAP 6, 3 + SWAP 5, 3 +%endif + + ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1 + mova m4, m1 + SWAP 4, 1 + psubusb m4, m0 ; p2-p3 + psubusb m0, m1 ; p3-p2 + por m0, m4 ; abs(p3-p2) + + mova m4, m2 + SWAP 4, 2 + psubusb m4, m1 ; p1-p2 + psubusb m1, m2 ; p2-p1 + por m1, m4 ; abs(p2-p1) + + mova m4, m6 + SWAP 4, 6 + psubusb m4, m7 ; q2-q3 + psubusb m7, m6 ; q3-q2 + por m7, m4 ; abs(q3-q2) + + mova m4, m5 + SWAP 4, 5 + psubusb m4, m6 ; q1-q2 + psubusb m6, m5 ; q2-q1 + por m6, m4 ; abs(q2-q1) + +%if notcpuflag(mmxext) + mova m4, m_flimI + pxor m3, m3 + psubusb m0, m4 + psubusb m1, m4 + psubusb m7, m4 + psubusb m6, m4 + pcmpeqb m0, m3 ; abs(p3-p2) <= I + pcmpeqb m1, m3 ; abs(p2-p1) <= I + pcmpeqb m7, m3 ; abs(q3-q2) <= I + pcmpeqb m6, m3 ; abs(q2-q1) <= I + pand m0, m1 + pand m7, m6 + pand m0, m7 +%else ; mmxext/sse2 + pmaxub m0, m1 + pmaxub m6, m7 + pmaxub m0, m6 +%endif + + ; normal_limit and high_edge_variance for p1-p0, q1-q0 + SWAP 7, 3 ; now m7 is zero +%ifidn %1, v + movrow m3, [dst1q+mstrideq ] ; p0 +%if mmsize == 16 && %2 == 8 + movhps m3, [dst8q+mstrideq ] +%endif +%elifdef m12 + SWAP 3, 12 +%else + mova m3, m_p0backup +%endif + + mova m1, m2 + SWAP 1, 2 + mova m6, m3 + SWAP 3, 6 + psubusb m1, m3 ; p1-p0 + psubusb m6, m2 ; p0-p1 + por m1, m6 ; abs(p1-p0) +%if notcpuflag(mmxext) + mova m6, m1 + psubusb m1, m4 + psubusb m6, m_hevthr + pcmpeqb m1, m7 ; abs(p1-p0) <= I + pcmpeqb m6, m7 ; abs(p1-p0) <= hev_thresh + pand m0, m1 + mova m_maskres, m6 +%else ; mmxext/sse2 + pmaxub m0, m1 ; max_I + SWAP 1, 4 ; max_hev_thresh +%endif + + SWAP 6, 4 ; now m6 is I +%ifidn %1, v + movrow m4, [dst1q] ; q0 +%if mmsize == 16 && %2 == 8 + movhps m4, [dst8q] +%endif +%elifdef m8 + SWAP 4, 8 +%else + mova m4, m_q0backup +%endif + mova m1, m4 + SWAP 1, 4 + mova m7, m5 + SWAP 7, 5 + psubusb m1, m5 ; q0-q1 + psubusb m7, m4 ; q1-q0 + por m1, m7 ; abs(q1-q0) +%if notcpuflag(mmxext) + mova m7, m1 + psubusb m1, m6 + psubusb m7, m_hevthr + pxor m6, m6 + pcmpeqb m1, m6 ; abs(q1-q0) <= I + pcmpeqb m7, m6 ; abs(q1-q0) <= hev_thresh + mova m6, m_maskres + pand m0, m1 ; abs([pq][321]-[pq][210]) <= I + pand m6, m7 +%else ; mmxext/sse2 + pxor m7, m7 + pmaxub m0, m1 + pmaxub m6, m1 + psubusb m0, m_flimI + psubusb m6, m_hevthr + pcmpeqb m0, m7 ; max(abs(..)) <= I + pcmpeqb m6, m7 ; !(max(abs..) > thresh) +%endif +%ifdef m12 + SWAP 6, 12 +%else + mova m_maskres, m6 ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t) +%endif + + ; simple_limit + mova m1, m3 + SWAP 1, 3 + mova m6, m4 ; keep copies of p0/q0 around for later use + SWAP 6, 4 + psubusb m1, m4 ; p0-q0 + psubusb m6, m3 ; q0-p0 + por m1, m6 ; abs(q0-p0) + paddusb m1, m1 ; m1=2*abs(q0-p0) + + mova m7, m2 + SWAP 7, 2 + mova m6, m5 + SWAP 6, 5 + psubusb m7, m5 ; p1-q1 + psubusb m6, m2 ; q1-p1 + por m7, m6 ; abs(q1-p1) + pxor m6, m6 + pand m7, [pb_FE] + psrlq m7, 1 ; abs(q1-p1)/2 + paddusb m7, m1 ; abs(q0-p0)*2+abs(q1-p1)/2 + psubusb m7, m_flimE + pcmpeqb m7, m6 ; abs(q0-p0)*2+abs(q1-p1)/2 <= E + pand m0, m7 ; normal_limit result + + ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask +%ifdef m8 ; x86-64 && sse2 + mova m8, [pb_80] +%define m_pb_80 m8 +%else ; x86-32 or mmx/mmxext +%define m_pb_80 [pb_80] +%endif + mova m1, m4 + mova m7, m3 + pxor m1, m_pb_80 + pxor m7, m_pb_80 + psubsb m1, m7 ; (signed) q0-p0 + mova m6, m2 + mova m7, m5 + pxor m6, m_pb_80 + pxor m7, m_pb_80 + psubsb m6, m7 ; (signed) p1-q1 + mova m7, m_maskres + pandn m7, m6 + paddsb m7, m1 + paddsb m7, m1 + paddsb m7, m1 ; 3*(q0-p0)+is4tap?(p1-q1) + + pand m7, m0 + mova m1, [pb_F8] + mova m6, m7 + paddsb m7, [pb_3] + paddsb m6, [pb_4] + pand m7, m1 + pand m6, m1 + + pxor m1, m1 + pxor m0, m0 + pcmpgtb m1, m7 + psubb m0, m7 + psrlq m7, 3 ; +f2 + psrlq m0, 3 ; -f2 + pand m0, m1 + pandn m1, m7 + psubusb m3, m0 + paddusb m3, m1 ; p0+f2 + + pxor m1, m1 + pxor m0, m0 + pcmpgtb m0, m6 + psubb m1, m6 + psrlq m6, 3 ; +f1 + psrlq m1, 3 ; -f1 + pand m1, m0 + pandn m0, m6 + psubusb m4, m0 + paddusb m4, m1 ; q0-f1 + +%ifdef m12 + SWAP 6, 12 +%else + mova m6, m_maskres +%endif +%if notcpuflag(mmxext) + mova m7, [pb_1] +%else ; mmxext/sse2 + pxor m7, m7 +%endif + pand m0, m6 + pand m1, m6 +%if notcpuflag(mmxext) + paddusb m0, m7 + pand m1, [pb_FE] + pandn m7, m0 + psrlq m1, 1 + psrlq m7, 1 + SWAP 0, 7 +%else ; mmxext/sse2 + psubusb m1, [pb_1] + pavgb m0, m7 ; a + pavgb m1, m7 ; -a +%endif + psubusb m5, m0 + psubusb m2, m1 + paddusb m5, m1 ; q1-a + paddusb m2, m0 ; p1+a + + ; store +%ifidn %1, v + movrow [dst1q+mstrideq*2], m2 + movrow [dst1q+mstrideq ], m3 + movrow [dst1q], m4 + movrow [dst1q+ strideq ], m5 +%if mmsize == 16 && %2 == 8 + movhps [dst8q+mstrideq*2], m2 + movhps [dst8q+mstrideq ], m3 + movhps [dst8q], m4 + movhps [dst8q+ strideq ], m5 +%endif +%else ; h + add dst1q, 2 + add dst2q, 2 + + ; 4x8/16 transpose + TRANSPOSE4x4B 2, 3, 4, 5, 6 + +%if mmsize == 8 ; mmx/mmxext (h) + WRITE_4x2D 2, 3, 4, 5, dst1q, dst2q, mstrideq, strideq +%else ; sse2 (h) + lea dst8q, [dst8q+mstrideq +2] + WRITE_4x4D 2, 3, 4, 5, dst1q, dst2q, dst8q, mstrideq, strideq, %2 +%endif +%endif + +%if mmsize == 8 +%if %2 == 8 ; chroma +%ifidn %1, h + sub dst1q, 2 +%endif + cmp dst1q, dst8q + mov dst1q, dst8q + jnz .next8px +%else +%ifidn %1, h + lea dst1q, [dst1q+ strideq*8-2] +%else ; v + add dst1q, 8 +%endif + dec cntrq + jg .next8px +%endif + REP_RET +%else ; mmsize == 16 + RET +%endif +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +INNER_LOOPFILTER v, 16 +INNER_LOOPFILTER h, 16 +INNER_LOOPFILTER v, 8 +INNER_LOOPFILTER h, 8 + +INIT_MMX mmxext +INNER_LOOPFILTER v, 16 +INNER_LOOPFILTER h, 16 +INNER_LOOPFILTER v, 8 +INNER_LOOPFILTER h, 8 +%endif + +INIT_XMM sse2 +INNER_LOOPFILTER v, 16 +INNER_LOOPFILTER h, 16 +INNER_LOOPFILTER v, 8 +INNER_LOOPFILTER h, 8 + +INIT_XMM ssse3 +INNER_LOOPFILTER v, 16 +INNER_LOOPFILTER h, 16 +INNER_LOOPFILTER v, 8 +INNER_LOOPFILTER h, 8 + +;----------------------------------------------------------------------------- +; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride, +; int flimE, int flimI, int hev_thr); +;----------------------------------------------------------------------------- + +%macro MBEDGE_LOOPFILTER 2 +%define stack_size 0 +%ifndef m8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr +%if mmsize == 16 ; [3]=hev() result + ; [4]=filter tmp result + ; [5]/[6] = p2/q2 backup + ; [7]=lim_res sign result +%define stack_size mmsize * -7 +%else ; 8 ; extra storage space for transposes +%define stack_size mmsize * -8 +%endif +%endif + +%if %2 == 8 ; chroma +cglobal vp8_%1_loop_filter8uv_mbedge, 6, 6, 15, stack_size, dst1, dst8, stride, flimE, flimI, hevthr +%else ; luma +cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, stack_size, dst1, stride, flimE, flimI, hevthr +%endif + +%if cpuflag(ssse3) + pxor m7, m7 +%endif + +%ifndef m8 + ; splat function arguments + SPLATB_REG m0, flimEq, m7 ; E + SPLATB_REG m1, flimIq, m7 ; I + SPLATB_REG m2, hevthrq, m7 ; hev_thresh + +%define m_flimE [rsp] +%define m_flimI [rsp+mmsize] +%define m_hevthr [rsp+mmsize*2] +%define m_maskres [rsp+mmsize*3] +%define m_limres [rsp+mmsize*4] +%define m_p0backup [rsp+mmsize*3] +%define m_q0backup [rsp+mmsize*4] +%define m_p2backup [rsp+mmsize*5] +%define m_q2backup [rsp+mmsize*6] +%if mmsize == 16 +%define m_limsign [rsp] +%else +%define m_limsign [rsp+mmsize*7] +%endif + + mova m_flimE, m0 + mova m_flimI, m1 + mova m_hevthr, m2 +%else ; sse2 on x86-64 +%define m_flimE m9 +%define m_flimI m10 +%define m_hevthr m11 +%define m_maskres m12 +%define m_limres m8 +%define m_p0backup m12 +%define m_q0backup m8 +%define m_p2backup m13 +%define m_q2backup m14 +%define m_limsign m9 + + ; splat function arguments + SPLATB_REG m_flimE, flimEq, m7 ; E + SPLATB_REG m_flimI, flimIq, m7 ; I + SPLATB_REG m_hevthr, hevthrq, m7 ; hev_thresh +%endif + +%if %2 == 8 ; chroma + DEFINE_ARGS dst1, dst8, mstride, stride, dst2 +%elif mmsize == 8 + DEFINE_ARGS dst1, mstride, stride, dst2, cntr + mov cntrq, 2 +%else + DEFINE_ARGS dst1, mstride, stride, dst2, dst8 +%endif + mov strideq, mstrideq + neg mstrideq +%ifidn %1, h + lea dst1q, [dst1q+strideq*4-4] +%if %2 == 8 ; chroma + lea dst8q, [dst8q+strideq*4-4] +%endif +%endif + +%if mmsize == 8 +.next8px: +%endif + ; read + lea dst2q, [dst1q+ strideq ] +%ifidn %1, v +%if %2 == 8 && mmsize == 16 +%define movrow movh +%else +%define movrow mova +%endif + movrow m0, [dst1q+mstrideq*4] ; p3 + movrow m1, [dst2q+mstrideq*4] ; p2 + movrow m2, [dst1q+mstrideq*2] ; p1 + movrow m5, [dst2q] ; q1 + movrow m6, [dst2q+ strideq ] ; q2 + movrow m7, [dst2q+ strideq*2] ; q3 +%if mmsize == 16 && %2 == 8 + movhps m0, [dst8q+mstrideq*4] + movhps m2, [dst8q+mstrideq*2] + add dst8q, strideq + movhps m1, [dst8q+mstrideq*4] + movhps m5, [dst8q] + movhps m6, [dst8q+ strideq ] + movhps m7, [dst8q+ strideq*2] + add dst8q, mstrideq +%endif +%elif mmsize == 8 ; mmx/mmxext (h) + ; read 8 rows of 8px each + movu m0, [dst1q+mstrideq*4] + movu m1, [dst2q+mstrideq*4] + movu m2, [dst1q+mstrideq*2] + movu m3, [dst1q+mstrideq ] + movu m4, [dst1q] + movu m5, [dst2q] + movu m6, [dst2q+ strideq ] + + ; 8x8 transpose + TRANSPOSE4x4B 0, 1, 2, 3, 7 + mova m_q0backup, m1 + movu m7, [dst2q+ strideq*2] + TRANSPOSE4x4B 4, 5, 6, 7, 1 + SBUTTERFLY dq, 0, 4, 1 ; p3/p2 + SBUTTERFLY dq, 2, 6, 1 ; q0/q1 + SBUTTERFLY dq, 3, 7, 1 ; q2/q3 + mova m1, m_q0backup + mova m_q0backup, m2 ; store q0 + SBUTTERFLY dq, 1, 5, 2 ; p1/p0 + mova m_p0backup, m5 ; store p0 + SWAP 1, 4 + SWAP 2, 4 + SWAP 6, 3 + SWAP 5, 3 +%else ; sse2 (h) +%if %2 == 16 + lea dst8q, [dst1q+ strideq*8 ] +%endif + + ; read 16 rows of 8px each, interleave + movh m0, [dst1q+mstrideq*4] + movh m1, [dst8q+mstrideq*4] + movh m2, [dst1q+mstrideq*2] + movh m5, [dst8q+mstrideq*2] + movh m3, [dst1q+mstrideq ] + movh m6, [dst8q+mstrideq ] + movh m4, [dst1q] + movh m7, [dst8q] + punpcklbw m0, m1 ; A/I + punpcklbw m2, m5 ; C/K + punpcklbw m3, m6 ; D/L + punpcklbw m4, m7 ; E/M + + add dst8q, strideq + movh m1, [dst2q+mstrideq*4] + movh m6, [dst8q+mstrideq*4] + movh m5, [dst2q] + movh m7, [dst8q] + punpcklbw m1, m6 ; B/J + punpcklbw m5, m7 ; F/N + movh m6, [dst2q+ strideq ] + movh m7, [dst8q+ strideq ] + punpcklbw m6, m7 ; G/O + + ; 8x16 transpose + TRANSPOSE4x4B 0, 1, 2, 3, 7 +%ifdef m8 + SWAP 1, 8 +%else + mova m_q0backup, m1 +%endif + movh m7, [dst2q+ strideq*2] + movh m1, [dst8q+ strideq*2] + punpcklbw m7, m1 ; H/P + TRANSPOSE4x4B 4, 5, 6, 7, 1 + SBUTTERFLY dq, 0, 4, 1 ; p3/p2 + SBUTTERFLY dq, 2, 6, 1 ; q0/q1 + SBUTTERFLY dq, 3, 7, 1 ; q2/q3 +%ifdef m8 + SWAP 1, 8 + SWAP 2, 8 +%else + mova m1, m_q0backup + mova m_q0backup, m2 ; store q0 +%endif + SBUTTERFLY dq, 1, 5, 2 ; p1/p0 +%ifdef m12 + SWAP 5, 12 +%else + mova m_p0backup, m5 ; store p0 +%endif + SWAP 1, 4 + SWAP 2, 4 + SWAP 6, 3 + SWAP 5, 3 +%endif + + ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1 + mova m4, m1 + SWAP 4, 1 + psubusb m4, m0 ; p2-p3 + psubusb m0, m1 ; p3-p2 + por m0, m4 ; abs(p3-p2) + + mova m4, m2 + SWAP 4, 2 + psubusb m4, m1 ; p1-p2 + mova m_p2backup, m1 + psubusb m1, m2 ; p2-p1 + por m1, m4 ; abs(p2-p1) + + mova m4, m6 + SWAP 4, 6 + psubusb m4, m7 ; q2-q3 + psubusb m7, m6 ; q3-q2 + por m7, m4 ; abs(q3-q2) + + mova m4, m5 + SWAP 4, 5 + psubusb m4, m6 ; q1-q2 + mova m_q2backup, m6 + psubusb m6, m5 ; q2-q1 + por m6, m4 ; abs(q2-q1) + +%if notcpuflag(mmxext) + mova m4, m_flimI + pxor m3, m3 + psubusb m0, m4 + psubusb m1, m4 + psubusb m7, m4 + psubusb m6, m4 + pcmpeqb m0, m3 ; abs(p3-p2) <= I + pcmpeqb m1, m3 ; abs(p2-p1) <= I + pcmpeqb m7, m3 ; abs(q3-q2) <= I + pcmpeqb m6, m3 ; abs(q2-q1) <= I + pand m0, m1 + pand m7, m6 + pand m0, m7 +%else ; mmxext/sse2 + pmaxub m0, m1 + pmaxub m6, m7 + pmaxub m0, m6 +%endif + + ; normal_limit and high_edge_variance for p1-p0, q1-q0 + SWAP 7, 3 ; now m7 is zero +%ifidn %1, v + movrow m3, [dst1q+mstrideq ] ; p0 +%if mmsize == 16 && %2 == 8 + movhps m3, [dst8q+mstrideq ] +%endif +%elifdef m12 + SWAP 3, 12 +%else + mova m3, m_p0backup +%endif + + mova m1, m2 + SWAP 1, 2 + mova m6, m3 + SWAP 3, 6 + psubusb m1, m3 ; p1-p0 + psubusb m6, m2 ; p0-p1 + por m1, m6 ; abs(p1-p0) +%if notcpuflag(mmxext) + mova m6, m1 + psubusb m1, m4 + psubusb m6, m_hevthr + pcmpeqb m1, m7 ; abs(p1-p0) <= I + pcmpeqb m6, m7 ; abs(p1-p0) <= hev_thresh + pand m0, m1 + mova m_maskres, m6 +%else ; mmxext/sse2 + pmaxub m0, m1 ; max_I + SWAP 1, 4 ; max_hev_thresh +%endif + + SWAP 6, 4 ; now m6 is I +%ifidn %1, v + movrow m4, [dst1q] ; q0 +%if mmsize == 16 && %2 == 8 + movhps m4, [dst8q] +%endif +%elifdef m8 + SWAP 4, 8 +%else + mova m4, m_q0backup +%endif + mova m1, m4 + SWAP 1, 4 + mova m7, m5 + SWAP 7, 5 + psubusb m1, m5 ; q0-q1 + psubusb m7, m4 ; q1-q0 + por m1, m7 ; abs(q1-q0) +%if notcpuflag(mmxext) + mova m7, m1 + psubusb m1, m6 + psubusb m7, m_hevthr + pxor m6, m6 + pcmpeqb m1, m6 ; abs(q1-q0) <= I + pcmpeqb m7, m6 ; abs(q1-q0) <= hev_thresh + mova m6, m_maskres + pand m0, m1 ; abs([pq][321]-[pq][210]) <= I + pand m6, m7 +%else ; mmxext/sse2 + pxor m7, m7 + pmaxub m0, m1 + pmaxub m6, m1 + psubusb m0, m_flimI + psubusb m6, m_hevthr + pcmpeqb m0, m7 ; max(abs(..)) <= I + pcmpeqb m6, m7 ; !(max(abs..) > thresh) +%endif +%ifdef m12 + SWAP 6, 12 +%else + mova m_maskres, m6 ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t) +%endif + + ; simple_limit + mova m1, m3 + SWAP 1, 3 + mova m6, m4 ; keep copies of p0/q0 around for later use + SWAP 6, 4 + psubusb m1, m4 ; p0-q0 + psubusb m6, m3 ; q0-p0 + por m1, m6 ; abs(q0-p0) + paddusb m1, m1 ; m1=2*abs(q0-p0) + + mova m7, m2 + SWAP 7, 2 + mova m6, m5 + SWAP 6, 5 + psubusb m7, m5 ; p1-q1 + psubusb m6, m2 ; q1-p1 + por m7, m6 ; abs(q1-p1) + pxor m6, m6 + pand m7, [pb_FE] + psrlq m7, 1 ; abs(q1-p1)/2 + paddusb m7, m1 ; abs(q0-p0)*2+abs(q1-p1)/2 + psubusb m7, m_flimE + pcmpeqb m7, m6 ; abs(q0-p0)*2+abs(q1-p1)/2 <= E + pand m0, m7 ; normal_limit result + + ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask +%ifdef m8 ; x86-64 && sse2 + mova m8, [pb_80] +%define m_pb_80 m8 +%else ; x86-32 or mmx/mmxext +%define m_pb_80 [pb_80] +%endif + mova m1, m4 + mova m7, m3 + pxor m1, m_pb_80 + pxor m7, m_pb_80 + psubsb m1, m7 ; (signed) q0-p0 + mova m6, m2 + mova m7, m5 + pxor m6, m_pb_80 + pxor m7, m_pb_80 + psubsb m6, m7 ; (signed) p1-q1 + mova m7, m_maskres + paddsb m6, m1 + paddsb m6, m1 + paddsb m6, m1 + pand m6, m0 +%ifdef m8 + mova m_limres, m6 ; 3*(qp-p0)+(p1-q1) masked for filter_mbedge + pand m_limres, m7 +%else + mova m0, m6 + pand m0, m7 + mova m_limres, m0 +%endif + pandn m7, m6 ; 3*(q0-p0)+(p1-q1) masked for filter_common + + mova m1, [pb_F8] + mova m6, m7 + paddsb m7, [pb_3] + paddsb m6, [pb_4] + pand m7, m1 + pand m6, m1 + + pxor m1, m1 + pxor m0, m0 + pcmpgtb m1, m7 + psubb m0, m7 + psrlq m7, 3 ; +f2 + psrlq m0, 3 ; -f2 + pand m0, m1 + pandn m1, m7 + psubusb m3, m0 + paddusb m3, m1 ; p0+f2 + + pxor m1, m1 + pxor m0, m0 + pcmpgtb m0, m6 + psubb m1, m6 + psrlq m6, 3 ; +f1 + psrlq m1, 3 ; -f1 + pand m1, m0 + pandn m0, m6 + psubusb m4, m0 + paddusb m4, m1 ; q0-f1 + + ; filter_mbedge (m2-m5 = p1-q1; lim_res carries w) +%if cpuflag(ssse3) + mova m7, [pb_1] +%else + mova m7, [pw_63] +%endif +%ifdef m8 + SWAP 1, 8 +%else + mova m1, m_limres +%endif + pxor m0, m0 + mova m6, m1 + pcmpgtb m0, m1 ; which are negative +%if cpuflag(ssse3) + punpcklbw m6, m7 ; interleave with "1" for rounding + punpckhbw m1, m7 +%else + punpcklbw m6, m0 ; signed byte->word + punpckhbw m1, m0 +%endif + mova m_limsign, m0 +%if cpuflag(ssse3) + mova m7, [pb_27_63] +%ifndef m8 + mova m_limres, m1 +%endif +%ifdef m10 + SWAP 0, 10 ; don't lose lim_sign copy +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%ifdef m10 + SWAP 0, 10 +%else + mova m0, m_limsign +%endif +%else + mova m_maskres, m6 ; backup for later in filter + mova m_limres, m1 + pmullw m6, [pw_27] + pmullw m1, [pw_27] + paddw m6, m7 + paddw m1, m7 +%endif + psraw m6, 7 + psraw m1, 7 + packsswb m6, m1 ; a0 + pxor m1, m1 + psubb m1, m6 + pand m1, m0 ; -a0 + pandn m0, m6 ; +a0 +%if cpuflag(ssse3) + mova m6, [pb_18_63] ; pipelining +%endif + psubusb m3, m1 + paddusb m4, m1 + paddusb m3, m0 ; p0+a0 + psubusb m4, m0 ; q0-a0 + +%if cpuflag(ssse3) + SWAP 6, 7 +%ifdef m10 + SWAP 1, 10 +%else + mova m1, m_limres +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%ifdef m10 + SWAP 0, 10 +%endif + mova m0, m_limsign +%else + mova m6, m_maskres + mova m1, m_limres + pmullw m6, [pw_18] + pmullw m1, [pw_18] + paddw m6, m7 + paddw m1, m7 +%endif + mova m0, m_limsign + psraw m6, 7 + psraw m1, 7 + packsswb m6, m1 ; a1 + pxor m1, m1 + psubb m1, m6 + pand m1, m0 ; -a1 + pandn m0, m6 ; +a1 +%if cpuflag(ssse3) + mova m6, [pb_9_63] +%endif + psubusb m2, m1 + paddusb m5, m1 + paddusb m2, m0 ; p1+a1 + psubusb m5, m0 ; q1-a1 + +%if cpuflag(ssse3) + SWAP 6, 7 +%ifdef m10 + SWAP 1, 10 +%else + mova m1, m_limres +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%else +%ifdef m8 + SWAP 6, 12 + SWAP 1, 8 +%else + mova m6, m_maskres + mova m1, m_limres +%endif + pmullw m6, [pw_9] + pmullw m1, [pw_9] + paddw m6, m7 + paddw m1, m7 +%endif +%ifdef m9 + SWAP 7, 9 +%else + mova m7, m_limsign +%endif + psraw m6, 7 + psraw m1, 7 + packsswb m6, m1 ; a1 + pxor m0, m0 + psubb m0, m6 + pand m0, m7 ; -a1 + pandn m7, m6 ; +a1 +%ifdef m8 + SWAP 1, 13 + SWAP 6, 14 +%else + mova m1, m_p2backup + mova m6, m_q2backup +%endif + psubusb m1, m0 + paddusb m6, m0 + paddusb m1, m7 ; p1+a1 + psubusb m6, m7 ; q1-a1 + + ; store +%ifidn %1, v + movrow [dst2q+mstrideq*4], m1 + movrow [dst1q+mstrideq*2], m2 + movrow [dst1q+mstrideq ], m3 + movrow [dst1q], m4 + movrow [dst2q], m5 + movrow [dst2q+ strideq ], m6 +%if mmsize == 16 && %2 == 8 + add dst8q, mstrideq + movhps [dst8q+mstrideq*2], m1 + movhps [dst8q+mstrideq ], m2 + movhps [dst8q], m3 + add dst8q, strideq + movhps [dst8q], m4 + movhps [dst8q+ strideq ], m5 + movhps [dst8q+ strideq*2], m6 +%endif +%else ; h + inc dst1q + inc dst2q + + ; 4x8/16 transpose + TRANSPOSE4x4B 1, 2, 3, 4, 0 + SBUTTERFLY bw, 5, 6, 0 + +%if mmsize == 8 ; mmx/mmxext (h) + WRITE_4x2D 1, 2, 3, 4, dst1q, dst2q, mstrideq, strideq + add dst1q, 4 + WRITE_2x4W m5, m6, dst2q, dst1q, mstrideq, strideq +%else ; sse2 (h) + lea dst8q, [dst8q+mstrideq+1] + WRITE_4x4D 1, 2, 3, 4, dst1q, dst2q, dst8q, mstrideq, strideq, %2 + lea dst1q, [dst2q+mstrideq+4] + lea dst8q, [dst8q+mstrideq+4] +%if cpuflag(sse4) + add dst2q, 4 +%endif + WRITE_8W m5, dst2q, dst1q, mstrideq, strideq +%if cpuflag(sse4) + lea dst2q, [dst8q+ strideq ] +%endif + WRITE_8W m6, dst2q, dst8q, mstrideq, strideq +%endif +%endif + +%if mmsize == 8 +%if %2 == 8 ; chroma +%ifidn %1, h + sub dst1q, 5 +%endif + cmp dst1q, dst8q + mov dst1q, dst8q + jnz .next8px +%else +%ifidn %1, h + lea dst1q, [dst1q+ strideq*8-5] +%else ; v + add dst1q, 8 +%endif + dec cntrq + jg .next8px +%endif + REP_RET +%else ; mmsize == 16 + RET +%endif +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +MBEDGE_LOOPFILTER v, 16 +MBEDGE_LOOPFILTER h, 16 +MBEDGE_LOOPFILTER v, 8 +MBEDGE_LOOPFILTER h, 8 + +INIT_MMX mmxext +MBEDGE_LOOPFILTER v, 16 +MBEDGE_LOOPFILTER h, 16 +MBEDGE_LOOPFILTER v, 8 +MBEDGE_LOOPFILTER h, 8 +%endif + +INIT_XMM sse2 +MBEDGE_LOOPFILTER v, 16 +MBEDGE_LOOPFILTER h, 16 +MBEDGE_LOOPFILTER v, 8 +MBEDGE_LOOPFILTER h, 8 + +INIT_XMM ssse3 +MBEDGE_LOOPFILTER v, 16 +MBEDGE_LOOPFILTER h, 16 +MBEDGE_LOOPFILTER v, 8 +MBEDGE_LOOPFILTER h, 8 + +INIT_XMM sse4 +MBEDGE_LOOPFILTER h, 16 +MBEDGE_LOOPFILTER h, 8 diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init.c b/libs/ffvpx/libavcodec/x86/vp9dsp_init.c new file mode 100644 index 000000000..837cce850 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init.c @@ -0,0 +1,416 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/vp9dsp.h" +#include "libavcodec/x86/vp9dsp_init.h" + +#if HAVE_X86ASM + +decl_fpel_func(put, 4, , mmx); +decl_fpel_func(put, 8, , mmx); +decl_fpel_func(put, 16, , sse); +decl_fpel_func(put, 32, , sse); +decl_fpel_func(put, 64, , sse); +decl_fpel_func(avg, 4, _8, mmxext); +decl_fpel_func(avg, 8, _8, mmxext); +decl_fpel_func(avg, 16, _8, sse2); +decl_fpel_func(avg, 32, _8, sse2); +decl_fpel_func(avg, 64, _8, sse2); +decl_fpel_func(put, 32, , avx); +decl_fpel_func(put, 64, , avx); +decl_fpel_func(avg, 32, _8, avx2); +decl_fpel_func(avg, 64, _8, avx2); + +decl_mc_funcs(4, mmxext, int16_t, 8, 8); +decl_mc_funcs(8, sse2, int16_t, 8, 8); +decl_mc_funcs(4, ssse3, int8_t, 32, 8); +decl_mc_funcs(8, ssse3, int8_t, 32, 8); +#if ARCH_X86_64 +decl_mc_funcs(16, ssse3, int8_t, 32, 8); +decl_mc_funcs(32, avx2, int8_t, 32, 8); +#endif + +mc_rep_funcs(16, 8, 8, sse2, int16_t, 8, 8) +#if ARCH_X86_32 +mc_rep_funcs(16, 8, 8, ssse3, int8_t, 32, 8) +#endif +mc_rep_funcs(32, 16, 16, sse2, int16_t, 8, 8) +mc_rep_funcs(32, 16, 16, ssse3, int8_t, 32, 8) +mc_rep_funcs(64, 32, 32, sse2, int16_t, 8, 8) +mc_rep_funcs(64, 32, 32, ssse3, int8_t, 32, 8) +#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +mc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8) +#endif + +extern const int8_t ff_filters_ssse3[3][15][4][32]; +extern const int16_t ff_filters_sse2[3][15][8][8]; + +filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2) +filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2) +filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3) +filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3) +#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3) +filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3) +filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3) +filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3) +#endif + +filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2) +filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2) +filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3) +filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3) +#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3) +filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3) +filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3) +filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3) +#endif + +#define itxfm_func(typea, typeb, size, opt) \ +void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \ + int16_t *block, int eob) +#define itxfm_funcs(size, opt) \ +itxfm_func(idct, idct, size, opt); \ +itxfm_func(iadst, idct, size, opt); \ +itxfm_func(idct, iadst, size, opt); \ +itxfm_func(iadst, iadst, size, opt) + +itxfm_func(idct, idct, 4, mmxext); +itxfm_func(idct, iadst, 4, sse2); +itxfm_func(iadst, idct, 4, sse2); +itxfm_func(iadst, iadst, 4, sse2); +itxfm_funcs(4, ssse3); +itxfm_funcs(8, sse2); +itxfm_funcs(8, ssse3); +itxfm_funcs(8, avx); +itxfm_funcs(16, sse2); +itxfm_funcs(16, ssse3); +itxfm_funcs(16, avx); +itxfm_func(idct, idct, 32, sse2); +itxfm_func(idct, idct, 32, ssse3); +itxfm_func(idct, idct, 32, avx); +itxfm_func(iwht, iwht, 4, mmx); +itxfm_funcs(16, avx2); +itxfm_func(idct, idct, 32, avx2); + +#undef itxfm_func +#undef itxfm_funcs + +#define lpf_funcs(size1, size2, opt) \ +void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H); \ +void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H) + +lpf_funcs(4, 8, mmxext); +lpf_funcs(8, 8, mmxext); +lpf_funcs(16, 16, sse2); +lpf_funcs(16, 16, ssse3); +lpf_funcs(16, 16, avx); +lpf_funcs(44, 16, sse2); +lpf_funcs(44, 16, ssse3); +lpf_funcs(44, 16, avx); +lpf_funcs(84, 16, sse2); +lpf_funcs(84, 16, ssse3); +lpf_funcs(84, 16, avx); +lpf_funcs(48, 16, sse2); +lpf_funcs(48, 16, ssse3); +lpf_funcs(48, 16, avx); +lpf_funcs(88, 16, sse2); +lpf_funcs(88, 16, ssse3); +lpf_funcs(88, 16, avx); + +#undef lpf_funcs + +#define ipred_func(size, type, opt) \ +void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \ + const uint8_t *l, const uint8_t *a) + +ipred_func(8, v, mmx); + +#define ipred_dc_funcs(size, opt) \ +ipred_func(size, dc, opt); \ +ipred_func(size, dc_left, opt); \ +ipred_func(size, dc_top, opt) + +ipred_dc_funcs(4, mmxext); +ipred_dc_funcs(8, mmxext); + +#define ipred_dir_tm_funcs(size, opt) \ +ipred_func(size, tm, opt); \ +ipred_func(size, dl, opt); \ +ipred_func(size, dr, opt); \ +ipred_func(size, hd, opt); \ +ipred_func(size, hu, opt); \ +ipred_func(size, vl, opt); \ +ipred_func(size, vr, opt) + +ipred_dir_tm_funcs(4, mmxext); + +ipred_func(16, v, sse); +ipred_func(32, v, sse); + +ipred_dc_funcs(16, sse2); +ipred_dc_funcs(32, sse2); + +#define ipred_dir_tm_h_funcs(size, opt) \ +ipred_dir_tm_funcs(size, opt); \ +ipred_func(size, h, opt) + +ipred_dir_tm_h_funcs(8, sse2); +ipred_dir_tm_h_funcs(16, sse2); +ipred_dir_tm_h_funcs(32, sse2); + +ipred_func(4, h, sse2); + +#define ipred_all_funcs(size, opt) \ +ipred_dc_funcs(size, opt); \ +ipred_dir_tm_h_funcs(size, opt) + +// FIXME hd/vl_4x4_ssse3 does not exist +ipred_all_funcs(4, ssse3); +ipred_all_funcs(8, ssse3); +ipred_all_funcs(16, ssse3); +ipred_all_funcs(32, ssse3); + +ipred_dir_tm_h_funcs(8, avx); +ipred_dir_tm_h_funcs(16, avx); +ipred_dir_tm_h_funcs(32, avx); + +ipred_func(32, v, avx); + +ipred_dc_funcs(32, avx2); +ipred_func(32, h, avx2); +ipred_func(32, tm, avx2); + +#undef ipred_func +#undef ipred_dir_tm_h_funcs +#undef ipred_dir_tm_funcs +#undef ipred_dc_funcs + +#endif /* HAVE_X86ASM */ + +av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) +{ +#if HAVE_X86ASM + int cpu_flags; + + if (bpp == 10) { + ff_vp9dsp_init_10bpp_x86(dsp, bitexact); + return; + } else if (bpp == 12) { + ff_vp9dsp_init_12bpp_x86(dsp, bitexact); + return; + } + + cpu_flags = av_get_cpu_flags(); + +#define init_lpf(opt) do { \ + dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \ + dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ + dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ + dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ + dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ + dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ + dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ + dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ + dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ +} while (0) + +#define init_ipred(sz, opt, t, e) \ + dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt + +#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext +#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext +#define init_dir_tm_ipred(sz, opt) do { \ + init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \ + init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \ + init_ipred(sz, opt, hd, HOR_DOWN); \ + init_ipred(sz, opt, vl, VERT_LEFT); \ + init_ipred(sz, opt, hu, HOR_UP); \ + init_ipred(sz, opt, tm, TM_VP8); \ + init_ipred(sz, opt, vr, VERT_RIGHT); \ +} while (0) +#define init_dir_tm_h_ipred(sz, opt) do { \ + init_dir_tm_ipred(sz, opt); \ + init_ipred(sz, opt, h, HOR); \ +} while (0) +#define init_dc_ipred(sz, opt) do { \ + init_ipred(sz, opt, dc, DC); \ + init_ipred(sz, opt, dc_left, LEFT_DC); \ + init_ipred(sz, opt, dc_top, TOP_DC); \ +} while (0) +#define init_all_ipred(sz, opt) do { \ + init_dc_ipred(sz, opt); \ + init_dir_tm_h_ipred(sz, opt); \ +} while (0) + + if (EXTERNAL_MMX(cpu_flags)) { + init_fpel_func(4, 0, 4, put, , mmx); + init_fpel_func(3, 0, 8, put, , mmx); + if (!bitexact) { + dsp->itxfm_add[4 /* lossless */][DCT_DCT] = + dsp->itxfm_add[4 /* lossless */][ADST_DCT] = + dsp->itxfm_add[4 /* lossless */][DCT_ADST] = + dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; + } + init_ipred(8, mmx, v, VERT); + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext; + dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext; + dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext; + dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; + init_subpel2(4, 0, 4, put, 8, mmxext); + init_subpel2(4, 1, 4, avg, 8, mmxext); + init_fpel_func(4, 1, 4, avg, _8, mmxext); + init_fpel_func(3, 1, 8, avg, _8, mmxext); + dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; + init_dc_ipred(4, mmxext); + init_dc_ipred(8, mmxext); + init_dir_tm_ipred(4, mmxext); + } + + if (EXTERNAL_SSE(cpu_flags)) { + init_fpel_func(2, 0, 16, put, , sse); + init_fpel_func(1, 0, 32, put, , sse); + init_fpel_func(0, 0, 64, put, , sse); + init_ipred(16, sse, v, VERT); + init_ipred(32, sse, v, VERT); + } + + if (EXTERNAL_SSE2(cpu_flags)) { + init_subpel3_8to64(0, put, 8, sse2); + init_subpel3_8to64(1, avg, 8, sse2); + init_fpel_func(2, 1, 16, avg, _8, sse2); + init_fpel_func(1, 1, 32, avg, _8, sse2); + init_fpel_func(0, 1, 64, avg, _8, sse2); + init_lpf(sse2); + dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2; + dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2; + dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2; + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2; + init_dc_ipred(16, sse2); + init_dc_ipred(32, sse2); + init_dir_tm_h_ipred(8, sse2); + init_dir_tm_h_ipred(16, sse2); + init_dir_tm_h_ipred(32, sse2); + init_ipred(4, sse2, h, HOR); + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + init_subpel3(0, put, 8, ssse3); + init_subpel3(1, avg, 8, ssse3); + dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3; + dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3; + dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3; + dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3; + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; + init_lpf(ssse3); + init_all_ipred(4, ssse3); + init_all_ipred(8, ssse3); + init_all_ipred(16, ssse3); + init_all_ipred(32, ssse3); + } + + if (EXTERNAL_AVX(cpu_flags)) { + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; + init_lpf(avx); + init_dir_tm_h_ipred(8, avx); + init_dir_tm_h_ipred(16, avx); + init_dir_tm_h_ipred(32, avx); + } + if (EXTERNAL_AVX_FAST(cpu_flags)) { + init_fpel_func(1, 0, 32, put, , avx); + init_fpel_func(0, 0, 64, put, , avx); + init_ipred(32, avx, v, VERT); + } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + init_fpel_func(1, 1, 32, avg, _8, avx2); + init_fpel_func(0, 1, 64, avg, _8, avx2); + if (ARCH_X86_64) { +#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx2; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx2; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx2; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2; + init_subpel3_32_64(0, put, 8, avx2); + init_subpel3_32_64(1, avg, 8, avx2); +#endif + } + init_dc_ipred(32, avx2); + init_ipred(32, avx2, h, HOR); + init_ipred(32, avx2, tm, TM_VP8); + } + +#undef init_fpel +#undef init_subpel1 +#undef init_subpel2 +#undef init_subpel3 + +#endif /* HAVE_X86ASM */ +} diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init.h b/libs/ffvpx/libavcodec/x86/vp9dsp_init.h new file mode 100644 index 000000000..e410cab3a --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init.h @@ -0,0 +1,189 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_VP9DSP_INIT_H +#define AVCODEC_X86_VP9DSP_INIT_H + +#include "libavcodec/vp9dsp.h" + +// hack to force-expand BPC +#define cat(a, bpp, b) a##bpp##b + +#define decl_fpel_func(avg, sz, bpp, opt) \ +void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) + +#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \ +void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, const type (*filter)[f_sz]) + +#define decl_mc_funcs(sz, opt, type, fsz, bpp) \ +decl_mc_func(put, sz, h, opt, type, fsz, bpp); \ +decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \ +decl_mc_func(put, sz, v, opt, type, fsz, bpp); \ +decl_mc_func(avg, sz, v, opt, type, fsz, bpp) + +#define decl_ipred_fn(type, sz, bpp, opt) \ +void ff_vp9_ipred_##type##_##sz##x##sz##_##bpp##_##opt(uint8_t *dst, \ + ptrdiff_t stride, \ + const uint8_t *l, \ + const uint8_t *a) + +#define decl_ipred_fns(type, bpp, opt4, opt8_16_32) \ +decl_ipred_fn(type, 4, bpp, opt4); \ +decl_ipred_fn(type, 8, bpp, opt8_16_32); \ +decl_ipred_fn(type, 16, bpp, opt8_16_32); \ +decl_ipred_fn(type, 32, bpp, opt8_16_32) + +#define decl_itxfm_func(typea, typeb, size, bpp, opt) \ +void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t *dst, \ + ptrdiff_t stride, \ + int16_t *block, \ + int eob) + +#define decl_itxfm_funcs(size, bpp, opt) \ +decl_itxfm_func(idct, idct, size, bpp, opt); \ +decl_itxfm_func(iadst, idct, size, bpp, opt); \ +decl_itxfm_func(idct, iadst, size, bpp, opt); \ +decl_itxfm_func(iadst, iadst, size, bpp, opt) + +#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \ +static av_always_inline void \ +ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, const type (*filter)[f_sz]) \ +{ \ + ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst, dst_stride, src, \ + src_stride, h, filter); \ + ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \ + src_stride, h, filter); \ +} + +#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \ +mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp) \ +mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp) \ +mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp) \ +mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp) + +#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \ +static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \ + h, ff_filters_##f_opt[f][dvar - 1]); \ +} + +#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \ +filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \ +filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, dir, dvar, bpp, opt) \ +filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, dir, dvar, bpp, opt) + +#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \ +filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \ +filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt) + +#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \ +filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \ +filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \ +filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \ +filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \ +filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt) + +#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \ +static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \ + ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \ + src_stride, h + 7, \ + ff_filters_##f_opt[f][mx - 1]); \ + ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \ + 64 * bytes, h, \ + ff_filters_##f_opt[f][my - 1]); \ +} + +#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \ +filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \ +filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, align, bpp, bytes, opt) \ +filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, align, bpp, bytes, opt) + +#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \ +filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \ +filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \ +filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \ +filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \ +filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt) + +#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \ + dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \ + dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \ + dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \ + dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt + +#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \ + dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \ + type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \ + dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \ + type##_8tap_regular_##sz##dir##_##bpp##_##opt; \ + dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = \ + type##_8tap_sharp_##sz##dir##_##bpp##_##opt + +#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \ + init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \ + init_subpel1(idx1, idx2, 0, 1, sz, v, type, bpp, opt); \ + init_subpel1(idx1, idx2, 1, 0, sz, h, type, bpp, opt) + +#define init_subpel3_32_64(idx, type, bpp, opt) \ + init_subpel2(0, idx, 64, type, bpp, opt); \ + init_subpel2(1, idx, 32, type, bpp, opt) + +#define init_subpel3_8to64(idx, type, bpp, opt) \ + init_subpel3_32_64(idx, type, bpp, opt); \ + init_subpel2(2, idx, 16, type, bpp, opt); \ + init_subpel2(3, idx, 8, type, bpp, opt) + +#define init_subpel3(idx, type, bpp, opt) \ + init_subpel3_8to64(idx, type, bpp, opt); \ + init_subpel2(4, idx, 4, type, bpp, opt) + +#define init_ipred_func(type, enum, sz, bpp, opt) \ + dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \ + cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt) + +#define init_8_16_32_ipred_funcs(type, enum, bpp, opt) \ + init_ipred_func(type, enum, 8, bpp, opt); \ + init_ipred_func(type, enum, 16, bpp, opt); \ + init_ipred_func(type, enum, 32, bpp, opt) + +#define init_ipred_funcs(type, enum, bpp, opt) \ + init_ipred_func(type, enum, 4, bpp, opt); \ + init_8_16_32_ipred_funcs(type, enum, bpp, opt) + +void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp, int bitexact); +void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp, int bitexact); +void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp); + +#endif /* AVCODEC_X86_VP9DSP_INIT_H */ diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c b/libs/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c new file mode 100644 index 000000000..2694c06cb --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c @@ -0,0 +1,25 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define BPC 10 +#define INIT_FUNC ff_vp9dsp_init_10bpp_x86 +#include "vp9dsp_init_16bpp_template.c" diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c b/libs/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c new file mode 100644 index 000000000..5da3bc184 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c @@ -0,0 +1,25 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define BPC 12 +#define INIT_FUNC ff_vp9dsp_init_12bpp_x86 +#include "vp9dsp_init_16bpp_template.c" diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c b/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c new file mode 100644 index 000000000..60d10a12a --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c @@ -0,0 +1,149 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/vp9dsp.h" +#include "libavcodec/x86/vp9dsp_init.h" + +#if HAVE_X86ASM + +decl_fpel_func(put, 8, , mmx); +decl_fpel_func(avg, 8, _16, mmxext); +decl_fpel_func(put, 16, , sse); +decl_fpel_func(put, 32, , sse); +decl_fpel_func(put, 64, , sse); +decl_fpel_func(put, 128, , sse); +decl_fpel_func(avg, 16, _16, sse2); +decl_fpel_func(avg, 32, _16, sse2); +decl_fpel_func(avg, 64, _16, sse2); +decl_fpel_func(avg, 128, _16, sse2); +decl_fpel_func(put, 32, , avx); +decl_fpel_func(put, 64, , avx); +decl_fpel_func(put, 128, , avx); +decl_fpel_func(avg, 32, _16, avx2); +decl_fpel_func(avg, 64, _16, avx2); +decl_fpel_func(avg, 128, _16, avx2); + +decl_ipred_fns(v, 16, mmx, sse); +decl_ipred_fns(h, 16, mmxext, sse2); +decl_ipred_fns(dc, 16, mmxext, sse2); +decl_ipred_fns(dc_top, 16, mmxext, sse2); +decl_ipred_fns(dc_left, 16, mmxext, sse2); +decl_ipred_fn(dl, 16, 16, avx2); +decl_ipred_fn(dl, 32, 16, avx2); +decl_ipred_fn(dr, 16, 16, avx2); +decl_ipred_fn(dr, 32, 16, avx2); + +#define decl_ipred_dir_funcs(type) \ +decl_ipred_fns(type, 16, sse2, sse2); \ +decl_ipred_fns(type, 16, ssse3, ssse3); \ +decl_ipred_fns(type, 16, avx, avx) + +decl_ipred_dir_funcs(dl); +decl_ipred_dir_funcs(dr); +decl_ipred_dir_funcs(vl); +decl_ipred_dir_funcs(vr); +decl_ipred_dir_funcs(hu); +decl_ipred_dir_funcs(hd); +#endif /* HAVE_X86ASM */ + +av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + init_fpel_func(4, 0, 8, put, , mmx); + init_ipred_func(v, VERT, 4, 16, mmx); + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + init_fpel_func(4, 1, 8, avg, _16, mmxext); + init_ipred_func(h, HOR, 4, 16, mmxext); + init_ipred_func(dc, DC, 4, 16, mmxext); + init_ipred_func(dc_top, TOP_DC, 4, 16, mmxext); + init_ipred_func(dc_left, LEFT_DC, 4, 16, mmxext); + } + + if (EXTERNAL_SSE(cpu_flags)) { + init_fpel_func(3, 0, 16, put, , sse); + init_fpel_func(2, 0, 32, put, , sse); + init_fpel_func(1, 0, 64, put, , sse); + init_fpel_func(0, 0, 128, put, , sse); + init_8_16_32_ipred_funcs(v, VERT, 16, sse); + } + + if (EXTERNAL_SSE2(cpu_flags)) { + init_fpel_func(3, 1, 16, avg, _16, sse2); + init_fpel_func(2, 1, 32, avg, _16, sse2); + init_fpel_func(1, 1, 64, avg, _16, sse2); + init_fpel_func(0, 1, 128, avg, _16, sse2); + init_8_16_32_ipred_funcs(h, HOR, 16, sse2); + init_8_16_32_ipred_funcs(dc, DC, 16, sse2); + init_8_16_32_ipred_funcs(dc_top, TOP_DC, 16, sse2); + init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2); + init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2); + init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2); + init_ipred_funcs(vl, VERT_LEFT, 16, sse2); + init_ipred_funcs(vr, VERT_RIGHT, 16, sse2); + init_ipred_funcs(hu, HOR_UP, 16, sse2); + init_ipred_funcs(hd, HOR_DOWN, 16, sse2); + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3); + init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3); + init_ipred_funcs(vl, VERT_LEFT, 16, ssse3); + init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3); + init_ipred_funcs(hu, HOR_UP, 16, ssse3); + init_ipred_funcs(hd, HOR_DOWN, 16, ssse3); + } + + if (EXTERNAL_AVX_FAST(cpu_flags)) { + init_fpel_func(2, 0, 32, put, , avx); + init_fpel_func(1, 0, 64, put, , avx); + init_fpel_func(0, 0, 128, put, , avx); + init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx); + init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx); + init_ipred_funcs(vl, VERT_LEFT, 16, avx); + init_ipred_funcs(vr, VERT_RIGHT, 16, avx); + init_ipred_funcs(hu, HOR_UP, 16, avx); + init_ipred_funcs(hd, HOR_DOWN, 16, avx); + } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + init_fpel_func(2, 1, 32, avg, _16, avx2); + init_fpel_func(1, 1, 64, avg, _16, avx2); + init_fpel_func(0, 1, 128, avg, _16, avx2); + init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2); + init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2); + init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2); +#if ARCH_X86_64 + init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2); +#endif + } + +#endif /* HAVE_X86ASM */ +} diff --git a/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c new file mode 100644 index 000000000..b56afc7f5 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c @@ -0,0 +1,240 @@ +/* + * VP9 SIMD optimizations + * + * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/mem.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/vp9dsp.h" +#include "libavcodec/x86/vp9dsp_init.h" + +#if HAVE_X86ASM + +extern const int16_t ff_filters_16bpp[3][15][4][16]; + +decl_mc_funcs(4, sse2, int16_t, 16, BPC); +decl_mc_funcs(8, sse2, int16_t, 16, BPC); +decl_mc_funcs(16, avx2, int16_t, 16, BPC); + +mc_rep_funcs(16, 8, 16, sse2, int16_t, 16, BPC) +mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC) +mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC) +#if HAVE_AVX2_EXTERNAL +mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC) +mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC) +#endif + +filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp) +filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp) +#if HAVE_AVX2_EXTERNAL +filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp) +filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp) +filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp) +filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp) +filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp) +filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp) +#endif + +filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp) +filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp) +#if HAVE_AVX2_EXTERNAL +filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp) +filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp) +filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp) +filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp) +filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp) +filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp) +#endif + +#define decl_lpf_func(dir, wd, bpp, opt) \ +void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H) + +#define decl_lpf_funcs(dir, wd, bpp) \ +decl_lpf_func(dir, wd, bpp, sse2); \ +decl_lpf_func(dir, wd, bpp, ssse3); \ +decl_lpf_func(dir, wd, bpp, avx) + +#define decl_lpf_funcs_wd(dir) \ +decl_lpf_funcs(dir, 4, BPC); \ +decl_lpf_funcs(dir, 8, BPC); \ +decl_lpf_funcs(dir, 16, BPC) + +decl_lpf_funcs_wd(h); +decl_lpf_funcs_wd(v); + +#define lpf_16_wrapper(dir, off, bpp, opt) \ +static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H) \ +{ \ + ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst, stride, E, I, H); \ + ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \ +} + +#define lpf_16_wrappers(bpp, opt) \ +lpf_16_wrapper(h, 8 * stride, bpp, opt) \ +lpf_16_wrapper(v, 16, bpp, opt) + +lpf_16_wrappers(BPC, sse2) +lpf_16_wrappers(BPC, ssse3) +lpf_16_wrappers(BPC, avx) + +#define lpf_mix2_wrapper(dir, off, wd1, wd2, bpp, opt) \ +static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H) \ +{ \ + ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst, stride, \ + E & 0xff, I & 0xff, H & 0xff); \ + ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \ + E >> 8, I >> 8, H >> 8); \ +} + +#define lpf_mix2_wrappers(wd1, wd2, bpp, opt) \ +lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt) \ +lpf_mix2_wrapper(v, 16, wd1, wd2, bpp, opt) + +#define lpf_mix2_wrappers_set(bpp, opt) \ +lpf_mix2_wrappers(4, 4, bpp, opt) \ +lpf_mix2_wrappers(4, 8, bpp, opt) \ +lpf_mix2_wrappers(8, 4, bpp, opt) \ +lpf_mix2_wrappers(8, 8, bpp, opt) \ + +lpf_mix2_wrappers_set(BPC, sse2) +lpf_mix2_wrappers_set(BPC, ssse3) +lpf_mix2_wrappers_set(BPC, avx) + +decl_ipred_fns(tm, BPC, mmxext, sse2); + +decl_itxfm_func(iwht, iwht, 4, BPC, mmxext); +#if BPC == 10 +decl_itxfm_func(idct, idct, 4, BPC, mmxext); +decl_itxfm_funcs(4, BPC, ssse3); +#else +decl_itxfm_func(idct, idct, 4, BPC, sse2); +#endif +decl_itxfm_func(idct, iadst, 4, BPC, sse2); +decl_itxfm_func(iadst, idct, 4, BPC, sse2); +decl_itxfm_func(iadst, iadst, 4, BPC, sse2); +decl_itxfm_funcs(8, BPC, sse2); +decl_itxfm_funcs(16, BPC, sse2); +decl_itxfm_func(idct, idct, 32, BPC, sse2); +#endif /* HAVE_X86ASM */ + +av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); + +#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \ + dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt +#define init_lpf_16_func(idx, dir, bpp, opt) \ + dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt +#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \ + dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt + +#define init_lpf_funcs(bpp, opt) \ + init_lpf_8_func(0, 0, h, 4, bpp, opt); \ + init_lpf_8_func(0, 1, v, 4, bpp, opt); \ + init_lpf_8_func(1, 0, h, 8, bpp, opt); \ + init_lpf_8_func(1, 1, v, 8, bpp, opt); \ + init_lpf_8_func(2, 0, h, 16, bpp, opt); \ + init_lpf_8_func(2, 1, v, 16, bpp, opt); \ + init_lpf_16_func(0, h, bpp, opt); \ + init_lpf_16_func(1, v, bpp, opt); \ + init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \ + init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \ + init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \ + init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \ + init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \ + init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \ + init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \ + init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt) + +#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \ + dsp->itxfm_add[idxa][idxb] = \ + cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt); +#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \ + init_itx_func(idx, DCT_DCT, typea, typeb, size, bpp, opt); \ + init_itx_func(idx, ADST_DCT, typea, typeb, size, bpp, opt); \ + init_itx_func(idx, DCT_ADST, typea, typeb, size, bpp, opt); \ + init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt) +#define init_itx_funcs(idx, size, bpp, opt) \ + init_itx_func(idx, DCT_DCT, idct, idct, size, bpp, opt); \ + init_itx_func(idx, ADST_DCT, idct, iadst, size, bpp, opt); \ + init_itx_func(idx, DCT_ADST, iadst, idct, size, bpp, opt); \ + init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \ + + if (EXTERNAL_MMXEXT(cpu_flags)) { + init_ipred_func(tm, TM_VP8, 4, BPC, mmxext); + if (!bitexact) { + init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext); +#if BPC == 10 + init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext); +#endif + } + } + + if (EXTERNAL_SSE2(cpu_flags)) { + init_subpel3(0, put, BPC, sse2); + init_subpel3(1, avg, BPC, sse2); + init_lpf_funcs(BPC, sse2); + init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2); +#if BPC == 10 + if (!bitexact) { + init_itx_func(TX_4X4, ADST_DCT, idct, iadst, 4, 10, sse2); + init_itx_func(TX_4X4, DCT_ADST, iadst, idct, 4, 10, sse2); + init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2); + } +#else + init_itx_funcs(TX_4X4, 4, 12, sse2); +#endif + init_itx_funcs(TX_8X8, 8, BPC, sse2); + init_itx_funcs(TX_16X16, 16, BPC, sse2); + init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2); + } + + if (EXTERNAL_SSSE3(cpu_flags)) { + init_lpf_funcs(BPC, ssse3); +#if BPC == 10 + if (!bitexact) { + init_itx_funcs(TX_4X4, 4, BPC, ssse3); + } +#endif + } + + if (EXTERNAL_AVX(cpu_flags)) { + init_lpf_funcs(BPC, avx); + } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { +#if HAVE_AVX2_EXTERNAL + init_subpel3_32_64(0, put, BPC, avx2); + init_subpel3_32_64(1, avg, BPC, avx2); + init_subpel2(2, 0, 16, put, BPC, avx2); + init_subpel2(2, 1, 16, avg, BPC, avx2); +#endif + } + +#endif /* HAVE_X86ASM */ + + ff_vp9dsp_init_16bpp_x86(dsp); +} diff --git a/libs/ffvpx/libavcodec/x86/vp9intrapred.asm b/libs/ffvpx/libavcodec/x86/vp9intrapred.asm new file mode 100644 index 000000000..31f7d449f --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9intrapred.asm @@ -0,0 +1,2044 @@ +;****************************************************************************** +;* VP9 Intra prediction SIMD optimizations +;* +;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> +;* +;* Parts based on: +;* H.264 intra prediction asm optimizations +;* Copyright (c) 2010 Fiona Glaser +;* Copyright (c) 2010 Holger Lubitz +;* Copyright (c) 2010 Loren Merritt +;* Copyright (c) 2010 Ronald S. Bultje +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 + +pw_m256: times 16 dw -256 +pw_m255: times 16 dw -255 +pw_4096: times 8 dw 4096 + +pb_4x3_4x2_4x1_4x0: times 4 db 3 + times 4 db 2 + times 4 db 1 + times 4 db 0 +pb_8x1_8x0: times 8 db 1 + times 8 db 0 +pb_8x3_8x2: times 8 db 3 + times 8 db 2 +pb_0to5_2x7: db 0, 1, 2, 3, 4, 5, 7, 7 + times 8 db -1 +pb_0to6_9x7: db 0, 1, 2, 3, 4, 5, 6 + times 9 db 7 +pb_1to6_10x7: db 1, 2, 3, 4, 5, 6 + times 10 db 7 +pb_2to6_3x7: +pb_2to6_11x7: db 2, 3, 4, 5, 6 + times 11 db 7 +pb_1toE_2xF: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 +pb_2toE_3xF: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 +pb_13456_3xm1: db 1, 3, 4, 5, 6 + times 3 db -1 +pb_6012_4xm1: db 6, 0, 1, 2 + times 4 db -1 +pb_6xm1_246_8toE: times 6 db -1 + db 2, 4, 6, 8, 9, 10, 11, 12, 13, 14 +pb_6xm1_BDF_0to6: times 6 db -1 + db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6 +pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 + +pb_15x0_1xm1: times 15 db 0 + db -1 +pb_0to2_5x3: db 0, 1, 2 + times 5 db 3 +pb_6xm1_2x0: times 6 db -1 + times 2 db 0 +pb_6x0_2xm1: times 6 db 0 + times 2 db -1 + +cextern pb_1 +cextern pb_2 +cextern pb_3 +cextern pb_15 +cextern pw_2 +cextern pw_4 +cextern pw_8 +cextern pw_16 +cextern pw_32 +cextern pw_255 +cextern pw_512 +cextern pw_1024 +cextern pw_2048 +cextern pw_8192 + +SECTION .text + +; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a) + +%macro DC_4to8_FUNCS 0 +cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a + movd m0, [lq] + punpckldq m0, [aq] + pxor m1, m1 + psadbw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_4096] + pshufb m0, m1 +%else + paddw m0, [pw_4] + psraw m0, 3 + punpcklbw m0, m0 + pshufw m0, m0, q0000 +%endif + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m0 + lea dstq, [dstq+strideq*2] + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m0 + RET + +cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a + movq m0, [lq] + movq m1, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + psadbw m1, m2 + paddw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_2048] + pshufb m0, m2 +%else + paddw m0, [pw_8] + psraw m0, 4 + punpcklbw m0, m0 + pshufw m0, m0, q0000 +%endif + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RET +%endmacro + +INIT_MMX mmxext +DC_4to8_FUNCS +INIT_MMX ssse3 +DC_4to8_FUNCS + +%macro DC_16to32_FUNCS 0 +cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a + mova m0, [lq] + mova m1, [aq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + psadbw m1, m2 + paddw m0, m1 + movhlps m1, m0 + paddw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_1024] + pshufb m0, m2 +%else + paddw m0, [pw_16] + psraw m0, 5 + punpcklbw m0, m0 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +%endif + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dc_32x32, 4, 4, 5, dst, stride, l, a + mova m0, [lq] + mova m1, [lq+16] + mova m2, [aq] + mova m3, [aq+16] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m4, m4 + psadbw m0, m4 + psadbw m1, m4 + psadbw m2, m4 + psadbw m3, m4 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + movhlps m1, m0 + paddw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_512] + pshufb m0, m4 +%else + paddw m0, [pw_32] + psraw m0, 6 + punpcklbw m0, m0 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +%endif + mov cntd, 8 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DC_16to32_FUNCS +INIT_XMM ssse3 +DC_16to32_FUNCS + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_ipred_dc_32x32, 4, 4, 3, dst, stride, l, a + mova m0, [lq] + mova m1, [aq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + psadbw m1, m2 + paddw m0, m1 + vextracti128 xm1, m0, 1 + paddw xm0, xm1 + movhlps xm1, xm0 + paddw xm0, xm1 + pmulhrsw xm0, [pw_512] + vpbroadcastb m0, xm0 + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET +%endif + +; dc_top/left_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a) + +%macro DC_1D_4to8_FUNCS 2 ; dir (top or left), arg (a or l) +cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a + movd m0, [%2q] + pxor m1, m1 + psadbw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_8192] + pshufb m0, m1 +%else + paddw m0, [pw_2] + psraw m0, 2 + punpcklbw m0, m0 + pshufw m0, m0, q0000 +%endif + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m0 + lea dstq, [dstq+strideq*2] + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m0 + RET + +cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a + movq m0, [%2q] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pxor m1, m1 + psadbw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_4096] + pshufb m0, m1 +%else + paddw m0, [pw_4] + psraw m0, 3 + punpcklbw m0, m0 + pshufw m0, m0, q0000 +%endif + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RET +%endmacro + +INIT_MMX mmxext +DC_1D_4to8_FUNCS top, a +DC_1D_4to8_FUNCS left, l +INIT_MMX ssse3 +DC_1D_4to8_FUNCS top, a +DC_1D_4to8_FUNCS left, l + +%macro DC_1D_16to32_FUNCS 2; dir (top or left), arg (a or l) +cglobal vp9_ipred_dc_%1_16x16, 4, 4, 3, dst, stride, l, a + mova m0, [%2q] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + movhlps m1, m0 + paddw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_2048] + pshufb m0, m2 +%else + paddw m0, [pw_8] + psraw m0, 4 + punpcklbw m0, m0 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +%endif + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a + mova m0, [%2q] + mova m1, [%2q+16] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + psadbw m1, m2 + paddw m0, m1 + movhlps m1, m0 + paddw m0, m1 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_1024] + pshufb m0, m2 +%else + paddw m0, [pw_16] + psraw m0, 5 + punpcklbw m0, m0 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +%endif + mov cntd, 8 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DC_1D_16to32_FUNCS top, a +DC_1D_16to32_FUNCS left, l +INIT_XMM ssse3 +DC_1D_16to32_FUNCS top, a +DC_1D_16to32_FUNCS left, l + +%macro DC_1D_AVX2_FUNCS 2 ; dir (top or left), arg (a or l) +%if HAVE_AVX2_EXTERNAL +cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a + mova m0, [%2q] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + pxor m2, m2 + psadbw m0, m2 + vextracti128 xm1, m0, 1 + paddw xm0, xm1 + movhlps xm1, xm0 + paddw xm0, xm1 + pmulhrsw xm0, [pw_1024] + vpbroadcastb m0, xm0 + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET +%endif +%endmacro + +INIT_YMM avx2 +DC_1D_AVX2_FUNCS top, a +DC_1D_AVX2_FUNCS left, l + +; v + +INIT_MMX mmx +cglobal vp9_ipred_v_8x8, 4, 4, 0, dst, stride, l, a + movq m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + RET + +INIT_XMM sse +cglobal vp9_ipred_v_16x16, 4, 4, 1, dst, stride, l, a + mova m0, [aq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +INIT_XMM sse +cglobal vp9_ipred_v_32x32, 4, 4, 2, dst, stride, l, a + mova m0, [aq] + mova m1, [aq+16] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 8 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m1 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m1 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +INIT_YMM avx +cglobal vp9_ipred_v_32x32, 4, 4, 1, dst, stride, l, a + mova m0, [aq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +; h + +%macro H_XMM_FUNCS 2 +%if notcpuflag(avx) +cglobal vp9_ipred_h_4x4, 3, 4, 1, dst, stride, l, stride3 + movd m0, [lq] +%if cpuflag(ssse3) + pshufb m0, [pb_4x3_4x2_4x1_4x0] +%else + punpcklbw m0, m0 + pshuflw m0, m0, q0123 + punpcklwd m0, m0 +%endif + lea stride3q, [strideq*3] + movd [dstq+strideq*0], m0 + psrldq m0, 4 + movd [dstq+strideq*1], m0 + psrldq m0, 4 + movd [dstq+strideq*2], m0 + psrldq m0, 4 + movd [dstq+stride3q ], m0 + RET +%endif + +cglobal vp9_ipred_h_8x8, 3, 5, %1, dst, stride, l, stride3, cnt +%if cpuflag(ssse3) + mova m2, [pb_8x1_8x0] + mova m3, [pb_8x3_8x2] +%endif + lea stride3q, [strideq*3] + mov cntq, 1 +.loop: + movd m0, [lq+cntq*4] +%if cpuflag(ssse3) + pshufb m1, m0, m3 + pshufb m0, m2 +%else + punpcklbw m0, m0 + punpcklwd m0, m0 + pshufd m1, m0, q2233 + pshufd m0, m0, q0011 +%endif + movq [dstq+strideq*0], m1 + movhps [dstq+strideq*1], m1 + movq [dstq+strideq*2], m0 + movhps [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + dec cntq + jge .loop + RET + +cglobal vp9_ipred_h_16x16, 3, 5, %2, dst, stride, l, stride3, cnt +%if cpuflag(ssse3) + mova m5, [pb_1] + mova m6, [pb_2] + mova m7, [pb_3] + pxor m4, m4 +%endif + lea stride3q, [strideq*3] + mov cntq, 3 +.loop: + movd m3, [lq+cntq*4] +%if cpuflag(ssse3) + pshufb m0, m3, m7 + pshufb m1, m3, m6 +%else + punpcklbw m3, m3 + punpcklwd m3, m3 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 +%endif + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 +%if cpuflag(ssse3) + pshufb m2, m3, m5 + pshufb m3, m4 +%else + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 +%endif + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + dec cntq + jge .loop + RET + +cglobal vp9_ipred_h_32x32, 3, 5, %2, dst, stride, l, stride3, cnt +%if cpuflag(ssse3) + mova m5, [pb_1] + mova m6, [pb_2] + mova m7, [pb_3] + pxor m4, m4 +%endif + lea stride3q, [strideq*3] + mov cntq, 7 +.loop: + movd m3, [lq+cntq*4] +%if cpuflag(ssse3) + pshufb m0, m3, m7 + pshufb m1, m3, m6 +%else + punpcklbw m3, m3 + punpcklwd m3, m3 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 +%endif + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m1 + mova [dstq+strideq*1+16], m1 +%if cpuflag(ssse3) + pshufb m2, m3, m5 + pshufb m3, m4 +%else + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 +%endif + mova [dstq+strideq*2+ 0], m2 + mova [dstq+strideq*2+16], m2 + mova [dstq+stride3q + 0], m3 + mova [dstq+stride3q +16], m3 + lea dstq, [dstq+strideq*4] + dec cntq + jge .loop + RET +%endmacro + +INIT_XMM sse2 +H_XMM_FUNCS 2, 4 +INIT_XMM ssse3 +H_XMM_FUNCS 4, 8 +INIT_XMM avx +H_XMM_FUNCS 4, 8 + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_ipred_h_32x32, 3, 5, 8, dst, stride, l, stride3, cnt + mova m5, [pb_1] + mova m6, [pb_2] + mova m7, [pb_3] + pxor m4, m4 + lea stride3q, [strideq*3] + mov cntq, 7 +.loop: + movd xm3, [lq+cntq*4] + vinserti128 m3, m3, xm3, 1 + pshufb m0, m3, m7 + pshufb m1, m3, m6 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + pshufb m2, m3, m5 + pshufb m3, m4 + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + dec cntq + jge .loop + RET +%endif + +; tm + +%macro TM_MMX_FUNCS 0 +cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a + pxor m1, m1 + movd m0, [aq] + pinsrw m2, [aq-1], 0 + punpcklbw m0, m1 + DEFINE_ARGS dst, stride, l, cnt +%if cpuflag(ssse3) + mova m3, [pw_m256] + mova m1, [pw_m255] + pshufb m2, m3 +%else + punpcklbw m2, m1 + pshufw m2, m2, q0000 +%endif + psubw m0, m2 + mov cntq, 1 +.loop: + pinsrw m2, [lq+cntq*2], 0 +%if cpuflag(ssse3) + pshufb m4, m2, m1 + pshufb m2, m3 +%else + punpcklbw m2, m1 + pshufw m4, m2, q1111 + pshufw m2, m2, q0000 +%endif + paddw m4, m0 + paddw m2, m0 + packuswb m4, m4 + packuswb m2, m2 + movd [dstq+strideq*0], m4 + movd [dstq+strideq*1], m2 + lea dstq, [dstq+strideq*2] + dec cntq + jge .loop + RET +%endmacro + +INIT_MMX mmxext +TM_MMX_FUNCS +INIT_MMX ssse3 +TM_MMX_FUNCS + +%macro TM_XMM_FUNCS 0 +cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a + pxor m1, m1 + movh m0, [aq] + pinsrw m2, [aq-1], 0 + punpcklbw m0, m1 + DEFINE_ARGS dst, stride, l, cnt +%if cpuflag(ssse3) + mova m3, [pw_m256] + mova m1, [pw_m255] + pshufb m2, m3 +%else + punpcklbw m2, m1 + punpcklwd m2, m2 + pshufd m2, m2, q0000 +%endif + psubw m0, m2 + mov cntq, 3 +.loop: + pinsrw m2, [lq+cntq*2], 0 +%if cpuflag(ssse3) + pshufb m4, m2, m1 + pshufb m2, m3 +%else + punpcklbw m2, m1 + punpcklwd m2, m2 + pshufd m4, m2, q1111 + pshufd m2, m2, q0000 +%endif + paddw m4, m0 + paddw m2, m0 + packuswb m4, m2 + movh [dstq+strideq*0], m4 + movhps [dstq+strideq*1], m4 + lea dstq, [dstq+strideq*2] + dec cntq + jge .loop + RET + +cglobal vp9_ipred_tm_16x16, 4, 4, 8, dst, stride, l, a + pxor m3, m3 + mova m0, [aq] + pinsrw m2, [aq-1], 0 + punpckhbw m1, m0, m3 + punpcklbw m0, m3 + DEFINE_ARGS dst, stride, l, cnt +%if cpuflag(ssse3) + mova m4, [pw_m256] + mova m3, [pw_m255] + pshufb m2, m4 +%else + punpcklbw m2, m3 + punpcklwd m2, m2 + pshufd m2, m2, q0000 +%endif + psubw m1, m2 + psubw m0, m2 + mov cntq, 7 +.loop: + pinsrw m7, [lq+cntq*2], 0 +%if cpuflag(ssse3) + pshufb m5, m7, m3 + pshufb m7, m4 +%else + punpcklbw m7, m3 + punpcklwd m7, m7 + pshufd m5, m7, q1111 + pshufd m7, m7, q0000 +%endif + paddw m2, m5, m0 + paddw m5, m1 + paddw m6, m7, m0 + paddw m7, m1 + packuswb m2, m5 + packuswb m6, m7 + mova [dstq+strideq*0], m2 + mova [dstq+strideq*1], m6 + lea dstq, [dstq+strideq*2] + dec cntq + jge .loop + RET + +%if ARCH_X86_64 +%define mem 0 +%else +%define mem 64 +%endif +cglobal vp9_ipred_tm_32x32, 4, 4, 14, mem, dst, stride, l, a + pxor m5, m5 + pinsrw m4, [aq-1], 0 + mova m0, [aq] + mova m2, [aq+16] + DEFINE_ARGS dst, stride, l, cnt +%if cpuflag(ssse3) +%if ARCH_X86_64 + mova m12, [pw_m256] + mova m13, [pw_m255] +%define pw_m256_reg m12 +%define pw_m255_reg m13 +%else +%define pw_m256_reg [pw_m256] +%define pw_m255_reg [pw_m255] +%endif + pshufb m4, pw_m256_reg +%else + punpcklbw m4, m5 + punpcklwd m4, m4 + pshufd m4, m4, q0000 +%endif + punpckhbw m1, m0, m5 + punpckhbw m3, m2, m5 + punpcklbw m0, m5 + punpcklbw m2, m5 + psubw m1, m4 + psubw m0, m4 + psubw m3, m4 + psubw m2, m4 +%if ARCH_X86_64 + SWAP 0, 8 + SWAP 1, 9 + SWAP 2, 10 + SWAP 3, 11 +%else + mova [rsp+0*16], m0 + mova [rsp+1*16], m1 + mova [rsp+2*16], m2 + mova [rsp+3*16], m3 +%endif + mov cntq, 15 +.loop: + pinsrw m3, [lq+cntq*2], 0 +%if cpuflag(ssse3) + pshufb m7, m3, pw_m255_reg + pshufb m3, pw_m256_reg +%else + pxor m7, m7 + punpcklbw m3, m7 + punpcklwd m3, m3 + pshufd m7, m3, q1111 + pshufd m3, m3, q0000 +%endif +%if ARCH_X86_64 + paddw m4, m7, m8 + paddw m5, m7, m9 + paddw m6, m7, m10 + paddw m7, m11 + paddw m0, m3, m8 + paddw m1, m3, m9 + paddw m2, m3, m10 + paddw m3, m11 +%else + paddw m4, m7, [rsp+0*16] + paddw m5, m7, [rsp+1*16] + paddw m6, m7, [rsp+2*16] + paddw m7, [rsp+3*16] + paddw m0, m3, [rsp+0*16] + paddw m1, m3, [rsp+1*16] + paddw m2, m3, [rsp+2*16] + paddw m3, [rsp+3*16] +%endif + packuswb m4, m5 + packuswb m6, m7 + packuswb m0, m1 + packuswb m2, m3 + mova [dstq+strideq*0+ 0], m4 + mova [dstq+strideq*0+16], m6 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m2 + lea dstq, [dstq+strideq*2] + dec cntq + jge .loop + RET +%undef pw_m256_reg +%undef pw_m255_reg +%undef mem +%endmacro + +INIT_XMM sse2 +TM_XMM_FUNCS +INIT_XMM ssse3 +TM_XMM_FUNCS +INIT_XMM avx +TM_XMM_FUNCS + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_ipred_tm_32x32, 4, 4, 8, dst, stride, l, a + pxor m3, m3 + pinsrw xm2, [aq-1], 0 + vinserti128 m2, m2, xm2, 1 + mova m0, [aq] + DEFINE_ARGS dst, stride, l, cnt + mova m4, [pw_m256] + mova m5, [pw_m255] + pshufb m2, m4 + punpckhbw m1, m0, m3 + punpcklbw m0, m3 + psubw m1, m2 + psubw m0, m2 + mov cntq, 15 +.loop: + pinsrw xm7, [lq+cntq*2], 0 + vinserti128 m7, m7, xm7, 1 + pshufb m3, m7, m5 + pshufb m7, m4 + paddw m2, m3, m0 + paddw m3, m1 + paddw m6, m7, m0 + paddw m7, m1 + packuswb m2, m3 + packuswb m6, m7 + mova [dstq+strideq*0], m2 + mova [dstq+strideq*1], m6 + lea dstq, [dstq+strideq*2] + dec cntq + jge .loop + RET +%endif + +; dl + +%macro LOWPASS 4 ; left [dst], center, right, tmp + pxor m%4, m%1, m%3 + pand m%4, [pb_1] + pavgb m%1, m%3 + psubusb m%1, m%4 + pavgb m%1, m%2 +%endmacro + +%macro DL_MMX_FUNCS 0 +cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a + movq m1, [aq] +%if cpuflag(ssse3) + pshufb m0, m1, [pb_0to5_2x7] + pshufb m2, m1, [pb_2to6_3x7] +%else + punpckhbw m3, m1, m1 ; 44556677 + pand m0, m1, [pb_6xm1_2x0] ; 012345__ + pand m3, [pb_6x0_2xm1] ; ______77 + psrlq m2, m1, 16 ; 234567__ + por m0, m3 ; 01234577 + por m2, m3 ; 23456777 +%endif + psrlq m1, 8 + LOWPASS 0, 1, 2, 3 + + pshufw m1, m0, q3321 + movd [dstq+strideq*0], m0 + movd [dstq+strideq*2], m1 + psrlq m0, 8 + psrlq m1, 8 + add dstq, strideq + movd [dstq+strideq*0], m0 + movd [dstq+strideq*2], m1 + RET +%endmacro + +INIT_MMX mmxext +DL_MMX_FUNCS +INIT_MMX ssse3 +DL_MMX_FUNCS + +%macro DL_XMM_FUNCS 0 +cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a + movq m0, [aq] + lea stride5q, [strideq*5] +%if cpuflag(ssse3) + pshufb m1, m0, [pb_1to6_10x7] +%else + punpcklbw m1, m0, m0 ; 0011223344556677 + punpckhwd m1, m1 ; 4x4,4x5,4x6,4x7 +%endif + shufps m0, m1, q3310 +%if notcpuflag(ssse3) + psrldq m1, m0, 1 + shufps m1, m0, q3210 +%endif + psrldq m2, m1, 1 + LOWPASS 0, 1, 2, 3 + + pshufd m1, m0, q3321 + movq [dstq+strideq*0], m0 + movq [dstq+strideq*4], m1 + psrldq m0, 1 + psrldq m1, 1 + movq [dstq+strideq*1], m0 + movq [dstq+stride5q ], m1 + lea dstq, [dstq+strideq*2] + psrldq m0, 1 + psrldq m1, 1 + movq [dstq+strideq*0], m0 + movq [dstq+strideq*4], m1 + psrldq m0, 1 + psrldq m1, 1 + movq [dstq+strideq*1], m0 + movq [dstq+stride5q ], m1 + RET + +cglobal vp9_ipred_dl_16x16, 4, 4, 6, dst, stride, l, a + mova m0, [aq] +%if cpuflag(ssse3) + mova m5, [pb_1toE_2xF] + pshufb m1, m0, m5 + pshufb m2, m1, m5 + pshufb m4, m0, [pb_15] +%else + pand m5, m0, [pb_15x0_1xm1] ; _______________F + psrldq m1, m0, 1 ; 123456789ABCDEF_ + por m1, m5 ; 123456789ABCDEFF + psrldq m2, m1, 1 ; 23456789ABCDEFF_ + por m2, m5 ; 23456789ABCDEFFF + pshufhw m4, m1, q3333 ; xxxxxxxxFFFFFFFF +%endif + LOWPASS 0, 1, 2, 3 + DEFINE_ARGS dst, stride, cnt, stride9 + lea stride9q, [strideq+strideq*8] + mov cntd, 4 + +.loop: + movhlps m4, m0 + mova [dstq+strideq*0], m0 +%if cpuflag(ssse3) + pshufb m0, m5 +%else + psrldq m0, 1 + por m0, m5 +%endif + mova [dstq+strideq*8], m4 + movhlps m4, m0 + mova [dstq+strideq*1], m0 +%if cpuflag(ssse3) + pshufb m0, m5 +%else + psrldq m0, 1 + por m0, m5 +%endif + mova [dstq+stride9q ], m4 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dl_32x32, 4, 5, 8, dst, stride, cnt, a, dst16 + mova m0, [aq] + mova m1, [aq+16] + PALIGNR m2, m1, m0, 1, m4 + PALIGNR m3, m1, m0, 2, m4 + LOWPASS 0, 2, 3, 4 +%if cpuflag(ssse3) + mova m5, [pb_1toE_2xF] + pshufb m2, m1, m5 + pshufb m3, m2, m5 + pshufb m6, m1, [pb_15] + mova m7, m6 +%else + pand m5, m1, [pb_15x0_1xm1] ; _______________F + psrldq m2, m1, 1 ; 123456789ABCDEF_ + por m2, m5 ; 123456789ABCDEFF + psrldq m3, m2, 1 ; 23456789ABCDEFF_ + por m3, m5 ; 23456789ABCDEFFF + pshufhw m7, m2, q3333 ; xxxxxxxxFFFFFFFF + pshufd m6, m7, q3333 +%endif + LOWPASS 1, 2, 3, 4 + lea dst16q, [dstq +strideq*8] + mov cntd, 8 + lea dst16q, [dst16q+strideq*8] +.loop: + movhlps m7, m1 + mova [dstq +strideq*0+ 0], m0 + mova [dstq +strideq*0+16], m1 + movhps [dstq+strideq*8+ 0], m0 + movq [dstq +strideq*8+ 8], m1 + mova [dstq +strideq*8+16], m7 + mova [dst16q+strideq*0+ 0], m1 + mova [dst16q+strideq*0+16], m6 + mova [dst16q+strideq*8+ 0], m7 + mova [dst16q+strideq*8+16], m6 +%if cpuflag(avx) + vpalignr m0, m1, m0, 1 + pshufb m1, m5 +%elif cpuflag(ssse3) + palignr m2, m1, m0, 1 + pshufb m1, m5 + mova m0, m2 +%else + mova m4, m1 + psrldq m0, 1 + pslldq m4, 15 + psrldq m1, 1 + por m0, m4 + por m1, m5 +%endif + add dstq, strideq + add dst16q, strideq + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DL_XMM_FUNCS +INIT_XMM ssse3 +DL_XMM_FUNCS +INIT_XMM avx +DL_XMM_FUNCS + +; dr + +%macro DR_MMX_FUNCS 0 +cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a + movd m0, [lq] + punpckldq m0, [aq-1] + movd m1, [aq+3] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + PALIGNR m1, m0, 1, m3 + psrlq m2, m1, 8 + LOWPASS 0, 1, 2, 3 + + movd [dstq+stride3q ], m0 + psrlq m0, 8 + movd [dstq+strideq*2], m0 + psrlq m0, 8 + movd [dstq+strideq*1], m0 + psrlq m0, 8 + movd [dstq+strideq*0], m0 + RET +%endmacro + +INIT_MMX mmxext +DR_MMX_FUNCS +INIT_MMX ssse3 +DR_MMX_FUNCS + +%macro DR_XMM_FUNCS 0 +cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a + movq m1, [lq] + movhps m1, [aq-1] + movd m2, [aq+7] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pslldq m0, m1, 1 + PALIGNR m2, m1, 1, m3 + LOWPASS 0, 1, 2, 3 + + movhps [dstq+strideq*0], m0 + pslldq m0, 1 + movhps [dstq+strideq*1], m0 + pslldq m0, 1 + movhps [dstq+strideq*2], m0 + pslldq m0, 1 + movhps [dstq+stride3q ], m0 + pslldq m0, 1 + lea dstq, [dstq+strideq*4] + movhps [dstq+strideq*0], m0 + pslldq m0, 1 + movhps [dstq+strideq*1], m0 + pslldq m0, 1 + movhps [dstq+strideq*2], m0 + pslldq m0, 1 + movhps [dstq+stride3q ], m0 + RET + +cglobal vp9_ipred_dr_16x16, 4, 4, 6, dst, stride, l, a + mova m1, [lq] + movu m2, [aq-1] + movd m4, [aq+15] + DEFINE_ARGS dst, stride, stride9, cnt + lea stride9q, [strideq *3] + mov cntd, 4 + lea stride9q, [stride9q*3] + PALIGNR m4, m2, 1, m5 + PALIGNR m3, m2, m1, 15, m5 + LOWPASS 3, 2, 4, 5 + pslldq m0, m1, 1 + PALIGNR m2, m1, 1, m4 + LOWPASS 0, 1, 2, 4 + +.loop: + mova [dstq+strideq*0 ], m3 + movhps [dstq+strideq*8+0], m0 + movq [dstq+strideq*8+8], m3 + PALIGNR m3, m0, 15, m1 + pslldq m0, 1 + mova [dstq+strideq*1 ], m3 + movhps [dstq+stride9q +0], m0 + movq [dstq+stride9q +8], m3 + PALIGNR m3, m0, 15, m1 + pslldq m0, 1 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dr_32x32, 4, 4, 8, dst, stride, l, a + mova m1, [lq] + mova m2, [lq+16] + movu m3, [aq-1] + movu m4, [aq+15] + movd m5, [aq+31] + DEFINE_ARGS dst, stride, stride8, cnt + lea stride8q, [strideq*8] + PALIGNR m5, m4, 1, m7 + PALIGNR m6, m4, m3, 15, m7 + LOWPASS 5, 4, 6, 7 + PALIGNR m4, m3, 1, m7 + PALIGNR m6, m3, m2, 15, m7 + LOWPASS 4, 3, 6, 7 + PALIGNR m3, m2, 1, m7 + PALIGNR m6, m2, m1, 15, m7 + LOWPASS 3, 2, 6, 7 + PALIGNR m2, m1, 1, m6 + pslldq m0, m1, 1 + LOWPASS 2, 1, 0, 6 + mov cntd, 16 + + ; out=m2/m3/m4/m5 +.loop: + mova [dstq+stride8q*0+ 0], m4 + mova [dstq+stride8q*0+16], m5 + mova [dstq+stride8q*2+ 0], m3 + mova [dstq+stride8q*2+16], m4 + PALIGNR m5, m4, 15, m6 + PALIGNR m4, m3, 15, m6 + PALIGNR m3, m2, 15, m6 + pslldq m2, 1 + add dstq, strideq + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DR_XMM_FUNCS +INIT_XMM ssse3 +DR_XMM_FUNCS +INIT_XMM avx +DR_XMM_FUNCS + +; vl + +INIT_MMX mmxext +cglobal vp9_ipred_vl_4x4, 4, 4, 0, dst, stride, l, a + movq m0, [aq] + psrlq m1, m0, 8 + psrlq m2, m1, 8 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + movd [dstq+strideq*0], m1 + movd [dstq+strideq*1], m2 + lea dstq, [dstq+strideq*2] + psrlq m1, 8 + psrlq m2, 8 + movd [dstq+strideq*0], m1 + movd [dstq+strideq*1], m2 + RET + +%macro VL_XMM_FUNCS 0 +cglobal vp9_ipred_vl_8x8, 4, 4, 4, dst, stride, l, a + movq m0, [aq] +%if cpuflag(ssse3) + pshufb m0, [pb_0to6_9x7] +%else + punpcklbw m1, m0, m0 + punpckhwd m1, m1 + shufps m0, m1, q3310 +%endif + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psrldq m1, m0, 1 + psrldq m2, m0, 2 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + + movq [dstq+strideq*0], m1 + movq [dstq+strideq*1], m2 + psrldq m1, 1 + psrldq m2, 1 + movq [dstq+strideq*2], m1 + movq [dstq+stride3q ], m2 + lea dstq, [dstq+strideq*4] + psrldq m1, 1 + psrldq m2, 1 + movq [dstq+strideq*0], m1 + movq [dstq+strideq*1], m2 + psrldq m1, 1 + psrldq m2, 1 + movq [dstq+strideq*2], m1 + movq [dstq+stride3q ], m2 + RET + +cglobal vp9_ipred_vl_16x16, 4, 4, 5, dst, stride, l, a + mova m0, [aq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] +%if cpuflag(ssse3) + mova m4, [pb_1toE_2xF] + pshufb m1, m0, m4 + pshufb m2, m1, m4 +%else + pand m4, m0, [pb_15x0_1xm1] ; _______________F + psrldq m1, m0, 1 ; 123456789ABCDEF_ + por m1, m4 ; 123456789ABCDEFF + psrldq m2, m1, 1 ; 23456789ABCDEFF_ + por m2, m4 ; 23456789ABCDEFFF +%endif + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + mov cntd, 4 +.loop: + mova [dstq+strideq*0], m1 + mova [dstq+strideq*1], m2 +%if cpuflag(ssse3) + pshufb m1, m4 + pshufb m2, m4 +%else + psrldq m1, 1 + psrldq m2, 1 + por m1, m4 + por m2, m4 +%endif + mova [dstq+strideq*2], m1 + mova [dstq+stride3q ], m2 +%if cpuflag(ssse3) + pshufb m1, m4 + pshufb m2, m4 +%else + psrldq m1, 1 + psrldq m2, 1 + por m1, m4 + por m2, m4 +%endif + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_vl_32x32, 4, 4, 7, dst, stride, l, a + mova m0, [aq] + mova m5, [aq+16] + DEFINE_ARGS dst, stride, dst16, cnt + PALIGNR m2, m5, m0, 1, m4 + PALIGNR m3, m5, m0, 2, m4 + lea dst16q, [dstq +strideq*8] + LOWPASS 3, 2, 0, 6 + pavgb m2, m0 +%if cpuflag(ssse3) + mova m4, [pb_1toE_2xF] + pshufb m0, m5, m4 + pshufb m1, m0, m4 +%else + pand m4, m5, [pb_15x0_1xm1] ; _______________F + psrldq m0, m5, 1 ; 123456789ABCDEF_ + por m0, m4 ; 123456789ABCDEFF + psrldq m1, m0, 1 ; 23456789ABCDEFF_ + por m1, m4 ; 23456789ABCDEFFF +%endif + lea dst16q, [dst16q+strideq*8] + LOWPASS 1, 0, 5, 6 + pavgb m0, m5 +%if cpuflag(ssse3) + pshufb m5, [pb_15] +%else + punpckhbw m5, m4, m4 + pshufhw m5, m5, q3333 + punpckhqdq m5, m5 +%endif + mov cntd, 8 + +.loop: +%macro %%write 3 + mova [dstq+stride%1+ 0], %2 + mova [dstq+stride%1+16], %3 + movhps [dst16q+stride%1 ], %2 + movu [dst16q+stride%1+ 8], %3 + movq [dst16q+stride%1+24], m5 +%if cpuflag(avx) + palignr %2, %3, %2, 1 + pshufb %3, m4 +%elif cpuflag(ssse3) + palignr m6, %3, %2, 1 + pshufb %3, m4 + mova %2, m6 +%else + pslldq m6, %3, 15 + psrldq %3, 1 + psrldq %2, 1 + por %3, m4 + por %2, m6 +%endif +%endmacro + + %%write q*0, m2, m0 + %%write q*1, m3, m1 + lea dstq, [dstq +strideq*2] + lea dst16q, [dst16q+strideq*2] + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +VL_XMM_FUNCS +INIT_XMM ssse3 +VL_XMM_FUNCS +INIT_XMM avx +VL_XMM_FUNCS + +; vr + +%macro VR_MMX_FUNCS 0 +cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a + movq m1, [aq-1] + punpckldq m2, [lq] + movd m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pavgb m0, m1 + PALIGNR m1, m2, 5, m3 + psrlq m2, m1, 8 + psllq m3, m1, 8 + LOWPASS 2, 1, 3, 4 + + ; ABCD <- for the following predictor: + ; EFGH + ; IABC | m0 contains ABCDxxxx + ; JEFG | m2 contains xJIEFGHx + +%if cpuflag(ssse3) + punpckldq m0, m2 + pshufb m2, [pb_13456_3xm1] + movd [dstq+strideq*0], m0 + pshufb m0, [pb_6012_4xm1] + movd [dstq+stride3q ], m2 + psrlq m2, 8 + movd [dstq+strideq*2], m0 + movd [dstq+strideq*1], m2 +%else + psllq m1, m2, 40 + psrlq m2, 24 + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m2 + PALIGNR m0, m1, 7, m3 + psllq m1, 8 + PALIGNR m2, m1, 7, m3 + movd [dstq+strideq*2], m0 + movd [dstq+stride3q ], m2 +%endif + RET +%endmacro + +INIT_MMX mmxext +VR_MMX_FUNCS +INIT_MMX ssse3 +VR_MMX_FUNCS + +%macro VR_XMM_FUNCS 1 ; n_xmm_regs for 16x16 +cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a + movu m1, [aq-1] + movhps m2, [lq] + movq m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pavgb m0, m1 + PALIGNR m1, m2, 9, m3 + pslldq m2, m1, 1 + pslldq m3, m1, 2 + LOWPASS 1, 2, 3, 4 + + ; ABCDEFGH <- for the following predictor: + ; IJKLMNOP + ; QABCDEFG | m0 contains ABCDEFGHxxxxxxxx + ; RIJKLMNO | m1 contains xxVUTSRQIJKLMNOP + ; SQABCDEF + ; TRIJKLMN + ; USQABCDE + ; VTRIJKLM + +%if cpuflag(ssse3) + punpcklqdq m0, m1 ; ABCDEFGHxxVUTSRQ +%endif + movq [dstq+strideq*0], m0 + movhps [dstq+strideq*1], m1 +%if cpuflag(ssse3) + pshufb m0, [pb_6xm1_BDF_0to6] ; xxxxxxUSQABCDEFG + pshufb m1, [pb_6xm1_246_8toE] ; xxxxxxVTRIJKLMNO +%else + psrlw m2, m1, 8 ; x_U_S_Q_xxxxxxxx + pand m3, m1, [pw_255] ; x_V_T_R_xxxxxxxx + packuswb m3, m2 ; xVTRxxxxxUSQxxxx + pslldq m3, 4 ; xxxxxVTRxxxxxUSQ + PALIGNR m0, m3, 7, m4 ; xxxxxxUSQABCDEFG + psrldq m1, 8 + pslldq m3, 8 + PALIGNR m1, m3, 7, m4 ; xxxxxxVTRIJKLMNO +%endif + movhps [dstq+strideq*2], m0 + movhps [dstq+stride3q ], m1 + lea dstq, [dstq+strideq*4] + pslldq m0, 1 + pslldq m1, 1 + movhps [dstq+strideq*0], m0 + movhps [dstq+strideq*1], m1 + pslldq m0, 1 + pslldq m1, 1 + movhps [dstq+strideq*2], m0 + movhps [dstq+stride3q ], m1 + RET + +cglobal vp9_ipred_vr_16x16, 4, 4, %1, dst, stride, l, a + mova m0, [aq] + movu m1, [aq-1] + mova m2, [lq] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + PALIGNR m3, m1, m2, 15, m6 + LOWPASS 3, 1, 0, 4 + pavgb m0, m1 + PALIGNR m1, m2, 1, m6 + pslldq m4, m2, 1 + LOWPASS 1, 2, 4, 5 +%if cpuflag(ssse3) + pshufb m1, [pb_02468ACE_13579BDF] +%else + psrlw m5, m1, 8 + pand m1, [pw_255] + packuswb m1, m5 +%endif + mov cntd, 4 + +.loop: + movlhps m2, m1 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m3 + PALIGNR m4, m0, m1, 15, m6 + PALIGNR m5, m3, m2, 15, m6 + mova [dstq+strideq*2], m4 + mova [dstq+stride3q ], m5 + lea dstq, [dstq+strideq*4] + PALIGNR m0, m1, 14, m6 + PALIGNR m3, m2, 14, m6 + pslldq m1, 2 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_vr_32x32, 4, 4, 9, dst, stride, l, a + mova m0, [aq] + mova m2, [aq+16] + movu m1, [aq-1] + PALIGNR m3, m2, m0, 15, m6 + PALIGNR m4, m2, m0, 14, m6 + LOWPASS 4, 3, 2, 5 + pavgb m3, m2 + mova m2, [lq+16] + PALIGNR m5, m1, m2, 15, m6 + LOWPASS 5, 1, 0, 6 + pavgb m0, m1 + mova m6, [lq] +%if ARCH_X86_64 + SWAP 0, 8 +%else + mova [dstq], m0 +%endif + PALIGNR m1, m2, 1, m0 + PALIGNR m7, m2, m6, 15, m0 + LOWPASS 1, 2, 7, 0 + PALIGNR m2, m6, 1, m0 + pslldq m7, m6, 1 + LOWPASS 2, 6, 7, 0 +%if cpuflag(ssse3) + pshufb m1, [pb_02468ACE_13579BDF] + pshufb m2, [pb_02468ACE_13579BDF] +%else + psrlw m0, m1, 8 + psrlw m6, m2, 8 + pand m1, [pw_255] + pand m2, [pw_255] + packuswb m1, m0 + packuswb m2, m6 +%endif + DEFINE_ARGS dst, stride, dst16, cnt + lea dst16q, [dstq +strideq*8] + lea dst16q, [dst16q+strideq*8] + SBUTTERFLY qdq, 2, 1, 6 +%if ARCH_X86_64 + SWAP 0, 8 +%else + mova m0, [dstq] +%endif + mov cntd, 8 + +.loop: + ; even lines (0, 2, 4, ...): m1 | m0, m3 + ; odd lines (1, 3, 5, ...): m2 | m5, m4 +%macro %%write 4 + mova [dstq+stride%1+ 0], %3 + mova [dstq+stride%1+16], %4 + movhps [dst16q+stride%1 ], %2 + movu [dst16q+stride%1+ 8], %3 + movq [dst16q+stride%1+24], %4 + PALIGNR %4, %3, 15, m6 + PALIGNR %3, %2, 15, m6 + pslldq %2, 1 +%endmacro + + %%write q*0, m1, m0, m3 + %%write q*1, m2, m5, m4 + lea dstq, [dstq +strideq*2] + lea dst16q, [dst16q+strideq*2] + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +VR_XMM_FUNCS 7 +INIT_XMM ssse3 +VR_XMM_FUNCS 6 +INIT_XMM avx +VR_XMM_FUNCS 6 + +; hd + +INIT_MMX mmxext +cglobal vp9_ipred_hd_4x4, 4, 4, 0, dst, stride, l, a + movd m0, [lq] + punpckldq m0, [aq-1] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + psrlq m1, m0, 8 + psrlq m2, m1, 8 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + + ; DHIJ <- for the following predictor: + ; CGDH + ; BFCG | m1 contains ABCDxxxx + ; AEBF | m2 contains EFGHIJxx + + punpcklbw m1, m2 + punpckhdq m0, m1, m2 + + ; m1 contains AEBFCGDH + ; m0 contains CGDHIJxx + + movd [dstq+stride3q ], m1 + movd [dstq+strideq*1], m0 + psrlq m1, 16 + psrlq m0, 16 + movd [dstq+strideq*2], m1 + movd [dstq+strideq*0], m0 + RET + +%macro HD_XMM_FUNCS 0 +cglobal vp9_ipred_hd_8x8, 4, 4, 5, dst, stride, l, a + movq m0, [lq] + movhps m0, [aq-1] + DEFINE_ARGS dst, stride, stride3, dst4 + lea stride3q, [strideq*3] + lea dst4q, [dstq+strideq*4] + psrldq m1, m0, 1 + psrldq m2, m1, 1 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + + ; HPQRSTUV <- for the following predictor + ; GOHPQRST + ; FNGOHPQR | m1 contains ABCDEFGHxxxxxxxx + ; EMFNGOHP | m2 contains IJKLMNOPQRSTUVxx + ; DLEMFNGO + ; CKDLEMFN + ; BJCKDLEM + ; AIBJCKDL + + punpcklbw m1, m2 + movhlps m2, m2 + + ; m1 contains AIBJCKDLEMFNGOHP + ; m2 contains QRSTUVxxxxxxxxxx + + movhps [dstq +stride3q ], m1 + movq [dst4q+stride3q ], m1 + PALIGNR m3, m2, m1, 2, m4 + movhps [dstq +strideq*2], m3 + movq [dst4q+strideq*2], m3 + PALIGNR m3, m2, m1, 4, m4 + movhps [dstq +strideq*1], m3 + movq [dst4q+strideq*1], m3 + PALIGNR m2, m1, 6, m4 + movhps [dstq +strideq*0], m2 + movq [dst4q+strideq*0], m2 + RET + +cglobal vp9_ipred_hd_16x16, 4, 6, 7, dst, stride, l, a + mova m0, [lq] + movu m3, [aq-1] + DEFINE_ARGS dst, stride, stride4, dst4, dst8, dst12 + lea stride4q, [strideq*4] + lea dst4q, [dstq +stride4q] + lea dst8q, [dst4q+stride4q] + lea dst12q, [dst8q+stride4q] + psrldq m4, m3, 1 + psrldq m5, m3, 2 + LOWPASS 5, 4, 3, 6 + PALIGNR m1, m3, m0, 1, m6 + PALIGNR m2, m3, m0, 2, m6 + LOWPASS 2, 1, 0, 6 + pavgb m1, m0 + SBUTTERFLY bw, 1, 2, 6 + + ; I PROBABLY INVERTED L0 ad L16 here + ; m1, m2, m5 +.loop: + sub stride4q, strideq + movhps [dstq +stride4q +0], m2 + movq [dstq +stride4q +8], m5 + mova [dst4q+stride4q ], m2 + movhps [dst8q+stride4q +0], m1 + movq [dst8q+stride4q +8], m2 + mova [dst12q+stride4q ], m1 +%if cpuflag(avx) + palignr m1, m2, m1, 2 + palignr m2, m5, m2, 2 +%elif cpuflag(ssse3) + palignr m3, m2, m1, 2 + palignr m0, m5, m2, 2 + mova m1, m3 + mova m2, m0 +%else + ; slightly modified version of PALIGNR + mova m6, m2 + mova m4, m5 + pslldq m6, 14 + pslldq m4, 14 + psrldq m1, 2 + psrldq m2, 2 + por m1, m6 + por m2, m4 +%endif + psrldq m5, 2 + jg .loop + RET + +cglobal vp9_ipred_hd_32x32, 4, 6, 8, dst, stride, l, a + mova m0, [lq] + mova m1, [lq+16] + movu m2, [aq-1] + movu m3, [aq+15] + DEFINE_ARGS dst, stride, stride8, dst8, dst16, dst24 + lea stride8q, [strideq*8] + lea dst8q, [dstq +stride8q] + lea dst16q, [dst8q +stride8q] + lea dst24q, [dst16q+stride8q] + psrldq m4, m3, 1 + psrldq m5, m3, 2 + LOWPASS 5, 4, 3, 6 + PALIGNR m4, m3, m2, 2, m6 + PALIGNR m3, m2, 1, m6 + LOWPASS 4, 3, 2, 6 + PALIGNR m3, m2, m1, 2, m6 + PALIGNR m2, m1, 1, m6 + LOWPASS 3, 2, 1, 6 + pavgb m2, m1 + PALIGNR m6, m1, m0, 1, m7 + PALIGNR m1, m0, 2, m7 + LOWPASS 1, 6, 0, 7 + pavgb m0, m6 + SBUTTERFLY bw, 2, 3, 6 + SBUTTERFLY bw, 0, 1, 6 + + ; m0, m1, m2, m3, m4, m5 +.loop: + sub stride8q, strideq + mova [dstq +stride8q+ 0], m3 + mova [dstq +stride8q+16], m4 + mova [dst8q +stride8q+ 0], m2 + mova [dst8q +stride8q+16], m3 + mova [dst16q+stride8q+ 0], m1 + mova [dst16q+stride8q+16], m2 + mova [dst24q+stride8q+ 0], m0 + mova [dst24q+stride8q+16], m1 +%if cpuflag(avx) + palignr m0, m1, m0, 2 + palignr m1, m2, m1, 2 + palignr m2, m3, m2, 2 + palignr m3, m4, m3, 2 + palignr m4, m5, m4, 2 + psrldq m5, 2 +%elif cpuflag(ssse3) + psrldq m6, m5, 2 + palignr m5, m4, 2 + palignr m4, m3, 2 + palignr m3, m2, 2 + palignr m2, m1, 2 + palignr m1, m0, 2 + mova m0, m1 + mova m1, m2 + mova m2, m3 + mova m3, m4 + mova m4, m5 + mova m5, m6 +%else + ; sort of a half-integrated version of PALIGNR + pslldq m7, m4, 14 + pslldq m6, m5, 14 + psrldq m4, 2 + psrldq m5, 2 + por m4, m6 + pslldq m6, m3, 14 + psrldq m3, 2 + por m3, m7 + pslldq m7, m2, 14 + psrldq m2, 2 + por m2, m6 + pslldq m6, m1, 14 + psrldq m1, 2 + por m1, m7 + psrldq m0, 2 + por m0, m6 +%endif + jg .loop + RET +%endmacro + +INIT_XMM sse2 +HD_XMM_FUNCS +INIT_XMM ssse3 +HD_XMM_FUNCS +INIT_XMM avx +HD_XMM_FUNCS + +%macro HU_MMX_FUNCS 0 +cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l + movd m0, [lq] +%if cpuflag(ssse3) + pshufb m0, [pb_0to2_5x3] +%else + punpcklbw m1, m0, m0 ; 00112233 + pshufw m1, m1, q3333 ; 33333333 + punpckldq m0, m1 ; 01233333 +%endif + psrlq m1, m0, 8 + psrlq m2, m1, 8 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + SBUTTERFLY bw, 1, 2, 0 + PALIGNR m2, m1, 2, m0 + movd [dstq+strideq*0], m1 + movd [dstq+strideq*1], m2 + punpckhdq m1, m1 + punpckhdq m2, m2 + movd [dstq+strideq*2], m1 + movd [dstq+stride3q ], m2 + RET +%endmacro + +INIT_MMX mmxext +HU_MMX_FUNCS +INIT_MMX ssse3 +HU_MMX_FUNCS + +%macro HU_XMM_FUNCS 1 ; n_xmm_regs in hu_32x32 +cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l + movq m0, [lq] +%if cpuflag(ssse3) + pshufb m0, [pb_0to6_9x7] +%else + punpcklbw m1, m0, m0 ; 0011223344556677 + punpckhwd m1, m1 ; 4444555566667777 + shufps m0, m1, q3310 ; 0123456777777777 +%endif + psrldq m1, m0, 1 + psrldq m2, m1, 1 + LOWPASS 2, 1, 0, 3 + pavgb m1, m0 + DEFINE_ARGS dst, stride, stride3, dst4 + lea stride3q, [strideq*3] + lea dst4q, [dstq+strideq*4] + SBUTTERFLY bw, 1, 2, 0 + movq [dstq +strideq*0], m1 + movhps [dst4q+strideq*0], m1 + PALIGNR m0, m2, m1, 2, m3 + movq [dstq +strideq*1], m0 + movhps [dst4q+strideq*1], m0 + PALIGNR m0, m2, m1, 4, m3 + movq [dstq +strideq*2], m0 + movhps [dst4q+strideq*2], m0 + PALIGNR m2, m1, 6, m3 + movq [dstq +stride3q ], m2 + movhps [dst4q+stride3q ], m2 + RET + +cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l + mova m0, [lq] +%if cpuflag(ssse3) + mova m3, [pb_2toE_3xF] + pshufb m1, m0, [pb_1toE_2xF] + pshufb m2, m0, m3 +%else + pand m3, m0, [pb_15x0_1xm1] + psrldq m1, m0, 1 + por m1, m3 + punpckhbw m3, m3 + psrldq m2, m0, 2 + por m2, m3 +%endif + LOWPASS 2, 1, 0, 4 + pavgb m1, m0 + DEFINE_ARGS dst, stride, stride9, cnt + lea stride9q, [strideq*8+strideq] + mov cntd, 4 + SBUTTERFLY bw, 1, 2, 0 + +.loop: + mova [dstq+strideq*0], m1 + mova [dstq+strideq*8], m2 + PALIGNR m0, m2, m1, 2, m4 +%if cpuflag(ssse3) + pshufb m2, m3 +%else + psrldq m2, 2 + por m2, m3 +%endif + mova [dstq+strideq*1], m0 + mova [dstq+stride9q ], m2 + PALIGNR m1, m2, m0, 2, m4 +%if cpuflag(ssse3) + pshufb m2, m3 +%else + psrldq m2, 2 + por m2, m3 +%endif + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET + +cglobal vp9_ipred_hu_32x32, 3, 7, %1, dst, stride, l + mova m1, [lq] + mova m0, [lq+16] + PALIGNR m2, m0, m1, 1, m5 + PALIGNR m3, m0, m1, 2, m5 + LOWPASS 3, 2, 1, 5 + pavgb m2, m1 +%if cpuflag(ssse3) + mova m4, [pb_2toE_3xF] + pshufb m5, m0, [pb_1toE_2xF] + pshufb m1, m0, m4 +%else + pand m4, m0, [pb_15x0_1xm1] + psrldq m5, m0, 1 + por m5, m4 + punpckhbw m4, m4 + psrldq m1, m0, 2 + por m1, m4 +%endif + LOWPASS 1, 5, 0, 6 + pavgb m0, m5 + DEFINE_ARGS dst, stride, cnt, stride0, dst8, dst16, dst24 + mov cntd, 8 + xor stride0q, stride0q + lea dst8q, [dstq +strideq*8] + lea dst16q, [dst8q +strideq*8] + lea dst24q, [dst16q+strideq*8] + SBUTTERFLY bw, 0, 1, 5 + SBUTTERFLY bw, 2, 3, 5 +%if cpuflag(ssse3) + pshufb m6, m1, [pb_15] +%else + pshufhw m6, m4, q3333 + punpckhqdq m6, m6 +%endif + +.loop: + mova [dstq +stride0q+ 0], m2 + mova [dstq +stride0q+16], m3 + mova [dst8q +stride0q+ 0], m3 + mova [dst8q +stride0q+16], m0 + mova [dst16q+stride0q+ 0], m0 + mova [dst16q+stride0q+16], m1 + mova [dst24q+stride0q+ 0], m1 + mova [dst24q+stride0q+16], m6 +%if cpuflag(avx) + palignr m2, m3, m2, 2 + palignr m3, m0, m3, 2 + palignr m0, m1, m0, 2 + pshufb m1, m4 +%elif cpuflag(ssse3) + pshufb m5, m1, m4 + palignr m1, m0, 2 + palignr m0, m3, 2 + palignr m3, m2, 2 + mova m2, m3 + mova m3, m0 + mova m0, m1 + mova m1, m5 +%else + ; half-integrated version of PALIGNR + pslldq m5, m1, 14 + pslldq m7, m0, 14 + psrldq m1, 2 + psrldq m0, 2 + por m1, m4 + por m0, m5 + pslldq m5, m3, 14 + psrldq m3, 2 + por m3, m7 + psrldq m2, 2 + por m2, m5 +%endif + add stride0q, strideq + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +HU_XMM_FUNCS 8 +INIT_XMM ssse3 +HU_XMM_FUNCS 7 +INIT_XMM avx +HU_XMM_FUNCS 7 + +; FIXME 127, 128, 129 ? diff --git a/libs/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm b/libs/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm new file mode 100644 index 000000000..32b698243 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm @@ -0,0 +1,2392 @@ +;****************************************************************************** +;* VP9 Intra prediction SIMD optimizations +;* +;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com> +;* Copyright (c) 2015 Henrik Gramner <henrik gramner com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 + +pd_2: times 8 dd 2 +pd_4: times 8 dd 4 +pd_8: times 8 dd 8 + +pb_2to15_14_15: db 2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15 +pb_4_5_8to13_8x0: db 4,5,8,9,10,11,12,13,0,0,0,0,0,0,0,0 +pb_0to7_67x4: db 0,1,2,3,4,5,6,7,6,7,6,7,6,7,6,7 + +cextern pw_1 +cextern pw_1023 +cextern pw_4095 +cextern pd_16 +cextern pd_32 +cextern pd_65535; + +; FIXME most top-only functions (ddl, vl, v, dc_top) can be modified to take +; only 3 registers on x86-32, which would make it one cycle faster, but that +; would make the code quite a bit uglier... + +SECTION .text + +%macro SCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%if %0 == 4 +%define reg_%4 m%2 +%endif +%else + mova [%3], m%1 +%if %0 == 4 +%define reg_%4 [%3] +%endif +%endif +%endmacro + +%macro UNSCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%else + mova m%1, [%3] +%endif +%if %0 == 4 +%undef reg_%4 +%endif +%endmacro + +%macro PRELOAD 2-3 +%if ARCH_X86_64 + mova m%1, [%2] +%if %0 == 3 +%define reg_%3 m%1 +%endif +%elif %0 == 3 +%define reg_%3 [%2] +%endif +%endmacro + +INIT_MMX mmx +cglobal vp9_ipred_v_4x4_16, 2, 4, 1, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse +cglobal vp9_ipred_v_8x8_16, 2, 4, 1, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse +cglobal vp9_ipred_v_16x16_16, 2, 4, 2, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] + mova m1, [aq+mmsize] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m1 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m1 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +INIT_XMM sse +cglobal vp9_ipred_v_32x32_16, 2, 4, 4, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq+mmsize*0] + mova m1, [aq+mmsize*1] + mova m2, [aq+mmsize*2] + mova m3, [aq+mmsize*3] + DEFINE_ARGS dst, stride, cnt + mov cntd, 16 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*0+32], m2 + mova [dstq+strideq*0+48], m3 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m1 + mova [dstq+strideq*1+32], m2 + mova [dstq+strideq*1+48], m3 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET + +INIT_MMX mmxext +cglobal vp9_ipred_h_4x4_16, 3, 3, 4, dst, stride, l, a + mova m3, [lq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pshufw m0, m3, q3333 + pshufw m1, m3, q2222 + pshufw m2, m3, q1111 + pshufw m3, m3, q0000 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m3 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_h_8x8_16, 3, 3, 4, dst, stride, l, a + mova m2, [lq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + punpckhwd m3, m2, m2 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + pshufd m0, m3, q1111 + pshufd m1, m3, q0000 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m1 + lea dstq, [dstq+strideq*4] + punpcklwd m2, m2 + pshufd m0, m2, q3333 + pshufd m1, m2, q2222 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + pshufd m0, m2, q1111 + pshufd m1, m2, q0000 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m1 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_h_16x16_16, 3, 5, 4, dst, stride, l, stride3, cnt + mov cntd, 3 + lea stride3q, [strideq*3] +.loop: + movh m3, [lq+cntq*8] + punpcklwd m3, m3 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m1 + mova [dstq+strideq*1+16], m1 + mova [dstq+strideq*2+ 0], m2 + mova [dstq+strideq*2+16], m2 + mova [dstq+stride3q + 0], m3 + mova [dstq+stride3q +16], m3 + lea dstq, [dstq+strideq*4] + dec cntd + jge .loop + RET + +INIT_XMM sse2 +cglobal vp9_ipred_h_32x32_16, 3, 5, 4, dst, stride, l, stride3, cnt + mov cntd, 7 + lea stride3q, [strideq*3] +.loop: + movh m3, [lq+cntq*8] + punpcklwd m3, m3 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*0+32], m0 + mova [dstq+strideq*0+48], m0 + mova [dstq+strideq*1+ 0], m1 + mova [dstq+strideq*1+16], m1 + mova [dstq+strideq*1+32], m1 + mova [dstq+strideq*1+48], m1 + mova [dstq+strideq*2+ 0], m2 + mova [dstq+strideq*2+16], m2 + mova [dstq+strideq*2+32], m2 + mova [dstq+strideq*2+48], m2 + mova [dstq+stride3q + 0], m3 + mova [dstq+stride3q +16], m3 + mova [dstq+stride3q +32], m3 + mova [dstq+stride3q +48], m3 + lea dstq, [dstq+strideq*4] + dec cntd + jge .loop + RET + +INIT_MMX mmxext +cglobal vp9_ipred_dc_4x4_16, 4, 4, 2, dst, stride, l, a + mova m0, [lq] + paddw m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pmaddwd m0, [pw_1] + pshufw m1, m0, q3232 + paddd m0, [pd_4] + paddd m0, m1 + psrad m0, 3 + pshufw m0, m0, q0000 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_8x8_16, 4, 4, 2, dst, stride, l, a + mova m0, [lq] + paddw m0, [aq] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_8] + paddd m0, m1 + psrad m0, 4 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_16x16_16, 4, 4, 2, dst, stride, l, a + mova m0, [lq] + paddw m0, [lq+mmsize] + paddw m0, [aq] + paddw m0, [aq+mmsize] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_16] + paddd m0, m1 + psrad m0, 5 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_32x32_16, 4, 4, 2, dst, stride, l, a + mova m0, [lq+mmsize*0] + paddw m0, [lq+mmsize*1] + paddw m0, [lq+mmsize*2] + paddw m0, [lq+mmsize*3] + paddw m0, [aq+mmsize*0] + paddw m0, [aq+mmsize*1] + paddw m0, [aq+mmsize*2] + paddw m0, [aq+mmsize*3] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 16 + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_32] + paddd m0, m1 + psrad m0, 6 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*0+32], m0 + mova [dstq+strideq*0+48], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*1+32], m0 + mova [dstq+strideq*1+48], m0 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET + +%macro DC_1D_FNS 2 +INIT_MMX mmxext +cglobal vp9_ipred_dc_%1_4x4_16, 4, 4, 2, dst, stride, l, a + mova m0, [%2] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pmaddwd m0, [pw_1] + pshufw m1, m0, q3232 + paddd m0, [pd_2] + paddd m0, m1 + psrad m0, 2 + pshufw m0, m0, q0000 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_%1_8x8_16, 4, 4, 2, dst, stride, l, a + mova m0, [%2] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_4] + paddd m0, m1 + psrad m0, 3 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_%1_16x16_16, 4, 4, 2, dst, stride, l, a + mova m0, [%2] + paddw m0, [%2+mmsize] + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_8] + paddd m0, m1 + psrad m0, 4 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*2+ 0], m0 + mova [dstq+strideq*2+16], m0 + mova [dstq+stride3q + 0], m0 + mova [dstq+stride3q +16], m0 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET + +INIT_XMM sse2 +cglobal vp9_ipred_dc_%1_32x32_16, 4, 4, 2, dst, stride, l, a + mova m0, [%2+mmsize*0] + paddw m0, [%2+mmsize*1] + paddw m0, [%2+mmsize*2] + paddw m0, [%2+mmsize*3] + DEFINE_ARGS dst, stride, cnt + mov cntd, 16 + pmaddwd m0, [pw_1] + pshufd m1, m0, q3232 + paddd m0, m1 + pshufd m1, m0, q1111 + paddd m0, [pd_16] + paddd m0, m1 + psrad m0, 5 + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m0 + mova [dstq+strideq*0+32], m0 + mova [dstq+strideq*0+48], m0 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m0 + mova [dstq+strideq*1+32], m0 + mova [dstq+strideq*1+48], m0 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop + RET +%endmacro + +DC_1D_FNS top, aq +DC_1D_FNS left, lq + +INIT_MMX mmxext +cglobal vp9_ipred_tm_4x4_10, 4, 4, 6, dst, stride, l, a + mova m5, [pw_1023] +.body: + mova m4, [aq] + mova m3, [lq] + movd m0, [aq-4] + pshufw m0, m0, q1111 + psubw m4, m0 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + pshufw m0, m3, q3333 + pshufw m1, m3, q2222 + pshufw m2, m3, q1111 + pshufw m3, m3, q0000 + paddw m0, m4 + paddw m1, m4 + paddw m2, m4 + paddw m3, m4 + pxor m4, m4 + pmaxsw m0, m4 + pmaxsw m1, m4 + pmaxsw m2, m4 + pmaxsw m3, m4 + pminsw m0, m5 + pminsw m1, m5 + pminsw m2, m5 + pminsw m3, m5 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m3 + RET + +cglobal vp9_ipred_tm_4x4_12, 4, 4, 6, dst, stride, l, a + mova m5, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_4x4_10 %+ SUFFIX).body + +INIT_XMM sse2 +cglobal vp9_ipred_tm_8x8_10, 4, 5, 7, dst, stride, l, a + mova m4, [pw_1023] +.body: + pxor m6, m6 + mova m5, [aq] + movd m0, [aq-4] + pshuflw m0, m0, q1111 + punpcklqdq m0, m0 + psubw m5, m0 + DEFINE_ARGS dst, stride, l, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 1 +.loop: + movh m3, [lq+cntq*8] + punpcklwd m3, m3 + pshufd m0, m3, q3333 + pshufd m1, m3, q2222 + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 + paddw m0, m5 + paddw m1, m5 + paddw m2, m5 + paddw m3, m5 + pmaxsw m0, m6 + pmaxsw m1, m6 + pmaxsw m2, m6 + pmaxsw m3, m6 + pminsw m0, m4 + pminsw m1, m4 + pminsw m2, m4 + pminsw m3, m4 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m1 + mova [dstq+strideq*2], m2 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + dec cntd + jge .loop + RET + +cglobal vp9_ipred_tm_8x8_12, 4, 5, 7, dst, stride, l, a + mova m4, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_8x8_10 %+ SUFFIX).body + +INIT_XMM sse2 +cglobal vp9_ipred_tm_16x16_10, 4, 4, 8, dst, stride, l, a + mova m7, [pw_1023] +.body: + pxor m6, m6 + mova m4, [aq] + mova m5, [aq+mmsize] + movd m0, [aq-4] + pshuflw m0, m0, q1111 + punpcklqdq m0, m0 + psubw m4, m0 + psubw m5, m0 + DEFINE_ARGS dst, stride, l, cnt + mov cntd, 7 +.loop: + movd m3, [lq+cntq*4] + punpcklwd m3, m3 + pshufd m2, m3, q1111 + pshufd m3, m3, q0000 + paddw m0, m2, m4 + paddw m2, m5 + paddw m1, m3, m4 + paddw m3, m5 + pmaxsw m0, m6 + pmaxsw m2, m6 + pmaxsw m1, m6 + pmaxsw m3, m6 + pminsw m0, m7 + pminsw m2, m7 + pminsw m1, m7 + pminsw m3, m7 + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m2 + mova [dstq+strideq*1+ 0], m1 + mova [dstq+strideq*1+16], m3 + lea dstq, [dstq+strideq*2] + dec cntd + jge .loop + RET + +cglobal vp9_ipred_tm_16x16_12, 4, 4, 8, dst, stride, l, a + mova m7, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_16x16_10 %+ SUFFIX).body + +INIT_XMM sse2 +cglobal vp9_ipred_tm_32x32_10, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a + mova m0, [pw_1023] +.body: + pxor m1, m1 +%if ARCH_X86_64 + SWAP 0, 8 + SWAP 1, 9 +%define reg_min m9 +%define reg_max m8 +%else + mova [rsp+ 0], m0 + mova [rsp+16], m1 +%define reg_min [rsp+16] +%define reg_max [rsp+ 0] +%endif + + mova m4, [aq+mmsize*0] + mova m5, [aq+mmsize*1] + mova m6, [aq+mmsize*2] + mova m7, [aq+mmsize*3] + movd m0, [aq-4] + pshuflw m0, m0, q1111 + punpcklqdq m0, m0 + psubw m4, m0 + psubw m5, m0 + psubw m6, m0 + psubw m7, m0 + DEFINE_ARGS dst, stride, l, cnt + mov cntd, 31 +.loop: + pinsrw m3, [lq+cntq*2], 0 + punpcklwd m3, m3 + pshufd m3, m3, q0000 + paddw m0, m3, m4 + paddw m1, m3, m5 + paddw m2, m3, m6 + paddw m3, m7 + pmaxsw m0, reg_min + pmaxsw m1, reg_min + pmaxsw m2, reg_min + pmaxsw m3, reg_min + pminsw m0, reg_max + pminsw m1, reg_max + pminsw m2, reg_max + pminsw m3, reg_max + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*0+32], m2 + mova [dstq+strideq*0+48], m3 + add dstq, strideq + dec cntd + jge .loop + RET + +cglobal vp9_ipred_tm_32x32_12, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a + mova m0, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_32x32_10 %+ SUFFIX).body + +; Directional intra predicion functions +; +; in the functions below, 'abcdefgh' refers to above data (sometimes simply +; abbreviated as a[N-M]). 'stuvwxyz' refers to left data (sometimes simply +; abbreviated as l[N-M]). * is top-left data. ABCDEFG or A[N-M] is filtered +; above data, STUVWXYZ or L[N-M] is filtered left data, and # is filtered +; top-left data. + +; left=(left+2*center+right+2)>>2 +%macro LOWPASS 3 ; left [dst], center, right + paddw m%1, m%3 + psraw m%1, 1 + pavgw m%1, m%2 +%endmacro + +; abcdefgh (src) -> bcdefghh (dst) +; dst/src can be the same register +%macro SHIFT_RIGHT 2-3 [pb_2to15_14_15] ; dst, src, [ssse3_shift_reg] +%if cpuflag(ssse3) + pshufb %1, %2, %3 ; abcdefgh -> bcdefghh +%else + psrldq %1, %2, 2 ; abcdefgh -> bcdefgh. + pshufhw %1, %1, q2210 ; bcdefgh. -> bcdefghh +%endif +%endmacro + +; abcdefgh (src) -> bcdefghh (dst1) and cdefghhh (dst2) +%macro SHIFT_RIGHTx2 3-4 [pb_2to15_14_15] ; dst1, dst2, src, [ssse3_shift_reg] +%if cpuflag(ssse3) + pshufb %1, %3, %4 ; abcdefgh -> bcdefghh + pshufb %2, %1, %4 ; bcdefghh -> cdefghhh +%else + psrldq %1, %3, 2 ; abcdefgh -> bcdefgh. + psrldq %2, %3, 4 ; abcdefgh -> cdefgh.. + pshufhw %1, %1, q2210 ; bcdefgh. -> bcdefghh + pshufhw %2, %2, q1110 ; cdefgh.. -> cdefghhh +%endif +%endmacro + +%macro DL_FUNCS 0 +cglobal vp9_ipred_dl_4x4_16, 2, 4, 3, dst, stride, l, a + movifnidn aq, amp + movu m1, [aq] ; abcdefgh + pshufhw m0, m1, q3310 ; abcdefhh + SHIFT_RIGHT m1, m1 ; bcdefghh + psrldq m2, m1, 2 ; cdefghh. + LOWPASS 0, 1, 2 ; BCDEFGh. + pshufd m1, m0, q3321 ; DEFGh... + movh [dstq+strideq*0], m0 + movh [dstq+strideq*2], m1 + add dstq, strideq + psrldq m0, 2 ; CDEFGh.. + psrldq m1, 2 ; EFGh.... + movh [dstq+strideq*0], m0 + movh [dstq+strideq*2], m1 + RET + +cglobal vp9_ipred_dl_8x8_16, 2, 4, 5, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] ; abcdefgh +%if cpuflag(ssse3) + mova m4, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m1, m2, m0, m4 ; bcdefghh/cdefghhh + LOWPASS 0, 1, 2 ; BCDEFGHh + shufps m1, m0, m2, q3332 ; FGHhhhhh + shufps m3, m0, m1, q2121 ; DEFGHhhh + DEFINE_ARGS dst, stride, stride5 + lea stride5q, [strideq*5] + + mova [dstq+strideq*0], m0 + mova [dstq+strideq*4], m1 + SHIFT_RIGHT m0, m0, m4 ; CDEFGHhh + pshuflw m1, m1, q3321 ; GHhhhhhh + pshufd m2, m0, q3321 ; EFGHhhhh + mova [dstq+strideq*1], m0 + mova [dstq+stride5q ], m1 + lea dstq, [dstq+strideq*2] + pshuflw m1, m1, q3321 ; Hhhhhhhh + mova [dstq+strideq*0], m3 + mova [dstq+strideq*4], m1 + pshuflw m1, m1, q3321 ; hhhhhhhh + mova [dstq+strideq*1], m2 + mova [dstq+stride5q ], m1 + RET + +cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] ; abcdefgh + mova m3, [aq+mmsize] ; ijklmnop + PALIGNR m1, m3, m0, 2, m4 ; bcdefghi + PALIGNR m2, m3, m0, 4, m4 ; cdefghij + LOWPASS 0, 1, 2 ; BCDEFGHI +%if cpuflag(ssse3) + mova m4, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m2, m1, m3, m4 ; jklmnopp/klmnoppp + LOWPASS 1, 2, 3 ; JKLMNOPp + pshufd m2, m2, q3333 ; pppppppp + DEFINE_ARGS dst, stride, cnt + mov cntd, 8 + +.loop: + mova [dstq+strideq*0+ 0], m0 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*8+ 0], m1 + mova [dstq+strideq*8+16], m2 + add dstq, strideq +%if cpuflag(avx) + vpalignr m0, m1, m0, 2 +%else + PALIGNR m3, m1, m0, 2, m4 + mova m0, m3 +%endif + SHIFT_RIGHT m1, m1, m4 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dl_32x32_16, 2, 5, 7, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq+mmsize*0] ; abcdefgh + mova m1, [aq+mmsize*1] ; ijklmnop + mova m2, [aq+mmsize*2] ; qrstuvwx + mova m3, [aq+mmsize*3] ; yz012345 + PALIGNR m4, m1, m0, 2, m6 + PALIGNR m5, m1, m0, 4, m6 + LOWPASS 0, 4, 5 ; BCDEFGHI + PALIGNR m4, m2, m1, 2, m6 + PALIGNR m5, m2, m1, 4, m6 + LOWPASS 1, 4, 5 ; JKLMNOPQ + PALIGNR m4, m3, m2, 2, m6 + PALIGNR m5, m3, m2, 4, m6 + LOWPASS 2, 4, 5 ; RSTUVWXY +%if cpuflag(ssse3) + mova m6, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m4, m5, m3, m6 + LOWPASS 3, 4, 5 ; Z0123455 + pshufd m4, m4, q3333 ; 55555555 + DEFINE_ARGS dst, stride, stride8, stride24, cnt + mov cntd, 8 + lea stride8q, [strideq*8] + lea stride24q, [stride8q*3] + +.loop: + mova [dstq+stride8q*0+ 0], m0 + mova [dstq+stride8q*0+16], m1 + mova [dstq+stride8q*0+32], m2 + mova [dstq+stride8q*0+48], m3 + mova [dstq+stride8q*1+ 0], m1 + mova [dstq+stride8q*1+16], m2 + mova [dstq+stride8q*1+32], m3 + mova [dstq+stride8q*1+48], m4 + mova [dstq+stride8q*2+ 0], m2 + mova [dstq+stride8q*2+16], m3 + mova [dstq+stride8q*2+32], m4 + mova [dstq+stride8q*2+48], m4 + mova [dstq+stride24q + 0], m3 + mova [dstq+stride24q +16], m4 + mova [dstq+stride24q +32], m4 + mova [dstq+stride24q +48], m4 + add dstq, strideq +%if cpuflag(avx) + vpalignr m0, m1, m0, 2 + vpalignr m1, m2, m1, 2 + vpalignr m2, m3, m2, 2 +%else + PALIGNR m5, m1, m0, 2, m6 + mova m0, m5 + PALIGNR m5, m2, m1, 2, m6 + mova m1, m5 + PALIGNR m5, m3, m2, 2, m6 + mova m2, m5 +%endif + SHIFT_RIGHT m3, m3, m6 + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DL_FUNCS +INIT_XMM ssse3 +DL_FUNCS +INIT_XMM avx +DL_FUNCS + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] ; abcdefghijklmnop + vpbroadcastw xm1, [aq+30] ; pppppppp + vperm2i128 m2, m0, m1, q0201 ; ijklmnoppppppppp + vpalignr m3, m2, m0, 2 ; bcdefghijklmnopp + vpalignr m4, m2, m0, 4 ; cdefghijklmnoppp + LOWPASS 0, 3, 4 ; BCDEFGHIJKLMNOPp + vperm2i128 m2, m0, m1, q0201 ; JKLMNOPppppppppp + DEFINE_ARGS dst, stride, stride3, cnt + mov cntd, 2 + lea stride3q, [strideq*3] + +.loop: + mova [dstq+strideq*0], m0 + vpalignr m3, m2, m0, 2 + vpalignr m4, m2, m0, 4 + mova [dstq+strideq*1], m3 + mova [dstq+strideq*2], m4 + vpalignr m3, m2, m0, 6 + vpalignr m4, m2, m0, 8 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + mova [dstq+strideq*0], m4 + vpalignr m3, m2, m0, 10 + vpalignr m4, m2, m0, 12 + mova [dstq+strideq*1], m3 + mova [dstq+strideq*2], m4 + vpalignr m3, m2, m0, 14 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + mova m0, m2 + vperm2i128 m2, m2, m2, q0101 ; pppppppppppppppp + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dl_32x32_16, 2, 6, 7, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq+mmsize*0+ 0] ; abcdefghijklmnop + mova m1, [aq+mmsize*1+ 0] ; qrstuvwxyz012345 + vpbroadcastw xm4, [aq+mmsize*1+30] ; 55555555 + vperm2i128 m5, m0, m1, q0201 ; ijklmnopqrstuvwx + vpalignr m2, m5, m0, 2 ; bcdefghijklmnopq + vpalignr m3, m5, m0, 4 ; cdefghijklmnopqr + LOWPASS 0, 2, 3 ; BCDEFGHIJKLMNOPQ + vperm2i128 m5, m1, m4, q0201 ; yz01234555555555 + vpalignr m2, m5, m1, 2 ; rstuvwxyz0123455 + vpalignr m3, m5, m1, 4 ; stuvwxyz01234555 + LOWPASS 1, 2, 3 ; RSTUVWXYZ......5 + vperm2i128 m2, m1, m4, q0201 ; Z......555555555 + vperm2i128 m5, m0, m1, q0201 ; JKLMNOPQRSTUVWXY + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 + +.loop: + mova [dstq+strideq*0 + 0], m0 + mova [dstq+strideq*0 +32], m1 + vpalignr m3, m5, m0, 2 + vpalignr m4, m2, m1, 2 + mova [dstq+strideq*1 + 0], m3 + mova [dstq+strideq*1 +32], m4 + vpalignr m3, m5, m0, 4 + vpalignr m4, m2, m1, 4 + mova [dstq+strideq*2 + 0], m3 + mova [dstq+strideq*2 +32], m4 + vpalignr m3, m5, m0, 6 + vpalignr m4, m2, m1, 6 + mova [dstq+stride3q*1+ 0], m3 + mova [dstq+stride3q*1+32], m4 + lea dstq, [dstq+strideq*4] + vpalignr m3, m5, m0, 8 + vpalignr m4, m2, m1, 8 + mova [dstq+strideq*0 + 0], m3 + mova [dstq+strideq*0 +32], m4 + vpalignr m3, m5, m0, 10 + vpalignr m4, m2, m1, 10 + mova [dstq+strideq*1 + 0], m3 + mova [dstq+strideq*1 +32], m4 + vpalignr m3, m5, m0, 12 + vpalignr m4, m2, m1, 12 + mova [dstq+strideq*2+ 0], m3 + mova [dstq+strideq*2+32], m4 + vpalignr m3, m5, m0, 14 + vpalignr m4, m2, m1, 14 + mova [dstq+stride3q+ 0], m3 + mova [dstq+stride3q+ 32], m4 + vpalignr m3, m5, m0, 16 + vpalignr m4, m2, m1, 16 + vperm2i128 m5, m3, m4, q0201 + vperm2i128 m2, m4, m4, q0101 + mova m0, m3 + mova m1, m4 + lea dstq, [dstq+strideq*4] + dec cntd + jg .loop + RET +%endif + +%macro DR_FUNCS 1 ; stack_mem_for_32x32_32bit_function +cglobal vp9_ipred_dr_4x4_16, 4, 4, 3, dst, stride, l, a + movh m0, [lq] ; wxyz.... + movhps m0, [aq-2] ; wxyz*abc + movd m1, [aq+6] ; d....... + PALIGNR m1, m0, 2, m2 ; xyz*abcd + psrldq m2, m1, 2 ; yz*abcd. + LOWPASS 0, 1, 2 ; XYZ#ABC. + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movh [dstq+stride3q ], m0 + psrldq m0, 2 ; YZ#ABC.. + movh [dstq+strideq*2], m0 + psrldq m0, 2 ; Z#ABC... + movh [dstq+strideq*1], m0 + psrldq m0, 2 ; #ABC.... + movh [dstq+strideq*0], m0 + RET + +cglobal vp9_ipred_dr_8x8_16, 4, 4, 5, dst, stride, l, a + mova m0, [lq] ; stuvwxyz + movu m1, [aq-2] ; *abcdefg + mova m2, [aq] ; abcdefgh + psrldq m3, m2, 2 ; bcdefgh. + LOWPASS 3, 2, 1 ; ABCDEFG. + PALIGNR m1, m0, 2, m4 ; tuvwxyz* + PALIGNR m2, m1, 2, m4 ; uvwxyz*a + LOWPASS 2, 1, 0 ; TUVWXYZ# + DEFINE_ARGS dst, stride, dst4, stride3 + lea stride3q, [strideq*3] + lea dst4q, [dstq+strideq*4] + + movhps [dstq +stride3q +0], m2 + movh [dstq+ stride3q +8], m3 + mova [dst4q+stride3q +0], m2 + PALIGNR m1, m3, m2, 2, m0 + psrldq m3, 2 + movhps [dstq +strideq*2+0], m1 + movh [dstq+ strideq*2+8], m3 + mova [dst4q+strideq*2+0], m1 + PALIGNR m2, m3, m1, 2, m0 + psrldq m3, 2 + movhps [dstq +strideq*1+0], m2 + movh [dstq+ strideq*1+8], m3 + mova [dst4q+strideq*1+0], m2 + PALIGNR m1, m3, m2, 2, m0 + psrldq m3, 2 + movhps [dstq +strideq*0+0], m1 + movh [dstq+ strideq*0+8], m3 + mova [dst4q+strideq*0+0], m1 + RET + +cglobal vp9_ipred_dr_16x16_16, 4, 4, 7, dst, stride, l, a + mova m0, [lq] ; klmnopqr + mova m1, [lq+mmsize] ; stuvwxyz + movu m2, [aq-2] ; *abcdefg + movu m3, [aq+mmsize-2] ; hijklmno + mova m4, [aq] ; abcdefgh + mova m5, [aq+mmsize] ; ijklmnop + psrldq m6, m5, 2 ; jklmnop. + LOWPASS 6, 5, 3 ; IJKLMNO. + PALIGNR m5, m4, 2, m3 ; bcdefghi + LOWPASS 5, 4, 2 ; ABCDEFGH + PALIGNR m2, m1, 2, m3 ; tuvwxyz* + PALIGNR m4, m2, 2, m3 ; uvwxyz*a + LOWPASS 4, 2, 1 ; TUVWXYZ# + PALIGNR m1, m0, 2, m3 ; lmnopqrs + PALIGNR m2, m1, 2, m3 ; mnopqrst + LOWPASS 2, 1, 0 ; LMNOPQRS + DEFINE_ARGS dst, stride, dst8, cnt + lea dst8q, [dstq+strideq*8] + mov cntd, 8 + +.loop: + sub dst8q, strideq + mova [dst8q+strideq*0+ 0], m4 + mova [dst8q+strideq*0+16], m5 + mova [dst8q+strideq*8+ 0], m2 + mova [dst8q+strideq*8+16], m4 +%if cpuflag(avx) + vpalignr m2, m4, m2, 2 + vpalignr m4, m5, m4, 2 + vpalignr m5, m6, m5, 2 +%else + PALIGNR m0, m4, m2, 2, m1 + mova m2, m0 + PALIGNR m0, m5, m4, 2, m1 + mova m4, m0 + PALIGNR m0, m6, m5, 2, m1 + mova m5, m0 +%endif + psrldq m6, 2 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_dr_32x32_16, 4, 5, 10 + notcpuflag(ssse3), \ + %1 * ARCH_X86_32 * -mmsize, dst, stride, l, a + mova m0, [aq+mmsize*3] ; a[24-31] + movu m1, [aq+mmsize*3-2] ; a[23-30] + psrldq m2, m0, 2 ; a[25-31]. + LOWPASS 2, 0, 1 ; A[24-30]. + mova m1, [aq+mmsize*2] ; a[16-23] + movu m3, [aq+mmsize*2-2] ; a[15-22] + PALIGNR m0, m1, 2, m4 ; a[17-24] + LOWPASS 0, 1, 3 ; A[16-23] + mova m3, [aq+mmsize*1] ; a[8-15] + movu m4, [aq+mmsize*1-2] ; a[7-14] + PALIGNR m1, m3, 2, m5 ; a[9-16] + LOWPASS 1, 3, 4 ; A[8-15] + mova m4, [aq+mmsize*0] ; a[0-7] + movu m5, [aq+mmsize*0-2] ; *a[0-6] + PALIGNR m3, m4, 2, m6 ; a[1-8] + LOWPASS 3, 4, 5 ; A[0-7] + SCRATCH 1, 8, rsp+0*mmsize + SCRATCH 3, 9, rsp+1*mmsize +%if notcpuflag(ssse3) + SCRATCH 0, 10, rsp+2*mmsize +%endif + mova m6, [lq+mmsize*3] ; l[24-31] + PALIGNR m5, m6, 2, m0 ; l[25-31]* + PALIGNR m4, m5, 2, m0 ; l[26-31]*a + LOWPASS 4, 5, 6 ; L[25-31]# + mova m7, [lq+mmsize*2] ; l[16-23] + PALIGNR m6, m7, 2, m0 ; l[17-24] + PALIGNR m5, m6, 2, m0 ; l[18-25] + LOWPASS 5, 6, 7 ; L[17-24] + mova m1, [lq+mmsize*1] ; l[8-15] + PALIGNR m7, m1, 2, m0 ; l[9-16] + PALIGNR m6, m7, 2, m0 ; l[10-17] + LOWPASS 6, 7, 1 ; L[9-16] + mova m3, [lq+mmsize*0] ; l[0-7] + PALIGNR m1, m3, 2, m0 ; l[1-8] + PALIGNR m7, m1, 2, m0 ; l[2-9] + LOWPASS 7, 1, 3 ; L[1-8] +%if cpuflag(ssse3) +%if cpuflag(avx) + UNSCRATCH 1, 8, rsp+0*mmsize +%endif + UNSCRATCH 3, 9, rsp+1*mmsize +%else + UNSCRATCH 0, 10, rsp+2*mmsize +%endif + DEFINE_ARGS dst8, stride, stride8, stride24, cnt + lea stride8q, [strideq*8] + lea stride24q, [stride8q*3] + lea dst8q, [dst8q+strideq*8] + mov cntd, 8 + +.loop: + sub dst8q, strideq +%if notcpuflag(avx) + UNSCRATCH 1, 8, rsp+0*mmsize +%if notcpuflag(ssse3) + UNSCRATCH 3, 9, rsp+1*mmsize +%endif +%endif + mova [dst8q+stride8q*0+ 0], m4 + mova [dst8q+stride8q*0+16], m3 + mova [dst8q+stride8q*0+32], m1 + mova [dst8q+stride8q*0+48], m0 + mova [dst8q+stride8q*1+ 0], m5 + mova [dst8q+stride8q*1+16], m4 + mova [dst8q+stride8q*1+32], m3 + mova [dst8q+stride8q*1+48], m1 + mova [dst8q+stride8q*2+ 0], m6 + mova [dst8q+stride8q*2+16], m5 + mova [dst8q+stride8q*2+32], m4 + mova [dst8q+stride8q*2+48], m3 + mova [dst8q+stride24q + 0], m7 + mova [dst8q+stride24q +16], m6 + mova [dst8q+stride24q +32], m5 + mova [dst8q+stride24q +48], m4 +%if cpuflag(avx) + vpalignr m7, m6, m7, 2 + vpalignr m6, m5, m6, 2 + vpalignr m5, m4, m5, 2 + vpalignr m4, m3, m4, 2 + vpalignr m3, m1, m3, 2 + vpalignr m1, m0, m1, 2 + vpalignr m0, m2, m0, 2 +%else + SCRATCH 2, 8, rsp+0*mmsize +%if notcpuflag(ssse3) + SCRATCH 0, 9, rsp+1*mmsize +%endif + PALIGNR m2, m6, m7, 2, m0 + mova m7, m2 + PALIGNR m2, m5, m6, 2, m0 + mova m6, m2 + PALIGNR m2, m4, m5, 2, m0 + mova m5, m2 + PALIGNR m2, m3, m4, 2, m0 + mova m4, m2 + PALIGNR m2, m1, m3, 2, m0 + mova m3, m2 +%if notcpuflag(ssse3) + UNSCRATCH 0, 9, rsp+1*mmsize + SCRATCH 3, 9, rsp+1*mmsize +%endif + PALIGNR m2, m0, m1, 2, m3 + mova m1, m2 + UNSCRATCH 2, 8, rsp+0*mmsize + SCRATCH 1, 8, rsp+0*mmsize + PALIGNR m1, m2, m0, 2, m3 + mova m0, m1 +%endif + psrldq m2, 2 + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +DR_FUNCS 3 +INIT_XMM ssse3 +DR_FUNCS 2 +INIT_XMM avx +DR_FUNCS 2 + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_ipred_dr_16x16_16, 4, 5, 6, dst, stride, l, a + mova m0, [lq] ; klmnopqrstuvwxyz + movu m1, [aq-2] ; *abcdefghijklmno + mova m2, [aq] ; abcdefghijklmnop + vperm2i128 m4, m2, m2, q2001 ; ijklmnop........ + vpalignr m5, m4, m2, 2 ; bcdefghijklmnop. + vperm2i128 m3, m0, m1, q0201 ; stuvwxyz*abcdefg + LOWPASS 1, 2, 5 ; ABCDEFGHIJKLMNO. + vpalignr m4, m3, m0, 2 ; lmnopqrstuvwxyz* + vpalignr m5, m3, m0, 4 ; mnopqrstuvwxyz*a + LOWPASS 0, 4, 5 ; LMNOPQRSTUVWXYZ# + vperm2i128 m5, m0, m1, q0201 ; TUVWXYZ#ABCDEFGH + DEFINE_ARGS dst, stride, stride3, stride5, dst3 + lea dst3q, [dstq+strideq*4] + lea stride3q, [strideq*3] + lea stride5q, [stride3q+strideq*2] + + vpalignr m3, m5, m0, 2 + vpalignr m4, m1, m5, 2 + mova [dst3q+stride5q*2], m3 ; 14 + mova [ dstq+stride3q*2], m4 ; 6 + vpalignr m3, m5, m0, 4 + vpalignr m4, m1, m5, 4 + sub dst3q, strideq + mova [dst3q+stride5q*2], m3 ; 13 + mova [dst3q+strideq*2 ], m4 ; 5 + mova [dst3q+stride3q*4], m0 ; 15 + vpalignr m3, m5, m0, 6 + vpalignr m4, m1, m5, 6 + mova [dstq+stride3q*4], m3 ; 12 + mova [dst3q+strideq*1], m4 ; 4 + vpalignr m3, m5, m0, 8 + vpalignr m4, m1, m5, 8 + mova [dst3q+strideq*8], m3 ; 11 + mova [dst3q+strideq*0], m4 ; 3 + vpalignr m3, m5, m0, 10 + vpalignr m4, m1, m5, 10 + mova [dstq+stride5q*2], m3 ; 10 + mova [dstq+strideq*2 ], m4 ; 2 + vpalignr m3, m5, m0, 12 + vpalignr m4, m1, m5, 12 + mova [dst3q+stride3q*2], m3 ; 9 + mova [dstq+strideq*1 ], m4 ; 1 + vpalignr m3, m5, m0, 14 + vpalignr m4, m1, m5, 14 + mova [dstq+strideq*8], m3 ; 8 + mova [dstq+strideq*0], m4 ; 0 + mova [dst3q+strideq*4], m5 ; 7 + RET + +%if ARCH_X86_64 +cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a + mova m0, [lq+mmsize*0+0] ; l[0-15] + mova m1, [lq+mmsize*1+0] ; l[16-31] + movu m2, [aq+mmsize*0-2] ; *abcdefghijklmno + mova m3, [aq+mmsize*0+0] ; abcdefghijklmnop + mova m4, [aq+mmsize*1+0] ; qrstuvwxyz012345 + vperm2i128 m5, m0, m1, q0201 ; lmnopqrstuvwxyz0 + vpalignr m6, m5, m0, 2 ; mnopqrstuvwxyz01 + vpalignr m7, m5, m0, 4 ; nopqrstuvwxyz012 + LOWPASS 0, 6, 7 ; L[0-15] + vperm2i128 m7, m1, m2, q0201 ; stuvwxyz*abcdefg + vpalignr m5, m7, m1, 2 ; lmnopqrstuvwxyz* + vpalignr m6, m7, m1, 4 ; mnopqrstuvwxyz*a + LOWPASS 1, 5, 6 ; L[16-31]# + vperm2i128 m5, m3, m4, q0201 ; ijklmnopqrstuvwx + vpalignr m6, m5, m3, 2 ; bcdefghijklmnopq + LOWPASS 2, 3, 6 ; A[0-15] + movu m3, [aq+mmsize*1-2] ; pqrstuvwxyz01234 + vperm2i128 m6, m4, m4, q2001 ; yz012345........ + vpalignr m7, m6, m4, 2 ; rstuvwxyz012345. + LOWPASS 3, 4, 7 ; A[16-31]. + vperm2i128 m4, m1, m2, q0201 ; TUVWXYZ#ABCDEFGH + vperm2i128 m5, m0, m1, q0201 ; L[7-15]L[16-23] + vperm2i128 m8, m2, m3, q0201 ; IJKLMNOPQRSTUVWX + DEFINE_ARGS dst8, stride, stride3, stride7, stride5, dst24, cnt + lea stride3q, [strideq*3] + lea stride5q, [stride3q+strideq*2] + lea stride7q, [strideq*4+stride3q] + lea dst24q, [dst8q+stride3q*8] + lea dst8q, [dst8q+strideq*8] + mov cntd, 2 + +.loop: + mova [dst24q+stride7q+0 ], m0 ; 31 23 15 7 + mova [dst24q+stride7q+32], m1 + mova [dst8q+stride7q+0], m1 + mova [dst8q+stride7q+32], m2 + vpalignr m6, m4, m1, 2 + vpalignr m7, m5, m0, 2 + vpalignr m9, m8, m2, 2 + mova [dst24q+stride3q*2+0], m7 ; 30 22 14 6 + mova [dst24q+stride3q*2+32], m6 + mova [dst8q+stride3q*2+0], m6 + mova [dst8q+stride3q*2+32], m9 + vpalignr m6, m4, m1, 4 + vpalignr m7, m5, m0, 4 + vpalignr m9, m8, m2, 4 + mova [dst24q+stride5q+0], m7 ; 29 21 13 5 + mova [dst24q+stride5q+32], m6 + mova [dst8q+stride5q+0], m6 + mova [dst8q+stride5q+32], m9 + vpalignr m6, m4, m1, 6 + vpalignr m7, m5, m0, 6 + vpalignr m9, m8, m2, 6 + mova [dst24q+strideq*4+0 ], m7 ; 28 20 12 4 + mova [dst24q+strideq*4+32], m6 + mova [dst8q+strideq*4+0], m6 + mova [dst8q+strideq*4+32], m9 + vpalignr m6, m4, m1, 8 + vpalignr m7, m5, m0, 8 + vpalignr m9, m8, m2, 8 + mova [dst24q+stride3q+0 ], m7 ; 27 19 11 3 + mova [dst24q+stride3q+32], m6 + mova [dst8q+stride3q+0], m6 + mova [dst8q+stride3q+32], m9 + vpalignr m6, m4, m1, 10 + vpalignr m7, m5, m0, 10 + vpalignr m9, m8, m2, 10 + mova [dst24q+strideq*2+0 ], m7 ; 26 18 10 2 + mova [dst24q+strideq*2+32], m6 + mova [dst8q+strideq*2+0], m6 + mova [dst8q+strideq*2+32], m9 + vpalignr m6, m4, m1, 12 + vpalignr m7, m5, m0, 12 + vpalignr m9, m8, m2, 12 + mova [dst24q+strideq+0 ], m7 ; 25 17 9 1 + mova [dst24q+strideq+32], m6 + mova [dst8q+strideq+0], m6 + mova [dst8q+strideq+32], m9 + vpalignr m6, m4, m1, 14 + vpalignr m7, m5, m0, 14 + vpalignr m9, m8, m2, 14 + mova [dst24q+strideq*0+0 ], m7 ; 24 16 8 0 + mova [dst24q+strideq*0+32], m6 + mova [dst8q+strideq*0+0], m6 + mova [dst8q+strideq*0+32], m9 + mova m0, m5 + mova m5, m1 + mova m1, m4 + mova m4, m2 + mova m2, m8 + mova m8, m3 + sub dst24q, stride7q + sub dst24q, strideq + sub dst8q, stride7q + sub dst8q, strideq + dec cntd + jg .loop + RET +%endif +%endif + +%macro VL_FUNCS 1 ; stack_mem_for_32x32_32bit_function +cglobal vp9_ipred_vl_4x4_16, 2, 4, 3, dst, stride, l, a + movifnidn aq, amp + movu m0, [aq] ; abcdefgh + psrldq m1, m0, 2 ; bcdefgh. + psrldq m2, m0, 4 ; cdefgh.. + LOWPASS 2, 1, 0 ; BCDEFGH. + pavgw m1, m0 ; ABCDEFG. + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movh [dstq+strideq*0], m1 + movh [dstq+strideq*1], m2 + psrldq m1, 2 + psrldq m2, 2 + movh [dstq+strideq*2], m1 + movh [dstq+stride3q ], m2 + RET + +cglobal vp9_ipred_vl_8x8_16, 2, 4, 4, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] ; abcdefgh +%if cpuflag(ssse3) + mova m3, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m1, m2, m0, m3 ; bcdefghh/cdefghhh + LOWPASS 2, 1, 0 ; BCDEFGHh + pavgw m1, m0 ; ABCDEFGh + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + mova [dstq+strideq*0], m1 + mova [dstq+strideq*1], m2 + SHIFT_RIGHT m1, m1, m3 + SHIFT_RIGHT m2, m2, m3 + mova [dstq+strideq*2], m1 + mova [dstq+stride3q ], m2 + lea dstq, [dstq+strideq*4] + SHIFT_RIGHT m1, m1, m3 + SHIFT_RIGHT m2, m2, m3 + mova [dstq+strideq*0], m1 + mova [dstq+strideq*1], m2 + SHIFT_RIGHT m1, m1, m3 + SHIFT_RIGHT m2, m2, m3 + mova [dstq+strideq*2], m1 + mova [dstq+stride3q ], m2 + RET + +cglobal vp9_ipred_vl_16x16_16, 2, 4, 6, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq] + mova m1, [aq+mmsize] + PALIGNR m2, m1, m0, 2, m3 + PALIGNR m3, m1, m0, 4, m4 + LOWPASS 3, 2, 0 + pavgw m2, m0 +%if cpuflag(ssse3) + mova m4, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m5, m0, m1, m4 + LOWPASS 0, 5, 1 + pavgw m1, m5 + DEFINE_ARGS dst, stride, cnt + mov cntd, 8 + +.loop: + mova [dstq+strideq*0+ 0], m2 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*1+ 0], m3 + mova [dstq+strideq*1+16], m0 + lea dstq, [dstq+strideq*2] +%if cpuflag(avx) + vpalignr m2, m1, m2, 2 + vpalignr m3, m0, m3, 2 +%else + PALIGNR m5, m1, m2, 2, m4 + mova m2, m5 + PALIGNR m5, m0, m3, 2, m4 + mova m3, m5 +%endif + SHIFT_RIGHT m1, m1, m4 + SHIFT_RIGHT m0, m0, m4 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_vl_32x32_16, 2, 5, 11, %1 * mmsize * ARCH_X86_32, dst, stride, l, a + movifnidn aq, amp + mova m0, [aq+mmsize*0] + mova m1, [aq+mmsize*1] + mova m2, [aq+mmsize*2] + PALIGNR m6, m1, m0, 2, m5 + PALIGNR m7, m1, m0, 4, m5 + LOWPASS 7, 6, 0 + pavgw m6, m0 + SCRATCH 6, 8, rsp+0*mmsize + PALIGNR m4, m2, m1, 2, m0 + PALIGNR m5, m2, m1, 4, m0 + LOWPASS 5, 4, 1 + pavgw m4, m1 + mova m0, [aq+mmsize*3] + PALIGNR m1, m0, m2, 2, m6 + PALIGNR m3, m0, m2, 4, m6 + LOWPASS 3, 1, 2 + pavgw m2, m1 +%if cpuflag(ssse3) + PRELOAD 10, pb_2to15_14_15, shuf +%endif + SHIFT_RIGHTx2 m6, m1, m0, reg_shuf + LOWPASS 1, 6, 0 + pavgw m0, m6 +%if ARCH_X86_64 + pshufd m9, m6, q3333 +%endif +%if cpuflag(avx) + UNSCRATCH 6, 8, rsp+0*mmsize +%endif + DEFINE_ARGS dst, stride, cnt, stride16, stride17 + mov stride16q, strideq + mov cntd, 8 + shl stride16q, 4 + lea stride17q, [stride16q+strideq] + + ; FIXME m8 is unused for avx, so we could save one register here for win64 +.loop: +%if notcpuflag(avx) + UNSCRATCH 6, 8, rsp+0*mmsize +%endif + mova [dstq+strideq*0+ 0], m6 + mova [dstq+strideq*0+16], m4 + mova [dstq+strideq*0+32], m2 + mova [dstq+strideq*0+48], m0 + mova [dstq+strideq*1+ 0], m7 + mova [dstq+strideq*1+16], m5 + mova [dstq+strideq*1+32], m3 + mova [dstq+strideq*1+48], m1 + mova [dstq+stride16q+ 0], m4 + mova [dstq+stride16q+16], m2 + mova [dstq+stride16q+32], m0 +%if ARCH_X86_64 + mova [dstq+stride16q+48], m9 +%endif + mova [dstq+stride17q+ 0], m5 + mova [dstq+stride17q+16], m3 + mova [dstq+stride17q+32], m1 +%if ARCH_X86_64 + mova [dstq+stride17q+48], m9 +%endif + lea dstq, [dstq+strideq*2] +%if cpuflag(avx) + vpalignr m6, m4, m6, 2 + vpalignr m4, m2, m4, 2 + vpalignr m2, m0, m2, 2 + vpalignr m7, m5, m7, 2 + vpalignr m5, m3, m5, 2 + vpalignr m3, m1, m3, 2 +%else + SCRATCH 3, 8, rsp+0*mmsize +%if notcpuflag(ssse3) + SCRATCH 1, 10, rsp+1*mmsize +%endif + PALIGNR m3, m4, m6, 2, m1 + mova m6, m3 + PALIGNR m3, m2, m4, 2, m1 + mova m4, m3 + PALIGNR m3, m0, m2, 2, m1 + mova m2, m3 + PALIGNR m3, m5, m7, 2, m1 + mova m7, m3 + UNSCRATCH 3, 8, rsp+0*mmsize + SCRATCH 6, 8, rsp+0*mmsize +%if notcpuflag(ssse3) + UNSCRATCH 1, 10, rsp+1*mmsize + SCRATCH 7, 10, rsp+1*mmsize +%endif + PALIGNR m6, m3, m5, 2, m7 + mova m5, m6 + PALIGNR m6, m1, m3, 2, m7 + mova m3, m6 +%if notcpuflag(ssse3) + UNSCRATCH 7, 10, rsp+1*mmsize +%endif +%endif + SHIFT_RIGHT m1, m1, reg_shuf + SHIFT_RIGHT m0, m0, reg_shuf + dec cntd + jg .loop + +%if ARCH_X86_32 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] +%assign %%n 0 +%rep 4 + mova [dstq+strideq*0+48], m0 + mova [dstq+strideq*1+48], m0 + mova [dstq+strideq*2+48], m0 + mova [dstq+stride3q +48], m0 +%if %%n < 3 + lea dstq, [dstq+strideq*4] +%endif +%assign %%n (%%n+1) +%endrep +%endif + RET +%endmacro + +INIT_XMM sse2 +VL_FUNCS 2 +INIT_XMM ssse3 +VL_FUNCS 1 +INIT_XMM avx +VL_FUNCS 1 + +%macro VR_FUNCS 0 +cglobal vp9_ipred_vr_4x4_16, 4, 4, 3, dst, stride, l, a + movu m0, [aq-2] + movhps m1, [lq] + PALIGNR m0, m1, 10, m2 ; xyz*abcd + pslldq m1, m0, 2 ; .xyz*abc + pslldq m2, m0, 4 ; ..xyz*ab + LOWPASS 2, 1, 0 ; ..YZ#ABC + pavgw m1, m0 ; ....#ABC + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movhps [dstq+strideq*0], m1 + movhps [dstq+strideq*1], m2 + shufps m0, m2, m1, q3210 +%if cpuflag(ssse3) + pshufb m2, [pb_4_5_8to13_8x0] +%else + pshuflw m2, m2, q2222 + psrldq m2, 6 +%endif + psrldq m0, 6 + movh [dstq+strideq*2], m0 + movh [dstq+stride3q ], m2 + RET + +cglobal vp9_ipred_vr_8x8_16, 4, 4, 5, dst, stride, l, a + movu m1, [aq-2] ; *abcdefg + movu m2, [lq] ; stuvwxyz + mova m0, [aq] ; abcdefgh + PALIGNR m3, m1, m2, 14, m4 ; z*abcdef + LOWPASS 3, 1, 0 + pavgw m0, m1 + PALIGNR m1, m2, 2, m4 ; tuvwxyz* + pslldq m4, m2, 2 ; .stuvwxy + LOWPASS 4, 2, 1 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m3 + PALIGNR m0, m4, 14, m1 + pslldq m4, 2 + PALIGNR m3, m4, 14, m1 + pslldq m4, 2 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m3 + lea dstq, [dstq+strideq*4] + PALIGNR m0, m4, 14, m1 + pslldq m4, 2 + PALIGNR m3, m4, 14, m1 + pslldq m4, 2 + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m3 + PALIGNR m0, m4, 14, m1 + pslldq m4, 2 + PALIGNR m3, m4, 14, m4 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m3 + RET + +cglobal vp9_ipred_vr_16x16_16, 4, 4, 8, dst, stride, l, a + movu m1, [aq-2] ; *abcdefg + movu m2, [aq+mmsize-2] ; hijklmno + mova m3, [aq] ; abcdefgh + mova m4, [aq+mmsize] ; ijklmnop + mova m5, [lq+mmsize] ; stuvwxyz + PALIGNR m0, m1, m5, 14, m6 ; z*abcdef + movu m6, [aq+mmsize-4] ; ghijklmn + LOWPASS 6, 2, 4 + pavgw m2, m4 + LOWPASS 0, 1, 3 + pavgw m3, m1 + PALIGNR m1, m5, 2, m7 ; tuvwxyz* + movu m7, [lq+mmsize-2] ; rstuvwxy + LOWPASS 1, 5, 7 + movu m5, [lq+2] ; lmnopqrs + pslldq m4, m5, 2 ; .lmnopqr + pslldq m7, m5, 4 ; ..lmnopq + LOWPASS 5, 4, 7 + psrld m4, m1, 16 + psrld m7, m5, 16 + pand m1, [pd_65535] + pand m5, [pd_65535] + packssdw m7, m4 + packssdw m5, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 8 + +.loop: + mova [dstq+strideq*0+ 0], m3 + mova [dstq+strideq*0+16], m2 + mova [dstq+strideq*1+ 0], m0 + mova [dstq+strideq*1+16], m6 + lea dstq, [dstq+strideq*2] + PALIGNR m2, m3, 14, m4 + PALIGNR m3, m7, 14, m4 + pslldq m7, 2 + PALIGNR m6, m0, 14, m4 + PALIGNR m0, m5, 14, m4 + pslldq m5, 2 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_vr_32x32_16, 4, 5, 14, 6 * mmsize * ARCH_X86_32, dst, stride, l, a + movu m0, [aq+mmsize*0-2] ; *a[0-6] + movu m1, [aq+mmsize*1-2] ; a[7-14] + movu m2, [aq+mmsize*2-2] ; a[15-22] + movu m3, [aq+mmsize*3-2] ; a[23-30] + mova m4, [aq+mmsize*3+0] ; a[24-31] + movu m5, [aq+mmsize*3-4] ; a[22-29] + LOWPASS 5, 3, 4 ; A[23-30] + SCRATCH 5, 8, rsp+0*mmsize + pavgw m3, m4 + mova m4, [aq+mmsize*2+0] ; a[16-23] + movu m6, [aq+mmsize*2-4] ; a[14-21] + LOWPASS 6, 2, 4 ; A[15-22] + SCRATCH 6, 9, rsp+1*mmsize + pavgw m2, m4 + mova m4, [aq+mmsize*1+0] ; a[8-15] + movu m7, [aq+mmsize*1-4] ; a[6-13] + LOWPASS 7, 1, 4 ; A[7-14] + SCRATCH 7, 10, rsp+2*mmsize + pavgw m1, m4 + mova m4, [aq+mmsize*0+0] ; a[0-7] + mova m5, [lq+mmsize*3+0] ; l[24-31] + PALIGNR m6, m0, m5, 14, m7 ; l[31]*a[0-5] + LOWPASS 6, 0, 4 ; #A[0-6] + SCRATCH 6, 11, rsp+3*mmsize + pavgw m4, m0 + PALIGNR m0, m5, 2, m7 ; l[25-31]* + movu m7, [lq+mmsize*3-2] ; l[23-30] + LOWPASS 0, 5, 7 ; L[24-31] + movu m5, [lq+mmsize*2-2] ; l[15-22] + mova m7, [lq+mmsize*2+0] ; l[16-23] + movu m6, [lq+mmsize*2+2] ; l[17-24] + LOWPASS 5, 7, 6 ; L[16-23] + psrld m7, m0, 16 + psrld m6, m5, 16 + pand m0, [pd_65535] + pand m5, [pd_65535] + packssdw m6, m7 + packssdw m5, m0 + SCRATCH 5, 12, rsp+4*mmsize + SCRATCH 6, 13, rsp+5*mmsize + movu m6, [lq+mmsize*1-2] ; l[7-14] + mova m0, [lq+mmsize*1+0] ; l[8-15] + movu m5, [lq+mmsize*1+2] ; l[9-16] + LOWPASS 6, 0, 5 ; L[8-15] + movu m0, [lq+mmsize*0+2] ; l[1-8] + pslldq m5, m0, 2 ; .l[1-7] + pslldq m7, m0, 4 ; ..l[1-6] + LOWPASS 0, 5, 7 + psrld m5, m6, 16 + psrld m7, m0, 16 + pand m6, [pd_65535] + pand m0, [pd_65535] + packssdw m7, m5 + packssdw m0, m6 + UNSCRATCH 6, 13, rsp+5*mmsize + DEFINE_ARGS dst, stride, stride16, cnt, stride17 + mov stride16q, strideq + mov cntd, 8 + shl stride16q, 4 +%if ARCH_X86_64 + lea stride17q, [stride16q+strideq] +%endif + +.loop: + mova [dstq+strideq*0+ 0], m4 + mova [dstq+strideq*0+16], m1 + mova [dstq+strideq*0+32], m2 + mova [dstq+strideq*0+48], m3 +%if ARCH_X86_64 + mova [dstq+strideq*1+ 0], m11 + mova [dstq+strideq*1+16], m10 + mova [dstq+strideq*1+32], m9 + mova [dstq+strideq*1+48], m8 +%endif + mova [dstq+stride16q+ 0], m6 + mova [dstq+stride16q+16], m4 + mova [dstq+stride16q+32], m1 + mova [dstq+stride16q+48], m2 +%if ARCH_X86_64 + mova [dstq+stride17q+ 0], m12 + mova [dstq+stride17q+16], m11 + mova [dstq+stride17q+32], m10 + mova [dstq+stride17q+48], m9 +%endif + lea dstq, [dstq+strideq*2] + PALIGNR m3, m2, 14, m5 + PALIGNR m2, m1, 14, m5 + PALIGNR m1, m4, 14, m5 + PALIGNR m4, m6, 14, m5 + PALIGNR m6, m7, 14, m5 + pslldq m7, 2 +%if ARCH_X86_64 + PALIGNR m8, m9, 14, m5 + PALIGNR m9, m10, 14, m5 + PALIGNR m10, m11, 14, m5 + PALIGNR m11, m12, 14, m5 + PALIGNR m12, m0, 14, m5 + pslldq m0, 2 +%endif + dec cntd + jg .loop + +%if ARCH_X86_32 + UNSCRATCH 5, 12, rsp+4*mmsize + UNSCRATCH 4, 11, rsp+3*mmsize + UNSCRATCH 3, 10, rsp+2*mmsize + UNSCRATCH 2, 9, rsp+1*mmsize + UNSCRATCH 1, 8, rsp+0*mmsize + mov dstq, dstm + mov cntd, 8 + add dstq, strideq +.loop2: + mova [dstq+strideq*0+ 0], m4 + mova [dstq+strideq*0+16], m3 + mova [dstq+strideq*0+32], m2 + mova [dstq+strideq*0+48], m1 + mova [dstq+stride16q+ 0], m5 + mova [dstq+stride16q+16], m4 + mova [dstq+stride16q+32], m3 + mova [dstq+stride16q+48], m2 + lea dstq, [dstq+strideq*2] + PALIGNR m1, m2, 14, m6 + PALIGNR m2, m3, 14, m6 + PALIGNR m3, m4, 14, m6 + PALIGNR m4, m5, 14, m6 + PALIGNR m5, m0, 14, m6 + pslldq m0, 2 + dec cntd + jg .loop2 +%endif + RET +%endmacro + +INIT_XMM sse2 +VR_FUNCS +INIT_XMM ssse3 +VR_FUNCS +INIT_XMM avx +VR_FUNCS + +%macro HU_FUNCS 1 ; stack_mem_for_32x32_32bit_function +cglobal vp9_ipred_hu_4x4_16, 3, 3, 3, dst, stride, l, a + movh m0, [lq] ; abcd +%if cpuflag(ssse3) + pshufb m0, [pb_0to7_67x4] ; abcddddd +%else + punpcklqdq m0, m0 + pshufhw m0, m0, q3333 ; abcddddd +%endif + psrldq m1, m0, 2 ; bcddddd. + psrldq m2, m0, 4 ; cddddd.. + LOWPASS 2, 1, 0 ; BCDddd.. + pavgw m1, m0 ; abcddddd + SBUTTERFLY wd, 1, 2, 0 ; aBbCcDdd, dddddddd + PALIGNR m2, m1, 4, m0 ; bCcDdddd + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movh [dstq+strideq*0], m1 ; aBbC + movh [dstq+strideq*1], m2 ; bCcD + movhps [dstq+strideq*2], m1 ; cDdd + movhps [dstq+stride3q ], m2 ; dddd + RET + +cglobal vp9_ipred_hu_8x8_16, 3, 3, 4, dst, stride, l, a + mova m0, [lq] +%if cpuflag(ssse3) + mova m3, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m1, m2, m0, m3 + LOWPASS 2, 1, 0 + pavgw m1, m0 + SBUTTERFLY wd, 1, 2, 0 + shufps m0, m1, m2, q1032 + pshufd m3, m2, q3332 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + mova [dstq+strideq *0], m1 + mova [dstq+strideq *2], m0 + mova [dstq+strideq *4], m2 + mova [dstq+stride3q*2], m3 + add dstq, strideq +%if cpuflag(avx) + vpalignr m1, m2, m1, 4 +%else + PALIGNR m0, m2, m1, 4, m3 + mova m1, m0 +%endif + pshufd m2, m2, q3321 + shufps m0, m1, m2, q1032 + pshufd m3, m2, q3332 + mova [dstq+strideq *0], m1 + mova [dstq+strideq *2], m0 + mova [dstq+strideq *4], m2 + mova [dstq+stride3q*2], m3 + RET + +cglobal vp9_ipred_hu_16x16_16, 3, 4, 6 + notcpuflag(ssse3), dst, stride, l, a + mova m0, [lq] + mova m3, [lq+mmsize] + movu m1, [lq+2] + movu m2, [lq+4] + LOWPASS 2, 1, 0 + pavgw m1, m0 + SBUTTERFLY wd, 1, 2, 0 +%if cpuflag(ssse3) + mova m5, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m0, m4, m3, m5 + LOWPASS 4, 0, 3 + pavgw m3, m0 + SBUTTERFLY wd, 3, 4, 5 + pshufd m0, m0, q3333 + DEFINE_ARGS dst, stride, stride3, cnt + lea stride3q, [strideq*3] + mov cntd, 4 + +.loop: + mova [dstq+strideq *0+ 0], m1 + mova [dstq+strideq *0+16], m2 + mova [dstq+strideq *4+ 0], m2 + mova [dstq+strideq *4+16], m3 + mova [dstq+strideq *8+ 0], m3 + mova [dstq+strideq *8+16], m4 + mova [dstq+stride3q*4+ 0], m4 + mova [dstq+stride3q*4+16], m0 + add dstq, strideq +%if cpuflag(avx) + vpalignr m1, m2, m1, 4 + vpalignr m2, m3, m2, 4 + vpalignr m3, m4, m3, 4 + vpalignr m4, m0, m4, 4 +%else + PALIGNR m5, m2, m1, 4, m6 + mova m1, m5 + PALIGNR m5, m3, m2, 4, m6 + mova m2, m5 + PALIGNR m5, m4, m3, 4, m6 + mova m3, m5 + PALIGNR m5, m0, m4, 4, m6 + mova m4, m5 +%endif + dec cntd + jg .loop + RET + +cglobal vp9_ipred_hu_32x32_16, 3, 7, 10 + notcpuflag(ssse3), \ + %1 * -mmsize * ARCH_X86_32, dst, stride, l, a + mova m2, [lq+mmsize*0+0] + movu m1, [lq+mmsize*0+2] + movu m0, [lq+mmsize*0+4] + LOWPASS 0, 1, 2 + pavgw m1, m2 + SBUTTERFLY wd, 1, 0, 2 + SCRATCH 1, 8, rsp+0*mmsize + mova m4, [lq+mmsize*1+0] + movu m3, [lq+mmsize*1+2] + movu m2, [lq+mmsize*1+4] + LOWPASS 2, 3, 4 + pavgw m3, m4 + SBUTTERFLY wd, 3, 2, 4 + mova m6, [lq+mmsize*2+0] + movu m5, [lq+mmsize*2+2] + movu m4, [lq+mmsize*2+4] + LOWPASS 4, 5, 6 + pavgw m5, m6 + SBUTTERFLY wd, 5, 4, 6 + mova m7, [lq+mmsize*3+0] + SCRATCH 0, 9, rsp+1*mmsize +%if cpuflag(ssse3) + mova m0, [pb_2to15_14_15] +%endif + SHIFT_RIGHTx2 m1, m6, m7, m0 + LOWPASS 6, 1, 7 + pavgw m7, m1 + SBUTTERFLY wd, 7, 6, 0 + pshufd m1, m1, q3333 + UNSCRATCH 0, 9, rsp+1*mmsize + DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28 + lea stride3q, [strideq*3] + lea stride4q, [strideq*4] + lea stride28q, [stride4q*8] + lea stride20q, [stride4q*5] + sub stride28q, stride4q + mov cntd, 4 + +.loop: +%if ARCH_X86_64 + SWAP 1, 8 +%else + mova [rsp+1*mmsize], m1 + mova m1, [rsp+0*mmsize] +%endif + mova [dstq+strideq *0+ 0], m1 + mova [dstq+strideq *0+16], m0 + mova [dstq+strideq *0+32], m3 + mova [dstq+strideq *0+48], m2 + mova [dstq+stride4q*1+ 0], m0 + mova [dstq+stride4q*1+16], m3 + mova [dstq+stride4q*1+32], m2 + mova [dstq+stride4q*1+48], m5 + mova [dstq+stride4q*2+ 0], m3 + mova [dstq+stride4q*2+16], m2 + mova [dstq+stride4q*2+32], m5 + mova [dstq+stride4q*2+48], m4 +%if cpuflag(avx) + vpalignr m1, m0, m1, 4 + vpalignr m0, m3, m0, 4 + vpalignr m3, m2, m3, 4 +%else + SCRATCH 6, 9, rsp+2*mmsize +%if notcpuflag(ssse3) + SCRATCH 7, 10, rsp+3*mmsize +%endif + PALIGNR m6, m0, m1, 4, m7 + mova m1, m6 + PALIGNR m6, m3, m0, 4, m7 + mova m0, m6 + PALIGNR m6, m2, m3, 4, m7 + mova m3, m6 + UNSCRATCH 6, 9, rsp+2*mmsize + SCRATCH 0, 9, rsp+2*mmsize +%if notcpuflag(ssse3) + UNSCRATCH 7, 10, rsp+3*mmsize + SCRATCH 3, 10, rsp+3*mmsize +%endif +%endif +%if ARCH_X86_64 + SWAP 1, 8 +%else + mova [rsp+0*mmsize], m1 + mova m1, [rsp+1*mmsize] +%endif + mova [dstq+stride3q*4+ 0], m2 + mova [dstq+stride3q*4+16], m5 + mova [dstq+stride3q*4+32], m4 + mova [dstq+stride3q*4+48], m7 + mova [dstq+stride4q*4+ 0], m5 + mova [dstq+stride4q*4+16], m4 + mova [dstq+stride4q*4+32], m7 + mova [dstq+stride4q*4+48], m6 + mova [dstq+stride20q + 0], m4 + mova [dstq+stride20q +16], m7 + mova [dstq+stride20q +32], m6 + mova [dstq+stride20q +48], m1 + mova [dstq+stride3q*8+ 0], m7 + mova [dstq+stride3q*8+16], m6 + mova [dstq+stride3q*8+32], m1 + mova [dstq+stride3q*8+48], m1 + mova [dstq+stride28q + 0], m6 + mova [dstq+stride28q +16], m1 + mova [dstq+stride28q +32], m1 + mova [dstq+stride28q +48], m1 +%if cpuflag(avx) + vpalignr m2, m5, m2, 4 + vpalignr m5, m4, m5, 4 + vpalignr m4, m7, m4, 4 + vpalignr m7, m6, m7, 4 + vpalignr m6, m1, m6, 4 +%else + PALIGNR m0, m5, m2, 4, m3 + mova m2, m0 + PALIGNR m0, m4, m5, 4, m3 + mova m5, m0 + PALIGNR m0, m7, m4, 4, m3 + mova m4, m0 + PALIGNR m0, m6, m7, 4, m3 + mova m7, m0 + PALIGNR m0, m1, m6, 4, m3 + mova m6, m0 + UNSCRATCH 0, 9, rsp+2*mmsize +%if notcpuflag(ssse3) + UNSCRATCH 3, 10, rsp+3*mmsize +%endif +%endif + add dstq, strideq + dec cntd + jg .loop + RET +%endmacro + +INIT_XMM sse2 +HU_FUNCS 4 +INIT_XMM ssse3 +HU_FUNCS 3 +INIT_XMM avx +HU_FUNCS 2 + +%macro HD_FUNCS 0 +cglobal vp9_ipred_hd_4x4_16, 4, 4, 4, dst, stride, l, a + movh m0, [lq] + movhps m0, [aq-2] + psrldq m1, m0, 2 + psrldq m2, m0, 4 + LOWPASS 2, 1, 0 + pavgw m1, m0 + punpcklwd m1, m2 + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + + movh [dstq+stride3q ], m1 + movhps [dstq+strideq*1], m1 + movhlps m2, m2 + PALIGNR m2, m1, 4, m0 + movh [dstq+strideq*2], m2 + movhps [dstq+strideq*0], m2 + RET + +cglobal vp9_ipred_hd_8x8_16, 4, 4, 5, dst, stride, l, a + mova m0, [lq] + movu m1, [aq-2] + PALIGNR m2, m1, m0, 2, m3 + PALIGNR m3, m1, m0, 4, m4 + LOWPASS 3, 2, 0 + pavgw m2, m0 + SBUTTERFLY wd, 2, 3, 0 + psrldq m0, m1, 2 + psrldq m4, m1, 4 + LOWPASS 1, 0, 4 + DEFINE_ARGS dst8, mstride, cnt + lea dst8q, [dst8q+mstrideq*8] + neg mstrideq + mov cntd, 4 + +.loop: + add dst8q, mstrideq + mova [dst8q+mstrideq*0], m2 + mova [dst8q+mstrideq*4], m3 +%if cpuflag(avx) + vpalignr m2, m3, m2, 4 + vpalignr m3, m1, m3, 4 +%else + PALIGNR m0, m3, m2, 4, m4 + mova m2, m0 + PALIGNR m0, m1, m3, 4, m4 + mova m3, m0 +%endif + psrldq m1, 4 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_hd_16x16_16, 4, 4, 8, dst, stride, l, a + mova m2, [lq] + movu m1, [lq+2] + movu m0, [lq+4] + LOWPASS 0, 1, 2 + pavgw m1, m2 + mova m4, [lq+mmsize] + movu m5, [aq-2] + PALIGNR m3, m5, m4, 2, m6 + PALIGNR m2, m5, m4, 4, m6 + LOWPASS 2, 3, 4 + pavgw m3, m4 + SBUTTERFLY wd, 1, 0, 4 + SBUTTERFLY wd, 3, 2, 4 + mova m6, [aq] + movu m4, [aq+2] + LOWPASS 4, 6, 5 + movu m5, [aq+mmsize-2] + psrldq m6, m5, 2 + psrldq m7, m5, 4 + LOWPASS 5, 6, 7 + DEFINE_ARGS dst, mstride, mstride3, cnt + lea dstq, [dstq+mstrideq*8] + lea dstq, [dstq+mstrideq*8] + neg mstrideq + lea mstride3q, [mstrideq*3] + mov cntd, 4 + +.loop: + add dstq, mstrideq + mova [dstq+mstride3q*4+ 0], m2 + mova [dstq+mstride3q*4+16], m4 + mova [dstq+mstrideq *8+ 0], m3 + mova [dstq+mstrideq *8+16], m2 + mova [dstq+mstrideq *4+ 0], m0 + mova [dstq+mstrideq *4+16], m3 + mova [dstq+mstrideq *0+ 0], m1 + mova [dstq+mstrideq *0+16], m0 +%if cpuflag(avx) + vpalignr m1, m0, m1, 4 + vpalignr m0, m3, m0, 4 + vpalignr m3, m2, m3, 4 + vpalignr m2, m4, m2, 4 + vpalignr m4, m5, m4, 4 +%else + PALIGNR m6, m0, m1, 4, m7 + mova m1, m6 + PALIGNR m6, m3, m0, 4, m7 + mova m0, m6 + PALIGNR m6, m2, m3, 4, m7 + mova m3, m6 + PALIGNR m6, m4, m2, 4, m7 + mova m2, m6 + PALIGNR m6, m5, m4, 4, m7 + mova m4, m6 +%endif + psrldq m5, 4 + dec cntd + jg .loop + RET + +cglobal vp9_ipred_hd_32x32_16, 4, 4 + 3 * ARCH_X86_64, 14, \ + 10 * -mmsize * ARCH_X86_32, dst, stride, l, a + mova m2, [lq+mmsize*0+0] + movu m1, [lq+mmsize*0+2] + movu m0, [lq+mmsize*0+4] + LOWPASS 0, 1, 2 + pavgw m1, m2 + SBUTTERFLY wd, 1, 0, 2 + mova m4, [lq+mmsize*1+0] + movu m3, [lq+mmsize*1+2] + movu m2, [lq+mmsize*1+4] + LOWPASS 2, 3, 4 + pavgw m3, m4 + SBUTTERFLY wd, 3, 2, 4 + SCRATCH 0, 8, rsp+0*mmsize + SCRATCH 1, 9, rsp+1*mmsize + SCRATCH 2, 10, rsp+2*mmsize + SCRATCH 3, 11, rsp+3*mmsize + mova m6, [lq+mmsize*2+0] + movu m5, [lq+mmsize*2+2] + movu m4, [lq+mmsize*2+4] + LOWPASS 4, 5, 6 + pavgw m5, m6 + SBUTTERFLY wd, 5, 4, 6 + mova m0, [lq+mmsize*3+0] + movu m1, [aq+mmsize*0-2] + PALIGNR m7, m1, m0, 2, m2 + PALIGNR m6, m1, m0, 4, m2 + LOWPASS 6, 7, 0 + pavgw m7, m0 + SBUTTERFLY wd, 7, 6, 0 + mova m2, [aq+mmsize*0+0] + movu m0, [aq+mmsize*0+2] + LOWPASS 0, 2, 1 + movu m1, [aq+mmsize*1-2] + mova m2, [aq+mmsize*1+0] + movu m3, [aq+mmsize*1+2] + LOWPASS 1, 2, 3 + SCRATCH 6, 12, rsp+6*mmsize + SCRATCH 7, 13, rsp+7*mmsize + movu m2, [aq+mmsize*2-2] + mova m3, [aq+mmsize*2+0] + movu m6, [aq+mmsize*2+2] + LOWPASS 2, 3, 6 + movu m3, [aq+mmsize*3-2] + psrldq m6, m3, 2 + psrldq m7, m3, 4 + LOWPASS 3, 6, 7 + UNSCRATCH 6, 12, rsp+6*mmsize + UNSCRATCH 7, 13, rsp+7*mmsize +%if ARCH_X86_32 + mova [rsp+4*mmsize], m4 + mova [rsp+5*mmsize], m5 + ; we already backed up m6/m7 earlier on x86-32 in SCRATCH, so we don't need + ; to do it again here +%endif + DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28 + mov cntd, 4 + lea stride3q, [strideq*3] +%if ARCH_X86_64 + lea stride4q, [strideq*4] + lea stride28q, [stride4q*8] + lea stride20q, [stride4q*5] + sub stride28q, stride4q +%endif + add dstq, stride3q + + ; x86-32 doesn't have enough registers, so on that platform, we split + ; the loop in 2... Otherwise you spend most of the loop (un)scratching +.loop: +%if ARCH_X86_64 + mova [dstq+stride28q + 0], m9 + mova [dstq+stride28q +16], m8 + mova [dstq+stride28q +32], m11 + mova [dstq+stride28q +48], m10 + mova [dstq+stride3q*8+ 0], m8 + mova [dstq+stride3q*8+16], m11 + mova [dstq+stride3q*8+32], m10 + mova [dstq+stride3q*8+48], m5 + mova [dstq+stride20q + 0], m11 + mova [dstq+stride20q +16], m10 + mova [dstq+stride20q +32], m5 + mova [dstq+stride20q +48], m4 + mova [dstq+stride4q*4+ 0], m10 + mova [dstq+stride4q*4+16], m5 + mova [dstq+stride4q*4+32], m4 + mova [dstq+stride4q*4+48], m7 +%endif + mova [dstq+stride3q*4+ 0], m5 + mova [dstq+stride3q*4+16], m4 + mova [dstq+stride3q*4+32], m7 + mova [dstq+stride3q*4+48], m6 + mova [dstq+strideq* 8+ 0], m4 + mova [dstq+strideq* 8+16], m7 + mova [dstq+strideq* 8+32], m6 + mova [dstq+strideq* 8+48], m0 + mova [dstq+strideq* 4+ 0], m7 + mova [dstq+strideq* 4+16], m6 + mova [dstq+strideq* 4+32], m0 + mova [dstq+strideq* 4+48], m1 + mova [dstq+strideq* 0+ 0], m6 + mova [dstq+strideq* 0+16], m0 + mova [dstq+strideq* 0+32], m1 + mova [dstq+strideq* 0+48], m2 + sub dstq, strideq +%if cpuflag(avx) +%if ARCH_X86_64 + vpalignr m9, m8, m9, 4 + vpalignr m8, m11, m8, 4 + vpalignr m11, m10, m11, 4 + vpalignr m10, m5, m10, 4 +%endif + vpalignr m5, m4, m5, 4 + vpalignr m4, m7, m4, 4 + vpalignr m7, m6, m7, 4 + vpalignr m6, m0, m6, 4 + vpalignr m0, m1, m0, 4 + vpalignr m1, m2, m1, 4 + vpalignr m2, m3, m2, 4 +%else +%if ARCH_X86_64 + PALIGNR m12, m8, m9, 4, m13 + mova m9, m12 + PALIGNR m12, m11, m8, 4, m13 + mova m8, m12 + PALIGNR m12, m10, m11, 4, m13 + mova m11, m12 + PALIGNR m12, m5, m10, 4, m13 + mova m10, m12 +%endif + SCRATCH 3, 12, rsp+8*mmsize, sh +%if notcpuflag(ssse3) + SCRATCH 2, 13, rsp+9*mmsize +%endif + PALIGNR m3, m4, m5, 4, m2 + mova m5, m3 + PALIGNR m3, m7, m4, 4, m2 + mova m4, m3 + PALIGNR m3, m6, m7, 4, m2 + mova m7, m3 + PALIGNR m3, m0, m6, 4, m2 + mova m6, m3 + PALIGNR m3, m1, m0, 4, m2 + mova m0, m3 +%if notcpuflag(ssse3) + UNSCRATCH 2, 13, rsp+9*mmsize + SCRATCH 0, 13, rsp+9*mmsize +%endif + PALIGNR m3, m2, m1, 4, m0 + mova m1, m3 + PALIGNR m3, reg_sh, m2, 4, m0 + mova m2, m3 +%if notcpuflag(ssse3) + UNSCRATCH 0, 13, rsp+9*mmsize +%endif + UNSCRATCH 3, 12, rsp+8*mmsize, sh +%endif + psrldq m3, 4 + dec cntd + jg .loop + +%if ARCH_X86_32 + UNSCRATCH 0, 8, rsp+0*mmsize + UNSCRATCH 1, 9, rsp+1*mmsize + UNSCRATCH 2, 10, rsp+2*mmsize + UNSCRATCH 3, 11, rsp+3*mmsize + mova m4, [rsp+4*mmsize] + mova m5, [rsp+5*mmsize] + mova m6, [rsp+6*mmsize] + mova m7, [rsp+7*mmsize] + DEFINE_ARGS dst, stride, stride5, stride3 + lea stride5q, [strideq*5] + lea dstq, [dstq+stride5q*4] + DEFINE_ARGS dst, stride, cnt, stride3 + mov cntd, 4 +.loop_2: + mova [dstq+stride3q*4+ 0], m1 + mova [dstq+stride3q*4+16], m0 + mova [dstq+stride3q*4+32], m3 + mova [dstq+stride3q*4+48], m2 + mova [dstq+strideq* 8+ 0], m0 + mova [dstq+strideq* 8+16], m3 + mova [dstq+strideq* 8+32], m2 + mova [dstq+strideq* 8+48], m5 + mova [dstq+strideq* 4+ 0], m3 + mova [dstq+strideq* 4+16], m2 + mova [dstq+strideq* 4+32], m5 + mova [dstq+strideq* 4+48], m4 + mova [dstq+strideq* 0+ 0], m2 + mova [dstq+strideq* 0+16], m5 + mova [dstq+strideq* 0+32], m4 + mova [dstq+strideq* 0+48], m7 + sub dstq, strideq +%if cpuflag(avx) + vpalignr m1, m0, m1, 4 + vpalignr m0, m3, m0, 4 + vpalignr m3, m2, m3, 4 + vpalignr m2, m5, m2, 4 + vpalignr m5, m4, m5, 4 + vpalignr m4, m7, m4, 4 + vpalignr m7, m6, m7, 4 +%else + SCRATCH 6, 12, rsp+8*mmsize, sh +%if notcpuflag(ssse3) + SCRATCH 7, 13, rsp+9*mmsize +%endif + PALIGNR m6, m0, m1, 4, m7 + mova m1, m6 + PALIGNR m6, m3, m0, 4, m7 + mova m0, m6 + PALIGNR m6, m2, m3, 4, m7 + mova m3, m6 + PALIGNR m6, m5, m2, 4, m7 + mova m2, m6 + PALIGNR m6, m4, m5, 4, m7 + mova m5, m6 +%if notcpuflag(ssse3) + UNSCRATCH 7, 13, rsp+9*mmsize + SCRATCH 5, 13, rsp+9*mmsize +%endif + PALIGNR m6, m7, m4, 4, m5 + mova m4, m6 + PALIGNR m6, reg_sh, m7, 4, m5 + mova m7, m6 +%if notcpuflag(ssse3) + UNSCRATCH 5, 13, rsp+9*mmsize +%endif + UNSCRATCH 6, 12, rsp+8*mmsize, sh +%endif + psrldq m6, 4 + dec cntd + jg .loop_2 +%endif + RET +%endmacro + +INIT_XMM sse2 +HD_FUNCS +INIT_XMM ssse3 +HD_FUNCS +INIT_XMM avx +HD_FUNCS diff --git a/libs/ffvpx/libavcodec/x86/vp9itxfm.asm b/libs/ffvpx/libavcodec/x86/vp9itxfm.asm new file mode 100644 index 000000000..2c63fe514 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9itxfm.asm @@ -0,0 +1,3197 @@ +;****************************************************************************** +;* VP9 IDCT SIMD optimizations +;* +;* Copyright (C) 2013 Clément BÅ“sch <u pkh me> +;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" +%include "vp9itxfm_template.asm" + +SECTION_RODATA 32 + +%macro VP9_IDCT_COEFFS 2-3 0 +const pw_m%1_%2 +times 8 dw -%1, %2 +const pw_%2_%1 +times 8 dw %2, %1 + +%if %3 == 1 +const pw_m%2_m%1 +times 8 dw -%2, -%1 +%if %1 != %2 +const pw_m%2_%1 +times 8 dw -%2, %1 +const pw_%1_%2 +times 8 dw %1, %2 +%endif +%endif + +%if %1 < 11585 +pw_m%1x2: times 16 dw -%1*2 +%elif %1 > 11585 +pw_%1x2: times 16 dw %1*2 +%else +const pw_%1x2 +times 16 dw %1*2 +%endif + +%if %2 != %1 +pw_%2x2: times 16 dw %2*2 +%endif +%endmacro + +VP9_IDCT_COEFFS 16364, 804 +VP9_IDCT_COEFFS 16305, 1606 +VP9_IDCT_COEFFS 16069, 3196, 1 +VP9_IDCT_COEFFS 15893, 3981 +VP9_IDCT_COEFFS 15137, 6270, 1 +VP9_IDCT_COEFFS 14811, 7005 +VP9_IDCT_COEFFS 14449, 7723 +VP9_IDCT_COEFFS 13160, 9760 +VP9_IDCT_COEFFS 11585, 11585, 1 +VP9_IDCT_COEFFS 11003, 12140 +VP9_IDCT_COEFFS 10394, 12665 +VP9_IDCT_COEFFS 9102, 13623, 1 +VP9_IDCT_COEFFS 8423, 14053 +VP9_IDCT_COEFFS 5520, 15426 +VP9_IDCT_COEFFS 4756, 15679 +VP9_IDCT_COEFFS 2404, 16207 + +const pw_5283_13377 +times 4 dw 5283, 13377 +const pw_9929_13377 +times 4 dw 9929, 13377 +const pw_15212_m13377 +times 4 dw 15212, -13377 +const pw_15212_9929 +times 4 dw 15212, 9929 +const pw_m5283_m15212 +times 4 dw -5283, -15212 +const pw_13377x2 +times 8 dw 13377*2 +const pw_m13377_13377 +times 4 dw -13377, 13377 +const pw_13377_0 +times 4 dw 13377, 0 + +cextern pw_8 +cextern pw_16 +cextern pw_32 +cextern pw_512 +cextern pw_1024 +cextern pw_2048 +cextern pw_m1 +cextern pd_8192 + +SECTION .text + +%macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2 + punpckhwd m%4, m%2, m%1 + punpcklwd m%2, m%1 + pmaddwd m%3, m%4, [pw_m%5_%6] + pmaddwd m%4, [pw_%6_%5] + pmaddwd m%1, m%2, [pw_m%5_%6] + pmaddwd m%2, [pw_%6_%5] +%endmacro + +%macro VP9_RND_SH_SUMSUB_BA 6 ; dst1 [src1], dst2 [src2], src3, src4, tmp, round + SUMSUB_BA d, %1, %2, %5 + SUMSUB_BA d, %3, %4, %5 + paddd m%1, %6 + paddd m%2, %6 + paddd m%3, %6 + paddd m%4, %6 + psrad m%1, 14 + psrad m%2, 14 + psrad m%3, 14 + psrad m%4, 14 + packssdw m%1, m%3 + packssdw m%2, m%4 +%endmacro + +%macro VP9_STORE_2X 5-6 dstq ; reg1, reg2, tmp1, tmp2, zero, dst +%if mmsize == 32 + pmovzxbw m%3, [%6] + pmovzxbw m%4, [%6+strideq] +%else + movh m%3, [%6] + movh m%4, [%6+strideq] + punpcklbw m%3, m%5 + punpcklbw m%4, m%5 +%endif + paddw m%3, m%1 + paddw m%4, m%2 +%if mmsize == 32 + packuswb m%3, m%4 + ; Intel... + vpermq m%3, m%3, q3120 + mova [%6], xm%3 + vextracti128 [%6+strideq], m%3, 1 +%elif mmsize == 16 + packuswb m%3, m%4 + movh [%6], m%3 + movhps [%6+strideq], m%3 +%else + packuswb m%3, m%5 + packuswb m%4, m%5 + movh [%6], m%3 + movh [%6+strideq], m%4 +%endif +%endmacro + +%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg +%assign %%y 0 +%rep %3 +%assign %%x 0 +%rep %3*2/mmsize + mova [%1+%%y+%%x], %4 +%assign %%x (%%x+mmsize) +%endrep +%assign %%y (%%y+%2) +%endrep +%endmacro + +;------------------------------------------------------------------------------------------- +; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;------------------------------------------------------------------------------------------- + +INIT_MMX mmx +cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob + mova m0, [blockq+0*8] + mova m1, [blockq+1*8] + mova m2, [blockq+2*8] + mova m3, [blockq+3*8] + psraw m0, 2 + psraw m1, 2 + psraw m2, 2 + psraw m3, 2 + + VP9_IWHT4_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_IWHT4_1D + + pxor m4, m4 + VP9_STORE_2X 0, 1, 5, 6, 4 + lea dstq, [dstq+strideq*2] + VP9_STORE_2X 2, 3, 5, 6, 4 + ZERO_BLOCK blockq, 8, 4, m4 + RET + +;------------------------------------------------------------------------------------------- +; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;------------------------------------------------------------------------------------------- + +; 2x2 top left corner +%macro VP9_IDCT4_2x2_1D 0 + pmulhrsw m0, m5 ; m0=t1 + mova m2, m0 ; m2=t0 + mova m3, m1 + pmulhrsw m1, m6 ; m1=t2 + pmulhrsw m3, m7 ; m3=t3 + VP9_IDCT4_1D_FINALIZE +%endmacro + +%macro VP9_IDCT4_WRITEOUT 0 +%if cpuflag(ssse3) + mova m5, [pw_2048] + pmulhrsw m0, m5 ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4 + pmulhrsw m1, m5 +%else + mova m5, [pw_8] + paddw m0, m5 + paddw m1, m5 + psraw m0, 4 + psraw m1, 4 +%endif + VP9_STORE_2X 0, 1, 6, 7, 4 + lea dstq, [dstq+2*strideq] +%if cpuflag(ssse3) + pmulhrsw m2, m5 + pmulhrsw m3, m5 +%else + paddw m2, m5 + paddw m3, m5 + psraw m2, 4 + psraw m3, 4 +%endif + VP9_STORE_2X 2, 3, 6, 7, 4 +%endmacro + +%macro IDCT_4x4_FN 1 +INIT_MMX %1 +cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob + +%if cpuflag(ssse3) + cmp eobd, 4 ; 2x2 or smaller + jg .idctfull + + cmp eobd, 1 ; faster path for when only DC is set + jne .idct2x2 +%else + cmp eobd, 1 + jg .idctfull +%endif + +%if cpuflag(ssse3) + movd m0, [blockq] + mova m5, [pw_11585x2] + pmulhrsw m0, m5 + pmulhrsw m0, m5 +%else + DEFINE_ARGS dst, stride, block, coef + movsx coefd, word [blockq] + imul coefd, 11585 + add coefd, 8192 + sar coefd, 14 + imul coefd, 11585 + add coefd, (8 << 14) + 8192 + sar coefd, 14 + 4 + movd m0, coefd +%endif + pshufw m0, m0, 0 + pxor m4, m4 + movh [blockq], m4 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_2048] ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4 +%endif + VP9_STORE_2X 0, 0, 6, 7, 4 + lea dstq, [dstq+2*strideq] + VP9_STORE_2X 0, 0, 6, 7, 4 + RET + +%if cpuflag(ssse3) +; faster path for when only top left 2x2 block is set +.idct2x2: + movd m0, [blockq+0] + movd m1, [blockq+8] + mova m5, [pw_11585x2] + mova m6, [pw_6270x2] + mova m7, [pw_15137x2] + VP9_IDCT4_2x2_1D + ; partial 2x4 transpose + punpcklwd m0, m1 + punpcklwd m2, m3 + SBUTTERFLY dq, 0, 2, 1 + SWAP 1, 2 + VP9_IDCT4_2x2_1D + pxor m4, m4 ; used for the block reset, and VP9_STORE_2X + movh [blockq+ 0], m4 + movh [blockq+ 8], m4 + VP9_IDCT4_WRITEOUT + RET +%endif + +.idctfull: ; generic full 4x4 idct/idct + mova m0, [blockq+ 0] + mova m1, [blockq+ 8] + mova m2, [blockq+16] + mova m3, [blockq+24] +%if cpuflag(ssse3) + mova m6, [pw_11585x2] +%endif + mova m7, [pd_8192] ; rounding + VP9_IDCT4_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_IDCT4_1D + pxor m4, m4 ; used for the block reset, and VP9_STORE_2X + mova [blockq+ 0], m4 + mova [blockq+ 8], m4 + mova [blockq+16], m4 + mova [blockq+24], m4 + VP9_IDCT4_WRITEOUT + RET +%endmacro + +IDCT_4x4_FN mmxext +IDCT_4x4_FN ssse3 + +;------------------------------------------------------------------------------------------- +; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;------------------------------------------------------------------------------------------- + +%macro IADST4_FN 5 +INIT_MMX %5 +cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob +%if WIN64 && notcpuflag(ssse3) + WIN64_SPILL_XMM 8 +%endif + movdqa xmm5, [pd_8192] + mova m0, [blockq+ 0] + mova m1, [blockq+ 8] + mova m2, [blockq+16] + mova m3, [blockq+24] +%if cpuflag(ssse3) + mova m6, [pw_11585x2] +%endif +%ifnidn %1%3, iadstiadst + movdq2q m7, xmm5 +%endif + VP9_%2_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_%4_1D + pxor m4, m4 ; used for the block reset, and VP9_STORE_2X + mova [blockq+ 0], m4 + mova [blockq+ 8], m4 + mova [blockq+16], m4 + mova [blockq+24], m4 + VP9_IDCT4_WRITEOUT + RET +%endmacro + +IADST4_FN idct, IDCT4, iadst, IADST4, sse2 +IADST4_FN iadst, IADST4, idct, IDCT4, sse2 +IADST4_FN iadst, IADST4, iadst, IADST4, sse2 + +IADST4_FN idct, IDCT4, iadst, IADST4, ssse3 +IADST4_FN iadst, IADST4, idct, IDCT4, ssse3 +IADST4_FN iadst, IADST4, iadst, IADST4, ssse3 + +%macro SCRATCH 3 +%if ARCH_X86_64 + SWAP %1, %2 +%else + mova [%3], m%1 +%endif +%endmacro + +%macro UNSCRATCH 3 +%if ARCH_X86_64 + SWAP %1, %2 +%else + mova m%1, [%3] +%endif +%endmacro + +;------------------------------------------------------------------------------------------- +; void vp9_idct_idct_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;------------------------------------------------------------------------------------------- + +%macro VP9_IDCT8_1D_FINALIZE 0 + SUMSUB_BA w, 3, 6, 5 ; m3=t0+t7, m6=t0-t7 + SUMSUB_BA w, 1, 2, 5 ; m1=t1+t6, m2=t1-t6 + SUMSUB_BA w, 7, 0, 5 ; m7=t2+t5, m0=t2-t5 + + UNSCRATCH 5, 8, blockq+ 0 + SCRATCH 2, 8, blockq+ 0 + + SUMSUB_BA w, 5, 4, 2 ; m5=t3+t4, m4=t3-t4 + SWAP 7, 6, 2 + SWAP 3, 5, 0 + +%if ARCH_X86_64 + SWAP 6, 8 +%endif +%endmacro + +; x86-32 +; - in: m0/m4 is in mem +; - out: m6 is in mem +; x86-64: +; - everything is in registers (m0-7) +%macro VP9_IDCT8_1D 0 +%if ARCH_X86_64 + SWAP 0, 8 + SWAP 4, 9 +%endif + + VP9_UNPACK_MULSUB_2W_4X 5, 3, 9102, 13623, D_8192_REG, 0, 4 ; m5=t5a, m3=t6a + VP9_UNPACK_MULSUB_2W_4X 1, 7, 16069, 3196, D_8192_REG, 0, 4 ; m1=t4a, m7=t7a + SUMSUB_BA w, 5, 1, 0 ; m5=t4a+t5a (t4), m1=t4a-t5a (t5a) + SUMSUB_BA w, 3, 7, 0 ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a) +%if cpuflag(ssse3) + SUMSUB_BA w, 1, 7, 0 ; m1=t6a+t5a (t6), m7=t6a-t5a (t5) + pmulhrsw m1, W_11585x2_REG ; m1=t6 + pmulhrsw m7, W_11585x2_REG ; m7=t5 +%else + VP9_UNPACK_MULSUB_2W_4X 7, 1, 11585, 11585, D_8192_REG, 0, 4 +%endif + VP9_UNPACK_MULSUB_2W_4X 2, 6, 15137, 6270, D_8192_REG, 0, 4 ; m2=t2a, m6=t3a + + UNSCRATCH 0, 8, blockq+ 0 ; IN(0) + UNSCRATCH 4, 9, blockq+64 ; IN(4) + SCRATCH 5, 8, blockq+ 0 + +%if cpuflag(ssse3) + SUMSUB_BA w, 4, 0, 5 ; m4=IN(0)+IN(4) m0=IN(0)-IN(4) + pmulhrsw m4, W_11585x2_REG ; m4=t0a + pmulhrsw m0, W_11585x2_REG ; m0=t1a +%else + SCRATCH 7, 9, blockq+64 + VP9_UNPACK_MULSUB_2W_4X 0, 4, 11585, 11585, D_8192_REG, 5, 7 + UNSCRATCH 7, 9, blockq+64 +%endif + SUMSUB_BA w, 6, 4, 5 ; m6=t0a+t3a (t0), m4=t0a-t3a (t3) + SUMSUB_BA w, 2, 0, 5 ; m2=t1a+t2a (t1), m0=t1a-t2a (t2) + + VP9_IDCT8_1D_FINALIZE +%endmacro + +%macro VP9_IDCT8_4x4_1D 0 + pmulhrsw m0, W_11585x2_REG ; m0=t1a/t0a + pmulhrsw m6, m2, [pw_15137x2] ; m6=t3a + pmulhrsw m2, [pw_6270x2] ; m2=t2a + pmulhrsw m7, m1, [pw_16069x2] ; m7=t7a + pmulhrsw m1, [pw_3196x2] ; m1=t4a + pmulhrsw m5, m3, [pw_m9102x2] ; m5=t5a + pmulhrsw m3, [pw_13623x2] ; m3=t6a + SUMSUB_BA w, 5, 1, 4 ; m1=t4a+t5a (t4), m5=t4a-t5a (t5a) + SUMSUB_BA w, 3, 7, 4 ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a) + SUMSUB_BA w, 1, 7, 4 ; m1=t6a+t5a (t6), m7=t6a-t5a (t5) + pmulhrsw m1, W_11585x2_REG ; m1=t6 + pmulhrsw m7, W_11585x2_REG ; m7=t5 + psubw m4, m0, m6 ; m4=t0a-t3a (t3) + paddw m6, m0 ; m6=t0a+t3a (t0) + SCRATCH 5, 8, blockq+ 0 + SUMSUB_BA w, 2, 0, 5 ; m2=t1a+t2a (t1), m0=t1a-t2a (t2) + VP9_IDCT8_1D_FINALIZE +%endmacro + +%macro VP9_IDCT8_2x2_1D 1 + pmulhrsw m0, W_11585x2_REG ; m0=t0 + pmulhrsw m3, m1, W_16069x2_REG ; m3=t7 + pmulhrsw m1, W_3196x2_REG ; m1=t4 + psubw m7, m3, m1 ; t5 = t7a - t4a + paddw m5, m3, m1 ; t6 = t7a + t4a + pmulhrsw m7, W_11585x2_REG ; m7=t5 + pmulhrsw m5, W_11585x2_REG ; m5=t6 + SWAP 5, 1 + ; merged VP9_IDCT8_1D_FINALIZE to make register-sharing w/ avx easier + psubw m6, m0, m3 ; m6=t0-t7 + paddw m3, m0 ; m3=t0+t7 + psubw m2, m0, m1 ; m2=t1-t6 + paddw m1, m0 ; m1=t1+t6 +%if %1 == 1 + punpcklwd m3, m1 +%define SCRATCH_REG 1 +%elif ARCH_X86_32 + mova [blockq+ 0], m2 +%define SCRATCH_REG 2 +%else +%define SCRATCH_REG 8 +%endif + psubw m4, m0, m5 ; m4=t3-t4 + paddw m5, m0 ; m5=t3+t4 + SUMSUB_BA w, 7, 0, SCRATCH_REG ; m7=t2+t5, m0=t2-t5 + SWAP 7, 6, 2 + SWAP 3, 5, 0 +%undef SCRATCH_REG +%endmacro + +%macro VP9_IDCT8_WRITEx2 6-8 5 ; line1, line2, tmp1, tmp2, zero, pw_1024/pw_16, shift +%if cpuflag(ssse3) + pmulhrsw m%1, %6 ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5 + pmulhrsw m%2, %6 +%else + paddw m%1, %6 + paddw m%2, %6 + psraw m%1, %7 + psraw m%2, %7 +%endif +%if %0 <= 7 + VP9_STORE_2X %1, %2, %3, %4, %5 +%else + VP9_STORE_2X %1, %2, %3, %4, %5, %8 +%endif +%endmacro + +; x86-32: +; - m6 is in mem +; x86-64: +; - m8 holds m6 (SWAP) +; m6 holds zero +%macro VP9_IDCT8_WRITEOUT 0 +%if ARCH_X86_64 +%if cpuflag(ssse3) + mova m9, [pw_1024] +%else + mova m9, [pw_16] +%endif +%define ROUND_REG m9 +%else +%if cpuflag(ssse3) +%define ROUND_REG [pw_1024] +%else +%define ROUND_REG [pw_16] +%endif +%endif + SCRATCH 5, 10, blockq+16 + SCRATCH 7, 11, blockq+32 + VP9_IDCT8_WRITEx2 0, 1, 5, 7, 6, ROUND_REG + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 2, 3, 5, 7, 6, ROUND_REG + lea dstq, [dstq+2*strideq] + UNSCRATCH 5, 10, blockq+16 + UNSCRATCH 7, 11, blockq+32 + VP9_IDCT8_WRITEx2 4, 5, 0, 1, 6, ROUND_REG + lea dstq, [dstq+2*strideq] + UNSCRATCH 5, 8, blockq+ 0 + VP9_IDCT8_WRITEx2 5, 7, 0, 1, 6, ROUND_REG + +%undef ROUND_REG +%endmacro + +%macro VP9_IDCT_IDCT_8x8_ADD_XMM 2 +INIT_XMM %1 +cglobal vp9_idct_idct_8x8_add, 4, 4, %2, dst, stride, block, eob + +%if cpuflag(ssse3) +%if ARCH_X86_64 + mova m12, [pw_11585x2] ; often used +%define W_11585x2_REG m12 +%else +%define W_11585x2_REG [pw_11585x2] +%endif + + cmp eobd, 12 ; top left half or less + jg .idctfull + + cmp eobd, 3 ; top left corner or less + jg .idcthalf + + cmp eobd, 1 ; faster path for when only DC is set + jne .idcttopleftcorner +%else + cmp eobd, 1 + jg .idctfull +%endif + +%if cpuflag(ssse3) + movd m0, [blockq] + pmulhrsw m0, W_11585x2_REG + pmulhrsw m0, W_11585x2_REG +%else + DEFINE_ARGS dst, stride, block, coef + movsx coefd, word [blockq] + imul coefd, 11585 + add coefd, 8192 + sar coefd, 14 + imul coefd, 11585 + add coefd, (16 << 14) + 8192 + sar coefd, 14 + 5 + movd m0, coefd +%endif + SPLATW m0, m0, 0 + pxor m4, m4 + movd [blockq], m4 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_1024] ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5 +%endif +%rep 3 + VP9_STORE_2X 0, 0, 6, 7, 4 + lea dstq, [dstq+2*strideq] +%endrep + VP9_STORE_2X 0, 0, 6, 7, 4 + RET + +%if cpuflag(ssse3) +; faster path for when only left corner is set (3 input: DC, right to DC, below +; to DC). Note: also working with a 2x2 block +.idcttopleftcorner: + movd m0, [blockq+0] + movd m1, [blockq+16] +%if ARCH_X86_64 + mova m10, [pw_3196x2] + mova m11, [pw_16069x2] +%define W_3196x2_REG m10 +%define W_16069x2_REG m11 +%else +%define W_3196x2_REG [pw_3196x2] +%define W_16069x2_REG [pw_16069x2] +%endif + VP9_IDCT8_2x2_1D 1 + ; partial 2x8 transpose + ; punpcklwd m0, m1 already done inside idct + punpcklwd m2, m3 + punpcklwd m4, m5 + punpcklwd m6, m7 + punpckldq m0, m2 + punpckldq m4, m6 + SBUTTERFLY qdq, 0, 4, 1 + SWAP 1, 4 + VP9_IDCT8_2x2_1D 2 +%if ARCH_X86_64 + SWAP 6, 8 +%endif + pxor m6, m6 ; used for the block reset, and VP9_STORE_2X + VP9_IDCT8_WRITEOUT +%if ARCH_X86_64 + movd [blockq+ 0], m6 + movd [blockq+16], m6 +%else + mova [blockq+ 0], m6 + mova [blockq+16], m6 + mova [blockq+32], m6 +%endif + RET + +.idcthalf: + movh m0, [blockq + 0] + movh m1, [blockq +16] + movh m2, [blockq +32] + movh m3, [blockq +48] + VP9_IDCT8_4x4_1D + ; partial 4x8 transpose +%if ARCH_X86_32 + mova m6, [blockq+ 0] +%endif + punpcklwd m0, m1 + punpcklwd m2, m3 + punpcklwd m4, m5 + punpcklwd m6, m7 + SBUTTERFLY dq, 0, 2, 1 + SBUTTERFLY dq, 4, 6, 5 + SBUTTERFLY qdq, 0, 4, 1 + SBUTTERFLY qdq, 2, 6, 5 + SWAP 1, 4 + SWAP 3, 6 + VP9_IDCT8_4x4_1D +%if ARCH_X86_64 + SWAP 6, 8 +%endif + pxor m6, m6 + VP9_IDCT8_WRITEOUT +%if ARCH_X86_64 + movh [blockq+ 0], m6 + movh [blockq+16], m6 + movh [blockq+32], m6 +%else + mova [blockq+ 0], m6 + mova [blockq+16], m6 + mova [blockq+32], m6 +%endif + movh [blockq+48], m6 + RET +%endif + +.idctfull: ; generic full 8x8 idct/idct +%if ARCH_X86_64 + mova m0, [blockq+ 0] ; IN(0) +%endif + mova m1, [blockq+ 16] ; IN(1) + mova m2, [blockq+ 32] ; IN(2) + mova m3, [blockq+ 48] ; IN(3) +%if ARCH_X86_64 + mova m4, [blockq+ 64] ; IN(4) +%endif + mova m5, [blockq+ 80] ; IN(5) + mova m6, [blockq+ 96] ; IN(6) + mova m7, [blockq+112] ; IN(7) +%if ARCH_X86_64 + mova m11, [pd_8192] ; rounding +%define D_8192_REG m11 +%else +%define D_8192_REG [pd_8192] +%endif + VP9_IDCT8_1D +%if ARCH_X86_64 + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1 + mova [blockq+0], m0 +%endif + VP9_IDCT8_1D + +%if ARCH_X86_64 + SWAP 6, 8 +%endif + pxor m6, m6 ; used for the block reset, and VP9_STORE_2X + VP9_IDCT8_WRITEOUT + ZERO_BLOCK blockq, 16, 8, m6 + RET +%undef W_11585x2_REG +%endmacro + +VP9_IDCT_IDCT_8x8_ADD_XMM sse2, 12 +VP9_IDCT_IDCT_8x8_ADD_XMM ssse3, 13 +VP9_IDCT_IDCT_8x8_ADD_XMM avx, 13 + +;--------------------------------------------------------------------------------------------- +; void vp9_iadst_iadst_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;--------------------------------------------------------------------------------------------- + +; x86-32: +; - in: m0/3/4/7 are in mem [blockq+N*16] +; - out: m6 is in mem [blockq+0] +; x86-64: +; - everything is in registers +%macro VP9_IADST8_1D 0 ; input/output=m0/1/2/3/4/5/6/7 +%if ARCH_X86_64 + SWAP 0, 8 + SWAP 3, 9 + SWAP 4, 10 + SWAP 7, 11 +%endif + + VP9_UNPACK_MULSUB_2D_4X 5, 2, 0, 3, 14449, 7723 ; m5/2=t3[d], m2/4=t2[d] + VP9_UNPACK_MULSUB_2D_4X 1, 6, 4, 7, 4756, 15679 ; m1/4=t7[d], m6/7=t6[d] + SCRATCH 4, 12, blockq+1*16 + VP9_RND_SH_SUMSUB_BA 6, 2, 7, 3, 4, D_8192_REG ; m6=t2[w], m2=t6[w] + UNSCRATCH 4, 12, blockq+1*16 + VP9_RND_SH_SUMSUB_BA 1, 5, 4, 0, 3, D_8192_REG ; m1=t3[w], m5=t7[w] + + UNSCRATCH 0, 8, blockq+16*0 + UNSCRATCH 3, 9, blockq+16*3 + UNSCRATCH 4, 10, blockq+16*4 + UNSCRATCH 7, 11, blockq+16*7 + SCRATCH 1, 8, blockq+16*1 + SCRATCH 2, 9, blockq+16*2 + SCRATCH 5, 10, blockq+16*5 + SCRATCH 6, 11, blockq+16*6 + + VP9_UNPACK_MULSUB_2D_4X 7, 0, 1, 2, 16305, 1606 ; m7/1=t1[d], m0/2=t0[d] + VP9_UNPACK_MULSUB_2D_4X 3, 4, 5, 6, 10394, 12665 ; m3/5=t5[d], m4/6=t4[d] + SCRATCH 1, 12, blockq+ 0*16 + VP9_RND_SH_SUMSUB_BA 4, 0, 6, 2, 1, D_8192_REG ; m4=t0[w], m0=t4[w] + UNSCRATCH 1, 12, blockq+ 0*16 + VP9_RND_SH_SUMSUB_BA 3, 7, 5, 1, 2, D_8192_REG ; m3=t1[w], m7=t5[w] + + UNSCRATCH 2, 9, blockq+16*2 + UNSCRATCH 5, 10, blockq+16*5 + SCRATCH 3, 9, blockq+16*3 + SCRATCH 4, 10, blockq+16*4 + + ; m4=t0, m3=t1, m6=t2, m1=t3, m0=t4, m7=t5, m2=t6, m5=t7 + + VP9_UNPACK_MULSUB_2D_4X 0, 7, 1, 3, 15137, 6270 ; m0/1=t5[d], m7/3=t4[d] + VP9_UNPACK_MULSUB_2D_4X 5, 2, 4, 6, 6270, 15137 ; m5/4=t6[d], m2/6=t7[d] + SCRATCH 1, 12, blockq+ 0*16 + VP9_RND_SH_SUMSUB_BA 5, 7, 4, 3, 1, D_8192_REG + UNSCRATCH 1, 12, blockq+ 0*16 + PSIGNW m5, W_M1_REG ; m5=out1[w], m7=t6[w] + VP9_RND_SH_SUMSUB_BA 2, 0, 6, 1, 3, D_8192_REG ; m2=out6[w], m0=t7[w] + + UNSCRATCH 1, 8, blockq+16*1 + UNSCRATCH 3, 9, blockq+16*3 + UNSCRATCH 4, 10, blockq+16*4 + UNSCRATCH 6, 11, blockq+16*6 + SCRATCH 2, 8, blockq+16*0 + + SUMSUB_BA w, 6, 4, 2 ; m6=out0[w], m4=t2[w] + SUMSUB_BA w, 1, 3, 2 + PSIGNW m1, W_M1_REG ; m1=out7[w], m3=t3[w] + + ; m6=out0, m5=out1, m4=t2, m3=t3, m7=t6, m0=t7, m2=out6, m1=out7 + + ; unfortunately, the code below overflows in some cases +%if 0; cpuflag(ssse3) + SUMSUB_BA w, 3, 4, 2 + SUMSUB_BA w, 0, 7, 2 + pmulhrsw m3, W_11585x2_REG + pmulhrsw m7, W_11585x2_REG + pmulhrsw m4, W_11585x2_REG ; out4 + pmulhrsw m0, W_11585x2_REG ; out2 +%else + SCRATCH 5, 9, blockq+16*1 + VP9_UNPACK_MULSUB_2W_4X 4, 3, 11585, 11585, D_8192_REG, 2, 5 + VP9_UNPACK_MULSUB_2W_4X 7, 0, 11585, 11585, D_8192_REG, 2, 5 + UNSCRATCH 5, 9, blockq+16*1 +%endif + PSIGNW m3, W_M1_REG ; out3 + PSIGNW m7, W_M1_REG ; out5 + + ; m6=out0, m5=out1, m0=out2, m3=out3, m4=out4, m7=out5, m2=out6, m1=out7 + +%if ARCH_X86_64 + SWAP 2, 8 +%endif + SWAP 0, 6, 2 + SWAP 7, 1, 5 +%endmacro + +%macro IADST8_FN 6 +INIT_XMM %5 +cglobal vp9_%1_%3_8x8_add, 3, 3, %6, dst, stride, block, eob + +%ifidn %1, idct +%define first_is_idct 1 +%else +%define first_is_idct 0 +%endif + +%ifidn %3, idct +%define second_is_idct 1 +%else +%define second_is_idct 0 +%endif + +%if ARCH_X86_64 + mova m0, [blockq+ 0] ; IN(0) +%endif + mova m1, [blockq+ 16] ; IN(1) + mova m2, [blockq+ 32] ; IN(2) +%if ARCH_X86_64 || first_is_idct + mova m3, [blockq+ 48] ; IN(3) +%endif +%if ARCH_X86_64 + mova m4, [blockq+ 64] ; IN(4) +%endif + mova m5, [blockq+ 80] ; IN(5) + mova m6, [blockq+ 96] ; IN(6) +%if ARCH_X86_64 || first_is_idct + mova m7, [blockq+112] ; IN(7) +%endif +%if ARCH_X86_64 +%if cpuflag(ssse3) + mova m15, [pw_11585x2] ; often used +%endif + mova m13, [pd_8192] ; rounding + mova m14, [pw_m1] +%define W_11585x2_REG m15 +%define D_8192_REG m13 +%define W_M1_REG m14 +%else +%define W_11585x2_REG [pw_11585x2] +%define D_8192_REG [pd_8192] +%define W_M1_REG [pw_m1] +%endif + + ; note different calling conventions for idct8 vs. iadst8 on x86-32 + VP9_%2_1D +%if ARCH_X86_64 + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1 + mova [blockq+ 0], m0 +%if second_is_idct == 0 + mova [blockq+ 48], m3 + mova [blockq+112], m7 +%endif +%endif + VP9_%4_1D + +%if ARCH_X86_64 + SWAP 6, 8 +%endif + pxor m6, m6 ; used for the block reset, and VP9_STORE_2X + VP9_IDCT8_WRITEOUT + ZERO_BLOCK blockq, 16, 8, m6 + RET + +%undef W_11585x2_REG +%undef first_is_idct +%undef second_is_idct + +%endmacro + +IADST8_FN idct, IDCT8, iadst, IADST8, sse2, 15 +IADST8_FN iadst, IADST8, idct, IDCT8, sse2, 15 +IADST8_FN iadst, IADST8, iadst, IADST8, sse2, 15 +IADST8_FN idct, IDCT8, iadst, IADST8, ssse3, 16 +IADST8_FN idct, IDCT8, iadst, IADST8, avx, 16 +IADST8_FN iadst, IADST8, idct, IDCT8, ssse3, 16 +IADST8_FN iadst, IADST8, idct, IDCT8, avx, 16 +IADST8_FN iadst, IADST8, iadst, IADST8, ssse3, 16 +IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16 + +;--------------------------------------------------------------------------------------------- +; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;--------------------------------------------------------------------------------------------- + +; x86-64: +; at the end of this macro, m7 is stored in [%4+15*%5] +; everything else (t0-6 and t8-15) is stored in m0-6 and m8-15 +; the following sumsubs have not been done yet: +; SUMSUB_BA w, 6, 9, 15 ; t6, t9 +; SUMSUB_BA w, 7, 8, 15 ; t7, t8 +; or (x86-32) t0-t5 are in m0-m5, t10-t15 are in x11/9/7/5/3/1, +; and the following simsubs have not been done yet: +; SUMSUB_BA w, x13, x14, 7 ; t6, t9 +; SUMSUB_BA w, x15, x12, 7 ; t7, t8 + +%macro VP9_IDCT16_1D_START 6 ; src, nnzc, stride, scratch, scratch_stride, is_iadst +%if %2 <= 4 + mova m3, [%1+ 1*%3] ; IN(1) + mova m0, [%1+ 3*%3] ; IN(3) + + pmulhrsw m4, m3, [pw_16305x2] ; t14-15 + pmulhrsw m3, [pw_1606x2] ; t8-9 + pmulhrsw m7, m0, [pw_m4756x2] ; t10-11 + pmulhrsw m0, [pw_15679x2] ; t12-13 + + ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7 + ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15 + + VP9_UNPACK_MULSUB_2W_4X 2, 5, 4, 3, 15137, 6270, [pd_8192], 1, 6 ; t9, t14 + SCRATCH 4, 10, %4+ 1*%5 + SCRATCH 5, 11, %4+ 7*%5 + VP9_UNPACK_MULSUB_2W_4X 6, 1, 0, 7, 6270, m15137, [pd_8192], 4, 5 ; t10, t13 + UNSCRATCH 5, 11, %4+ 7*%5 + + ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7 + ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15 +%else + mova m5, [%1+ 1*%3] ; IN(1) + mova m4, [%1+ 7*%3] ; IN(7) +%if %2 <= 8 + pmulhrsw m2, m5, [pw_16305x2] ; t15 + pmulhrsw m5, [pw_1606x2] ; t8 + pmulhrsw m3, m4, [pw_m10394x2] ; t9 + pmulhrsw m4, [pw_12665x2] ; t14 +%else + mova m3, [%1+ 9*%3] ; IN(9) + mova m2, [%1+15*%3] ; IN(15) + + ; m10=in0, m5=in1, m14=in2, m6=in3, m9=in4, m7=in5, m15=in6, m4=in7 + ; m11=in8, m3=in9, m12=in10 m0=in11, m8=in12, m1=in13, m13=in14, m2=in15 + + VP9_UNPACK_MULSUB_2W_4X 5, 2, 16305, 1606, [pd_8192], 0, 1 ; t8, t15 + VP9_UNPACK_MULSUB_2W_4X 3, 4, 10394, 12665, [pd_8192], 0, 1 ; t9, t14 +%endif + + SUMSUB_BA w, 3, 5, 0 ; t8, t9 + SUMSUB_BA w, 4, 2, 0 ; t15, t14 + + VP9_UNPACK_MULSUB_2W_4X 2, 5, 15137, 6270, [pd_8192], 0, 1 ; t9, t14 + + SCRATCH 4, 10, %4+ 1*%5 + SCRATCH 5, 11, %4+ 7*%5 + + mova m6, [%1+ 3*%3] ; IN(3) + mova m7, [%1+ 5*%3] ; IN(5) +%if %2 <= 8 + pmulhrsw m0, m7, [pw_14449x2] ; t13 + pmulhrsw m7, [pw_7723x2] ; t10 + pmulhrsw m1, m6, [pw_m4756x2] ; t11 + pmulhrsw m6, [pw_15679x2] ; t12 +%else + mova m0, [%1+11*%3] ; IN(11) + mova m1, [%1+13*%3] ; IN(13) + + VP9_UNPACK_MULSUB_2W_4X 7, 0, 14449, 7723, [pd_8192], 4, 5 ; t10, t13 + VP9_UNPACK_MULSUB_2W_4X 1, 6, 4756, 15679, [pd_8192], 4, 5 ; t11, t12 +%endif + + ; m11=t0, m10=t1, m9=t2, m8=t3, m14=t4, m12=t5, m15=t6, m13=t7 + ; m5=t8, m3=t9, m7=t10, m1=t11, m6=t12, m0=t13, m4=t14, m2=t15 + + SUMSUB_BA w, 7, 1, 4 ; t11, t10 + SUMSUB_BA w, 0, 6, 4 ; t12, t13 + + ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7 + ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15 + + VP9_UNPACK_MULSUB_2W_4X 6, 1, 6270, m15137, [pd_8192], 4, 5 ; t10, t13 + + UNSCRATCH 5, 11, %4+ 7*%5 +%endif + + ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m13=t5, m14=t6, m15=t7 + ; m3=t8, m2=t9, m6=t10, m7=t11, m0=t12, m1=t13, m5=t14, m4=t15 + + SUMSUB_BA w, 7, 3, 4 ; t8, t11 + + ; backup first register + mova [%4+15*%5], m7 + + SUMSUB_BA w, 6, 2, 7 ; t9, t10 + UNSCRATCH 4, 10, %4+ 1*%5 + SUMSUB_BA w, 0, 4, 7 ; t15, t12 + SUMSUB_BA w, 1, 5, 7 ; t14. t13 + + ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7 + ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15 + +%if cpuflag(ssse3) && %6 == 0 + SUMSUB_BA w, 2, 5, 7 + SUMSUB_BA w, 3, 4, 7 + pmulhrsw m5, [pw_11585x2] ; t10 + pmulhrsw m4, [pw_11585x2] ; t11 + pmulhrsw m3, [pw_11585x2] ; t12 + pmulhrsw m2, [pw_11585x2] ; t13 +%else + SCRATCH 6, 10, %4+ 1*%5 + VP9_UNPACK_MULSUB_2W_4X 5, 2, 11585, 11585, [pd_8192], 6, 7 ; t10, t13 + VP9_UNPACK_MULSUB_2W_4X 4, 3, 11585, 11585, [pd_8192], 6, 7 ; t11, t12 + UNSCRATCH 6, 10, %4+ 1*%5 +%endif + + ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7 + ; m7=t8, m6=t9, m5=t10, m4=t11, m3=t12, m2=t13, m1=t14, m0=t15 + + SCRATCH 0, 8, %4+ 1*%5 + SCRATCH 1, 9, %4+ 3*%5 + SCRATCH 2, 10, %4+ 5*%5 + SCRATCH 3, 11, %4+ 7*%5 + SCRATCH 4, 12, %4+ 9*%5 + SCRATCH 5, 13, %4+11*%5 + SCRATCH 6, 14, %4+13*%5 + + ; even (tx8x8) +%if %2 <= 4 + mova m3, [%1+ 0*%3] ; IN(0) + mova m4, [%1+ 2*%3] ; IN(2) + + pmulhrsw m3, [pw_11585x2] ; t0-t3 + pmulhrsw m7, m4, [pw_16069x2] ; t6-7 + pmulhrsw m4, [pw_3196x2] ; t4-5 + +%if 0 ; overflows :( + paddw m6, m7, m4 + psubw m5, m7, m4 + pmulhrsw m5, [pw_11585x2] ; t5 + pmulhrsw m6, [pw_11585x2] ; t6 +%else + VP9_UNPACK_MULSUB_2W_4X 5, 6, 7, 4, 11585, 11585, [pd_8192], 0, 1 ; t5, t6 +%endif + + psubw m0, m3, m7 + paddw m7, m3 + psubw m1, m3, m6 + paddw m6, m3 + psubw m2, m3, m5 + paddw m5, m3 + +%if ARCH_X86_32 + SWAP 0, 7 +%endif + SCRATCH 7, 15, %4+12*%5 +%else + mova m6, [%1+ 2*%3] ; IN(2) + mova m1, [%1+ 4*%3] ; IN(4) + mova m7, [%1+ 6*%3] ; IN(6) +%if %2 <= 8 + pmulhrsw m0, m1, [pw_15137x2] ; t3 + pmulhrsw m1, [pw_6270x2] ; t2 + pmulhrsw m5, m6, [pw_16069x2] ; t7 + pmulhrsw m6, [pw_3196x2] ; t4 + pmulhrsw m4, m7, [pw_m9102x2] ; t5 + pmulhrsw m7, [pw_13623x2] ; t6 +%else + mova m4, [%1+10*%3] ; IN(10) + mova m0, [%1+12*%3] ; IN(12) + mova m5, [%1+14*%3] ; IN(14) + + VP9_UNPACK_MULSUB_2W_4X 1, 0, 15137, 6270, [pd_8192], 2, 3 ; t2, t3 + VP9_UNPACK_MULSUB_2W_4X 6, 5, 16069, 3196, [pd_8192], 2, 3 ; t4, t7 + VP9_UNPACK_MULSUB_2W_4X 4, 7, 9102, 13623, [pd_8192], 2, 3 ; t5, t6 +%endif + + SUMSUB_BA w, 4, 6, 2 ; t4, t5 + SUMSUB_BA w, 7, 5, 2 ; t7, t6 + +%if cpuflag(ssse3) && %6 == 0 + SUMSUB_BA w, 6, 5, 2 + pmulhrsw m5, [pw_11585x2] ; t5 + pmulhrsw m6, [pw_11585x2] ; t6 +%else + VP9_UNPACK_MULSUB_2W_4X 5, 6, 11585, 11585, [pd_8192], 2, 3 ; t5, t6 +%endif + + SCRATCH 5, 15, %4+10*%5 + mova m2, [%1+ 0*%3] ; IN(0) +%if %2 <= 8 + pmulhrsw m2, [pw_11585x2] ; t0 and t1 + psubw m3, m2, m0 + paddw m0, m2 + + SUMSUB_BA w, 7, 0, 5 ; t0, t7 +%else + mova m3, [%1+ 8*%3] ; IN(8) + + ; from 3 stages back +%if cpuflag(ssse3) && %6 == 0 + SUMSUB_BA w, 3, 2, 5 + pmulhrsw m3, [pw_11585x2] ; t0 + pmulhrsw m2, [pw_11585x2] ; t1 +%else + mova [%1+ 0*%3], m0 + VP9_UNPACK_MULSUB_2W_4X 2, 3, 11585, 11585, [pd_8192], 5, 0 ; t0, t1 + mova m0, [%1+ 0*%3] +%endif + + ; from 2 stages back + SUMSUB_BA w, 0, 3, 5 ; t0, t3 + + SUMSUB_BA w, 7, 0, 5 ; t0, t7 +%endif + UNSCRATCH 5, 15, %4+10*%5 +%if ARCH_X86_32 + SWAP 0, 7 +%endif + SCRATCH 7, 15, %4+12*%5 + SUMSUB_BA w, 1, 2, 7 ; t1, t2 + + ; from 1 stage back + SUMSUB_BA w, 6, 1, 7 ; t1, t6 + SUMSUB_BA w, 5, 2, 7 ; t2, t5 +%endif + SUMSUB_BA w, 4, 3, 7 ; t3, t4 + +%if ARCH_X86_64 + SWAP 0, 8 + SWAP 1, 9 + SWAP 2, 10 + SWAP 3, 11 + SWAP 4, 12 + SWAP 5, 13 + SWAP 6, 14 + + SUMSUB_BA w, 0, 15, 7 ; t0, t15 + SUMSUB_BA w, 1, 14, 7 ; t1, t14 + SUMSUB_BA w, 2, 13, 7 ; t2, t13 + SUMSUB_BA w, 3, 12, 7 ; t3, t12 + SUMSUB_BA w, 4, 11, 7 ; t4, t11 + SUMSUB_BA w, 5, 10, 7 ; t5, t10 +%else + SWAP 1, 6 + SWAP 2, 5 + SWAP 3, 4 + mova [%4+14*%5], m6 + +%macro %%SUMSUB_BA_STORE 5 ; reg, from_mem, to_mem, scratch, scratch_stride + mova m6, [%4+%2*%5] + SUMSUB_BA w, 6, %1, 7 + SWAP %1, 6 + mova [%4+%3*%5], m6 +%endmacro + + %%SUMSUB_BA_STORE 0, 1, 1, %4, %5 ; t0, t15 + %%SUMSUB_BA_STORE 1, 3, 3, %4, %5 ; t1, t14 + %%SUMSUB_BA_STORE 2, 5, 5, %4, %5 ; t2, t13 + %%SUMSUB_BA_STORE 3, 7, 7, %4, %5 ; t3, t12 + %%SUMSUB_BA_STORE 4, 9, 9, %4, %5 ; t4, t11 + %%SUMSUB_BA_STORE 5, 11, 11, %4, %5 ; t5, t10 +%endif +%endmacro + +%macro VP9_IDCT16_1D 2-4 16, 1 ; src, pass, nnzc, is_iadst +%if %2 == 1 + VP9_IDCT16_1D_START %1, %3, 32, tmpq, 16, %4 + +%if ARCH_X86_64 + ; backup a different register + mova m7, [tmpq+15*16] + mova [tmpq+ 1*16], m15 + + SUMSUB_BA w, 6, 9, 15 ; t6, t9 + SUMSUB_BA w, 7, 8, 15 ; t7, t8 + + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 15 + mova [tmpq+ 0], m0 + mova [tmpq+ 32], m1 + mova [tmpq+ 64], m2 + mova [tmpq+ 96], m3 + mova [tmpq+128], m4 + mova [tmpq+160], m5 + mova [tmpq+192], m6 + mova [tmpq+224], m7 + + mova m15, [tmpq+ 1*16] + TRANSPOSE8x8W 8, 9, 10, 11, 12, 13, 14, 15, 0 + mova [tmpq+ 16], m8 + mova [tmpq+ 48], m9 + mova [tmpq+ 80], m10 + mova [tmpq+112], m11 + mova [tmpq+144], m12 + mova [tmpq+176], m13 + mova [tmpq+208], m14 + mova [tmpq+240], m15 +%else + mova m6, [tmpq+13*16] + mova m7, [tmpq+14*16] + SUMSUB_BA w, 6, 7 ; t6, t9 + mova [tmpq+14*16], m6 + mova [tmpq+13*16], m7 + mova m7, [tmpq+15*16] + mova m6, [tmpq+12*16] + SUMSUB_BA w, 7, 6 ; t7, t8 + mova [tmpq+15*16], m6 + + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [tmpq+14*16], [tmpq+ 8*16], 1 + mova [tmpq+ 0*16], m0 + mova [tmpq+ 2*16], m1 + mova [tmpq+ 4*16], m2 + mova [tmpq+ 6*16], m3 + mova [tmpq+10*16], m5 + mova [tmpq+12*16], m6 + mova [tmpq+14*16], m7 + + mova m0, [tmpq+15*16] + mova m1, [tmpq+13*16] + mova m2, [tmpq+11*16] + mova m3, [tmpq+ 9*16] + mova m4, [tmpq+ 7*16] + mova m5, [tmpq+ 5*16] + mova m7, [tmpq+ 1*16] + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [tmpq+ 3*16], [tmpq+ 9*16], 1 + mova [tmpq+ 1*16], m0 + mova [tmpq+ 3*16], m1 + mova [tmpq+ 5*16], m2 + mova [tmpq+ 7*16], m3 + mova [tmpq+11*16], m5 + mova [tmpq+13*16], m6 + mova [tmpq+15*16], m7 +%endif +%else ; %2 == 2 + VP9_IDCT16_1D_START %1, %3, 32, %1, 32, %4 + +%if cpuflag(ssse3) +%define ROUND_REG [pw_512] +%else +%define ROUND_REG [pw_32] +%endif + + pxor m7, m7 +%if ARCH_X86_64 + ; backup more registers + mova [%1+ 2*32], m8 + mova [%1+ 3*32], m9 + + VP9_IDCT8_WRITEx2 0, 1, 8, 9, 7, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 2, 3, 8, 9, 7, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 4, 5, 8, 9, 7, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + ; restore from cache + SWAP 0, 7 ; move zero from m7 to m0 + mova m7, [%1+15*32] + mova m8, [%1+ 2*32] + mova m9, [%1+ 3*32] + + SUMSUB_BA w, 6, 9, 3 ; t6, t9 + SUMSUB_BA w, 7, 8, 3 ; t7, t8 + + VP9_IDCT8_WRITEx2 6, 7, 3, 4, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 8, 9, 3, 4, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 10, 11, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 12, 13, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 14, 15, 1, 2, 0, ROUND_REG, 6 +%else + mova [tmpq+ 0*32], m5 + + VP9_IDCT8_WRITEx2 0, 1, 5, 6, 7, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 2, 3, 5, 6, 7, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + SWAP 0, 7 ; move zero from m7 to m0 + mova m5, [tmpq+ 0*32] + + VP9_IDCT8_WRITEx2 4, 5, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + mova m4, [tmpq+13*32] + mova m7, [tmpq+14*32] + mova m5, [tmpq+15*32] + mova m6, [tmpq+12*32] + SUMSUB_BADC w, 4, 7, 5, 6, 1 + + VP9_IDCT8_WRITEx2 4, 5, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 6, 7, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + mova m4, [tmpq+11*32] + mova m5, [tmpq+ 9*32] + mova m6, [tmpq+ 7*32] + mova m7, [tmpq+ 5*32] + + VP9_IDCT8_WRITEx2 4, 5, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 6, 7, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + mova m4, [tmpq+ 3*32] + mova m5, [tmpq+ 1*32] + + VP9_IDCT8_WRITEx2 4, 5, 1, 2, 0, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] +%endif + +%undef ROUND_REG +%endif ; %2 == 1/2 +%endmacro + +%macro VP9_STORE_2XFULL 6-7 strideq; dc, tmp1, tmp2, tmp3, tmp4, zero, stride + mova m%3, [dstq] + mova m%5, [dstq+%7] + punpcklbw m%2, m%3, m%6 + punpckhbw m%3, m%6 + punpcklbw m%4, m%5, m%6 + punpckhbw m%5, m%6 + paddw m%2, m%1 + paddw m%3, m%1 + paddw m%4, m%1 + paddw m%5, m%1 + packuswb m%2, m%3 + packuswb m%4, m%5 + mova [dstq], m%2 + mova [dstq+%7], m%4 +%endmacro + +%macro VP9_IDCT_IDCT_16x16_ADD_XMM 1 +INIT_XMM %1 +cglobal vp9_idct_idct_16x16_add, 4, 6, 16, 512, dst, stride, block, eob +%if cpuflag(ssse3) + ; 2x2=eob=3, 4x4=eob=10 + cmp eobd, 38 + jg .idctfull + cmp eobd, 1 ; faster path for when only DC is set + jne .idct8x8 +%else + cmp eobd, 1 ; faster path for when only DC is set + jg .idctfull +%endif + + ; dc-only +%if cpuflag(ssse3) + movd m0, [blockq] + mova m1, [pw_11585x2] + pmulhrsw m0, m1 + pmulhrsw m0, m1 +%else + DEFINE_ARGS dst, stride, block, coef + movsx coefd, word [blockq] + imul coefd, 11585 + add coefd, 8192 + sar coefd, 14 + imul coefd, 11585 + add coefd, (32 << 14) + 8192 + sar coefd, 14 + 6 + movd m0, coefd +%endif + SPLATW m0, m0, q0000 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_512] +%endif + pxor m5, m5 + movd [blockq], m5 +%rep 7 + VP9_STORE_2XFULL 0, 1, 2, 3, 4, 5 + lea dstq, [dstq+2*strideq] +%endrep + VP9_STORE_2XFULL 0, 1, 2, 3, 4, 5 + RET + + DEFINE_ARGS dst, stride, block, cnt, dst_bak, tmp +%if cpuflag(ssse3) +.idct8x8: + mov tmpq, rsp + VP9_IDCT16_1D blockq, 1, 8, 0 + + mov cntd, 2 + mov dst_bakq, dstq +.loop2_8x8: + VP9_IDCT16_1D tmpq, 2, 8, 0 + lea dstq, [dst_bakq+8] + add tmpq, 16 + dec cntd + jg .loop2_8x8 + + ; at the end of the loop, m0 should still be zero + ; use that to zero out block coefficients + ZERO_BLOCK blockq, 32, 8, m0 + RET +%endif + +.idctfull: + mov cntd, 2 + mov tmpq, rsp +.loop1_full: + VP9_IDCT16_1D blockq, 1, 16, 0 + add blockq, 16 + add tmpq, 256 + dec cntd + jg .loop1_full + sub blockq, 32 + + mov cntd, 2 + mov tmpq, rsp + mov dst_bakq, dstq +.loop2_full: + VP9_IDCT16_1D tmpq, 2, 16, 0 + lea dstq, [dst_bakq+8] + add tmpq, 16 + dec cntd + jg .loop2_full + + ; at the end of the loop, m0 should still be zero + ; use that to zero out block coefficients + ZERO_BLOCK blockq, 32, 16, m0 + RET +%endmacro + +VP9_IDCT_IDCT_16x16_ADD_XMM sse2 +VP9_IDCT_IDCT_16x16_ADD_XMM ssse3 +VP9_IDCT_IDCT_16x16_ADD_XMM avx + +%macro VP9_IDCT16_YMM_1D 0 + VP9_UNPACK_MULSUB_2W_4X 1, 15, 16305, 1606, [pd_8192], 0, 4 ; t8, t15 + VP9_UNPACK_MULSUB_2W_4X 9, 7, 10394, 12665, [pd_8192], 0, 4 ; t9, t14 + + SUMSUB_BA w, 9, 1, 0 ; t8, t9 + SUMSUB_BA w, 7, 15, 0 ; t15, t14 + + VP9_UNPACK_MULSUB_2W_4X 15, 1, 15137, 6270, [pd_8192], 0, 4 ; t9, t14 + + VP9_UNPACK_MULSUB_2W_4X 5, 11, 14449, 7723, [pd_8192], 0, 4 ; t10, t13 + VP9_UNPACK_MULSUB_2W_4X 13, 3, 4756, 15679, [pd_8192], 0, 4 ; t11, t12 + + SUMSUB_BA w, 5, 13, 0 ; t11, t10 + SUMSUB_BA w, 11, 3, 0 ; t12, t13 + + VP9_UNPACK_MULSUB_2W_4X 3, 13, 6270, m15137, [pd_8192], 0, 4 ; t10, t13 + + SUMSUB_BA w, 5, 9, 0 ; t8, t11 + SUMSUB_BA w, 3, 15, 0 ; t9, t10 + SUMSUB_BA w, 11, 7, 0 ; t15, t12 + SUMSUB_BA w, 13, 1, 0 ; t14, t13 + + SUMSUB_BA w, 15, 1, 0 + SUMSUB_BA w, 9, 7, 0 + pmulhrsw m1, [pw_11585x2] ; t10 + pmulhrsw m7, [pw_11585x2] ; t11 + pmulhrsw m9, [pw_11585x2] ; t12 + pmulhrsw m15, [pw_11585x2] ; t13 + + ; even (tx8x8) + mova m4, [blockq+128] + mova [blockq+128], m5 + VP9_UNPACK_MULSUB_2W_4X 4, 12, 15137, 6270, [pd_8192], 0, 5 ; t2, t3 + VP9_UNPACK_MULSUB_2W_4X 2, 14, 16069, 3196, [pd_8192], 0, 5 ; t4, t7 + VP9_UNPACK_MULSUB_2W_4X 10, 6, 9102, 13623, [pd_8192], 0, 5 ; t5, t6 + mova m0, [blockq+ 0] + SUMSUB_BA w, 8, 0, 5 + pmulhrsw m8, [pw_11585x2] ; t0 + pmulhrsw m0, [pw_11585x2] ; t1 + + SUMSUB_BA w, 10, 2, 5 ; t4, t5 + SUMSUB_BA w, 6, 14, 5 ; t7, t6 + SUMSUB_BA w, 12, 8, 5 ; t0, t3 + SUMSUB_BA w, 4, 0, 5 ; t1, t2 + + SUMSUB_BA w, 2, 14, 5 + pmulhrsw m14, [pw_11585x2] ; t5 + pmulhrsw m2, [pw_11585x2] ; t6 + + SUMSUB_BA w, 6, 12, 5 ; t0, t7 + SUMSUB_BA w, 2, 4, 5 ; t1, t6 + SUMSUB_BA w, 14, 0, 5 ; t2, t5 + SUMSUB_BA w, 10, 8, 5 ; t3, t4 + + ; final stage + SUMSUB_BA w, 11, 6, 5 ; out0, out15 + SUMSUB_BA w, 13, 2, 5 ; out1, out14 + SUMSUB_BA w, 15, 14, 5 ; out2, out13 + SUMSUB_BA w, 9, 10, 5 ; out3, out12 + SUMSUB_BA w, 7, 8, 5 ; out4, out11 + SUMSUB_BA w, 1, 0, 5 ; out5, out10 + SUMSUB_BA w, 3, 4, 5 ; out6, out9 + mova m5, [blockq+128] + mova [blockq+192], m3 + SUMSUB_BA w, 5, 12, 3 ; out7, out8 + + SWAP 0, 11, 8, 12, 10 + SWAP 1, 13, 14, 2, 15, 6, 3, 9, 4, 7, 5 +%endmacro + +; this is almost identical to VP9_STORE_2X, but it does two rows +; for slightly improved interleaving, and it omits vpermq since the +; input is DC so all values are identical +%macro VP9_STORE_YMM_DC_4X 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero + mova xm%2, [dstq] + mova xm%4, [dstq+strideq*2] + vinserti128 m%2, m%2, [dstq+strideq], 1 + vinserti128 m%4, m%4, [dstq+stride3q], 1 + punpckhbw m%3, m%2, m%6 + punpcklbw m%2, m%6 + punpckhbw m%5, m%4, m%6 + punpcklbw m%4, m%6 + paddw m%3, m%1 + paddw m%2, m%1 + paddw m%5, m%1 + paddw m%4, m%1 + packuswb m%2, m%3 + packuswb m%4, m%5 + mova [dstq], xm%2 + mova [dstq+strideq*2], xm%4 + vextracti128 [dstq+strideq], m%2, 1 + vextracti128 [dstq+stride3q], m%4, 1 +%endmacro + +%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_idct_idct_16x16_add, 4, 4, 16, dst, stride, block, eob + cmp eobd, 1 ; faster path for when only DC is set + jg .idctfull + + ; dc-only + mova m1, [pw_11585x2] + vpbroadcastw m0, [blockq] + pmulhrsw m0, m1 + pmulhrsw m0, m1 + pxor m5, m5 + pmulhrsw m0, [pw_512] + movd [blockq], xm5 + + DEFINE_ARGS dst, stride, stride3, cnt + mov cntd, 4 + lea stride3q, [strideq*3] +.loop_dc: + VP9_STORE_YMM_DC_4X 0, 1, 2, 3, 4, 5 + lea dstq, [dstq+4*strideq] + dec cntd + jg .loop_dc + RET + + DEFINE_ARGS dst, stride, block, eob +.idctfull: + mova m1, [blockq+ 32] + mova m2, [blockq+ 64] + mova m3, [blockq+ 96] + mova m5, [blockq+160] + mova m6, [blockq+192] + mova m7, [blockq+224] + mova m8, [blockq+256] + mova m9, [blockq+288] + mova m10, [blockq+320] + mova m11, [blockq+352] + mova m12, [blockq+384] + mova m13, [blockq+416] + mova m14, [blockq+448] + mova m15, [blockq+480] + + VP9_IDCT16_YMM_1D + TRANSPOSE16x16W 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ + [blockq+192], [blockq+128], 1 + mova [blockq+ 0], m0 + VP9_IDCT16_YMM_1D + + mova [blockq+224], m7 + + ; store + VP9_IDCT8_WRITEx2 0, 1, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 2, 3, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 4, 5, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + mova m6, [blockq+192] + mova m7, [blockq+224] + VP9_IDCT8_WRITEx2 6, 7, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 8, 9, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 10, 11, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 12, 13, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 14, 15, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + + ; at the end of the loop, m0 should still be zero + ; use that to zero out block coefficients + pxor m0, m0 + ZERO_BLOCK blockq, 32, 16, m0 + RET +%endif + +;--------------------------------------------------------------------------------------------- +; void vp9_iadst_iadst_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;--------------------------------------------------------------------------------------------- + +%macro VP9_IADST16_1D 2 ; src, pass +%assign %%str 16*%2 + mova m0, [%1+ 0*32] ; in0 + mova m1, [%1+15*32] ; in15 + mova m2, [%1+ 7*32] ; in7 + mova m3, [%1+ 8*32] ; in8 + + VP9_UNPACK_MULSUB_2D_4X 1, 0, 4, 5, 16364, 804 ; m1/4=t1[d], m0/5=t0[d] + VP9_UNPACK_MULSUB_2D_4X 2, 3, 7, 6, 11003, 12140 ; m2/7=t9[d], m3/6=t8[d] + SCRATCH 4, 8, tmpq+ 0*%%str + VP9_RND_SH_SUMSUB_BA 3, 0, 6, 5, 4, [pd_8192] ; m3=t0[w], m0=t8[w] + UNSCRATCH 4, 8, tmpq+ 0*%%str + VP9_RND_SH_SUMSUB_BA 2, 1, 7, 4, 5, [pd_8192] ; m2=t1[w], m1=t9[w] + + SCRATCH 0, 10, tmpq+ 0*%%str + SCRATCH 1, 11, tmpq+15*%%str + mova [tmpq+ 7*%%str], m2 + mova [tmpq+ 8*%%str], m3 + + mova m1, [%1+ 2*32] ; in2 + mova m0, [%1+13*32] ; in13 + mova m3, [%1+ 5*32] ; in5 + mova m2, [%1+10*32] ; in10 + + VP9_UNPACK_MULSUB_2D_4X 0, 1, 6, 7, 15893, 3981 ; m0/6=t3[d], m1/7=t2[d] + VP9_UNPACK_MULSUB_2D_4X 3, 2, 4, 5, 8423, 14053 ; m3/4=t11[d], m2/5=t10[d] + SCRATCH 4, 12, tmpq+ 2*%%str + VP9_RND_SH_SUMSUB_BA 2, 1, 5, 7, 4, [pd_8192] ; m2=t2[w], m1=t10[w] + UNSCRATCH 4, 12, tmpq+ 2*%%str + VP9_RND_SH_SUMSUB_BA 3, 0, 4, 6, 5, [pd_8192] ; m3=t3[w], m0=t11[w] + + SCRATCH 0, 12, tmpq+ 2*%%str + SCRATCH 1, 13, tmpq+13*%%str + mova [tmpq+ 5*%%str], m2 + mova [tmpq+10*%%str], m3 + + mova m2, [%1+ 4*32] ; in4 + mova m3, [%1+11*32] ; in11 + mova m0, [%1+ 3*32] ; in3 + mova m1, [%1+12*32] ; in12 + + VP9_UNPACK_MULSUB_2D_4X 3, 2, 7, 6, 14811, 7005 ; m3/7=t5[d], m2/6=t4[d] + VP9_UNPACK_MULSUB_2D_4X 0, 1, 4, 5, 5520, 15426 ; m0/4=t13[d], m1/5=t12[d] + SCRATCH 4, 9, tmpq+ 4*%%str + VP9_RND_SH_SUMSUB_BA 1, 2, 5, 6, 4, [pd_8192] ; m1=t4[w], m2=t12[w] + UNSCRATCH 4, 9, tmpq+ 4*%%str + VP9_RND_SH_SUMSUB_BA 0, 3, 4, 7, 6, [pd_8192] ; m0=t5[w], m3=t13[w] + + SCRATCH 0, 8, tmpq+ 4*%%str + mova [tmpq+11*%%str], m1 ; t4:m1->r11 + UNSCRATCH 0, 10, tmpq+ 0*%%str + UNSCRATCH 1, 11, tmpq+15*%%str + + ; round 2 interleaved part 1 + VP9_UNPACK_MULSUB_2D_4X 0, 1, 6, 7, 16069, 3196 ; m1/7=t8[d], m0/6=t9[d] + VP9_UNPACK_MULSUB_2D_4X 3, 2, 5, 4, 3196, 16069 ; m3/5=t12[d], m2/4=t13[d] + SCRATCH 4, 9, tmpq+ 3*%%str + VP9_RND_SH_SUMSUB_BA 3, 1, 5, 7, 4, [pd_8192] ; m3=t8[w], m1=t12[w] + UNSCRATCH 4, 9, tmpq+ 3*%%str + VP9_RND_SH_SUMSUB_BA 2, 0, 4, 6, 5, [pd_8192] ; m2=t9[w], m0=t13[w] + + SCRATCH 0, 10, tmpq+ 0*%%str + SCRATCH 1, 11, tmpq+15*%%str + SCRATCH 2, 14, tmpq+ 3*%%str + SCRATCH 3, 15, tmpq+12*%%str + + mova m2, [%1+ 6*32] ; in6 + mova m3, [%1+ 9*32] ; in9 + mova m0, [%1+ 1*32] ; in1 + mova m1, [%1+14*32] ; in14 + + VP9_UNPACK_MULSUB_2D_4X 3, 2, 7, 6, 13160, 9760 ; m3/7=t7[d], m2/6=t6[d] + VP9_UNPACK_MULSUB_2D_4X 0, 1, 4, 5, 2404, 16207 ; m0/4=t15[d], m1/5=t14[d] + SCRATCH 4, 9, tmpq+ 6*%%str + VP9_RND_SH_SUMSUB_BA 1, 2, 5, 6, 4, [pd_8192] ; m1=t6[w], m2=t14[w] + UNSCRATCH 4, 9, tmpq+ 6*%%str + VP9_RND_SH_SUMSUB_BA 0, 3, 4, 7, 6, [pd_8192] ; m0=t7[w], m3=t15[w] + + ; r8=t0, r7=t1, r5=t2, r10=t3, r11=t4, m8|r4=t5, m1=t6, m0=t7 + ; m10|r0=t8, m11|r15=t9, m13|r13=t10, m12|r2=t11, m14|r3=t12, m15|r12=t13, m2=t14, m3=t15 + + UNSCRATCH 4, 12, tmpq+ 2*%%str + UNSCRATCH 5, 13, tmpq+13*%%str + SCRATCH 0, 12, tmpq+ 1*%%str + SCRATCH 1, 13, tmpq+14*%%str + + ; remainder of round 2 (rest of t8-15) + VP9_UNPACK_MULSUB_2D_4X 5, 4, 6, 7, 9102, 13623 ; m5/6=t11[d], m4/7=t10[d] + VP9_UNPACK_MULSUB_2D_4X 3, 2, 1, 0, 13623, 9102 ; m3/1=t14[d], m2/0=t15[d] + SCRATCH 0, 9, tmpq+ 6*%%str + VP9_RND_SH_SUMSUB_BA 3, 4, 1, 7, 0, [pd_8192] ; m3=t10[w], m4=t14[w] + UNSCRATCH 0, 9, tmpq+ 6*%%str + VP9_RND_SH_SUMSUB_BA 2, 5, 0, 6, 1, [pd_8192] ; m2=t11[w], m5=t15[w] + + ; m15|r12=t8, m14|r3=t9, m3=t10, m2=t11, m11|r15=t12, m10|r0=t13, m4=t14, m5=t15 + + UNSCRATCH 6, 14, tmpq+ 3*%%str + UNSCRATCH 7, 15, tmpq+12*%%str + + SUMSUB_BA w, 3, 7, 1 + PSIGNW m3, [pw_m1] ; m3=out1[w], m7=t10[w] + SUMSUB_BA w, 2, 6, 1 ; m2=out14[w], m6=t11[w] + + ; unfortunately, the code below overflows in some cases, e.g. + ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8.webm +%if 0; cpuflag(ssse3) + SUMSUB_BA w, 7, 6, 1 + pmulhrsw m7, [pw_11585x2] ; m7=out6[w] + pmulhrsw m6, [pw_11585x2] ; m6=out9[w] +%else + VP9_UNPACK_MULSUB_2W_4X 6, 7, 11585, 11585, [pd_8192], 1, 0 +%endif + + mova [tmpq+ 3*%%str], m6 + mova [tmpq+ 6*%%str], m7 + UNSCRATCH 6, 10, tmpq+ 0*%%str + UNSCRATCH 7, 11, tmpq+15*%%str + mova [tmpq+13*%%str], m2 + SCRATCH 3, 11, tmpq+ 9*%%str + + VP9_UNPACK_MULSUB_2D_4X 7, 6, 2, 3, 15137, 6270 ; m6/3=t13[d], m7/2=t12[d] + VP9_UNPACK_MULSUB_2D_4X 5, 4, 1, 0, 6270, 15137 ; m5/1=t14[d], m4/0=t15[d] + SCRATCH 0, 9, tmpq+ 2*%%str + VP9_RND_SH_SUMSUB_BA 5, 6, 1, 3, 0, [pd_8192] ; m5=out2[w], m6=t14[w] + UNSCRATCH 0, 9, tmpq+ 2*%%str + VP9_RND_SH_SUMSUB_BA 4, 7, 0, 2, 1, [pd_8192] + PSIGNW m4, [pw_m1] ; m4=out13[w], m7=t15[w] + + ; unfortunately, the code below overflows in some cases +%if 0; cpuflag(ssse3) + SUMSUB_BA w, 7, 6, 1 + pmulhrsw m7, [pw_m11585x2] ; m7=out5[w] + pmulhrsw m6, [pw_11585x2] ; m6=out10[w] +%else + PSIGNW m7, [pw_m1] + VP9_UNPACK_MULSUB_2W_4X 7, 6, 11585, 11585, [pd_8192], 1, 0 +%endif + + ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, m6=out10, m4=out13, r2=out14 + + mova m2, [tmpq+ 8*%%str] + mova m3, [tmpq+ 7*%%str] + mova m1, [tmpq+11*%%str] + mova [tmpq+ 7*%%str], m6 + mova [tmpq+11*%%str], m4 + mova m4, [tmpq+ 5*%%str] + SCRATCH 5, 14, tmpq+ 5*%%str + SCRATCH 7, 15, tmpq+ 8*%%str + UNSCRATCH 6, 8, tmpq+ 4*%%str + UNSCRATCH 5, 12, tmpq+ 1*%%str + UNSCRATCH 7, 13, tmpq+14*%%str + + ; m2=t0, m3=t1, m9=t2, m0=t3, m1=t4, m8=t5, m13=t6, m12=t7 + ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14 + + SUMSUB_BA w, 1, 2, 0 ; m1=t0[w], m2=t4[w] + mova m0, [tmpq+10*%%str] + SCRATCH 1, 12, tmpq+ 1*%%str + SUMSUB_BA w, 6, 3, 1 ; m8=t1[w], m3=t5[w] + SCRATCH 6, 13, tmpq+ 4*%%str + SUMSUB_BA w, 7, 4, 1 ; m13=t2[w], m9=t6[w] + SCRATCH 7, 8, tmpq+10*%%str + SUMSUB_BA w, 5, 0, 1 ; m12=t3[w], m0=t7[w] + SCRATCH 5, 9, tmpq+14*%%str + + VP9_UNPACK_MULSUB_2D_4X 2, 3, 7, 5, 15137, 6270 ; m2/6=t5[d], m3/10=t4[d] + VP9_UNPACK_MULSUB_2D_4X 0, 4, 1, 6, 6270, 15137 ; m0/14=t6[d], m9/15=t7[d] + SCRATCH 6, 10, tmpq+ 0*%%str + VP9_RND_SH_SUMSUB_BA 0, 3, 1, 5, 6, [pd_8192] + UNSCRATCH 6, 10, tmpq+ 0*%%str + PSIGNW m0, [pw_m1] ; m0=out3[w], m3=t6[w] + VP9_RND_SH_SUMSUB_BA 4, 2, 6, 7, 5, [pd_8192] ; m9=out12[w], m2=t7[w] + + UNSCRATCH 1, 8, tmpq+10*%%str + UNSCRATCH 5, 9, tmpq+14*%%str + UNSCRATCH 6, 12, tmpq+ 1*%%str + UNSCRATCH 7, 13, tmpq+ 4*%%str + SCRATCH 4, 9, tmpq+14*%%str + + SUMSUB_BA w, 1, 6, 4 ; m13=out0[w], m1=t2[w] + SUMSUB_BA w, 5, 7, 4 + PSIGNW m5, [pw_m1] ; m12=out15[w], m8=t3[w] + + ; unfortunately, the code below overflows in some cases, e.g. + ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +%if 0 ; cpuflag(ssse3) + SUMSUB_BA w, 7, 6, 4 + pmulhrsw m7, [pw_m11585x2] ; m8=out7[w] + pmulhrsw m6, [pw_11585x2] ; m1=out8[w] + SWAP 6, 7 + SUMSUB_BA w, 3, 2, 4 + pmulhrsw m3, [pw_11585x2] ; m3=out4[w] + pmulhrsw m2, [pw_11585x2] ; m2=out11[w] +%else + SCRATCH 5, 8, tmpq+10*%%str + VP9_UNPACK_MULSUB_2W_4X 6, 7, 11585, m11585, [pd_8192], 5, 4 + VP9_UNPACK_MULSUB_2W_4X 2, 3, 11585, 11585, [pd_8192], 5, 4 + UNSCRATCH 5, 8, tmpq+10*%%str +%endif + + ; m13=out0, m0=out3, m3=out4, m8=out7, m1=out8, m2=out11, m9=out12, m12=out15 + ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14 + +%if %2 == 1 +%if ARCH_X86_64 + mova m13, [tmpq+ 6*%%str] + TRANSPOSE8x8W 1, 11, 14, 0, 3, 15, 13, 6, 10 + mova [tmpq+ 0*16], m1 + mova [tmpq+ 2*16], m11 + mova [tmpq+ 4*16], m14 + mova [tmpq+ 6*16], m0 + mova m1, [tmpq+ 3*%%str] + mova m11, [tmpq+ 7*%%str] + mova m14, [tmpq+11*%%str] + mova m0, [tmpq+13*%%str] + mova [tmpq+ 8*16], m3 + mova [tmpq+10*16], m15 + mova [tmpq+12*16], m13 + mova [tmpq+14*16], m6 + + TRANSPOSE8x8W 7, 1, 11, 2, 9, 14, 0, 5, 10 + mova [tmpq+ 1*16], m7 + mova [tmpq+ 3*16], m1 + mova [tmpq+ 5*16], m11 + mova [tmpq+ 7*16], m2 + mova [tmpq+ 9*16], m9 + mova [tmpq+11*16], m14 + mova [tmpq+13*16], m0 + mova [tmpq+15*16], m5 +%else + mova [tmpq+12*%%str], m2 + mova [tmpq+ 1*%%str], m5 + mova [tmpq+15*%%str], m7 + mova m2, [tmpq+ 9*%%str] + mova m5, [tmpq+ 5*%%str] + mova m7, [tmpq+ 8*%%str] + TRANSPOSE8x8W 1, 2, 5, 0, 3, 7, 4, 6, [tmpq+ 6*%%str], [tmpq+ 8*%%str], 1 + mova [tmpq+ 0*16], m1 + mova [tmpq+ 2*16], m2 + mova [tmpq+ 4*16], m5 + mova [tmpq+ 6*16], m0 + mova [tmpq+10*16], m7 + mova m3, [tmpq+12*%%str] + mova [tmpq+12*16], m4 + mova m4, [tmpq+14*%%str] + mova [tmpq+14*16], m6 + + mova m0, [tmpq+15*%%str] + mova m1, [tmpq+ 3*%%str] + mova m2, [tmpq+ 7*%%str] + mova m5, [tmpq+11*%%str] + mova m7, [tmpq+ 1*%%str] + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [tmpq+13*%%str], [tmpq+ 9*%%str], 1 + mova [tmpq+ 1*16], m0 + mova [tmpq+ 3*16], m1 + mova [tmpq+ 5*16], m2 + mova [tmpq+ 7*16], m3 + mova [tmpq+11*16], m5 + mova [tmpq+13*16], m6 + mova [tmpq+15*16], m7 +%endif +%else + pxor m4, m4 + +%if cpuflag(ssse3) +%define ROUND_REG [pw_512] +%else +%define ROUND_REG [pw_32] +%endif + +%if ARCH_X86_64 + mova m12, [tmpq+ 6*%%str] + VP9_IDCT8_WRITEx2 1, 11, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 14, 0, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 3, 15, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 12, 6, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + mova m1, [tmpq+ 3*%%str] + mova m11, [tmpq+ 7*%%str] + mova m14, [tmpq+11*%%str] + mova m0, [tmpq+13*%%str] + + VP9_IDCT8_WRITEx2 7, 1, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 11, 2, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 9, 14, 10, 8, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + VP9_IDCT8_WRITEx2 0, 5, 10, 8, 4, ROUND_REG, 6 +%else + mova [tmpq+ 0*%%str], m2 + mova [tmpq+ 1*%%str], m5 + mova [tmpq+ 2*%%str], m7 + mova m2, [tmpq+ 9*%%str] + VP9_IDCT8_WRITEx2 1, 2, 5, 7, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m5, [tmpq+ 5*%%str] + VP9_IDCT8_WRITEx2 5, 0, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m5, [tmpq+ 8*%%str] + VP9_IDCT8_WRITEx2 3, 5, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m5, [tmpq+ 6*%%str] + VP9_IDCT8_WRITEx2 5, 6, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + + mova m0, [tmpq+ 2*%%str] + mova m3, [tmpq+ 3*%%str] + VP9_IDCT8_WRITEx2 0, 3, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m0, [tmpq+ 7*%%str] + mova m3, [tmpq+ 0*%%str] + VP9_IDCT8_WRITEx2 0, 3, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m0, [tmpq+14*%%str] + mova m3, [tmpq+11*%%str] + VP9_IDCT8_WRITEx2 0, 3, 1, 2, 4, ROUND_REG, 6 + lea dstq, [dstq+strideq*2] + mova m0, [tmpq+13*%%str] + mova m3, [tmpq+ 1*%%str] + VP9_IDCT8_WRITEx2 0, 3, 1, 2, 4, ROUND_REG, 6 +%endif + + SWAP 0, 4 ; zero +%undef ROUND_REG +%endif +%endmacro + +%macro IADST16_FN 5 +INIT_XMM %5 +cglobal vp9_%1_%3_16x16_add, 3, 6, 16, 512, dst, stride, block, cnt, dst_bak, tmp + mov cntd, 2 + mov tmpq, rsp +.loop1_full: + VP9_%2_1D blockq, 1 + add blockq, 16 + add tmpq, 256 + dec cntd + jg .loop1_full + sub blockq, 32 + + mov cntd, 2 + mov tmpq, rsp + mov dst_bakq, dstq +.loop2_full: + VP9_%4_1D tmpq, 2 + lea dstq, [dst_bakq+8] + add tmpq, 16 + dec cntd + jg .loop2_full + + ; at the end of the loop, m0 should still be zero + ; use that to zero out block coefficients + ZERO_BLOCK blockq, 32, 16, m0 + RET +%endmacro + +IADST16_FN idct, IDCT16, iadst, IADST16, sse2 +IADST16_FN iadst, IADST16, idct, IDCT16, sse2 +IADST16_FN iadst, IADST16, iadst, IADST16, sse2 +IADST16_FN idct, IDCT16, iadst, IADST16, ssse3 +IADST16_FN iadst, IADST16, idct, IDCT16, ssse3 +IADST16_FN iadst, IADST16, iadst, IADST16, ssse3 +IADST16_FN idct, IDCT16, iadst, IADST16, avx +IADST16_FN iadst, IADST16, idct, IDCT16, avx +IADST16_FN iadst, IADST16, iadst, IADST16, avx + +; in: data in m[0-15] except m0/m4, which are in [blockq+0] and [blockq+128] +; out: m[0-15] except m6, which is in [blockq+192] +; uses blockq as scratch space +%macro VP9_IADST16_YMM_1D 0 + mova [blockq+ 32], m3 + mova [blockq+ 64], m7 + mova [blockq+ 96], m8 + + ; first half of round 1 + VP9_UNPACK_MULSUB_2D_4X 9, 6, 0, 3, 13160, 9760 ; m9/x=t7[d], m6/x=t6[d] + VP9_UNPACK_MULSUB_2D_4X 1, 14, 4, 7, 2404, 16207 ; m1/x=t15[d], m14/x=t14[d] + VP9_RND_SH_SUMSUB_BA 14, 6, 7, 3, 8, [pd_8192] ; m14=t6[w], m6=t14[w] + VP9_RND_SH_SUMSUB_BA 1, 9, 4, 0, 8, [pd_8192] ; m1=t7[w], m9=t15[w] + + VP9_UNPACK_MULSUB_2D_4X 13, 2, 4, 7, 15893, 3981 ; m13/x=t3[d], m2/x=t2[d] + VP9_UNPACK_MULSUB_2D_4X 5, 10, 0, 3, 8423, 14053 ; m5/x=t11[d], m10/x=t10[d] + VP9_RND_SH_SUMSUB_BA 10, 2, 3, 7, 8, [pd_8192] ; m10=t2[w], m2=t10[w] + VP9_RND_SH_SUMSUB_BA 5, 13, 0, 4, 8, [pd_8192] ; m5=t3[w], m13=t11[w] + + ; half of round 2 t8-15 + VP9_UNPACK_MULSUB_2D_4X 2, 13, 4, 7, 9102, 13623 ; m2/x=t11[d], m13/x=t10[d] + VP9_UNPACK_MULSUB_2D_4X 9, 6, 3, 0, 13623, 9102 ; m9/x=t14[d], m6/x=t15[d] + VP9_RND_SH_SUMSUB_BA 9, 13, 3, 7, 8, [pd_8192] ; m9=t10[w], m13=t14[w] + VP9_RND_SH_SUMSUB_BA 6, 2, 0, 4, 8, [pd_8192] ; m6=t11[w], m2=t15[w] + + SUMSUB_BA w, 14, 10, 8 ; m14=t2, m10=t6 + SUMSUB_BA w, 1, 5, 8 ; m1=t3, m5=t7 + + mova m0, [blockq+ 0] + mova m4, [blockq+128] + mova m3, [blockq+ 32] + mova m7, [blockq+ 64] + mova m8, [blockq+ 96] + mova [blockq+ 0], m1 + mova [blockq+128], m14 + mova [blockq+ 32], m6 + mova [blockq+ 64], m9 + mova [blockq+ 96], m10 + + ; second half of round 1 + VP9_UNPACK_MULSUB_2D_4X 15, 0, 1, 9, 16364, 804 ; m15/x=t1[d], m0/x=t0[d] + VP9_UNPACK_MULSUB_2D_4X 7, 8, 10, 6, 11003, 12140 ; m7/x=t9[d], m8/x=t8[d] + VP9_RND_SH_SUMSUB_BA 8, 0, 6, 9, 14, [pd_8192] ; m8=t0[w], m0=t8[w] + VP9_RND_SH_SUMSUB_BA 7, 15, 10, 1, 14, [pd_8192] ; m7=t1[w], m15=t9[w] + + VP9_UNPACK_MULSUB_2D_4X 11, 4, 10, 6, 14811, 7005 ; m11/x=t5[d], m4/x=t4[d] + VP9_UNPACK_MULSUB_2D_4X 3, 12, 1, 9, 5520, 15426 ; m3/x=t13[d], m12/x=t12[d] + VP9_RND_SH_SUMSUB_BA 12, 4, 9, 6, 14, [pd_8192] ; m12=t4[w], m4=t12[w] + VP9_RND_SH_SUMSUB_BA 3, 11, 1, 10, 14, [pd_8192] ; m3=t5[w], m11=t13[w] + + ; second half of round 2 t8-15 + VP9_UNPACK_MULSUB_2D_4X 0, 15, 6, 10, 16069, 3196 ; m15/x=t8[d], m0/x=t9[d] + VP9_UNPACK_MULSUB_2D_4X 11, 4, 9, 1, 3196, 16069 ; m11/x=t12[d], m4/x=t13[d] + VP9_RND_SH_SUMSUB_BA 11, 15, 9, 10, 14, [pd_8192] ; m11=t8[w], m15=t12[w] + VP9_RND_SH_SUMSUB_BA 4, 0, 1, 6, 14, [pd_8192] ; m4=t9[w], m0=t13[w] + + SUMSUB_BA w, 12, 8, 14 ; m12=t0, m8=t4 + SUMSUB_BA w, 3, 7, 14 ; m3=t1, m7=t5 + + mova m10, [blockq+ 96] + mova [blockq+ 96], m12 + + ; round 3 + VP9_UNPACK_MULSUB_2D_4X 15, 0, 9, 12, 15137, 6270 ; m15/x=t13[d], m0/x=t12[d] + VP9_UNPACK_MULSUB_2D_4X 2, 13, 1, 6, 6270, 15137 ; m2/x=t14[d], m13/x=t15[d] + VP9_RND_SH_SUMSUB_BA 2, 0, 1, 12, 14, [pd_8192] ; m2=out2[w], m0=t14a[w] + VP9_RND_SH_SUMSUB_BA 13, 15, 6, 9, 14, [pd_8192] + PSIGNW m13, [pw_m1] ; m13=out13[w], m15=t15a[w] + + VP9_UNPACK_MULSUB_2D_4X 8, 7, 12, 9, 15137, 6270 ; m8/x=t5[d], m7/x=t4[d] + VP9_UNPACK_MULSUB_2D_4X 5, 10, 1, 6, 6270, 15137 ; m5/x=t6[d], m10/x=t7[d] + VP9_RND_SH_SUMSUB_BA 5, 7, 1, 9, 14, [pd_8192] + PSIGNW m5, [pw_m1] ; m5=out3[w], m7=t6[w] + VP9_RND_SH_SUMSUB_BA 10, 8, 6, 12, 14, [pd_8192] ; m10=out12[w], m8=t7[w] + + mova m1, [blockq+ 0] + mova m14, [blockq+128] + mova m6, [blockq+ 32] + mova m9, [blockq+ 64] + mova m12, [blockq+ 96] + mova [blockq+ 0], m10 + mova [blockq+128], m5 + + SUMSUB_BA w, 14, 12, 5 ; m14=out0, m12=t2a + SUMSUB_BA w, 1, 3, 5 + PSIGNW m1, [pw_m1] ; m1=out15, m3=t3a + + SUMSUB_BA w, 9, 11, 5 + PSIGNW m9, [pw_m1] ; m9=out1, m11=t10 + SUMSUB_BA w, 6, 4, 5 ; m6=out14, m4=t11 + + VP9_UNPACK_MULSUB_2W_4X 4, 11, 11585, 11585, [pd_8192], 5, 10 ; m4=out9, m11=out6 + mova m5, [blockq+128] + mova [blockq+192], m11 + PSIGNW m15, [pw_m1] + VP9_UNPACK_MULSUB_2W_4X 15, 0, 11585, 11585, [pd_8192], 10, 11 ; m15=out5, m0=out10 + + PSIGNW m3, [pw_m1] + VP9_UNPACK_MULSUB_2W_4X 3, 12, 11585, 11585, [pd_8192], 10, 11 ; m3=out7,m12=out8 + VP9_UNPACK_MULSUB_2W_4X 8, 7, 11585, 11585, [pd_8192], 10, 11 ; m8=out11,m7=out4 + + mova m10, [blockq+ 0] + + SWAP 0, 14, 6, 11, 8, 12, 10 + SWAP 1, 9, 15, 4, 7, 3, 5 + SWAP 5, 9, 15 +%endmacro + +%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +%macro IADST16_YMM_FN 4 +INIT_YMM avx2 +cglobal vp9_%1_%3_16x16_add, 4, 4, 16, dst, stride, block, eob + mova m1, [blockq+ 32] + mova m2, [blockq+ 64] + mova m3, [blockq+ 96] + mova m5, [blockq+160] + mova m6, [blockq+192] + mova m7, [blockq+224] + mova m8, [blockq+256] + mova m9, [blockq+288] + mova m10, [blockq+320] + mova m11, [blockq+352] + mova m12, [blockq+384] + mova m13, [blockq+416] + mova m14, [blockq+448] + mova m15, [blockq+480] + + VP9_%2_YMM_1D + TRANSPOSE16x16W 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ + [blockq+192], [blockq+128], 1 + mova [blockq+ 0], m0 + VP9_%4_YMM_1D + + mova [blockq+224], m7 + + ; store + VP9_IDCT8_WRITEx2 0, 1, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 2, 3, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 4, 5, 6, 7, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + mova m6, [blockq+192] + mova m7, [blockq+224] + VP9_IDCT8_WRITEx2 6, 7, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 8, 9, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 10, 11, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 12, 13, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + VP9_IDCT8_WRITEx2 14, 15, 1, 2, unused, [pw_512], 6 + lea dstq, [dstq+2*strideq] + + ; at the end of the loop, m0 should still be zero + ; use that to zero out block coefficients + pxor m0, m0 + ZERO_BLOCK blockq, 32, 16, m0 + RET +%endmacro + +IADST16_YMM_FN idct, IDCT16, iadst, IADST16 +IADST16_YMM_FN iadst, IADST16, idct, IDCT16 +IADST16_YMM_FN iadst, IADST16, iadst, IADST16 +%endif + +;--------------------------------------------------------------------------------------------- +; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;--------------------------------------------------------------------------------------------- + +%macro VP9_IDCT32_1D 2-3 32 ; src, pass, nnzc +%if %2 == 1 +%assign %%str mmsize +%else +%assign %%str 64 +%endif + + ; first do t0-15, this can be done identical to idct16x16 + VP9_IDCT16_1D_START %1, %3/2, 64*2, tmpq, 2*%%str, 1 + + ; store everything on stack to make space available for t16-31 + ; we store interleaved with the output of the second half (t16-31) + ; so we don't need to allocate extra stack space + mova [tmpq+ 0*%%str], m0 ; t0 + mova [tmpq+ 4*%%str], m1 ; t1 + mova [tmpq+ 8*%%str], m2 ; t2 + mova [tmpq+12*%%str], m3 ; t3 + mova [tmpq+16*%%str], m4 ; t4 + mova [tmpq+20*%%str], m5 ; t5 +%if ARCH_X86_64 + mova [tmpq+22*%%str], m10 ; t10 + mova [tmpq+18*%%str], m11 ; t11 + mova [tmpq+14*%%str], m12 ; t12 + mova [tmpq+10*%%str], m13 ; t13 + mova [tmpq+ 6*%%str], m14 ; t14 + mova [tmpq+ 2*%%str], m15 ; t15 +%endif + + mova m0, [tmpq+ 30*%%str] + UNSCRATCH 1, 6, tmpq+26*%%str + UNSCRATCH 2, 8, tmpq+24*%%str + UNSCRATCH 3, 9, tmpq+28*%%str + SUMSUB_BA w, 1, 3, 4 ; t6, t9 + SUMSUB_BA w, 0, 2, 4 ; t7, t8 + + mova [tmpq+24*%%str], m1 ; t6 + mova [tmpq+28*%%str], m0 ; t7 + mova [tmpq+30*%%str], m2 ; t8 + mova [tmpq+26*%%str], m3 ; t9 + + ; then, secondly, do t16-31 +%if %3 <= 8 + mova m4, [%1+ 1*64] + mova m7, [%1+ 7*64] + + pmulhrsw m1, m4, [pw_16364x2] ;t31 + pmulhrsw m4, [pw_804x2] ;t16 + + VP9_UNPACK_MULSUB_2W_4X 5, 0, 1, 4, 16069, 3196, [pd_8192], 6, 2 ; t17, t30 + + pmulhrsw m3, m7, [pw_m5520x2] ;t19 + pmulhrsw m7, [pw_15426x2] ;t28 + + SCRATCH 4, 13, tmpq+ 1*%%str + SCRATCH 5, 12, tmpq+15*%%str + + VP9_UNPACK_MULSUB_2W_4X 2, 6, 7, 3, 3196, m16069, [pd_8192], 4, 5 ; t18, t29 +%else + mova m0, [%1+ 1*64] + mova m1, [%1+15*64] +%if %3 <= 16 + pmulhrsw m5, m0, [pw_16364x2] + pmulhrsw m0, [pw_804x2] + pmulhrsw m4, m1, [pw_m11003x2] + pmulhrsw m1, [pw_12140x2] +%else + mova m4, [%1+17*64] + mova m5, [%1+31*64] + + VP9_UNPACK_MULSUB_2W_4X 0, 5, 16364, 804, [pd_8192], 2, 3 ; t16, t31 + VP9_UNPACK_MULSUB_2W_4X 4, 1, 11003, 12140, [pd_8192], 2, 3 ; t17, t30 +%endif + SUMSUB_BA w, 4, 0, 2 + SUMSUB_BA w, 1, 5, 2 + + VP9_UNPACK_MULSUB_2W_4X 5, 0, 16069, 3196, [pd_8192], 2, 3 ; t17, t30 + + SCRATCH 4, 13, tmpq+ 1*%%str + SCRATCH 5, 12, tmpq+15*%%str + + mova m2, [%1+ 7*64] + mova m3, [%1+ 9*64] +%if %3 <= 16 + pmulhrsw m7, m3, [pw_14811x2] + pmulhrsw m3, [pw_7005x2] + pmulhrsw m6, m2, [pw_m5520x2] + pmulhrsw m2, [pw_15426x2] +%else + mova m7, [%1+23*64] + mova m6, [%1+25*64] + + VP9_UNPACK_MULSUB_2W_4X 3, 7, 14811, 7005, [pd_8192], 4, 5 ; t18, t29 + VP9_UNPACK_MULSUB_2W_4X 6, 2, 5520, 15426, [pd_8192], 4, 5 ; t19, t28 +%endif + SUMSUB_BA w, 3, 6, 4 + SUMSUB_BA w, 7, 2, 4 + + VP9_UNPACK_MULSUB_2W_4X 2, 6, 3196, m16069, [pd_8192], 4, 5 ; t18, t29 +%endif + + UNSCRATCH 5, 12, tmpq+15*%%str + SUMSUB_BA w, 6, 0, 4 + mova [tmpq+25*%%str], m6 ; t19 + UNSCRATCH 4, 13, tmpq+ 1*%%str + SUMSUB_BA w, 7, 1, 6 + SUMSUB_BA w, 3, 4, 6 + mova [tmpq+23*%%str], m3 ; t16 + SUMSUB_BA w, 2, 5, 6 + + VP9_UNPACK_MULSUB_2W_4X 0, 5, 15137, 6270, [pd_8192], 6, 3 ; t18, t29 + VP9_UNPACK_MULSUB_2W_4X 1, 4, 15137, 6270, [pd_8192], 6, 3 ; t19, t28 + + SCRATCH 0, 10, tmpq+ 1*%%str + SCRATCH 1, 11, tmpq+ 7*%%str + SCRATCH 2, 9, tmpq+ 9*%%str + SCRATCH 4, 14, tmpq+15*%%str + SCRATCH 5, 15, tmpq+17*%%str + SCRATCH 7, 13, tmpq+31*%%str + +%if %3 <= 8 + mova m0, [%1+ 5*64] + mova m3, [%1+ 3*64] + + pmulhrsw m5, m0, [pw_15893x2] ;t27 + pmulhrsw m0, [pw_3981x2] ;t20 + + VP9_UNPACK_MULSUB_2W_4X 1, 4, 5, 0, 9102, 13623, [pd_8192], 7, 2 ; t21, t26 + + pmulhrsw m6, m3, [pw_m2404x2] ;t23 + pmulhrsw m3, [pw_16207x2] ;t24 + + SCRATCH 5, 8, tmpq+ 5*%%str + SCRATCH 4, 12, tmpq+11*%%str + + VP9_UNPACK_MULSUB_2W_4X 7, 2, 3, 6, 13623, m9102, [pd_8192], 4, 5 ; t22, t25 +%else + mova m4, [%1+ 5*64] + mova m5, [%1+11*64] +%if %3 <= 16 + pmulhrsw m1, m4, [pw_15893x2] + pmulhrsw m4, [pw_3981x2] + pmulhrsw m0, m5, [pw_m8423x2] + pmulhrsw m5, [pw_14053x2] +%else + mova m0, [%1+21*64] + mova m1, [%1+27*64] + + VP9_UNPACK_MULSUB_2W_4X 4, 1, 15893, 3981, [pd_8192], 2, 3 ; t20, t27 + VP9_UNPACK_MULSUB_2W_4X 0, 5, 8423, 14053, [pd_8192], 2, 3 ; t21, t26 +%endif + SUMSUB_BA w, 0, 4, 2 + SUMSUB_BA w, 5, 1, 2 + + VP9_UNPACK_MULSUB_2W_4X 1, 4, 9102, 13623, [pd_8192], 2, 3 ; t21, t26 + + SCRATCH 5, 8, tmpq+ 5*%%str + SCRATCH 4, 12, tmpq+11*%%str + + mova m7, [%1+ 3*64] + mova m6, [%1+13*64] +%if %3 <= 16 + pmulhrsw m3, m6, [pw_13160x2] + pmulhrsw m6, [pw_9760x2] + pmulhrsw m2, m7, [pw_m2404x2] + pmulhrsw m7, [pw_16207x2] +%else + mova m2, [%1+29*64] + mova m3, [%1+19*64] + VP9_UNPACK_MULSUB_2W_4X 6, 3, 13160, 9760, [pd_8192], 4, 5 ; t22, t25 + VP9_UNPACK_MULSUB_2W_4X 2, 7, 2404, 16207, [pd_8192], 4, 5 ; t23, t24 +%endif + SUMSUB_BA w, 6, 2, 4 + SUMSUB_BA w, 3, 7, 4 + + VP9_UNPACK_MULSUB_2W_4X 7, 2, 13623, m9102, [pd_8192], 4, 5 ; t22, t25 +%endif + + ; m4=t16, m5=t17, m9=t18, m8=t19, m0=t20, m1=t21, m13=t22, m12=t23, + ; m3=t24, m2=t25, m14=t26, m15=t27, m7=t28, m6=t29, m10=t30, m11=t31 + + UNSCRATCH 4, 12, tmpq+11*%%str + SUMSUB_BA w, 0, 6, 5 + SUMSUB_BA w, 4, 2, 5 + UNSCRATCH 5, 8, tmpq+ 5*%%str + SCRATCH 4, 8, tmpq+11*%%str + SUMSUB_BA w, 1, 7, 4 + SUMSUB_BA w, 5, 3, 4 + SCRATCH 5, 12, tmpq+ 5*%%str + + VP9_UNPACK_MULSUB_2W_4X 3, 6, 6270, m15137, [pd_8192], 4, 5 ; t20, t27 + VP9_UNPACK_MULSUB_2W_4X 2, 7, 6270, m15137, [pd_8192], 4, 5 ; t21, t26 + + ; m8[s]=t16, m9=t17, m5=t18, m4[s]=t19, m12=t20, m13=t21, m1=t22, m0=t23, + ; m15=t24, m14=t25, m2=t26, m3=t27, m11=t28, m10=t29, m6=t30, m7=t31 + + UNSCRATCH 5, 9, tmpq+ 9*%%str + mova m4, [tmpq+23*%%str] ; t16 +%if ARCH_X86_64 + SUMSUB_BA w, 1, 5, 9 + SUMSUB_BA w, 0, 4, 9 +%else + SUMSUB_BADC w, 1, 5, 0, 4 +%endif + mova [tmpq+29*%%str], m1 ; t17 + mova [tmpq+21*%%str], m0 ; t16 + UNSCRATCH 0, 10, tmpq+ 1*%%str + UNSCRATCH 1, 11, tmpq+ 7*%%str +%if ARCH_X86_64 + SUMSUB_BA w, 2, 0, 9 + SUMSUB_BA w, 3, 1, 9 +%else + SUMSUB_BADC w, 2, 0, 3, 1 +%endif + mova [tmpq+ 9*%%str], m2 ; t18 + mova [tmpq+13*%%str], m3 ; t19 + SCRATCH 0, 10, tmpq+23*%%str + SCRATCH 1, 11, tmpq+27*%%str + + UNSCRATCH 2, 14, tmpq+15*%%str + UNSCRATCH 3, 15, tmpq+17*%%str + SUMSUB_BA w, 6, 2, 0 + SUMSUB_BA w, 7, 3, 0 + SCRATCH 6, 14, tmpq+ 3*%%str + SCRATCH 7, 15, tmpq+ 7*%%str + + UNSCRATCH 0, 8, tmpq+11*%%str + mova m1, [tmpq+25*%%str] ; t19 + UNSCRATCH 6, 12, tmpq+ 5*%%str + UNSCRATCH 7, 13, tmpq+31*%%str +%if ARCH_X86_64 + SUMSUB_BA w, 0, 1, 9 + SUMSUB_BA w, 6, 7, 9 +%else + SUMSUB_BADC w, 0, 1, 6, 7 +%endif + + ; m0=t16, m1=t17, m2=t18, m3=t19, m11=t20, m10=t21, m9=t22, m8=t23, + ; m7=t24, m6=t25, m5=t26, m4=t27, m12=t28, m13=t29, m14=t30, m15=t31 + +%if 0; cpuflag(ssse3) +%if ARCH_X86_64 + SUMSUB_BA w, 4, 7, 8 + SUMSUB_BA w, 5, 1, 8 +%else + SUMSUB_BADC w, 4, 7, 5, 1 +%endif + + pmulhrsw m7, [pw_11585x2] + pmulhrsw m4, [pw_11585x2] + pmulhrsw m1, [pw_11585x2] + pmulhrsw m5, [pw_11585x2] + + mova [tmpq+ 5*%%str], m7 ; t23 + SCRATCH 1, 13, tmpq+25*%%str + UNSCRATCH 7, 10, tmpq+23*%%str + UNSCRATCH 1, 11, tmpq+27*%%str + +%if ARCH_X86_64 + SUMSUB_BA w, 7, 3, 10 + SUMSUB_BA w, 1, 2, 10 +%else + SUMSUB_BADC w, 7, 3, 1, 2 +%endif + + pmulhrsw m3, [pw_11585x2] + pmulhrsw m7, [pw_11585x2] + pmulhrsw m2, [pw_11585x2] + pmulhrsw m1, [pw_11585x2] +%else + SCRATCH 0, 8, tmpq+15*%%str + SCRATCH 6, 9, tmpq+17*%%str + VP9_UNPACK_MULSUB_2W_4X 7, 4, 11585, 11585, [pd_8192], 0, 6 + mova [tmpq+ 5*%%str], m7 ; t23 + UNSCRATCH 7, 10, tmpq+23*%%str + VP9_UNPACK_MULSUB_2W_4X 1, 5, 11585, 11585, [pd_8192], 0, 6 + SCRATCH 1, 13, tmpq+25*%%str + UNSCRATCH 1, 11, tmpq+27*%%str + VP9_UNPACK_MULSUB_2W_4X 3, 7, 11585, 11585, [pd_8192], 0, 6 + VP9_UNPACK_MULSUB_2W_4X 2, 1, 11585, 11585, [pd_8192], 0, 6 + UNSCRATCH 0, 8, tmpq+15*%%str + UNSCRATCH 6, 9, tmpq+17*%%str +%endif + + ; m0=t16, m1=t17, m2=t18, m3=t19, m4=t20, m5=t21, m6=t22, m7=t23, + ; m8=t24, m9=t25, m10=t26, m11=t27, m12=t28, m13=t29, m14=t30, m15=t31 + + ; then do final pass to sumsub+store the two halves +%if %2 == 1 + mova [tmpq+17*%%str], m2 ; t20 + mova [tmpq+ 1*%%str], m3 ; t21 +%if ARCH_X86_64 + mova [tmpq+25*%%str], m13 ; t22 + + mova m8, [tmpq+ 0*%%str] ; t0 + mova m9, [tmpq+ 4*%%str] ; t1 + mova m12, [tmpq+ 8*%%str] ; t2 + mova m11, [tmpq+12*%%str] ; t3 + mova m2, [tmpq+16*%%str] ; t4 + mova m3, [tmpq+20*%%str] ; t5 + mova m13, [tmpq+24*%%str] ; t6 + + SUMSUB_BA w, 6, 8, 10 + mova [tmpq+ 3*%%str], m8 ; t15 + SUMSUB_BA w, 0, 9, 8 + SUMSUB_BA w, 15, 12, 8 + SUMSUB_BA w, 14, 11, 8 + SUMSUB_BA w, 1, 2, 8 + SUMSUB_BA w, 7, 3, 8 + SUMSUB_BA w, 5, 13, 8 + mova m10, [tmpq+28*%%str] ; t7 + SUMSUB_BA w, 4, 10, 8 +%if cpuflag(avx2) + ; the "shitty" about this idct is that the final pass does the outermost + ; interleave sumsubs (t0/31, t1/30, etc) but the tN for the 16x16 need + ; to be sequential, which means I need to load/store half of the sumsub + ; intermediates back to/from memory to get a 16x16 transpose going... + ; This would be easier if we had more (e.g. 32) YMM regs here. + mova [tmpq+ 7*%%str], m9 + mova [tmpq+11*%%str], m12 + mova [tmpq+15*%%str], m11 + mova [tmpq+19*%%str], m2 + mova [tmpq+23*%%str], m3 + mova [tmpq+27*%%str], m13 + mova [tmpq+31*%%str], m10 + mova [tmpq+12*%%str], m5 + + mova m13, [tmpq+30*%%str] ; t8 + mova m12, [tmpq+26*%%str] ; t9 + mova m11, [tmpq+22*%%str] ; t10 + mova m10, [tmpq+18*%%str] ; t11 + mova m9, [tmpq+17*%%str] ; t20 + mova m8, [tmpq+ 1*%%str] ; t21 + mova m3, [tmpq+25*%%str] ; t22 + mova m2, [tmpq+ 5*%%str] ; t23 + + SUMSUB_BA w, 9, 10, 5 + SUMSUB_BA w, 8, 11, 5 + SUMSUB_BA w, 3, 12, 5 + SUMSUB_BA w, 2, 13, 5 + mova [tmpq+ 1*%%str], m10 + mova [tmpq+ 5*%%str], m11 + mova [tmpq+17*%%str], m12 + mova [tmpq+25*%%str], m13 + + mova m13, [tmpq+14*%%str] ; t12 + mova m12, [tmpq+10*%%str] ; t13 + mova m11, [tmpq+ 9*%%str] ; t18 + mova m10, [tmpq+13*%%str] ; t19 + + SUMSUB_BA w, 11, 12, 5 + SUMSUB_BA w, 10, 13, 5 + mova [tmpq+ 9*%%str], m13 + mova [tmpq+13*%%str], m12 + mova [tmpq+10*%%str], m10 + mova [tmpq+14*%%str], m11 + + mova m13, [tmpq+ 6*%%str] ; t14 + mova m12, [tmpq+ 2*%%str] ; t15 + mova m11, [tmpq+21*%%str] ; t16 + mova m10, [tmpq+29*%%str] ; t17 + SUMSUB_BA w, 11, 12, 5 + SUMSUB_BA w, 10, 13, 5 + mova [tmpq+21*%%str], m12 + mova [tmpq+29*%%str], m13 + mova m12, [tmpq+10*%%str] + mova m13, [tmpq+14*%%str] + + TRANSPOSE16x16W 6, 0, 15, 14, 1, 7, 5, 4, \ + 2, 3, 8, 9, 12, 13, 10, 11, \ + [tmpq+12*%%str], [tmpq+ 8*%%str], 1 + mova [tmpq+ 0*%%str], m6 + mova [tmpq+ 2*%%str], m0 + mova [tmpq+ 4*%%str], m15 + mova [tmpq+ 6*%%str], m14 + mova [tmpq+10*%%str], m7 + mova [tmpq+12*%%str], m5 + mova [tmpq+14*%%str], m4 + mova [tmpq+16*%%str], m2 + mova [tmpq+18*%%str], m3 + mova [tmpq+20*%%str], m8 + mova [tmpq+22*%%str], m9 + mova [tmpq+24*%%str], m12 + mova [tmpq+26*%%str], m13 + mova [tmpq+28*%%str], m10 + mova [tmpq+30*%%str], m11 + + mova m0, [tmpq+21*%%str] + mova m1, [tmpq+29*%%str] + mova m2, [tmpq+13*%%str] + mova m3, [tmpq+ 9*%%str] + mova m4, [tmpq+ 1*%%str] + mova m5, [tmpq+ 5*%%str] + mova m7, [tmpq+25*%%str] + mova m8, [tmpq+31*%%str] + mova m9, [tmpq+27*%%str] + mova m10, [tmpq+23*%%str] + mova m11, [tmpq+19*%%str] + mova m12, [tmpq+15*%%str] + mova m13, [tmpq+11*%%str] + mova m14, [tmpq+ 7*%%str] + mova m15, [tmpq+ 3*%%str] + TRANSPOSE16x16W 0, 1, 2, 3, 4, 5, 6, 7, \ + 8, 9, 10, 11, 12, 13, 14, 15, \ + [tmpq+17*%%str], [tmpq+ 9*%%str], 1 + mova [tmpq+ 1*%%str], m0 + mova [tmpq+ 3*%%str], m1 + mova [tmpq+ 5*%%str], m2 + mova [tmpq+ 7*%%str], m3 + mova [tmpq+11*%%str], m5 + mova [tmpq+13*%%str], m6 + mova [tmpq+15*%%str], m7 + mova [tmpq+17*%%str], m8 + mova [tmpq+19*%%str], m9 + mova [tmpq+21*%%str], m10 + mova [tmpq+23*%%str], m11 + mova [tmpq+25*%%str], m12 + mova [tmpq+27*%%str], m13 + mova [tmpq+29*%%str], m14 + mova [tmpq+31*%%str], m15 +%else ; !avx2 + TRANSPOSE8x8W 6, 0, 15, 14, 1, 7, 5, 4, 8 + mova [tmpq+ 0*%%str], m6 + mova [tmpq+ 4*%%str], m0 + mova [tmpq+ 8*%%str], m15 + mova [tmpq+12*%%str], m14 + mova [tmpq+16*%%str], m1 + mova [tmpq+20*%%str], m7 + mova [tmpq+24*%%str], m5 + mova [tmpq+28*%%str], m4 + + mova m8, [tmpq+ 3*%%str] ; t15 + TRANSPOSE8x8W 10, 13, 3, 2, 11, 12, 9, 8, 0 + mova [tmpq+ 3*%%str], m10 + mova [tmpq+ 7*%%str], m13 + mova [tmpq+11*%%str], m3 + mova [tmpq+15*%%str], m2 + mova [tmpq+19*%%str], m11 + mova [tmpq+23*%%str], m12 + mova [tmpq+27*%%str], m9 + mova [tmpq+31*%%str], m8 + + mova m15, [tmpq+30*%%str] ; t8 + mova m14, [tmpq+26*%%str] ; t9 + mova m13, [tmpq+22*%%str] ; t10 + mova m12, [tmpq+18*%%str] ; t11 + mova m11, [tmpq+14*%%str] ; t12 + mova m10, [tmpq+10*%%str] ; t13 + mova m9, [tmpq+ 6*%%str] ; t14 + mova m8, [tmpq+ 2*%%str] ; t15 + mova m7, [tmpq+21*%%str] ; t16 + mova m6, [tmpq+29*%%str] ; t17 + mova m5, [tmpq+ 9*%%str] ; t18 + mova m4, [tmpq+13*%%str] ; t19 + mova m3, [tmpq+17*%%str] ; t20 + mova m2, [tmpq+ 1*%%str] ; t21 + mova m1, [tmpq+25*%%str] ; t22 + + SUMSUB_BA w, 7, 8, 0 + mova [tmpq+ 2*%%str], m8 + mova m0, [tmpq+ 5*%%str] ; t23 + SUMSUB_BA w, 6, 9, 8 + SUMSUB_BA w, 5, 10, 8 + SUMSUB_BA w, 4, 11, 8 + SUMSUB_BA w, 3, 12, 8 + SUMSUB_BA w, 2, 13, 8 + SUMSUB_BA w, 1, 14, 8 + SUMSUB_BA w, 0, 15, 8 + + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 + mova [tmpq+ 1*%%str], m0 + mova [tmpq+ 5*%%str], m1 + mova [tmpq+ 9*%%str], m2 + mova [tmpq+13*%%str], m3 + mova [tmpq+17*%%str], m4 + mova [tmpq+21*%%str], m5 + mova [tmpq+25*%%str], m6 + mova [tmpq+29*%%str], m7 + + mova m8, [tmpq+ 2*%%str] + TRANSPOSE8x8W 8, 9, 10, 11, 12, 13, 14, 15, 0 + mova [tmpq+ 2*%%str], m8 + mova [tmpq+ 6*%%str], m9 + mova [tmpq+10*%%str], m10 + mova [tmpq+14*%%str], m11 + mova [tmpq+18*%%str], m12 + mova [tmpq+22*%%str], m13 + mova [tmpq+26*%%str], m14 + mova [tmpq+30*%%str], m15 +%endif ; avx2 +%else + mova m2, [tmpq+24*%%str] ; t6 + mova m3, [tmpq+28*%%str] ; t7 + SUMSUB_BADC w, 5, 2, 4, 3 + mova [tmpq+24*%%str], m5 + mova [tmpq+23*%%str], m2 + mova [tmpq+28*%%str], m4 + mova [tmpq+19*%%str], m3 + + mova m2, [tmpq+16*%%str] ; t4 + mova m3, [tmpq+20*%%str] ; t5 + SUMSUB_BA w, 1, 2, 5 + SUMSUB_BA w, 7, 3, 5 + mova [tmpq+15*%%str], m2 + mova [tmpq+11*%%str], m3 + + mova m2, [tmpq+ 0*%%str] ; t0 + mova m3, [tmpq+ 4*%%str] ; t1 + SUMSUB_BA w, 6, 2, 5 + SUMSUB_BA w, 0, 3, 5 + mova [tmpq+31*%%str], m2 + mova [tmpq+27*%%str], m3 + + mova m2, [tmpq+ 8*%%str] ; t2 + mova m3, [tmpq+12*%%str] ; t3 + mova m5, [tmpq+ 7*%%str] + mova m4, [tmpq+ 3*%%str] + SUMSUB_BADC w, 5, 2, 4, 3 + mova [tmpq+ 7*%%str], m2 + mova [tmpq+ 3*%%str], m3 + + mova m3, [tmpq+28*%%str] + TRANSPOSE8x8W 6, 0, 5, 4, 1, 7, 2, 3, [tmpq+24*%%str], [tmpq+16*%%str], 1 + mova [tmpq+ 0*%%str], m6 + mova [tmpq+ 4*%%str], m0 + mova [tmpq+ 8*%%str], m5 + mova [tmpq+12*%%str], m4 + mova [tmpq+20*%%str], m7 + mova [tmpq+24*%%str], m2 + mova [tmpq+28*%%str], m3 + + mova m6, [tmpq+19*%%str] + mova m0, [tmpq+23*%%str] + mova m5, [tmpq+11*%%str] + mova m4, [tmpq+15*%%str] + mova m1, [tmpq+ 3*%%str] + mova m7, [tmpq+ 7*%%str] + mova m3, [tmpq+31*%%str] + TRANSPOSE8x8W 6, 0, 5, 4, 1, 7, 2, 3, [tmpq+27*%%str], [tmpq+19*%%str], 1 + mova [tmpq+ 3*%%str], m6 + mova [tmpq+ 7*%%str], m0 + mova [tmpq+11*%%str], m5 + mova [tmpq+15*%%str], m4 + mova [tmpq+23*%%str], m7 + mova [tmpq+27*%%str], m2 + mova [tmpq+31*%%str], m3 + + mova m1, [tmpq+ 6*%%str] ; t14 + mova m0, [tmpq+ 2*%%str] ; t15 + mova m7, [tmpq+21*%%str] ; t16 + mova m6, [tmpq+29*%%str] ; t17 + SUMSUB_BA w, 7, 0, 2 + SUMSUB_BA w, 6, 1, 2 + mova [tmpq+29*%%str], m7 + mova [tmpq+ 2*%%str], m0 + mova [tmpq+21*%%str], m6 + mova [tmpq+ 6*%%str], m1 + + mova m1, [tmpq+14*%%str] ; t12 + mova m0, [tmpq+10*%%str] ; t13 + mova m5, [tmpq+ 9*%%str] ; t18 + mova m4, [tmpq+13*%%str] ; t19 + SUMSUB_BA w, 5, 0, 2 + SUMSUB_BA w, 4, 1, 2 + mova [tmpq+10*%%str], m0 + mova [tmpq+14*%%str], m1 + + mova m1, [tmpq+22*%%str] ; t10 + mova m0, [tmpq+18*%%str] ; t11 + mova m3, [tmpq+17*%%str] ; t20 + mova m2, [tmpq+ 1*%%str] ; t21 + SUMSUB_BA w, 3, 0, 6 + SUMSUB_BA w, 2, 1, 6 + mova [tmpq+18*%%str], m0 + mova [tmpq+22*%%str], m1 + + mova m7, [tmpq+30*%%str] ; t8 + mova m6, [tmpq+26*%%str] ; t9 + mova m1, [tmpq+25*%%str] ; t22 + mova m0, [tmpq+ 5*%%str] ; t23 + SUMSUB_BADC w, 1, 6, 0, 7 + mova [tmpq+26*%%str], m6 + mova [tmpq+30*%%str], m7 + + mova m7, [tmpq+29*%%str] + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [tmpq+21*%%str], [tmpq+17*%%str], 1 + mova [tmpq+ 1*%%str], m0 + mova [tmpq+ 5*%%str], m1 + mova [tmpq+ 9*%%str], m2 + mova [tmpq+13*%%str], m3 + mova [tmpq+21*%%str], m5 + mova [tmpq+25*%%str], m6 + mova [tmpq+29*%%str], m7 + + mova m0, [tmpq+ 2*%%str] + mova m1, [tmpq+ 6*%%str] + mova m2, [tmpq+10*%%str] + mova m3, [tmpq+14*%%str] + mova m4, [tmpq+18*%%str] + mova m5, [tmpq+22*%%str] + mova m7, [tmpq+30*%%str] + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [tmpq+26*%%str], [tmpq+18*%%str], 1 + mova [tmpq+ 2*%%str], m0 + mova [tmpq+ 6*%%str], m1 + mova [tmpq+10*%%str], m2 + mova [tmpq+14*%%str], m3 + mova [tmpq+22*%%str], m5 + mova [tmpq+26*%%str], m6 + mova [tmpq+30*%%str], m7 +%endif +%else + ; t0-7 is in [tmpq+{0,4,8,12,16,20,24,28}*%%str] + ; t8-15 is in [tmpq+{2,6,10,14,18,22,26,30}*%%str] + ; t16-19 and t23 is in [tmpq+{1,5,9,13,29}*%%str] + ; t20-22 is in m4-6 + ; t24-31 is in m8-15 + +%if cpuflag(ssse3) +%define ROUND_REG [pw_512] +%else +%define ROUND_REG [pw_32] +%endif + +%macro %%STORE_2X2 7-8 1 ; src[1-4], tmp[1-2], zero, inc_dst_ptrs + SUMSUB_BA w, %4, %1, %5 + SUMSUB_BA w, %3, %2, %5 + VP9_IDCT8_WRITEx2 %4, %3, %5, %6, %7, ROUND_REG, 6 +%if %8 == 1 + add dstq, stride2q +%endif + VP9_IDCT8_WRITEx2 %2, %1, %5, %6, %7, ROUND_REG, 6, dst_endq +%if %8 == 1 + sub dst_endq, stride2q +%endif +%endmacro + +%if ARCH_X86_64 + pxor m10, m10 + + ; store t0-1 and t30-31 + mova m8, [tmpq+ 0*%%str] + mova m9, [tmpq+ 4*%%str] + %%STORE_2X2 8, 9, 0, 6, 12, 11, 10 + + ; store t2-3 and t28-29 + mova m8, [tmpq+ 8*%%str] + mova m9, [tmpq+12*%%str] + %%STORE_2X2 8, 9, 14, 15, 12, 11, 10 + + ; store t4-5 and t26-27 + mova m8, [tmpq+16*%%str] + mova m9, [tmpq+20*%%str] + %%STORE_2X2 8, 9, 7, 1, 12, 11, 10 + + ; store t6-7 and t24-25 + mova m8, [tmpq+24*%%str] + mova m9, [tmpq+28*%%str] + %%STORE_2X2 8, 9, 4, 5, 12, 11, 10 + + ; store t8-9 and t22-23 + mova m8, [tmpq+30*%%str] + mova m9, [tmpq+26*%%str] + mova m0, [tmpq+ 5*%%str] + %%STORE_2X2 8, 9, 13, 0, 12, 11, 10 + + ; store t10-11 and t20-21 + mova m8, [tmpq+22*%%str] + mova m9, [tmpq+18*%%str] + %%STORE_2X2 8, 9, 2, 3, 12, 11, 10 + + ; store t12-13 and t18-19 + mova m8, [tmpq+14*%%str] + mova m9, [tmpq+10*%%str] + mova m5, [tmpq+13*%%str] + mova m4, [tmpq+ 9*%%str] + %%STORE_2X2 8, 9, 4, 5, 12, 11, 10 + + ; store t14-17 + mova m8, [tmpq+ 6*%%str] + mova m9, [tmpq+ 2*%%str] + mova m5, [tmpq+29*%%str] + mova m4, [tmpq+21*%%str] + %%STORE_2X2 8, 9, 4, 5, 12, 11, 10, 0 + + SWAP 1, 10 ; zero +%else + mova [tmpq+ 1*%%str], m1 + mova [tmpq+11*%%str], m2 + mova [tmpq+15*%%str], m3 + mova [tmpq+17*%%str], m4 + mova [tmpq+19*%%str], m5 + pxor m1, m1 + + ; store t0-1 and t30-31 + mova m2, [tmpq+ 0*%%str] + mova m3, [tmpq+ 4*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t2-3 and t28-29 + mova m2, [tmpq+ 8*%%str] + mova m3, [tmpq+12*%%str] + mova m0, [tmpq+ 3*%%str] + mova m6, [tmpq+ 7*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t4-5 and t26-27 + mova m2, [tmpq+16*%%str] + mova m3, [tmpq+20*%%str] + mova m0, [tmpq+ 1*%%str] + %%STORE_2X2 2, 3, 7, 0, 4, 5, 1 + + ; store t6-7 and t24-25 + mova m2, [tmpq+24*%%str] + mova m3, [tmpq+28*%%str] + mova m0, [tmpq+17*%%str] + mova m6, [tmpq+19*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t8-9 and t22-23 + mova m2, [tmpq+30*%%str] + mova m3, [tmpq+26*%%str] + mova m0, [tmpq+25*%%str] + mova m6, [tmpq+ 5*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t10-11 and t20-21 + mova m2, [tmpq+22*%%str] + mova m3, [tmpq+18*%%str] + mova m0, [tmpq+11*%%str] + mova m6, [tmpq+15*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t12-13 and t18-19 + mova m2, [tmpq+14*%%str] + mova m3, [tmpq+10*%%str] + mova m6, [tmpq+13*%%str] + mova m0, [tmpq+ 9*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1 + + ; store t14-17 + mova m2, [tmpq+ 6*%%str] + mova m3, [tmpq+ 2*%%str] + mova m6, [tmpq+29*%%str] + mova m0, [tmpq+21*%%str] + %%STORE_2X2 2, 3, 0, 6, 4, 5, 1, 0 +%endif +%undef ROUND_REG +%endif +%endmacro + +%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1 +INIT_XMM %1 +cglobal vp9_idct_idct_32x32_add, 0, 6 + ARCH_X86_64 * 3, 16, 2048, dst, stride, block, eob + movifnidn eobd, dword eobm +%if cpuflag(ssse3) + cmp eobd, 135 + jg .idctfull + cmp eobd, 34 + jg .idct16x16 + cmp eobd, 1 + jg .idct8x8 +%else + cmp eobd, 1 + jg .idctfull +%endif + + ; dc-only case + movifnidn blockq, blockmp + movifnidn dstq, dstmp + movifnidn strideq, stridemp +%if cpuflag(ssse3) + movd m0, [blockq] + mova m1, [pw_11585x2] + pmulhrsw m0, m1 + pmulhrsw m0, m1 +%else + DEFINE_ARGS dst, stride, block, coef + movsx coefd, word [blockq] + imul coefd, 11585 + add coefd, 8192 + sar coefd, 14 + imul coefd, 11585 + add coefd, (32 << 14) + 8192 + sar coefd, 14 + 6 + movd m0, coefd +%endif + SPLATW m0, m0, q0000 +%if cpuflag(ssse3) + pmulhrsw m0, [pw_512] +%endif + pxor m5, m5 + movd [blockq], m5 +%rep 31 + VP9_STORE_2XFULL 0, 1, 2, 3, 4, 5, mmsize + add dstq, strideq +%endrep + VP9_STORE_2XFULL 0, 1, 2, 3, 4, 5, mmsize + RET + +%if ARCH_X86_64 + DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp +%else +%define dst_bakq r0mp +%endif +%if cpuflag(ssse3) +.idct8x8: +%if ARCH_X86_32 + DEFINE_ARGS block, u1, u2, u3, u4, tmp + mov blockq, r2mp +%endif + mov tmpq, rsp + VP9_IDCT32_1D blockq, 1, 8 + +%if ARCH_X86_32 + DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp + mov strideq, r1mp +%define cntd dword r3m +%endif + mov stride30q, strideq ; stride + lea stride2q, [strideq*2] ; stride*2 + shl stride30q, 5 ; stride*32 + mov cntd, 4 + sub stride30q, stride2q ; stride*30 +.loop2_8x8: + mov dstq, dst_bakq + lea dst_endq, [dstq+stride30q] + VP9_IDCT32_1D tmpq, 2, 8 + add dst_bakq, 8 + add tmpq, 16 + dec cntd + jg .loop2_8x8 + + ; at the end of the loop, m7 should still be zero + ; use that to zero out block coefficients +%if ARCH_X86_32 + DEFINE_ARGS block + mov blockq, r2mp +%endif + ZERO_BLOCK blockq, 64, 8, m1 + RET + +.idct16x16: +%if ARCH_X86_32 + DEFINE_ARGS block, tmp, cnt + mov blockq, r2mp +%endif + mov cntd, 2 + mov tmpq, rsp +.loop1_16x16: + VP9_IDCT32_1D blockq, 1, 16 + add blockq, 16 + add tmpq, 512 + dec cntd + jg .loop1_16x16 + +%if ARCH_X86_64 + sub blockq, 32 +%else + DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp + mov strideq, r1mp +%define cntd dword r3m +%endif + + mov stride30q, strideq ; stride + lea stride2q, [strideq*2] ; stride*2 + shl stride30q, 5 ; stride*32 + mov cntd, 4 + mov tmpq, rsp + sub stride30q, stride2q ; stride*30 +.loop2_16x16: + mov dstq, dst_bakq + lea dst_endq, [dstq+stride30q] + VP9_IDCT32_1D tmpq, 2, 16 + add dst_bakq, 8 + add tmpq, 16 + dec cntd + jg .loop2_16x16 + + ; at the end of the loop, m7 should still be zero + ; use that to zero out block coefficients +%if ARCH_X86_32 + DEFINE_ARGS block + mov blockq, r2mp +%endif + ZERO_BLOCK blockq, 64, 16, m1 + RET +%endif + +.idctfull: +%if ARCH_X86_32 + DEFINE_ARGS block, tmp, cnt + mov blockq, r2mp +%endif + mov cntd, 4 + mov tmpq, rsp +.loop1_full: + VP9_IDCT32_1D blockq, 1 + add blockq, 16 + add tmpq, 512 + dec cntd + jg .loop1_full + +%if ARCH_X86_64 + sub blockq, 64 +%else + DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp + mov strideq, r1mp +%define cntd dword r3m +%endif + + mov stride30q, strideq ; stride + lea stride2q, [strideq*2] ; stride*2 + shl stride30q, 5 ; stride*32 + mov cntd, 4 + mov tmpq, rsp + sub stride30q, stride2q ; stride*30 +.loop2_full: + mov dstq, dst_bakq + lea dst_endq, [dstq+stride30q] + VP9_IDCT32_1D tmpq, 2 + add dst_bakq, 8 + add tmpq, 16 + dec cntd + jg .loop2_full + + ; at the end of the loop, m7 should still be zero + ; use that to zero out block coefficients +%if ARCH_X86_32 + DEFINE_ARGS block + mov blockq, r2mp +%endif + ZERO_BLOCK blockq, 64, 32, m1 + RET +%endmacro + +VP9_IDCT_IDCT_32x32_ADD_XMM sse2 +VP9_IDCT_IDCT_32x32_ADD_XMM ssse3 +VP9_IDCT_IDCT_32x32_ADD_XMM avx + +; this is almost identical to VP9_STORE_2X, but it does two rows +; for slightly improved interleaving, and it omits vpermq since the +; input is DC so all values are identical +%macro VP9_STORE_YMM_DC_2X2 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero + mova m%2, [dstq] + mova m%4, [dstq+strideq] + punpckhbw m%3, m%2, m%6 + punpcklbw m%2, m%6 + punpckhbw m%5, m%4, m%6 + punpcklbw m%4, m%6 + paddw m%3, m%1 + paddw m%2, m%1 + paddw m%5, m%1 + paddw m%4, m%1 + packuswb m%2, m%3 + packuswb m%4, m%5 + mova [dstq+strideq*0], m%2 + mova [dstq+strideq*1], m%4 +%endmacro + +%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal vp9_idct_idct_32x32_add, 4, 9, 16, 2048, dst, stride, block, eob + cmp eobd, 135 + jg .idctfull + cmp eobd, 1 + jg .idct16x16 + + ; dc-only case + mova m1, [pw_11585x2] + vpbroadcastw m0, [blockq] + pmulhrsw m0, m1 + pmulhrsw m0, m1 + pxor m5, m5 + pmulhrsw m0, [pw_512] + movd [blockq], xm5 + + DEFINE_ARGS dst, stride, cnt + mov cntd, 16 +.loop_dc: + VP9_STORE_YMM_DC_2X2 0, 1, 2, 3, 4, 5 + lea dstq, [dstq+2*strideq] + dec cntd + jg .loop_dc + RET + + DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp +.idct16x16: + mov tmpq, rsp + VP9_IDCT32_1D blockq, 1, 16 + + mov stride30q, strideq ; stride + lea stride2q, [strideq*2] ; stride*2 + shl stride30q, 5 ; stride*32 + mov cntd, 2 + sub stride30q, stride2q ; stride*30 +.loop2_16x16: + mov dstq, dst_bakq + lea dst_endq, [dstq+stride30q] + VP9_IDCT32_1D tmpq, 2, 16 + add dst_bakq, 16 + add tmpq, 32 + dec cntd + jg .loop2_16x16 + + ; at the end of the loop, m1 should still be zero + ; use that to zero out block coefficients + ZERO_BLOCK blockq, 64, 16, m1 + RET + +.idctfull: + mov cntd, 2 + mov tmpq, rsp +.loop1_full: + VP9_IDCT32_1D blockq, 1 + add blockq, 32 + add tmpq, 1024 + dec cntd + jg .loop1_full + + sub blockq, 64 + + mov stride30q, strideq ; stride + lea stride2q, [strideq*2] ; stride*2 + shl stride30q, 5 ; stride*32 + mov cntd, 2 + mov tmpq, rsp + sub stride30q, stride2q ; stride*30 +.loop2_full: + mov dstq, dst_bakq + lea dst_endq, [dstq+stride30q] + VP9_IDCT32_1D tmpq, 2 + add dst_bakq, 16 + add tmpq, 32 + dec cntd + jg .loop2_full + + ; at the end of the loop, m1 should still be zero + ; use that to zero out block coefficients + ZERO_BLOCK blockq, 64, 32, m1 + RET +%endif diff --git a/libs/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm b/libs/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm new file mode 100644 index 000000000..902685edf --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm @@ -0,0 +1,2044 @@ +;****************************************************************************** +;* VP9 inverse transform x86 SIMD optimizations +;* +;* Copyright (C) 2015 Ronald S. Bultje <rsbultje gmail com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" +%include "vp9itxfm_template.asm" + +SECTION_RODATA + +cextern pw_8 +cextern pw_1023 +cextern pw_2048 +cextern pw_4095 +cextern pw_m1 +cextern pd_1 +cextern pd_16 +cextern pd_32 +cextern pd_8192 + +pd_8: times 4 dd 8 +pd_3fff: times 4 dd 0x3fff + +cextern pw_11585x2 + +cextern pw_5283_13377 +cextern pw_9929_13377 +cextern pw_15212_m13377 +cextern pw_15212_9929 +cextern pw_m5283_m15212 +cextern pw_13377x2 +cextern pw_m13377_13377 +cextern pw_13377_0 + +pw_9929_m5283: times 4 dw 9929, -5283 + +%macro COEF_PAIR 2-3 +cextern pw_m%1_%2 +cextern pw_%2_%1 +%if %0 == 3 +cextern pw_m%1_m%2 +%if %1 != %2 +cextern pw_m%2_%1 +cextern pw_%1_%2 +%endif +%endif +%endmacro + +COEF_PAIR 2404, 16207 +COEF_PAIR 3196, 16069, 1 +COEF_PAIR 4756, 15679 +COEF_PAIR 5520, 15426 +COEF_PAIR 6270, 15137, 1 +COEF_PAIR 8423, 14053 +COEF_PAIR 10394, 12665 +COEF_PAIR 11003, 12140 +COEF_PAIR 11585, 11585, 1 +COEF_PAIR 13160, 9760 +COEF_PAIR 13623, 9102, 1 +COEF_PAIR 14449, 7723 +COEF_PAIR 14811, 7005 +COEF_PAIR 15893, 3981 +COEF_PAIR 16305, 1606 +COEF_PAIR 16364, 804 + +default_8x8: +times 12 db 1 +times 52 db 2 +row_8x8: +times 18 db 1 +times 46 db 2 +col_8x8: +times 6 db 1 +times 58 db 2 +default_16x16: +times 10 db 1 +times 28 db 2 +times 51 db 3 +times 167 db 4 +row_16x16: +times 21 db 1 +times 45 db 2 +times 60 db 3 +times 130 db 4 +col_16x16: +times 5 db 1 +times 12 db 2 +times 25 db 3 +times 214 db 4 +default_32x32: +times 9 db 1 +times 25 db 2 +times 36 db 3 +times 65 db 4 +times 105 db 5 +times 96 db 6 +times 112 db 7 +times 576 db 8 + +SECTION .text + +%macro VP9_STORE_2X 6-7 dstq ; reg1, reg2, tmp1, tmp2, min, max, dst + mova m%3, [%7] + mova m%4, [%7+strideq] + paddw m%3, m%1 + paddw m%4, m%2 + pmaxsw m%3, m%5 + pmaxsw m%4, m%5 + pminsw m%3, m%6 + pminsw m%4, m%6 + mova [%7], m%3 + mova [%7+strideq], m%4 +%endmacro + +%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg +%assign %%y 0 +%rep %3 +%assign %%x 0 +%rep %3*4/mmsize + mova [%1+%%y+%%x], %4 +%assign %%x (%%x+mmsize) +%endrep +%assign %%y (%%y+%2) +%endrep +%endmacro + +; the input coefficients are scaled up by 2 bit (which we downscale immediately +; in the iwht), and is otherwise orthonormally increased by 1 bit per iwht_1d. +; therefore, a diff of 10-12+sign bit will fit in 12-14+sign bit after scaling, +; i.e. everything can be done in 15+1bpp words. Since the quant fractional bits +; add 2 bits, we need to scale before converting to word in 12bpp, since the +; input will be 16+sign bit which doesn't fit in 15+sign words, but in 10bpp +; we can scale after converting to words (which is half the instructions), +; since the input is only 14+sign bit, which fits in 15+sign words directly. + +%macro IWHT4_FN 2 ; bpp, max +cglobal vp9_iwht_iwht_4x4_add_%1, 3, 3, 8, dst, stride, block, eob + mova m7, [pw_%2] + mova m0, [blockq+0*16+0] + mova m1, [blockq+1*16+0] +%if %1 >= 12 + mova m4, [blockq+0*16+8] + mova m5, [blockq+1*16+8] + psrad m0, 2 + psrad m1, 2 + psrad m4, 2 + psrad m5, 2 + packssdw m0, m4 + packssdw m1, m5 +%else + packssdw m0, [blockq+0*16+8] + packssdw m1, [blockq+1*16+8] + psraw m0, 2 + psraw m1, 2 +%endif + mova m2, [blockq+2*16+0] + mova m3, [blockq+3*16+0] +%if %1 >= 12 + mova m4, [blockq+2*16+8] + mova m5, [blockq+3*16+8] + psrad m2, 2 + psrad m3, 2 + psrad m4, 2 + psrad m5, 2 + packssdw m2, m4 + packssdw m3, m5 +%else + packssdw m2, [blockq+2*16+8] + packssdw m3, [blockq+3*16+8] + psraw m2, 2 + psraw m3, 2 +%endif + + VP9_IWHT4_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_IWHT4_1D + + pxor m6, m6 + VP9_STORE_2X 0, 1, 4, 5, 6, 7 + lea dstq, [dstq+strideq*2] + VP9_STORE_2X 2, 3, 4, 5, 6, 7 + ZERO_BLOCK blockq, 16, 4, m6 + RET +%endmacro + +INIT_MMX mmxext +IWHT4_FN 10, 1023 +INIT_MMX mmxext +IWHT4_FN 12, 4095 + +%macro VP9_IDCT4_WRITEOUT 0 +%if cpuflag(ssse3) + mova m5, [pw_2048] + pmulhrsw m0, m5 + pmulhrsw m1, m5 + pmulhrsw m2, m5 + pmulhrsw m3, m5 +%else + mova m5, [pw_8] + paddw m0, m5 + paddw m1, m5 + paddw m2, m5 + paddw m3, m5 + psraw m0, 4 + psraw m1, 4 + psraw m2, 4 + psraw m3, 4 +%endif + mova m5, [pw_1023] + VP9_STORE_2X 0, 1, 6, 7, 4, 5 + lea dstq, [dstq+2*strideq] + VP9_STORE_2X 2, 3, 6, 7, 4, 5 +%endmacro + +%macro DC_ONLY 2 ; shift, zero + mov coefd, dword [blockq] + movd [blockq], %2 + imul coefd, 11585 + add coefd, 8192 + sar coefd, 14 + imul coefd, 11585 + add coefd, ((1 << (%1 - 1)) << 14) + 8192 + sar coefd, 14 + %1 +%endmacro + +; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits +; in 15+1 words without additional effort, since the coefficients are 15bpp. + +%macro IDCT4_10_FN 0 +cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob + cmp eobd, 1 + jg .idctfull + + ; dc-only + pxor m4, m4 +%if cpuflag(ssse3) + movd m0, [blockq] + movd [blockq], m4 + mova m5, [pw_11585x2] + pmulhrsw m0, m5 + pmulhrsw m0, m5 +%else + DEFINE_ARGS dst, stride, block, coef + DC_ONLY 4, m4 + movd m0, coefd +%endif + pshufw m0, m0, 0 + mova m5, [pw_1023] +%if cpuflag(ssse3) + pmulhrsw m0, [pw_2048] ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4 +%endif + VP9_STORE_2X 0, 0, 6, 7, 4, 5 + lea dstq, [dstq+2*strideq] + VP9_STORE_2X 0, 0, 6, 7, 4, 5 + RET + +.idctfull: + mova m0, [blockq+0*16+0] + mova m1, [blockq+1*16+0] + packssdw m0, [blockq+0*16+8] + packssdw m1, [blockq+1*16+8] + mova m2, [blockq+2*16+0] + mova m3, [blockq+3*16+0] + packssdw m2, [blockq+2*16+8] + packssdw m3, [blockq+3*16+8] + +%if cpuflag(ssse3) + mova m6, [pw_11585x2] +%endif + mova m7, [pd_8192] ; rounding + VP9_IDCT4_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_IDCT4_1D + + pxor m4, m4 + ZERO_BLOCK blockq, 16, 4, m4 + VP9_IDCT4_WRITEOUT + RET +%endmacro + +INIT_MMX mmxext +IDCT4_10_FN +INIT_MMX ssse3 +IDCT4_10_FN + +%macro IADST4_FN 4 +cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob +%if WIN64 && notcpuflag(ssse3) + WIN64_SPILL_XMM 8 +%endif + movdqa xmm5, [pd_8192] + mova m0, [blockq+0*16+0] + mova m1, [blockq+1*16+0] + packssdw m0, [blockq+0*16+8] + packssdw m1, [blockq+1*16+8] + mova m2, [blockq+2*16+0] + mova m3, [blockq+3*16+0] + packssdw m2, [blockq+2*16+8] + packssdw m3, [blockq+3*16+8] + +%if cpuflag(ssse3) + mova m6, [pw_11585x2] +%endif +%ifnidn %1%3, iadstiadst + movdq2q m7, xmm5 +%endif + VP9_%2_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_%4_1D + + pxor m4, m4 + ZERO_BLOCK blockq, 16, 4, m4 + VP9_IDCT4_WRITEOUT + RET +%endmacro + +INIT_MMX sse2 +IADST4_FN idct, IDCT4, iadst, IADST4 +IADST4_FN iadst, IADST4, idct, IDCT4 +IADST4_FN iadst, IADST4, iadst, IADST4 + +INIT_MMX ssse3 +IADST4_FN idct, IDCT4, iadst, IADST4 +IADST4_FN iadst, IADST4, idct, IDCT4 +IADST4_FN iadst, IADST4, iadst, IADST4 + +; inputs and outputs are dwords, coefficients are words +; +; dst1 = src1 * coef1 + src2 * coef2 + rnd >> 14 +; dst2 = src1 * coef2 - src2 * coef1 + rnd >> 14 +%macro SUMSUB_MUL 6-8 [pd_8192], [pd_3fff] ; src/dst 1-2, tmp1-2, coef1-2, rnd, mask + pand m%3, m%1, %8 + pand m%4, m%2, %8 + psrad m%1, 14 + psrad m%2, 14 + packssdw m%4, m%2 + packssdw m%3, m%1 + punpckhwd m%2, m%4, m%3 + punpcklwd m%4, m%3 + pmaddwd m%3, m%4, [pw_%6_%5] + pmaddwd m%1, m%2, [pw_%6_%5] + pmaddwd m%4, [pw_m%5_%6] + pmaddwd m%2, [pw_m%5_%6] + paddd m%3, %7 + paddd m%4, %7 + psrad m%3, 14 + psrad m%4, 14 + paddd m%1, m%3 + paddd m%2, m%4 +%endmacro + +%macro IDCT4_12BPP_1D 0-8 [pd_8192], [pd_3fff], 0, 1, 2, 3, 4, 5 ; rnd, mask, in/out0-3, tmp0-1 + SUMSUB_MUL %3, %5, %7, %8, 11585, 11585, %1, %2 + SUMSUB_MUL %4, %6, %7, %8, 15137, 6270, %1, %2 + SUMSUB_BA d, %4, %3, %7 + SUMSUB_BA d, %6, %5, %7 + SWAP %4, %6, %3 +%endmacro + +%macro STORE_4x4 6 ; tmp1-2, reg1-2, min, max + movh m%1, [dstq+strideq*0] + movh m%2, [dstq+strideq*2] + movhps m%1, [dstq+strideq*1] + movhps m%2, [dstq+stride3q ] + paddw m%1, m%3 + paddw m%2, m%4 + pmaxsw m%1, %5 + pmaxsw m%2, %5 + pminsw m%1, %6 + pminsw m%2, %6 + movh [dstq+strideq*0], m%1 + movhps [dstq+strideq*1], m%1 + movh [dstq+strideq*2], m%2 + movhps [dstq+stride3q ], m%2 +%endmacro + +%macro ROUND_AND_STORE_4x4 8 ; reg1-4, min, max, rnd, shift + paddd m%1, %7 + paddd m%2, %7 + paddd m%3, %7 + paddd m%4, %7 + psrad m%1, %8 + psrad m%2, %8 + psrad m%3, %8 + psrad m%4, %8 + packssdw m%1, m%2 + packssdw m%3, m%4 + STORE_4x4 %2, %4, %1, %3, %5, %6 +%endmacro + +INIT_XMM sse2 +cglobal vp9_idct_idct_4x4_add_12, 4, 4, 8, dst, stride, block, eob + cmp eobd, 1 + jg .idctfull + + ; dc-only - this is special, since for 4x4 12bpp, the max coef size is + ; 17+sign bpp. Since the multiply is with 11585, which is 14bpp, the + ; result of each multiply is 31+sign bit, i.e. it _exactly_ fits in a + ; dword. After the final shift (4), the result is 13+sign bits, so we + ; don't need any additional processing to fit it in a word + DEFINE_ARGS dst, stride, block, coef + pxor m4, m4 + DC_ONLY 4, m4 + movd m0, coefd + pshuflw m0, m0, q0000 + punpcklqdq m0, m0 + mova m5, [pw_4095] + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + STORE_4x4 1, 3, 0, 0, m4, m5 + RET + +.idctfull: + DEFINE_ARGS dst, stride, block, eob + mova m0, [blockq+0*16] + mova m1, [blockq+1*16] + mova m2, [blockq+2*16] + mova m3, [blockq+3*16] + mova m6, [pd_8192] + mova m7, [pd_3fff] + + IDCT4_12BPP_1D m6, m7 + TRANSPOSE4x4D 0, 1, 2, 3, 4 + IDCT4_12BPP_1D m6, m7 + + pxor m4, m4 + ZERO_BLOCK blockq, 16, 4, m4 + + ; writeout + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + mova m5, [pw_4095] + mova m6, [pd_8] + ROUND_AND_STORE_4x4 0, 1, 2, 3, m4, m5, m6, 4 + RET + +%macro SCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%if %0 == 4 +%define reg_%4 m%2 +%endif +%else + mova [%3], m%1 +%if %0 == 4 +%define reg_%4 [%3] +%endif +%endif +%endmacro + +%macro UNSCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%else + mova m%1, [%3] +%endif +%if %0 == 4 +%undef reg_%4 +%endif +%endmacro + +%macro PRELOAD 2-3 +%if ARCH_X86_64 + mova m%1, [%2] +%if %0 == 3 +%define reg_%3 m%1 +%endif +%elif %0 == 3 +%define reg_%3 [%2] +%endif +%endmacro + +; out0 = 5283 * in0 + 13377 + in1 + 15212 * in2 + 9929 * in3 + rnd >> 14 +; out1 = 9929 * in0 + 13377 * in1 - 5283 * in2 - 15282 * in3 + rnd >> 14 +; out2 = 13377 * in0 - 13377 * in2 + 13377 * in3 + rnd >> 14 +; out3 = 15212 * in0 - 13377 * in1 + 9929 * in2 - 5283 * in3 + rnd >> 14 +%macro IADST4_12BPP_1D 0-2 [pd_8192], [pd_3fff] ; rnd, mask + pand m4, m0, %2 + pand m5, m1, %2 + psrad m0, 14 + psrad m1, 14 + packssdw m5, m1 + packssdw m4, m0 + punpckhwd m1, m4, m5 + punpcklwd m4, m5 + pand m5, m2, %2 + pand m6, m3, %2 + psrad m2, 14 + psrad m3, 14 + packssdw m6, m3 + packssdw m5, m2 + punpckhwd m3, m5, m6 + punpcklwd m5, m6 + SCRATCH 1, 8, rsp+0*mmsize, a + SCRATCH 5, 9, rsp+1*mmsize, b + + ; m1/3 have the high bits of 0,1,2,3 + ; m4/5 have the low bits of 0,1,2,3 + ; m0/2/6/7 are free + + mova m2, [pw_15212_9929] + mova m0, [pw_5283_13377] + pmaddwd m7, m2, reg_b + pmaddwd m6, m4, m0 + pmaddwd m2, m3 + pmaddwd m0, reg_a + paddd m6, m7 + paddd m0, m2 + mova m1, [pw_m13377_13377] + mova m5, [pw_13377_0] + pmaddwd m7, m1, reg_b + pmaddwd m2, m4, m5 + pmaddwd m1, m3 + pmaddwd m5, reg_a + paddd m2, m7 + paddd m1, m5 + paddd m6, %1 + paddd m2, %1 + psrad m6, 14 + psrad m2, 14 + paddd m0, m6 ; t0 + paddd m2, m1 ; t2 + + mova m7, [pw_m5283_m15212] + mova m5, [pw_9929_13377] + pmaddwd m1, m7, reg_b + pmaddwd m6, m4, m5 + pmaddwd m7, m3 + pmaddwd m5, reg_a + paddd m6, m1 + paddd m7, m5 + UNSCRATCH 5, 9, rsp+1*mmsize, b + pmaddwd m5, [pw_9929_m5283] + pmaddwd m4, [pw_15212_m13377] + pmaddwd m3, [pw_9929_m5283] + UNSCRATCH 1, 8, rsp+0*mmsize, a + pmaddwd m1, [pw_15212_m13377] + paddd m4, m5 + paddd m3, m1 + paddd m6, %1 + paddd m4, %1 + psrad m6, 14 + psrad m4, 14 + paddd m7, m6 ; t1 + paddd m3, m4 ; t3 + + SWAP 1, 7 +%endmacro + +%macro IADST4_12BPP_FN 4 +cglobal vp9_%1_%3_4x4_add_12, 3, 3, 12, 2 * ARCH_X86_32 * mmsize, dst, stride, block, eob + mova m0, [blockq+0*16] + mova m1, [blockq+1*16] + mova m2, [blockq+2*16] + mova m3, [blockq+3*16] + + PRELOAD 10, pd_8192, rnd + PRELOAD 11, pd_3fff, mask + %2_12BPP_1D reg_rnd, reg_mask + TRANSPOSE4x4D 0, 1, 2, 3, 4 + %4_12BPP_1D reg_rnd, reg_mask + + pxor m4, m4 + ZERO_BLOCK blockq, 16, 4, m4 + + ; writeout + DEFINE_ARGS dst, stride, stride3 + lea stride3q, [strideq*3] + mova m5, [pw_4095] + mova m6, [pd_8] + ROUND_AND_STORE_4x4 0, 1, 2, 3, m4, m5, m6, 4 + RET +%endmacro + +INIT_XMM sse2 +IADST4_12BPP_FN idct, IDCT4, iadst, IADST4 +IADST4_12BPP_FN iadst, IADST4, idct, IDCT4 +IADST4_12BPP_FN iadst, IADST4, iadst, IADST4 + +; the following line has not been executed at the end of this macro: +; UNSCRATCH 6, 8, rsp+%3*mmsize +%macro IDCT8_1D 1-5 [pd_8192], [pd_3fff], 2 * mmsize, 17 ; src, rnd, mask, src_stride, stack_offset + mova m0, [%1+0*%4] + mova m2, [%1+2*%4] + mova m4, [%1+4*%4] + mova m6, [%1+6*%4] + IDCT4_12BPP_1D %2, %3, 0, 2, 4, 6, 1, 3 ; m0/2/4/6 have t0/1/2/3 + SCRATCH 4, 8, rsp+(%5+0)*mmsize + SCRATCH 6, 9, rsp+(%5+1)*mmsize + mova m1, [%1+1*%4] + mova m3, [%1+3*%4] + mova m5, [%1+5*%4] + mova m7, [%1+7*%4] + SUMSUB_MUL 1, 7, 4, 6, 16069, 3196, %2, %3 ; m1=t7a, m7=t4a + SUMSUB_MUL 5, 3, 4, 6, 9102, 13623, %2, %3 ; m5=t6a, m3=t5a + SUMSUB_BA d, 3, 7, 4 ; m3=t4, m7=t5a + SUMSUB_BA d, 5, 1, 4 ; m5=t7, m1=t6a + SUMSUB_MUL 1, 7, 4, 6, 11585, 11585, %2, %3 ; m1=t6, m7=t5 + SUMSUB_BA d, 5, 0, 4 ; m5=out0, m0=out7 + SUMSUB_BA d, 1, 2, 4 ; m1=out1, m2=out6 + UNSCRATCH 4, 8, rsp+(%5+0)*mmsize + UNSCRATCH 6, 9, rsp+(%5+1)*mmsize + SCRATCH 2, 8, rsp+(%5+0)*mmsize + SUMSUB_BA d, 7, 4, 2 ; m7=out2, m4=out5 + SUMSUB_BA d, 3, 6, 2 ; m3=out3, m6=out4 + SWAP 0, 5, 4, 6, 2, 7 +%endmacro + +%macro STORE_2x8 5-7 dstq, strideq ; tmp1-2, reg, min, max + mova m%1, [%6+%7*0] + mova m%2, [%6+%7*1] + paddw m%1, m%3 + paddw m%2, m%3 + pmaxsw m%1, %4 + pmaxsw m%2, %4 + pminsw m%1, %5 + pminsw m%2, %5 + mova [%6+%7*0], m%1 + mova [%6+%7*1], m%2 +%endmacro + +; FIXME we can use the intermediate storage (rsp[0-15]) on x86-32 for temp +; storage also instead of allocating two more stack spaces. This doesn't +; matter much but it's something... +INIT_XMM sse2 +cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \ + 16 * mmsize + 3 * ARCH_X86_32 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_1023] + cmp eobd, 1 + jg .idctfull + + ; dc-only - the 10bit version can be done entirely in 32bit, since the max + ; coef values are 16+sign bit, and the coef is 14bit, so 30+sign easily + ; fits in 32bit + DEFINE_ARGS dst, stride, block, coef + pxor m2, m2 + DC_ONLY 5, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 4 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop_dc + RET + +.idctfull: + SCRATCH 0, 12, rsp+16*mmsize, max + DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak +%if ARCH_X86_64 + mov dstbakq, dstq + movsxd cntq, cntd +%endif +%ifdef PIC + lea ptrq, [default_8x8] + movzx cntd, byte [ptrq+cntq-1] +%else + movzx cntd, byte [default_8x8+cntq-1] +%endif + mov skipd, 2 + sub skipd, cntd + mov ptrq, rsp + PRELOAD 10, pd_8192, rnd + PRELOAD 11, pd_3fff, mask + PRELOAD 13, pd_16, srnd +.loop_1: + IDCT8_1D blockq, reg_rnd, reg_mask + + TRANSPOSE4x4D 0, 1, 2, 3, 6 + mova [ptrq+ 0*mmsize], m0 + mova [ptrq+ 2*mmsize], m1 + mova [ptrq+ 4*mmsize], m2 + mova [ptrq+ 6*mmsize], m3 + UNSCRATCH 6, 8, rsp+17*mmsize + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 1*mmsize], m4 + mova [ptrq+ 3*mmsize], m5 + mova [ptrq+ 5*mmsize], m6 + mova [ptrq+ 7*mmsize], m7 + add ptrq, 8 * mmsize + add blockq, mmsize + dec cntd + jg .loop_1 + + ; zero-pad the remainder (skipped cols) + test skipd, skipd + jz .end + add skipd, skipd + lea blockq, [blockq+skipq*(mmsize/2)] + pxor m0, m0 +.loop_z: + mova [ptrq+mmsize*0], m0 + mova [ptrq+mmsize*1], m0 + mova [ptrq+mmsize*2], m0 + mova [ptrq+mmsize*3], m0 + add ptrq, 4 * mmsize + dec skipd + jg .loop_z +.end: + + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea stride3q, [strideq*3] + mov cntd, 2 + mov ptrq, rsp +.loop_2: + IDCT8_1D ptrq, reg_rnd, reg_mask + + pxor m6, m6 + ROUND_AND_STORE_4x4 0, 1, 2, 3, m6, reg_max, reg_srnd, 5 + lea dstq, [dstq+strideq*4] + UNSCRATCH 0, 8, rsp+17*mmsize + UNSCRATCH 1, 12, rsp+16*mmsize, max + UNSCRATCH 2, 13, pd_16, srnd + ROUND_AND_STORE_4x4 4, 5, 0, 7, m6, m1, m2, 5 + add ptrq, 16 +%if ARCH_X86_64 + lea dstq, [dstbakq+8] +%else + mov dstq, dstm + add dstq, 8 +%endif + dec cntd + jg .loop_2 + + ; m6 is still zero + ZERO_BLOCK blockq-2*mmsize, 32, 8, m6 + RET + +%macro DC_ONLY_64BIT 2 ; shift, zero +%if ARCH_X86_64 + movsxd coefq, dword [blockq] + movd [blockq], %2 + imul coefq, 11585 + add coefq, 8192 + sar coefq, 14 + imul coefq, 11585 + add coefq, ((1 << (%1 - 1)) << 14) + 8192 + sar coefq, 14 + %1 +%else + mov coefd, dword [blockq] + movd [blockq], %2 + DEFINE_ARGS dst, stride, cnt, coef, coefl + mov cntd, 2 +.loop_dc_calc: + mov coefld, coefd + sar coefd, 14 + and coefld, 0x3fff + imul coefd, 11585 + imul coefld, 11585 + add coefld, 8192 + sar coefld, 14 + add coefd, coefld + dec cntd + jg .loop_dc_calc + add coefd, 1 << (%1 - 1) + sar coefd, %1 +%endif +%endmacro + +INIT_XMM sse2 +cglobal vp9_idct_idct_8x8_add_12, 4, 6 + ARCH_X86_64, 14, \ + 16 * mmsize + 3 * ARCH_X86_32 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_4095] + cmp eobd, 1 + jg mangle(private_prefix %+ _ %+ vp9_idct_idct_8x8_add_10 %+ SUFFIX).idctfull + + ; dc-only - unfortunately, this one can overflow, since coefs are 18+sign + ; bpp, and 18+14+sign does not fit in 32bit, so we do 2-stage multiplies + DEFINE_ARGS dst, stride, block, coef, coefl + pxor m2, m2 + DC_ONLY_64BIT 5, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 4 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0 + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop_dc + RET + +; inputs and outputs are dwords, coefficients are words +; +; dst1[hi]:dst3[lo] = src1 * coef1 + src2 * coef2 +; dst2[hi]:dst4[lo] = src1 * coef2 - src2 * coef1 +%macro SUMSUB_MUL_D 6-7 [pd_3fff] ; src/dst 1-2, dst3-4, coef1-2, mask + pand m%3, m%1, %7 + pand m%4, m%2, %7 + psrad m%1, 14 + psrad m%2, 14 + packssdw m%4, m%2 + packssdw m%3, m%1 + punpckhwd m%2, m%4, m%3 + punpcklwd m%4, m%3 + pmaddwd m%3, m%4, [pw_%6_%5] + pmaddwd m%1, m%2, [pw_%6_%5] + pmaddwd m%4, [pw_m%5_%6] + pmaddwd m%2, [pw_m%5_%6] +%endmacro + +; dst1 = src2[hi]:src4[lo] + src1[hi]:src3[lo] + rnd >> 14 +; dst2 = src2[hi]:src4[lo] - src1[hi]:src3[lo] + rnd >> 14 +%macro SUMSUB_PACK_D 5-6 [pd_8192] ; src/dst 1-2, src3-4, tmp, rnd + SUMSUB_BA d, %1, %2, %5 + SUMSUB_BA d, %3, %4, %5 + paddd m%3, %6 + paddd m%4, %6 + psrad m%3, 14 + psrad m%4, 14 + paddd m%1, m%3 + paddd m%2, m%4 +%endmacro + +%macro NEGD 1 +%if cpuflag(ssse3) + psignd %1, [pw_m1] +%else + pxor %1, [pw_m1] + paddd %1, [pd_1] +%endif +%endmacro + +; the following line has not been executed at the end of this macro: +; UNSCRATCH 6, 8, rsp+17*mmsize +%macro IADST8_1D 1-3 [pd_8192], [pd_3fff] ; src, rnd, mask + mova m0, [%1+ 0*mmsize] + mova m3, [%1+ 6*mmsize] + mova m4, [%1+ 8*mmsize] + mova m7, [%1+14*mmsize] + SUMSUB_MUL_D 7, 0, 1, 2, 16305, 1606, %3 ; m7/1=t0a, m0/2=t1a + SUMSUB_MUL_D 3, 4, 5, 6, 10394, 12665, %3 ; m3/5=t4a, m4/6=t5a + SCRATCH 0, 8, rsp+17*mmsize + SUMSUB_PACK_D 3, 7, 5, 1, 0, %2 ; m3=t0, m7=t4 + UNSCRATCH 0, 8, rsp+17*mmsize + SUMSUB_PACK_D 4, 0, 6, 2, 1, %2 ; m4=t1, m0=t5 + + SCRATCH 3, 8, rsp+17*mmsize + SCRATCH 4, 9, rsp+18*mmsize + SCRATCH 7, 10, rsp+19*mmsize + SCRATCH 0, 11, rsp+20*mmsize + + mova m1, [%1+ 2*mmsize] + mova m2, [%1+ 4*mmsize] + mova m5, [%1+10*mmsize] + mova m6, [%1+12*mmsize] + SUMSUB_MUL_D 5, 2, 3, 4, 14449, 7723, %3 ; m5/8=t2a, m2/9=t3a + SUMSUB_MUL_D 1, 6, 7, 0, 4756, 15679, %3 ; m1/10=t6a, m6/11=t7a + SCRATCH 2, 12, rsp+21*mmsize + SUMSUB_PACK_D 1, 5, 7, 3, 2, %2 ; m1=t2, m5=t6 + UNSCRATCH 2, 12, rsp+21*mmsize + SUMSUB_PACK_D 6, 2, 0, 4, 3, %2 ; m6=t3, m2=t7 + + UNSCRATCH 7, 10, rsp+19*mmsize + UNSCRATCH 0, 11, rsp+20*mmsize + SCRATCH 1, 10, rsp+19*mmsize + SCRATCH 6, 11, rsp+20*mmsize + + SUMSUB_MUL_D 7, 0, 3, 4, 15137, 6270, %3 ; m7/8=t4a, m0/9=t5a + SUMSUB_MUL_D 2, 5, 1, 6, 6270, 15137, %3 ; m2/10=t7a, m5/11=t6a + SCRATCH 2, 12, rsp+21*mmsize + SUMSUB_PACK_D 5, 7, 6, 3, 2, %2 ; m5=-out1, m7=t6 + UNSCRATCH 2, 12, rsp+21*mmsize + NEGD m5 ; m5=out1 + SUMSUB_PACK_D 2, 0, 1, 4, 3, %2 ; m2=out6, m0=t7 + SUMSUB_MUL 7, 0, 3, 4, 11585, 11585, %2, %3 ; m7=out2, m0=-out5 + NEGD m0 ; m0=out5 + + UNSCRATCH 3, 8, rsp+17*mmsize + UNSCRATCH 4, 9, rsp+18*mmsize + UNSCRATCH 1, 10, rsp+19*mmsize + UNSCRATCH 6, 11, rsp+20*mmsize + SCRATCH 2, 8, rsp+17*mmsize + SCRATCH 0, 9, rsp+18*mmsize + + SUMSUB_BA d, 1, 3, 2 ; m1=out0, m3=t2 + SUMSUB_BA d, 6, 4, 2 ; m6=-out7, m4=t3 + NEGD m6 ; m6=out7 + SUMSUB_MUL 3, 4, 2, 0, 11585, 11585, %2, %3 ; m3=-out3, m4=out4 + NEGD m3 ; m3=out3 + + UNSCRATCH 0, 9, rsp+18*mmsize + + SWAP 0, 1, 5 + SWAP 2, 7, 6 +%endmacro + +%macro IADST8_FN 5 +cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \ + 16 * mmsize + ARCH_X86_32 * 6 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_1023] + +.body: + SCRATCH 0, 13, rsp+16*mmsize, max + DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak +%if ARCH_X86_64 + mov dstbakq, dstq + movsxd cntq, cntd +%endif +%ifdef PIC + lea ptrq, [%5_8x8] + movzx cntd, byte [ptrq+cntq-1] +%else + movzx cntd, byte [%5_8x8+cntq-1] +%endif + mov skipd, 2 + sub skipd, cntd + mov ptrq, rsp + PRELOAD 14, pd_8192, rnd + PRELOAD 15, pd_3fff, mask +.loop_1: + %2_1D blockq, reg_rnd, reg_mask + + TRANSPOSE4x4D 0, 1, 2, 3, 6 + mova [ptrq+ 0*mmsize], m0 + mova [ptrq+ 2*mmsize], m1 + mova [ptrq+ 4*mmsize], m2 + mova [ptrq+ 6*mmsize], m3 + UNSCRATCH 6, 8, rsp+17*mmsize + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 1*mmsize], m4 + mova [ptrq+ 3*mmsize], m5 + mova [ptrq+ 5*mmsize], m6 + mova [ptrq+ 7*mmsize], m7 + add ptrq, 8 * mmsize + add blockq, mmsize + dec cntd + jg .loop_1 + + ; zero-pad the remainder (skipped cols) + test skipd, skipd + jz .end + add skipd, skipd + lea blockq, [blockq+skipq*(mmsize/2)] + pxor m0, m0 +.loop_z: + mova [ptrq+mmsize*0], m0 + mova [ptrq+mmsize*1], m0 + mova [ptrq+mmsize*2], m0 + mova [ptrq+mmsize*3], m0 + add ptrq, 4 * mmsize + dec skipd + jg .loop_z +.end: + + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea stride3q, [strideq*3] + mov cntd, 2 + mov ptrq, rsp +.loop_2: + %4_1D ptrq, reg_rnd, reg_mask + + pxor m6, m6 + PRELOAD 9, pd_16, srnd + ROUND_AND_STORE_4x4 0, 1, 2, 3, m6, reg_max, reg_srnd, 5 + lea dstq, [dstq+strideq*4] + UNSCRATCH 0, 8, rsp+17*mmsize + UNSCRATCH 1, 13, rsp+16*mmsize, max + UNSCRATCH 2, 9, pd_16, srnd + ROUND_AND_STORE_4x4 4, 5, 0, 7, m6, m1, m2, 5 + add ptrq, 16 +%if ARCH_X86_64 + lea dstq, [dstbakq+8] +%else + mov dstq, dstm + add dstq, 8 +%endif + dec cntd + jg .loop_2 + + ; m6 is still zero + ZERO_BLOCK blockq-2*mmsize, 32, 8, m6 + RET + +cglobal vp9_%1_%3_8x8_add_12, 4, 6 + ARCH_X86_64, 16, \ + 16 * mmsize + ARCH_X86_32 * 6 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_%3_8x8_add_10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +IADST8_FN idct, IDCT8, iadst, IADST8, row +IADST8_FN iadst, IADST8, idct, IDCT8, col +IADST8_FN iadst, IADST8, iadst, IADST8, default + +%macro IDCT16_1D 1-4 4 * mmsize, 65, 67 ; src, src_stride, stack_offset, mm32bit_stack_offset + IDCT8_1D %1, [pd_8192], [pd_3fff], %2 * 2, %4 ; m0-3=t0-3a, m4-5/m8|r67/m7=t4-7 + ; SCRATCH 6, 8, rsp+(%4+0)*mmsize ; t6 + SCRATCH 0, 15, rsp+(%4+7)*mmsize ; t0a + SCRATCH 1, 14, rsp+(%4+6)*mmsize ; t1a + SCRATCH 2, 13, rsp+(%4+5)*mmsize ; t2a + SCRATCH 3, 12, rsp+(%4+4)*mmsize ; t3a + SCRATCH 4, 11, rsp+(%4+3)*mmsize ; t4 + mova [rsp+(%3+0)*mmsize], m5 ; t5 + mova [rsp+(%3+1)*mmsize], m7 ; t7 + + mova m0, [%1+ 1*%2] ; in1 + mova m3, [%1+ 7*%2] ; in7 + mova m4, [%1+ 9*%2] ; in9 + mova m7, [%1+15*%2] ; in15 + + SUMSUB_MUL 0, 7, 1, 2, 16305, 1606 ; m0=t15a, m7=t8a + SUMSUB_MUL 4, 3, 1, 2, 10394, 12665 ; m4=t14a, m3=t9a + SUMSUB_BA d, 3, 7, 1 ; m3=t8, m7=t9 + SUMSUB_BA d, 4, 0, 1 ; m4=t15,m0=t14 + SUMSUB_MUL 0, 7, 1, 2, 15137, 6270 ; m0=t14a, m7=t9a + + mova m1, [%1+ 3*%2] ; in3 + mova m2, [%1+ 5*%2] ; in5 + mova m5, [%1+11*%2] ; in11 + mova m6, [%1+13*%2] ; in13 + + SCRATCH 0, 9, rsp+(%4+1)*mmsize + SCRATCH 7, 10, rsp+(%4+2)*mmsize + + SUMSUB_MUL 2, 5, 0, 7, 14449, 7723 ; m2=t13a, m5=t10a + SUMSUB_MUL 6, 1, 0, 7, 4756, 15679 ; m6=t12a, m1=t11a + SUMSUB_BA d, 5, 1, 0 ; m5=t11,m1=t10 + SUMSUB_BA d, 2, 6, 0 ; m2=t12,m6=t13 + NEGD m1 ; m1=-t10 + SUMSUB_MUL 1, 6, 0, 7, 15137, 6270 ; m1=t13a, m6=t10a + + UNSCRATCH 7, 10, rsp+(%4+2)*mmsize + SUMSUB_BA d, 5, 3, 0 ; m5=t8a, m3=t11a + SUMSUB_BA d, 6, 7, 0 ; m6=t9, m7=t10 + SUMSUB_BA d, 2, 4, 0 ; m2=t15a,m4=t12a + SCRATCH 5, 10, rsp+(%4+2)*mmsize + SUMSUB_MUL 4, 3, 0, 5, 11585, 11585 ; m4=t12, m3=t11 + UNSCRATCH 0, 9, rsp+(%4+1)*mmsize + SUMSUB_BA d, 1, 0, 5 ; m1=t14, m0=t13 + SCRATCH 6, 9, rsp+(%4+1)*mmsize + SUMSUB_MUL 0, 7, 6, 5, 11585, 11585 ; m0=t13a,m7=t10a + + ; order: 15|r74,14|r73,13|r72,12|r71,11|r70,r65,8|r67,r66,10|r69,9|r68,7,3,4,0,1,2 + ; free: 6,5 + + UNSCRATCH 5, 15, rsp+(%4+7)*mmsize + SUMSUB_BA d, 2, 5, 6 ; m2=out0, m5=out15 + SCRATCH 5, 15, rsp+(%4+7)*mmsize + UNSCRATCH 5, 14, rsp+(%4+6)*mmsize + SUMSUB_BA d, 1, 5, 6 ; m1=out1, m5=out14 + SCRATCH 5, 14, rsp+(%4+6)*mmsize + UNSCRATCH 5, 13, rsp+(%4+5)*mmsize + SUMSUB_BA d, 0, 5, 6 ; m0=out2, m5=out13 + SCRATCH 5, 13, rsp+(%4+5)*mmsize + UNSCRATCH 5, 12, rsp+(%4+4)*mmsize + SUMSUB_BA d, 4, 5, 6 ; m4=out3, m5=out12 + SCRATCH 5, 12, rsp+(%4+4)*mmsize + UNSCRATCH 5, 11, rsp+(%4+3)*mmsize + SUMSUB_BA d, 3, 5, 6 ; m3=out4, m5=out11 + SCRATCH 4, 11, rsp+(%4+3)*mmsize + mova m4, [rsp+(%3+0)*mmsize] + SUMSUB_BA d, 7, 4, 6 ; m7=out5, m4=out10 + mova [rsp+(%3+0)*mmsize], m5 + UNSCRATCH 5, 8, rsp+(%4+0)*mmsize + UNSCRATCH 6, 9, rsp+(%4+1)*mmsize + SCRATCH 2, 8, rsp+(%4+0)*mmsize + SCRATCH 1, 9, rsp+(%4+1)*mmsize + UNSCRATCH 1, 10, rsp+(%4+2)*mmsize + SCRATCH 0, 10, rsp+(%4+2)*mmsize + mova m0, [rsp+(%3+1)*mmsize] + SUMSUB_BA d, 6, 5, 2 ; m6=out6, m5=out9 + SUMSUB_BA d, 1, 0, 2 ; m1=out7, m0=out8 + + SWAP 0, 3, 1, 7, 2, 6, 4 + + ; output order: 8-11|r67-70=out0-3 + ; 0-6,r65=out4-11 + ; 12-15|r71-74=out12-15 +%endmacro + +INIT_XMM sse2 +cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ + 67 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_1023] + cmp eobd, 1 + jg .idctfull + + ; dc-only - the 10bit version can be done entirely in 32bit, since the max + ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily + ; fits in 32bit + DEFINE_ARGS dst, stride, block, coef + pxor m2, m2 + DC_ONLY 6, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 8 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0, dstq, mmsize + STORE_2x8 3, 4, 1, m2, m0, dstq+strideq, mmsize + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop_dc + RET + +.idctfull: + mova [rsp+64*mmsize], m0 + DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak +%if ARCH_X86_64 + mov dstbakq, dstq + movsxd cntq, cntd +%endif +%ifdef PIC + lea ptrq, [default_16x16] + movzx cntd, byte [ptrq+cntq-1] +%else + movzx cntd, byte [default_16x16+cntq-1] +%endif + mov skipd, 4 + sub skipd, cntd + mov ptrq, rsp +.loop_1: + IDCT16_1D blockq + + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 1*mmsize], m0 + mova [ptrq+ 5*mmsize], m1 + mova [ptrq+ 9*mmsize], m2 + mova [ptrq+13*mmsize], m3 + mova m7, [rsp+65*mmsize] + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 2*mmsize], m4 + mova [ptrq+ 6*mmsize], m5 + mova [ptrq+10*mmsize], m6 + mova [ptrq+14*mmsize], m7 + UNSCRATCH 0, 8, rsp+67*mmsize + UNSCRATCH 1, 9, rsp+68*mmsize + UNSCRATCH 2, 10, rsp+69*mmsize + UNSCRATCH 3, 11, rsp+70*mmsize + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 0*mmsize], m0 + mova [ptrq+ 4*mmsize], m1 + mova [ptrq+ 8*mmsize], m2 + mova [ptrq+12*mmsize], m3 + UNSCRATCH 4, 12, rsp+71*mmsize + UNSCRATCH 5, 13, rsp+72*mmsize + UNSCRATCH 6, 14, rsp+73*mmsize + UNSCRATCH 7, 15, rsp+74*mmsize + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 3*mmsize], m4 + mova [ptrq+ 7*mmsize], m5 + mova [ptrq+11*mmsize], m6 + mova [ptrq+15*mmsize], m7 + add ptrq, 16 * mmsize + add blockq, mmsize + dec cntd + jg .loop_1 + + ; zero-pad the remainder (skipped cols) + test skipd, skipd + jz .end + add skipd, skipd + lea blockq, [blockq+skipq*(mmsize/2)] + pxor m0, m0 +.loop_z: + mova [ptrq+mmsize*0], m0 + mova [ptrq+mmsize*1], m0 + mova [ptrq+mmsize*2], m0 + mova [ptrq+mmsize*3], m0 + mova [ptrq+mmsize*4], m0 + mova [ptrq+mmsize*5], m0 + mova [ptrq+mmsize*6], m0 + mova [ptrq+mmsize*7], m0 + add ptrq, 8 * mmsize + dec skipd + jg .loop_z +.end: + + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea stride3q, [strideq*3] + mov cntd, 4 + mov ptrq, rsp +.loop_2: + IDCT16_1D ptrq + + pxor m7, m7 + lea dstq, [dstq+strideq*4] + ROUND_AND_STORE_4x4 0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6 + lea dstq, [dstq+strideq*4] + mova m0, [rsp+65*mmsize] + mova m1, [rsp+64*mmsize] + mova m2, [pd_32] + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, m1, m2, 6 + +%if ARCH_X86_64 + DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst +%else + mov dstq, dstm +%endif + UNSCRATCH 0, 8, rsp+67*mmsize + UNSCRATCH 4, 9, rsp+68*mmsize + UNSCRATCH 5, 10, rsp+69*mmsize + UNSCRATCH 3, 11, rsp+70*mmsize + ROUND_AND_STORE_4x4 0, 4, 5, 3, m7, m1, m2, 6 +%if ARCH_X86_64 + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea dstq, [dstbakq+stride3q*4] +%else + lea dstq, [dstq+stride3q*4] +%endif + UNSCRATCH 4, 12, rsp+71*mmsize + UNSCRATCH 5, 13, rsp+72*mmsize + UNSCRATCH 6, 14, rsp+73*mmsize + UNSCRATCH 0, 15, rsp+74*mmsize + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, m1, m2, 6 + + add ptrq, mmsize +%if ARCH_X86_64 + add dstbakq, 8 + mov dstq, dstbakq +%else + add dword dstm, 8 + mov dstq, dstm +%endif + dec cntd + jg .loop_2 + + ; m7 is still zero + ZERO_BLOCK blockq-4*mmsize, 64, 16, m7 + RET + +INIT_XMM sse2 +cglobal vp9_idct_idct_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \ + 67 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_4095] + cmp eobd, 1 + jg mangle(private_prefix %+ _ %+ vp9_idct_idct_16x16_add_10 %+ SUFFIX).idctfull + + ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign + ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies + DEFINE_ARGS dst, stride, block, coef, coefl + pxor m2, m2 + DC_ONLY_64BIT 6, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 8 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0, dstq, mmsize + STORE_2x8 3, 4, 1, m2, m0, dstq+strideq, mmsize + lea dstq, [dstq+strideq*2] + dec cntd + jg .loop_dc + RET + +; r65-69 are available for spills +; r70-77 are available on x86-32 only (x86-64 should use m8-15) +; output should be in m8-11|r70-73, m0-6,r65 and m12-15|r74-77 +%macro IADST16_1D 1 ; src + mova m0, [%1+ 0*4*mmsize] ; in0 + mova m1, [%1+ 7*4*mmsize] ; in7 + mova m2, [%1+ 8*4*mmsize] ; in8 + mova m3, [%1+15*4*mmsize] ; in15 + SUMSUB_MUL_D 3, 0, 4, 5, 16364, 804 ; m3/4=t0, m0/5=t1 + SUMSUB_MUL_D 1, 2, 6, 7, 11003, 12140 ; m1/6=t8, m2/7=t9 + SCRATCH 0, 8, rsp+70*mmsize + SUMSUB_PACK_D 1, 3, 6, 4, 0 ; m1=t0a, m3=t8a + UNSCRATCH 0, 8, rsp+70*mmsize + SUMSUB_PACK_D 2, 0, 7, 5, 4 ; m2=t1a, m0=t9a + mova [rsp+67*mmsize], m1 + SCRATCH 2, 9, rsp+71*mmsize + SCRATCH 3, 12, rsp+74*mmsize + SCRATCH 0, 13, rsp+75*mmsize + + mova m0, [%1+ 3*4*mmsize] ; in3 + mova m1, [%1+ 4*4*mmsize] ; in4 + mova m2, [%1+11*4*mmsize] ; in11 + mova m3, [%1+12*4*mmsize] ; in12 + SUMSUB_MUL_D 2, 1, 4, 5, 14811, 7005 ; m2/4=t4, m1/5=t5 + SUMSUB_MUL_D 0, 3, 6, 7, 5520, 15426 ; m0/6=t12, m3/7=t13 + SCRATCH 1, 10, rsp+72*mmsize + SUMSUB_PACK_D 0, 2, 6, 4, 1 ; m0=t4a, m2=t12a + UNSCRATCH 1, 10, rsp+72*mmsize + SUMSUB_PACK_D 3, 1, 7, 5, 4 ; m3=t5a, m1=t13a + SCRATCH 0, 15, rsp+77*mmsize + SCRATCH 3, 11, rsp+73*mmsize + + UNSCRATCH 0, 12, rsp+74*mmsize ; t8a + UNSCRATCH 3, 13, rsp+75*mmsize ; t9a + SUMSUB_MUL_D 0, 3, 4, 5, 16069, 3196 ; m0/4=t8, m3/5=t9 + SUMSUB_MUL_D 1, 2, 6, 7, 3196, 16069 ; m1/6=t13, m2/7=t12 + SCRATCH 1, 12, rsp+74*mmsize + SUMSUB_PACK_D 2, 0, 7, 4, 1 ; m2=t8a, m0=t12a + UNSCRATCH 1, 12, rsp+74*mmsize + SUMSUB_PACK_D 1, 3, 6, 5, 4 ; m1=t9a, m3=t13a + mova [rsp+65*mmsize], m2 + mova [rsp+66*mmsize], m1 + SCRATCH 0, 8, rsp+70*mmsize + SCRATCH 3, 12, rsp+74*mmsize + + mova m0, [%1+ 2*4*mmsize] ; in2 + mova m1, [%1+ 5*4*mmsize] ; in5 + mova m2, [%1+10*4*mmsize] ; in10 + mova m3, [%1+13*4*mmsize] ; in13 + SUMSUB_MUL_D 3, 0, 4, 5, 15893, 3981 ; m3/4=t2, m0/5=t3 + SUMSUB_MUL_D 1, 2, 6, 7, 8423, 14053 ; m1/6=t10, m2/7=t11 + SCRATCH 0, 10, rsp+72*mmsize + SUMSUB_PACK_D 1, 3, 6, 4, 0 ; m1=t2a, m3=t10a + UNSCRATCH 0, 10, rsp+72*mmsize + SUMSUB_PACK_D 2, 0, 7, 5, 4 ; m2=t3a, m0=t11a + mova [rsp+68*mmsize], m1 + mova [rsp+69*mmsize], m2 + SCRATCH 3, 13, rsp+75*mmsize + SCRATCH 0, 14, rsp+76*mmsize + + mova m0, [%1+ 1*4*mmsize] ; in1 + mova m1, [%1+ 6*4*mmsize] ; in6 + mova m2, [%1+ 9*4*mmsize] ; in9 + mova m3, [%1+14*4*mmsize] ; in14 + SUMSUB_MUL_D 2, 1, 4, 5, 13160, 9760 ; m2/4=t6, m1/5=t7 + SUMSUB_MUL_D 0, 3, 6, 7, 2404, 16207 ; m0/6=t14, m3/7=t15 + SCRATCH 1, 10, rsp+72*mmsize + SUMSUB_PACK_D 0, 2, 6, 4, 1 ; m0=t6a, m2=t14a + UNSCRATCH 1, 10, rsp+72*mmsize + SUMSUB_PACK_D 3, 1, 7, 5, 4 ; m3=t7a, m1=t15a + + UNSCRATCH 4, 13, rsp+75*mmsize ; t10a + UNSCRATCH 5, 14, rsp+76*mmsize ; t11a + SCRATCH 0, 13, rsp+75*mmsize + SCRATCH 3, 14, rsp+76*mmsize + SUMSUB_MUL_D 4, 5, 6, 7, 9102, 13623 ; m4/6=t10, m5/7=t11 + SUMSUB_MUL_D 1, 2, 0, 3, 13623, 9102 ; m1/0=t15, m2/3=t14 + SCRATCH 0, 10, rsp+72*mmsize + SUMSUB_PACK_D 2, 4, 3, 6, 0 ; m2=t10a, m4=t14a + UNSCRATCH 0, 10, rsp+72*mmsize + SUMSUB_PACK_D 1, 5, 0, 7, 6 ; m1=t11a, m5=t15a + + UNSCRATCH 0, 8, rsp+70*mmsize ; t12a + UNSCRATCH 3, 12, rsp+74*mmsize ; t13a + SCRATCH 2, 8, rsp+70*mmsize + SCRATCH 1, 12, rsp+74*mmsize + SUMSUB_MUL_D 0, 3, 1, 2, 15137, 6270 ; m0/1=t12, m3/2=t13 + SUMSUB_MUL_D 5, 4, 7, 6, 6270, 15137 ; m5/7=t15, m4/6=t14 + SCRATCH 2, 10, rsp+72*mmsize + SUMSUB_PACK_D 4, 0, 6, 1, 2 ; m4=out2, m0=t14a + UNSCRATCH 2, 10, rsp+72*mmsize + SUMSUB_PACK_D 5, 3, 7, 2, 1 ; m5=-out13, m3=t15a + NEGD m5 ; m5=out13 + + UNSCRATCH 1, 9, rsp+71*mmsize ; t1a + mova m2, [rsp+68*mmsize] ; t2a + UNSCRATCH 6, 13, rsp+75*mmsize ; t6a + UNSCRATCH 7, 14, rsp+76*mmsize ; t7a + SCRATCH 4, 10, rsp+72*mmsize + SCRATCH 5, 13, rsp+75*mmsize + UNSCRATCH 4, 15, rsp+77*mmsize ; t4a + UNSCRATCH 5, 11, rsp+73*mmsize ; t5a + SCRATCH 0, 14, rsp+76*mmsize + SCRATCH 3, 15, rsp+77*mmsize + mova m0, [rsp+67*mmsize] ; t0a + SUMSUB_BA d, 4, 0, 3 ; m4=t0, m0=t4 + SUMSUB_BA d, 5, 1, 3 ; m5=t1, m1=t5 + SUMSUB_BA d, 6, 2, 3 ; m6=t2, m2=t6 + SCRATCH 4, 9, rsp+71*mmsize + mova m3, [rsp+69*mmsize] ; t3a + SUMSUB_BA d, 7, 3, 4 ; m7=t3, m3=t7 + + mova [rsp+67*mmsize], m5 + mova [rsp+68*mmsize], m6 + mova [rsp+69*mmsize], m7 + SUMSUB_MUL_D 0, 1, 4, 5, 15137, 6270 ; m0/4=t4a, m1/5=t5a + SUMSUB_MUL_D 3, 2, 7, 6, 6270, 15137 ; m3/7=t7a, m2/6=t6a + SCRATCH 1, 11, rsp+73*mmsize + SUMSUB_PACK_D 2, 0, 6, 4, 1 ; m2=-out3, m0=t6 + NEGD m2 ; m2=out3 + UNSCRATCH 1, 11, rsp+73*mmsize + SUMSUB_PACK_D 3, 1, 7, 5, 4 ; m3=out12, m1=t7 + SCRATCH 2, 11, rsp+73*mmsize + UNSCRATCH 2, 12, rsp+74*mmsize ; t11a + SCRATCH 3, 12, rsp+74*mmsize + + UNSCRATCH 3, 8, rsp+70*mmsize ; t10a + mova m4, [rsp+65*mmsize] ; t8a + mova m5, [rsp+66*mmsize] ; t9a + SUMSUB_BA d, 3, 4, 6 ; m3=-out1, m4=t10 + NEGD m3 ; m3=out1 + SUMSUB_BA d, 2, 5, 6 ; m2=out14, m5=t11 + UNSCRATCH 6, 9, rsp+71*mmsize ; t0 + UNSCRATCH 7, 14, rsp+76*mmsize ; t14a + SCRATCH 3, 9, rsp+71*mmsize + SCRATCH 2, 14, rsp+76*mmsize + + SUMSUB_MUL 1, 0, 2, 3, 11585, 11585 ; m1=out4, m0=out11 + mova [rsp+65*mmsize], m0 + SUMSUB_MUL 5, 4, 2, 3, 11585, 11585 ; m5=out6, m4=out9 + UNSCRATCH 0, 15, rsp+77*mmsize ; t15a + SUMSUB_MUL 7, 0, 2, 3, 11585, m11585 ; m7=out10, m0=out5 + + mova m2, [rsp+68*mmsize] ; t2 + SUMSUB_BA d, 2, 6, 3 ; m2=out0, m6=t2a + SCRATCH 2, 8, rsp+70*mmsize + mova m2, [rsp+67*mmsize] ; t1 + mova m3, [rsp+69*mmsize] ; t3 + mova [rsp+67*mmsize], m7 + SUMSUB_BA d, 3, 2, 7 ; m3=-out15, m2=t3a + NEGD m3 ; m3=out15 + SCRATCH 3, 15, rsp+77*mmsize + SUMSUB_MUL 6, 2, 7, 3, 11585, m11585 ; m6=out8, m2=out7 + mova m7, [rsp+67*mmsize] + + SWAP 0, 1 + SWAP 2, 5, 4, 6, 7, 3 +%endmacro + +%macro IADST16_FN 7 +cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ + 70 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_1023] + +.body: + mova [rsp+64*mmsize], m0 + DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak +%if ARCH_X86_64 + mov dstbakq, dstq + movsxd cntq, cntd +%endif +%ifdef PIC + lea ptrq, [%7_16x16] + movzx cntd, byte [ptrq+cntq-1] +%else + movzx cntd, byte [%7_16x16+cntq-1] +%endif + mov skipd, 4 + sub skipd, cntd + mov ptrq, rsp +.loop_1: + %2_1D blockq + + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 1*mmsize], m0 + mova [ptrq+ 5*mmsize], m1 + mova [ptrq+ 9*mmsize], m2 + mova [ptrq+13*mmsize], m3 + mova m7, [rsp+65*mmsize] + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 2*mmsize], m4 + mova [ptrq+ 6*mmsize], m5 + mova [ptrq+10*mmsize], m6 + mova [ptrq+14*mmsize], m7 + UNSCRATCH 0, 8, rsp+(%3+0)*mmsize + UNSCRATCH 1, 9, rsp+(%3+1)*mmsize + UNSCRATCH 2, 10, rsp+(%3+2)*mmsize + UNSCRATCH 3, 11, rsp+(%3+3)*mmsize + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 0*mmsize], m0 + mova [ptrq+ 4*mmsize], m1 + mova [ptrq+ 8*mmsize], m2 + mova [ptrq+12*mmsize], m3 + UNSCRATCH 4, 12, rsp+(%3+4)*mmsize + UNSCRATCH 5, 13, rsp+(%3+5)*mmsize + UNSCRATCH 6, 14, rsp+(%3+6)*mmsize + UNSCRATCH 7, 15, rsp+(%3+7)*mmsize + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 3*mmsize], m4 + mova [ptrq+ 7*mmsize], m5 + mova [ptrq+11*mmsize], m6 + mova [ptrq+15*mmsize], m7 + add ptrq, 16 * mmsize + add blockq, mmsize + dec cntd + jg .loop_1 + + ; zero-pad the remainder (skipped cols) + test skipd, skipd + jz .end + add skipd, skipd + lea blockq, [blockq+skipq*(mmsize/2)] + pxor m0, m0 +.loop_z: + mova [ptrq+mmsize*0], m0 + mova [ptrq+mmsize*1], m0 + mova [ptrq+mmsize*2], m0 + mova [ptrq+mmsize*3], m0 + mova [ptrq+mmsize*4], m0 + mova [ptrq+mmsize*5], m0 + mova [ptrq+mmsize*6], m0 + mova [ptrq+mmsize*7], m0 + add ptrq, 8 * mmsize + dec skipd + jg .loop_z +.end: + + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea stride3q, [strideq*3] + mov cntd, 4 + mov ptrq, rsp +.loop_2: + %5_1D ptrq + + pxor m7, m7 + lea dstq, [dstq+strideq*4] + ROUND_AND_STORE_4x4 0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6 + lea dstq, [dstq+strideq*4] + mova m0, [rsp+65*mmsize] + mova m1, [rsp+64*mmsize] + mova m2, [pd_32] + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, m1, m2, 6 + +%if ARCH_X86_64 + DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst +%else + mov dstq, dstm +%endif + UNSCRATCH 0, 8, rsp+(%6+0)*mmsize + UNSCRATCH 4, 9, rsp+(%6+1)*mmsize + UNSCRATCH 5, 10, rsp+(%6+2)*mmsize + UNSCRATCH 3, 11, rsp+(%6+3)*mmsize + ROUND_AND_STORE_4x4 0, 4, 5, 3, m7, m1, m2, 6 +%if ARCH_X86_64 + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea dstq, [dstbakq+stride3q*4] +%else + lea dstq, [dstq+stride3q*4] +%endif + UNSCRATCH 4, 12, rsp+(%6+4)*mmsize + UNSCRATCH 5, 13, rsp+(%6+5)*mmsize + UNSCRATCH 6, 14, rsp+(%6+6)*mmsize + UNSCRATCH 0, 15, rsp+(%6+7)*mmsize + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, m1, m2, 6 + + add ptrq, mmsize +%if ARCH_X86_64 + add dstbakq, 8 + mov dstq, dstbakq +%else + add dword dstm, 8 + mov dstq, dstm +%endif + dec cntd + jg .loop_2 + + ; m7 is still zero + ZERO_BLOCK blockq-4*mmsize, 64, 16, m7 + RET + +cglobal vp9_%1_%4_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \ + 70 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_%4_16x16_add_10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +IADST16_FN idct, IDCT16, 67, iadst, IADST16, 70, row +IADST16_FN iadst, IADST16, 70, idct, IDCT16, 67, col +IADST16_FN iadst, IADST16, 70, iadst, IADST16, 70, default + +%macro IDCT32_1D 2-3 8 * mmsize; pass[1/2], src, src_stride + IDCT16_1D %2, 2 * %3, 272, 257 +%if ARCH_X86_64 + mova [rsp+257*mmsize], m8 + mova [rsp+258*mmsize], m9 + mova [rsp+259*mmsize], m10 + mova [rsp+260*mmsize], m11 + mova [rsp+261*mmsize], m12 + mova [rsp+262*mmsize], m13 + mova [rsp+263*mmsize], m14 + mova [rsp+264*mmsize], m15 +%endif + mova [rsp+265*mmsize], m0 + mova [rsp+266*mmsize], m1 + mova [rsp+267*mmsize], m2 + mova [rsp+268*mmsize], m3 + mova [rsp+269*mmsize], m4 + mova [rsp+270*mmsize], m5 + mova [rsp+271*mmsize], m6 + + ; r257-260: t0-3 + ; r265-272: t4/5a/6a/7/8/9a/10/11a + ; r261-264: t12a/13/14a/15 + ; r273-274 is free as scratch space, and 275-282 mirrors m8-15 on 32bit + + mova m0, [%2+ 1*%3] ; in1 + mova m1, [%2+15*%3] ; in15 + mova m2, [%2+17*%3] ; in17 + mova m3, [%2+31*%3] ; in31 + SUMSUB_MUL 0, 3, 4, 5, 16364, 804 ; m0=t31a, m3=t16a + SUMSUB_MUL 2, 1, 4, 5, 11003, 12140 ; m2=t30a, m1=t17a + SUMSUB_BA d, 1, 3, 4 ; m1=t16, m3=t17 + SUMSUB_BA d, 2, 0, 4 ; m2=t31, m0=t30 + SUMSUB_MUL 0, 3, 4, 5, 16069, 3196 ; m0=t30a, m3=t17a + SCRATCH 0, 8, rsp+275*mmsize + SCRATCH 2, 9, rsp+276*mmsize + + ; end of stage 1-3 first quart + + mova m0, [%2+ 7*%3] ; in7 + mova m2, [%2+ 9*%3] ; in9 + mova m4, [%2+23*%3] ; in23 + mova m5, [%2+25*%3] ; in25 + SUMSUB_MUL 2, 4, 6, 7, 14811, 7005 ; m2=t29a, m4=t18a + SUMSUB_MUL 5, 0, 6, 7, 5520, 15426 ; m5=t28a, m0=t19a + SUMSUB_BA d, 4, 0, 6 ; m4=t19, m0=t18 + SUMSUB_BA d, 2, 5, 6 ; m2=t28, m5=t29 + SUMSUB_MUL 5, 0, 6, 7, 3196, m16069 ; m5=t29a, m0=t18a + + ; end of stage 1-3 second quart + + SUMSUB_BA d, 4, 1, 6 ; m4=t16a, m1=t19a + SUMSUB_BA d, 0, 3, 6 ; m0=t17, m3=t18 + UNSCRATCH 6, 8, rsp+275*mmsize ; t30a + UNSCRATCH 7, 9, rsp+276*mmsize ; t31 + mova [rsp+273*mmsize], m4 + mova [rsp+274*mmsize], m0 + SUMSUB_BA d, 2, 7, 0 ; m2=t31a, m7=t28a + SUMSUB_BA d, 5, 6, 0 ; m5=t30, m6=t29 + SUMSUB_MUL 6, 3, 0, 4, 15137, 6270 ; m6=t29a, m3=t18a + SUMSUB_MUL 7, 1, 0, 4, 15137, 6270 ; m7=t28, m1=t19 + SCRATCH 3, 10, rsp+277*mmsize + SCRATCH 1, 11, rsp+278*mmsize + SCRATCH 7, 12, rsp+279*mmsize + SCRATCH 6, 13, rsp+280*mmsize + SCRATCH 5, 14, rsp+281*mmsize + SCRATCH 2, 15, rsp+282*mmsize + + ; end of stage 4-5 first half + + mova m0, [%2+ 5*%3] ; in5 + mova m1, [%2+11*%3] ; in11 + mova m2, [%2+21*%3] ; in21 + mova m3, [%2+27*%3] ; in27 + SUMSUB_MUL 0, 3, 4, 5, 15893, 3981 ; m0=t27a, m3=t20a + SUMSUB_MUL 2, 1, 4, 5, 8423, 14053 ; m2=t26a, m1=t21a + SUMSUB_BA d, 1, 3, 4 ; m1=t20, m3=t21 + SUMSUB_BA d, 2, 0, 4 ; m2=t27, m0=t26 + SUMSUB_MUL 0, 3, 4, 5, 9102, 13623 ; m0=t26a, m3=t21a + SCRATCH 0, 8, rsp+275*mmsize + SCRATCH 2, 9, rsp+276*mmsize + + ; end of stage 1-3 third quart + + mova m0, [%2+ 3*%3] ; in3 + mova m2, [%2+13*%3] ; in13 + mova m4, [%2+19*%3] ; in19 + mova m5, [%2+29*%3] ; in29 + SUMSUB_MUL 2, 4, 6, 7, 13160, 9760 ; m2=t25a, m4=t22a + SUMSUB_MUL 5, 0, 6, 7, 2404, 16207 ; m5=t24a, m0=t23a + SUMSUB_BA d, 4, 0, 6 ; m4=t23, m0=t22 + SUMSUB_BA d, 2, 5, 6 ; m2=t24, m5=t25 + SUMSUB_MUL 5, 0, 6, 7, 13623, m9102 ; m5=t25a, m0=t22a + + ; end of stage 1-3 fourth quart + + SUMSUB_BA d, 1, 4, 6 ; m1=t23a, m4=t20a + SUMSUB_BA d, 3, 0, 6 ; m3=t22, m0=t21 + UNSCRATCH 6, 8, rsp+275*mmsize ; t26a + UNSCRATCH 7, 9, rsp+276*mmsize ; t27 + SCRATCH 3, 8, rsp+275*mmsize + SCRATCH 1, 9, rsp+276*mmsize + SUMSUB_BA d, 7, 2, 1 ; m7=t24a, m2=t27a + SUMSUB_BA d, 6, 5, 1 ; m6=t25, m5=t26 + SUMSUB_MUL 2, 4, 1, 3, 6270, m15137 ; m2=t27, m4=t20 + SUMSUB_MUL 5, 0, 1, 3, 6270, m15137 ; m5=t26a, m0=t21a + + ; end of stage 4-5 second half + + UNSCRATCH 1, 12, rsp+279*mmsize ; t28 + UNSCRATCH 3, 13, rsp+280*mmsize ; t29a + SCRATCH 4, 12, rsp+279*mmsize + SCRATCH 0, 13, rsp+280*mmsize + SUMSUB_BA d, 5, 3, 0 ; m5=t29, m3=t26 + SUMSUB_BA d, 2, 1, 0 ; m2=t28a, m1=t27a + UNSCRATCH 0, 14, rsp+281*mmsize ; t30 + UNSCRATCH 4, 15, rsp+282*mmsize ; t31a + SCRATCH 2, 14, rsp+281*mmsize + SCRATCH 5, 15, rsp+282*mmsize + SUMSUB_BA d, 6, 0, 2 ; m6=t30a, m0=t25a + SUMSUB_BA d, 7, 4, 2 ; m7=t31, m4=t24 + + mova m2, [rsp+273*mmsize] ; t16a + mova m5, [rsp+274*mmsize] ; t17 + mova [rsp+273*mmsize], m6 + mova [rsp+274*mmsize], m7 + UNSCRATCH 6, 10, rsp+277*mmsize ; t18a + UNSCRATCH 7, 11, rsp+278*mmsize ; t19 + SCRATCH 4, 10, rsp+277*mmsize + SCRATCH 0, 11, rsp+278*mmsize + UNSCRATCH 4, 12, rsp+279*mmsize ; t20 + UNSCRATCH 0, 13, rsp+280*mmsize ; t21a + SCRATCH 3, 12, rsp+279*mmsize + SCRATCH 1, 13, rsp+280*mmsize + SUMSUB_BA d, 0, 6, 1 ; m0=t18, m6=t21 + SUMSUB_BA d, 4, 7, 1 ; m4=t19a, m7=t20a + UNSCRATCH 3, 8, rsp+275*mmsize ; t22 + UNSCRATCH 1, 9, rsp+276*mmsize ; t23a + SCRATCH 0, 8, rsp+275*mmsize + SCRATCH 4, 9, rsp+276*mmsize + SUMSUB_BA d, 3, 5, 0 ; m3=t17a, m5=t22a + SUMSUB_BA d, 1, 2, 0 ; m1=t16, m2=t23 + + ; end of stage 6 + + UNSCRATCH 0, 10, rsp+277*mmsize ; t24 + UNSCRATCH 4, 11, rsp+278*mmsize ; t25a + SCRATCH 1, 10, rsp+277*mmsize + SCRATCH 3, 11, rsp+278*mmsize + SUMSUB_MUL 0, 2, 1, 3, 11585, 11585 ; m0=t24a, m2=t23a + SUMSUB_MUL 4, 5, 1, 3, 11585, 11585 ; m4=t25, m5=t22 + UNSCRATCH 1, 12, rsp+279*mmsize ; t26 + UNSCRATCH 3, 13, rsp+280*mmsize ; t27a + SCRATCH 0, 12, rsp+279*mmsize + SCRATCH 4, 13, rsp+280*mmsize + SUMSUB_MUL 3, 7, 0, 4, 11585, 11585 ; m3=t27, m7=t20 + SUMSUB_MUL 1, 6, 0, 4, 11585, 11585 ; m1=t26a, m6=t21a + + ; end of stage 7 + + mova m0, [rsp+269*mmsize] ; t8 + mova m4, [rsp+270*mmsize] ; t9a + mova [rsp+269*mmsize], m1 ; t26a + mova [rsp+270*mmsize], m3 ; t27 + mova m3, [rsp+271*mmsize] ; t10 + SUMSUB_BA d, 2, 0, 1 ; m2=out8, m0=out23 + SUMSUB_BA d, 5, 4, 1 ; m5=out9, m4=out22 + SUMSUB_BA d, 6, 3, 1 ; m6=out10, m3=out21 + mova m1, [rsp+272*mmsize] ; t11a + mova [rsp+271*mmsize], m0 + SUMSUB_BA d, 7, 1, 0 ; m7=out11, m1=out20 + +%if %1 == 1 + TRANSPOSE4x4D 2, 5, 6, 7, 0 + mova [ptrq+ 2*mmsize], m2 + mova [ptrq+10*mmsize], m5 + mova [ptrq+18*mmsize], m6 + mova [ptrq+26*mmsize], m7 +%else ; %1 == 2 + pxor m0, m0 + lea dstq, [dstq+strideq*8] + ROUND_AND_STORE_4x4 2, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6 +%endif + mova m2, [rsp+271*mmsize] +%if %1 == 1 + TRANSPOSE4x4D 1, 3, 4, 2, 0 + mova [ptrq+ 5*mmsize], m1 + mova [ptrq+13*mmsize], m3 + mova [ptrq+21*mmsize], m4 + mova [ptrq+29*mmsize], m2 +%else ; %1 == 2 + lea dstq, [dstq+stride3q*4] + ROUND_AND_STORE_4x4 1, 3, 4, 2, m0, [rsp+256*mmsize], [pd_32], 6 +%endif + + ; end of last stage + store for out8-11 and out20-23 + + UNSCRATCH 0, 9, rsp+276*mmsize ; t19a + UNSCRATCH 1, 8, rsp+275*mmsize ; t18 + UNSCRATCH 2, 11, rsp+278*mmsize ; t17a + UNSCRATCH 3, 10, rsp+277*mmsize ; t16 + mova m7, [rsp+261*mmsize] ; t12a + mova m6, [rsp+262*mmsize] ; t13 + mova m5, [rsp+263*mmsize] ; t14a + SUMSUB_BA d, 0, 7, 4 ; m0=out12, m7=out19 + SUMSUB_BA d, 1, 6, 4 ; m1=out13, m6=out18 + SUMSUB_BA d, 2, 5, 4 ; m2=out14, m5=out17 + mova m4, [rsp+264*mmsize] ; t15 + SCRATCH 7, 8, rsp+275*mmsize + SUMSUB_BA d, 3, 4, 7 ; m3=out15, m4=out16 + +%if %1 == 1 + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 3*mmsize], m0 + mova [ptrq+11*mmsize], m1 + mova [ptrq+19*mmsize], m2 + mova [ptrq+27*mmsize], m3 +%else ; %1 == 2 +%if ARCH_X86_64 + SWAP 7, 9 + lea dstq, [dstbakq+stride3q*4] +%else ; x86-32 + pxor m7, m7 + mov dstq, dstm + lea dstq, [dstq+stride3q*4] +%endif + ROUND_AND_STORE_4x4 0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6 +%endif + UNSCRATCH 0, 8, rsp+275*mmsize ; out19 +%if %1 == 1 + TRANSPOSE4x4D 4, 5, 6, 0, 7 + mova [ptrq+ 4*mmsize], m4 + mova [ptrq+12*mmsize], m5 + mova [ptrq+20*mmsize], m6 + mova [ptrq+28*mmsize], m0 +%else ; %1 == 2 + lea dstq, [dstq+strideq*4] + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6 +%endif + + ; end of last stage + store for out12-19 + +%if ARCH_X86_64 + SWAP 7, 8 +%endif + mova m7, [rsp+257*mmsize] ; t0 + mova m6, [rsp+258*mmsize] ; t1 + mova m5, [rsp+259*mmsize] ; t2 + mova m4, [rsp+260*mmsize] ; t3 + mova m0, [rsp+274*mmsize] ; t31 + mova m1, [rsp+273*mmsize] ; t30a + UNSCRATCH 2, 15, rsp+282*mmsize ; t29 + SUMSUB_BA d, 0, 7, 3 ; m0=out0, m7=out31 + SUMSUB_BA d, 1, 6, 3 ; m1=out1, m6=out30 + SUMSUB_BA d, 2, 5, 3 ; m2=out2, m5=out29 + SCRATCH 0, 9, rsp+276*mmsize + UNSCRATCH 3, 14, rsp+281*mmsize ; t28a + SUMSUB_BA d, 3, 4, 0 ; m3=out3, m4=out28 + +%if %1 == 1 + TRANSPOSE4x4D 4, 5, 6, 7, 0 + mova [ptrq+ 7*mmsize], m4 + mova [ptrq+15*mmsize], m5 + mova [ptrq+23*mmsize], m6 + mova [ptrq+31*mmsize], m7 +%else ; %1 == 2 +%if ARCH_X86_64 + SWAP 0, 8 +%else ; x86-32 + pxor m0, m0 +%endif + lea dstq, [dstq+stride3q*4] + ROUND_AND_STORE_4x4 4, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6 +%endif + UNSCRATCH 7, 9, rsp+276*mmsize ; out0 +%if %1 == 1 + TRANSPOSE4x4D 7, 1, 2, 3, 0 + mova [ptrq+ 0*mmsize], m7 + mova [ptrq+ 8*mmsize], m1 + mova [ptrq+16*mmsize], m2 + mova [ptrq+24*mmsize], m3 +%else ; %1 == 2 +%if ARCH_X86_64 + DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst +%else ; x86-32 + mov dstq, dstm +%endif + ROUND_AND_STORE_4x4 7, 1, 2, 3, m0, [rsp+256*mmsize], [pd_32], 6 +%if ARCH_X86_64 + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak +%endif +%endif + + ; end of last stage + store for out0-3 and out28-31 + +%if ARCH_X86_64 + SWAP 0, 8 +%endif + mova m7, [rsp+265*mmsize] ; t4 + mova m6, [rsp+266*mmsize] ; t5a + mova m5, [rsp+267*mmsize] ; t6a + mova m4, [rsp+268*mmsize] ; t7 + mova m0, [rsp+270*mmsize] ; t27 + mova m1, [rsp+269*mmsize] ; t26a + UNSCRATCH 2, 13, rsp+280*mmsize ; t25 + SUMSUB_BA d, 0, 7, 3 ; m0=out4, m7=out27 + SUMSUB_BA d, 1, 6, 3 ; m1=out5, m6=out26 + SUMSUB_BA d, 2, 5, 3 ; m2=out6, m5=out25 + UNSCRATCH 3, 12, rsp+279*mmsize ; t24a + SCRATCH 7, 9, rsp+276*mmsize + SUMSUB_BA d, 3, 4, 7 ; m3=out7, m4=out24 + +%if %1 == 1 + TRANSPOSE4x4D 0, 1, 2, 3, 7 + mova [ptrq+ 1*mmsize], m0 + mova [ptrq+ 9*mmsize], m1 + mova [ptrq+17*mmsize], m2 + mova [ptrq+25*mmsize], m3 +%else ; %1 == 2 +%if ARCH_X86_64 + SWAP 7, 8 + lea dstq, [dstbakq+strideq*4] +%else ; x86-32 + pxor m7, m7 + lea dstq, [dstq+strideq*4] +%endif + ROUND_AND_STORE_4x4 0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6 +%endif + UNSCRATCH 0, 9, rsp+276*mmsize ; out27 +%if %1 == 1 + TRANSPOSE4x4D 4, 5, 6, 0, 7 + mova [ptrq+ 6*mmsize], m4 + mova [ptrq+14*mmsize], m5 + mova [ptrq+22*mmsize], m6 + mova [ptrq+30*mmsize], m0 +%else ; %1 == 2 +%if ARCH_X86_64 + lea dstq, [dstbakq+stride3q*8] +%else + mov dstq, dstm + lea dstq, [dstq+stride3q*8] +%endif + ROUND_AND_STORE_4x4 4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6 +%endif + + ; end of last stage + store for out4-7 and out24-27 +%endmacro + +INIT_XMM sse2 +cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \ + 275 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_1023] + cmp eobd, 1 + jg .idctfull + + ; dc-only - the 10bit version can be done entirely in 32bit, since the max + ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily + ; fits in 32bit + DEFINE_ARGS dst, stride, block, coef + pxor m2, m2 + DC_ONLY 6, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 32 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0, dstq, mmsize + STORE_2x8 3, 4, 1, m2, m0, dstq+mmsize*2, mmsize + add dstq, strideq + dec cntd + jg .loop_dc + RET + +.idctfull: + mova [rsp+256*mmsize], m0 + DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak +%if ARCH_X86_64 + mov dstbakq, dstq + movsxd cntq, cntd +%endif +%ifdef PIC + lea ptrq, [default_32x32] + movzx cntd, byte [ptrq+cntq-1] +%else + movzx cntd, byte [default_32x32+cntq-1] +%endif + mov skipd, 8 + sub skipd, cntd + mov ptrq, rsp +.loop_1: + IDCT32_1D 1, blockq + + add ptrq, 32 * mmsize + add blockq, mmsize + dec cntd + jg .loop_1 + + ; zero-pad the remainder (skipped cols) + test skipd, skipd + jz .end + shl skipd, 2 + lea blockq, [blockq+skipq*(mmsize/4)] + pxor m0, m0 +.loop_z: + mova [ptrq+mmsize*0], m0 + mova [ptrq+mmsize*1], m0 + mova [ptrq+mmsize*2], m0 + mova [ptrq+mmsize*3], m0 + mova [ptrq+mmsize*4], m0 + mova [ptrq+mmsize*5], m0 + mova [ptrq+mmsize*6], m0 + mova [ptrq+mmsize*7], m0 + add ptrq, 8 * mmsize + dec skipd + jg .loop_z +.end: + + DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak + lea stride3q, [strideq*3] + mov cntd, 8 + mov ptrq, rsp +.loop_2: + IDCT32_1D 2, ptrq + + add ptrq, mmsize +%if ARCH_X86_64 + add dstbakq, 8 + mov dstq, dstbakq +%else + add dword dstm, 8 + mov dstq, dstm +%endif + dec cntd + jg .loop_2 + + ; m7 is still zero + ZERO_BLOCK blockq-8*mmsize, 128, 32, m7 + RET + +INIT_XMM sse2 +cglobal vp9_idct_idct_32x32_add_12, 4, 6 + ARCH_X86_64, 16, \ + 275 * mmsize + ARCH_X86_32 * 8 * mmsize, \ + dst, stride, block, eob + mova m0, [pw_4095] + cmp eobd, 1 + jg mangle(private_prefix %+ _ %+ vp9_idct_idct_32x32_add_10 %+ SUFFIX).idctfull + + ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign + ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies + DEFINE_ARGS dst, stride, block, coef, coefl + pxor m2, m2 + DC_ONLY_64BIT 6, m2 + movd m1, coefd + pshuflw m1, m1, q0000 + punpcklqdq m1, m1 + DEFINE_ARGS dst, stride, cnt + mov cntd, 32 +.loop_dc: + STORE_2x8 3, 4, 1, m2, m0, dstq, mmsize + STORE_2x8 3, 4, 1, m2, m0, dstq+mmsize*2, mmsize + add dstq, strideq + dec cntd + jg .loop_dc + RET diff --git a/libs/ffvpx/libavcodec/x86/vp9itxfm_template.asm b/libs/ffvpx/libavcodec/x86/vp9itxfm_template.asm new file mode 100644 index 000000000..d2f2257d8 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9itxfm_template.asm @@ -0,0 +1,142 @@ +;****************************************************************************** +;* VP9 IDCT SIMD optimizations +;* +;* Copyright (C) 2013 Clément Bœsch <u pkh me> +;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%macro VP9_IWHT4_1D 0 + SWAP 1, 2, 3 + paddw m0, m2 + psubw m3, m1 + psubw m4, m0, m3 + psraw m4, 1 + psubw m5, m4, m1 + SWAP 5, 1 + psubw m4, m2 + SWAP 4, 2 + psubw m0, m1 + paddw m3, m2 + SWAP 3, 2, 1 +%endmacro + +; (a*x + b*y + round) >> shift +%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2 + pmaddwd m%1, m%2, %4 + pmaddwd m%2, %5 + paddd m%1, %3 + paddd m%2, %3 + psrad m%1, 14 + psrad m%2, 14 +%endmacro + +%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2 + VP9_MULSUB_2W_2X %7, %6, %5, [pw_m%3_%4], [pw_%4_%3] + VP9_MULSUB_2W_2X %1, %2, %5, [pw_m%3_%4], [pw_%4_%3] + packssdw m%1, m%7 + packssdw m%2, m%6 +%endmacro + +%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2 +%if %0 == 7 + punpckhwd m%6, m%2, m%1 + punpcklwd m%2, m%1 + VP9_MULSUB_2W_4X %1, %2, %3, %4, %5, %6, %7 +%else + punpckhwd m%8, m%4, m%3 + punpcklwd m%2, m%4, m%3 + VP9_MULSUB_2W_4X %1, %2, %5, %6, %7, %8, %9 +%endif +%endmacro + +%macro VP9_IDCT4_1D_FINALIZE 0 + SUMSUB_BA w, 3, 2, 4 ; m3=t3+t0, m2=-t3+t0 + SUMSUB_BA w, 1, 0, 4 ; m1=t2+t1, m0=-t2+t1 + SWAP 0, 3, 2 ; 3102 -> 0123 +%endmacro + +%macro VP9_IDCT4_1D 0 +%if cpuflag(ssse3) + SUMSUB_BA w, 2, 0, 4 ; m2=IN(0)+IN(2) m0=IN(0)-IN(2) + pmulhrsw m2, m6 ; m2=t0 + pmulhrsw m0, m6 ; m0=t1 +%else ; <= sse2 + VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5 ; m0=t1, m1=t0 +%endif + VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5 ; m1=t2, m3=t3 + VP9_IDCT4_1D_FINALIZE +%endmacro + +%macro VP9_IADST4_1D 0 + movq2dq xmm0, m0 + movq2dq xmm1, m1 + movq2dq xmm2, m2 + movq2dq xmm3, m3 +%if cpuflag(ssse3) + paddw m3, m0 +%endif + punpcklwd xmm0, xmm1 + punpcklwd xmm2, xmm3 + pmaddwd xmm1, xmm0, [pw_5283_13377] + pmaddwd xmm4, xmm0, [pw_9929_13377] +%if notcpuflag(ssse3) + pmaddwd xmm6, xmm0, [pw_13377_0] +%endif + pmaddwd xmm0, [pw_15212_m13377] + pmaddwd xmm3, xmm2, [pw_15212_9929] +%if notcpuflag(ssse3) + pmaddwd xmm7, xmm2, [pw_m13377_13377] +%endif + pmaddwd xmm2, [pw_m5283_m15212] +%if cpuflag(ssse3) + psubw m3, m2 +%else + paddd xmm6, xmm7 +%endif + paddd xmm0, xmm2 + paddd xmm3, xmm5 + paddd xmm2, xmm5 +%if notcpuflag(ssse3) + paddd xmm6, xmm5 +%endif + paddd xmm1, xmm3 + paddd xmm0, xmm3 + paddd xmm4, xmm2 + psrad xmm1, 14 + psrad xmm0, 14 + psrad xmm4, 14 +%if cpuflag(ssse3) + pmulhrsw m3, [pw_13377x2] ; out2 +%else + psrad xmm6, 14 +%endif + packssdw xmm0, xmm0 + packssdw xmm1, xmm1 + packssdw xmm4, xmm4 +%if notcpuflag(ssse3) + packssdw xmm6, xmm6 +%endif + movdq2q m0, xmm0 ; out3 + movdq2q m1, xmm1 ; out0 + movdq2q m2, xmm4 ; out1 +%if notcpuflag(ssse3) + movdq2q m3, xmm6 ; out2 +%endif + SWAP 0, 1, 2, 3 +%endmacro diff --git a/libs/ffvpx/libavcodec/x86/vp9lpf.asm b/libs/ffvpx/libavcodec/x86/vp9lpf.asm new file mode 100644 index 000000000..4e7ede223 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9lpf.asm @@ -0,0 +1,1211 @@ +;****************************************************************************** +;* VP9 loop filter SIMD optimizations +;* +;* Copyright (C) 2013-2014 Clément Bœsch <u pkh me> +;* Copyright (C) 2014 Ronald S. Bultje <rsbultje@gmail.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +cextern pb_3 +cextern pb_80 + +pb_4: times 16 db 0x04 +pb_10: times 16 db 0x10 +pb_40: times 16 db 0x40 +pb_81: times 16 db 0x81 +pb_f8: times 16 db 0xf8 +pb_fe: times 16 db 0xfe +pb_ff: times 16 db 0xff + +cextern pw_4 +cextern pw_8 + +; with mix functions, two 8-bit thresholds are stored in a 16-bit storage, +; the following mask is used to splat both in the same register +mask_mix: times 8 db 0 + times 8 db 1 + +mask_mix84: times 8 db 0xff + times 8 db 0x00 +mask_mix48: times 8 db 0x00 + times 8 db 0xff + +SECTION .text + +%macro SCRATCH 3 +%ifdef m8 + SWAP %1, %2 +%else + mova [%3], m%1 +%endif +%endmacro + +%macro UNSCRATCH 3 +%ifdef m8 + SWAP %1, %2 +%else + mova m%1, [%3] +%endif +%endmacro + +; %1 = abs(%2-%3) +%macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp +%ifdef m8 + psubusb %1, %3, %2 + psubusb %4, %2, %3 +%else + mova %1, %3 + mova %4, %2 + psubusb %1, %2 + psubusb %4, %3 +%endif + por %1, %4 +%endmacro + +; %1 = %1>%2 +%macro CMP_GT 2-3 ; src/dst, cmp, pb_80 +%if %0 == 3 + pxor %1, %3 +%endif + pcmpgtb %1, %2 +%endmacro + +; %1 = abs(%2-%3) > %4 +%macro ABSSUB_GT 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80] + ABSSUB %1, %2, %3, %5 ; dst = abs(src1-src2) + CMP_GT %1, %4, %6 ; dst > cmp +%endmacro + +%macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp + pand %1, %3 ; new &= mask + pandn %4, %3, %2 ; tmp = ~mask & old + por %1, %4 ; new&mask | old&~mask +%endmacro + +%macro UNPACK 4 +%ifdef m8 + punpck%1bw %2, %3, %4 +%else + mova %2, %3 + punpck%1bw %2, %4 +%endif +%endmacro + +%macro FILTER_SUBx2_ADDx2 11 ; %1=dst %2=h/l %3=cache %4=stack_off %5=sub1 %6=sub2 %7=add1 + ; %8=add2 %9=rshift, [unpack], [unpack_is_mem_on_x86_32] + psubw %3, [rsp+%4+%5*mmsize*2] + psubw %3, [rsp+%4+%6*mmsize*2] + paddw %3, [rsp+%4+%7*mmsize*2] +%ifnidn %10, "" +%if %11 == 0 + punpck%2bw %1, %10, m0 +%else + UNPACK %2, %1, %10, m0 +%endif + mova [rsp+%4+%8*mmsize*2], %1 + paddw %3, %1 +%else + paddw %3, [rsp+%4+%8*mmsize*2] +%endif + psraw %1, %3, %9 +%endmacro + +; FIXME interleave l/h better (for instruction pairing) +%macro FILTER_INIT 9 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, filterid, mask, source + FILTER%7_INIT %1, l, %3, %6 + 0 + FILTER%7_INIT %2, h, %4, %6 + mmsize + packuswb %1, %2 + MASK_APPLY %1, %9, %8, %2 + mova %5, %1 +%endmacro + + +%macro FILTER_UPDATE 12-16 "", "", "", 0 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, -, -, +, +, rshift, + ; mask, [source], [unpack + src], [unpack_is_mem_on_x86_32] +; FIXME interleave this properly with the subx2/addx2 +%ifnidn %15, "" +%if %16 == 0 || ARCH_X86_64 + mova %14, %15 +%endif +%endif + FILTER_SUBx2_ADDx2 %1, l, %3, %6 + 0, %7, %8, %9, %10, %11, %14, %16 + FILTER_SUBx2_ADDx2 %2, h, %4, %6 + mmsize, %7, %8, %9, %10, %11, %14, %16 + packuswb %1, %2 +%ifnidn %13, "" + MASK_APPLY %1, %13, %12, %2 +%else + MASK_APPLY %1, %5, %12, %2 +%endif + mova %5, %1 +%endmacro + +%macro SRSHIFT3B_2X 4 ; reg1, reg2, [pb_10], tmp + mova %4, [pb_f8] + pand %1, %4 + pand %2, %4 + psrlq %1, 3 + psrlq %2, 3 + pxor %1, %3 + pxor %2, %3 + psubb %1, %3 + psubb %2, %3 +%endmacro + +%macro EXTRACT_POS_NEG 3 ; i8, neg, pos + pxor %3, %3 + pxor %2, %2 + pcmpgtb %3, %1 ; i8 < 0 mask + psubb %2, %1 ; neg values (only the originally - will be kept) + pand %2, %3 ; negative values of i8 (but stored as +) + pandn %3, %1 ; positive values of i8 +%endmacro + +; clip_u8(u8 + i8) +%macro SIGN_ADD 4 ; dst, u8, i8, tmp1 + EXTRACT_POS_NEG %3, %4, %1 + paddusb %1, %2 ; add the positives + psubusb %1, %4 ; sub the negatives +%endmacro + +; clip_u8(u8 - i8) +%macro SIGN_SUB 4 ; dst, u8, i8, tmp1 + EXTRACT_POS_NEG %3, %1, %4 + paddusb %1, %2 ; add the negatives + psubusb %1, %4 ; sub the positives +%endmacro + +%macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off + UNPACK %2, %1, rp3, m0 ; p3: B->W + mova [rsp+%4+0*mmsize*2], %1 + paddw %3, %1, %1 ; p3*2 + paddw %3, %1 ; p3*3 + punpck%2bw %1, m1, m0 ; p2: B->W + mova [rsp+%4+1*mmsize*2], %1 + paddw %3, %1 ; p3*3 + p2 + paddw %3, %1 ; p3*3 + p2*2 + UNPACK %2, %1, rp1, m0 ; p1: B->W + mova [rsp+%4+2*mmsize*2], %1 + paddw %3, %1 ; p3*3 + p2*2 + p1 + UNPACK %2, %1, rp0, m0 ; p0: B->W + mova [rsp+%4+3*mmsize*2], %1 + paddw %3, %1 ; p3*3 + p2*2 + p1 + p0 + UNPACK %2, %1, rq0, m0 ; q0: B->W + mova [rsp+%4+4*mmsize*2], %1 + paddw %3, %1 ; p3*3 + p2*2 + p1 + p0 + q0 + paddw %3, [pw_4] ; p3*3 + p2*2 + p1 + p0 + q0 + 4 + psraw %1, %3, 3 ; (p3*3 + p2*2 + p1 + p0 + q0 + 4) >> 3 +%endmacro + +%macro FILTER14_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off + punpck%2bw %1, m2, m0 ; p7: B->W + mova [rsp+%4+ 8*mmsize*2], %1 + psllw %3, %1, 3 ; p7*8 + psubw %3, %1 ; p7*7 + punpck%2bw %1, m3, m0 ; p6: B->W + mova [rsp+%4+ 9*mmsize*2], %1 + paddw %3, %1 ; p7*7 + p6 + paddw %3, %1 ; p7*7 + p6*2 + UNPACK %2, %1, rp5, m0 ; p5: B->W + mova [rsp+%4+10*mmsize*2], %1 + paddw %3, %1 ; p7*7 + p6*2 + p5 + UNPACK %2, %1, rp4, m0 ; p4: B->W + mova [rsp+%4+11*mmsize*2], %1 + paddw %3, %1 ; p7*7 + p6*2 + p5 + p4 + paddw %3, [rsp+%4+ 0*mmsize*2] ; p7*7 + p6*2 + p5 + p4 + p3 + paddw %3, [rsp+%4+ 1*mmsize*2] ; p7*7 + p6*2 + p5 + .. + p2 + paddw %3, [rsp+%4+ 2*mmsize*2] ; p7*7 + p6*2 + p5 + .. + p1 + paddw %3, [rsp+%4+ 3*mmsize*2] ; p7*7 + p6*2 + p5 + .. + p0 + paddw %3, [rsp+%4+ 4*mmsize*2] ; p7*7 + p6*2 + p5 + .. + p0 + q0 + paddw %3, [pw_8] ; p7*7 + p6*2 + p5 + .. + p0 + q0 + 8 + psraw %1, %3, 4 ; (p7*7 + p6*2 + p5 + .. + p0 + q0 + 8) >> 4 +%endmacro + +%macro TRANSPOSE16x16B 17 + mova %17, m%16 + SBUTTERFLY bw, %1, %2, %16 + SBUTTERFLY bw, %3, %4, %16 + SBUTTERFLY bw, %5, %6, %16 + SBUTTERFLY bw, %7, %8, %16 + SBUTTERFLY bw, %9, %10, %16 + SBUTTERFLY bw, %11, %12, %16 + SBUTTERFLY bw, %13, %14, %16 + mova m%16, %17 + mova %17, m%14 + SBUTTERFLY bw, %15, %16, %14 + SBUTTERFLY wd, %1, %3, %14 + SBUTTERFLY wd, %2, %4, %14 + SBUTTERFLY wd, %5, %7, %14 + SBUTTERFLY wd, %6, %8, %14 + SBUTTERFLY wd, %9, %11, %14 + SBUTTERFLY wd, %10, %12, %14 + SBUTTERFLY wd, %13, %15, %14 + mova m%14, %17 + mova %17, m%12 + SBUTTERFLY wd, %14, %16, %12 + SBUTTERFLY dq, %1, %5, %12 + SBUTTERFLY dq, %2, %6, %12 + SBUTTERFLY dq, %3, %7, %12 + SBUTTERFLY dq, %4, %8, %12 + SBUTTERFLY dq, %9, %13, %12 + SBUTTERFLY dq, %10, %14, %12 + SBUTTERFLY dq, %11, %15, %12 + mova m%12, %17 + mova %17, m%8 + SBUTTERFLY dq, %12, %16, %8 + SBUTTERFLY qdq, %1, %9, %8 + SBUTTERFLY qdq, %2, %10, %8 + SBUTTERFLY qdq, %3, %11, %8 + SBUTTERFLY qdq, %4, %12, %8 + SBUTTERFLY qdq, %5, %13, %8 + SBUTTERFLY qdq, %6, %14, %8 + SBUTTERFLY qdq, %7, %15, %8 + mova m%8, %17 + mova %17, m%1 + SBUTTERFLY qdq, %8, %16, %1 + mova m%1, %17 + SWAP %2, %9 + SWAP %3, %5 + SWAP %4, %13 + SWAP %6, %11 + SWAP %8, %15 + SWAP %12, %14 +%endmacro + +%macro TRANSPOSE8x8B 13 + SBUTTERFLY bw, %1, %2, %7 + movdq%10 m%7, %9 + movdqa %11, m%2 + SBUTTERFLY bw, %3, %4, %2 + SBUTTERFLY bw, %5, %6, %2 + SBUTTERFLY bw, %7, %8, %2 + SBUTTERFLY wd, %1, %3, %2 + movdqa m%2, %11 + movdqa %11, m%3 + SBUTTERFLY wd, %2, %4, %3 + SBUTTERFLY wd, %5, %7, %3 + SBUTTERFLY wd, %6, %8, %3 + SBUTTERFLY dq, %1, %5, %3 + SBUTTERFLY dq, %2, %6, %3 + movdqa m%3, %11 + movh %12, m%2 + movhps %13, m%2 + SBUTTERFLY dq, %3, %7, %2 + SBUTTERFLY dq, %4, %8, %2 + SWAP %2, %5 + SWAP %4, %7 +%endmacro + +%macro DEFINE_REAL_P7_TO_Q7 0-1 0 +%define P7 dstq + 4*mstrideq + %1 +%define P6 dstq + mstride3q + %1 +%define P5 dstq + 2*mstrideq + %1 +%define P4 dstq + mstrideq + %1 +%define P3 dstq + %1 +%define P2 dstq + strideq + %1 +%define P1 dstq + 2* strideq + %1 +%define P0 dstq + stride3q + %1 +%define Q0 dstq + 4* strideq + %1 +%define Q1 dst2q + mstride3q + %1 +%define Q2 dst2q + 2*mstrideq + %1 +%define Q3 dst2q + mstrideq + %1 +%define Q4 dst2q + %1 +%define Q5 dst2q + strideq + %1 +%define Q6 dst2q + 2* strideq + %1 +%define Q7 dst2q + stride3q + %1 +%endmacro + +%macro DEFINE_TRANSPOSED_P7_TO_Q7 0-1 0 +%define P3 rsp + 0*mmsize + %1 +%define P2 rsp + 1*mmsize + %1 +%define P1 rsp + 2*mmsize + %1 +%define P0 rsp + 3*mmsize + %1 +%define Q0 rsp + 4*mmsize + %1 +%define Q1 rsp + 5*mmsize + %1 +%define Q2 rsp + 6*mmsize + %1 +%define Q3 rsp + 7*mmsize + %1 +%if mmsize == 16 +%define P7 rsp + 8*mmsize + %1 +%define P6 rsp + 9*mmsize + %1 +%define P5 rsp + 10*mmsize + %1 +%define P4 rsp + 11*mmsize + %1 +%define Q4 rsp + 12*mmsize + %1 +%define Q5 rsp + 13*mmsize + %1 +%define Q6 rsp + 14*mmsize + %1 +%define Q7 rsp + 15*mmsize + %1 +%endif +%endmacro + +; ..............AB -> AAAAAAAABBBBBBBB +%macro SPLATB_MIX 1-2 [mask_mix] +%if cpuflag(ssse3) + pshufb %1, %2 +%else + punpcklbw %1, %1 + punpcklwd %1, %1 + punpckldq %1, %1 +%endif +%endmacro + +%macro LOOPFILTER 5 ; %1=v/h %2=size1 %3+%4=stack, %5=mmx/32bit stack only +%assign %%ext 0 +%if ARCH_X86_32 || mmsize == 8 +%assign %%ext %5 +%endif + +%if UNIX64 +cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 5, 9, 16, %3 + %4 + %%ext, dst, stride, E, I, H, mstride, dst2, stride3, mstride3 +%else +%if WIN64 +cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 4, 8, 16, %3 + %4 + %%ext, dst, stride, E, I, mstride, dst2, stride3, mstride3 +%else +cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 2, 6, 16, %3 + %4 + %%ext, dst, stride, mstride, dst2, stride3, mstride3 +%define Ed dword r2m +%define Id dword r3m +%endif +%define Hd dword r4m +%endif + + mov mstrideq, strideq + neg mstrideq + + lea stride3q, [strideq*3] + lea mstride3q, [mstrideq*3] + +%ifidn %1, h +%if %2 != 16 +%if mmsize == 16 +%define movx movh +%else +%define movx mova +%endif + lea dstq, [dstq + 4*strideq - 4] +%else +%define movx movu + lea dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos) +%endif +%else + lea dstq, [dstq + 4*mstrideq] +%endif + ; FIXME we shouldn't need two dts registers if mmsize == 8 + lea dst2q, [dstq + 8*strideq] + + DEFINE_REAL_P7_TO_Q7 + +%ifidn %1, h + movx m0, [P7] + movx m1, [P6] + movx m2, [P5] + movx m3, [P4] + movx m4, [P3] + movx m5, [P2] +%if (ARCH_X86_64 && mmsize == 16) || %2 > 16 + movx m6, [P1] +%endif + movx m7, [P0] +%ifdef m8 + movx m8, [Q0] + movx m9, [Q1] + movx m10, [Q2] + movx m11, [Q3] + movx m12, [Q4] + movx m13, [Q5] + movx m14, [Q6] + movx m15, [Q7] + DEFINE_TRANSPOSED_P7_TO_Q7 +%if %2 == 16 + TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp] + mova [P7], m0 + mova [P6], m1 + mova [P5], m2 + mova [P4], m3 +%else ; %2 == 44/48/84/88 + ; 8x16 transpose + punpcklbw m0, m1 + punpcklbw m2, m3 + punpcklbw m4, m5 + punpcklbw m6, m7 + punpcklbw m8, m9 + punpcklbw m10, m11 + punpcklbw m12, m13 + punpcklbw m14, m15 + TRANSPOSE8x8W 0, 2, 4, 6, 8, 10, 12, 14, 15 + SWAP 0, 4 + SWAP 2, 5 + SWAP 0, 6 + SWAP 0, 7 + SWAP 10, 9 + SWAP 12, 10 + SWAP 14, 11 +%endif ; %2 + mova [P3], m4 + mova [P2], m5 + mova [P1], m6 + mova [P0], m7 + mova [Q0], m8 + mova [Q1], m9 + mova [Q2], m10 + mova [Q3], m11 +%if %2 == 16 + mova [Q4], m12 + mova [Q5], m13 + mova [Q6], m14 + mova [Q7], m15 +%endif ; %2 +%else ; x86-32 +%if %2 == 16 + TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [P1], u, [rsp+%3+%4], [rsp+64], [rsp+80] + DEFINE_TRANSPOSED_P7_TO_Q7 + movh [P7], m0 + movh [P5], m1 + movh [P3], m2 + movh [P1], m3 + movh [Q2], m5 + movh [Q4], m6 + movh [Q6], m7 + movhps [P6], m0 + movhps [P4], m1 + movhps [P2], m2 + movhps [P0], m3 + movhps [Q3], m5 + movhps [Q5], m6 + movhps [Q7], m7 + DEFINE_REAL_P7_TO_Q7 + movx m0, [Q0] + movx m1, [Q1] + movx m2, [Q2] + movx m3, [Q3] + movx m4, [Q4] + movx m5, [Q5] + movx m7, [Q7] + TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [Q6], u, [rsp+%3+%4], [rsp+72], [rsp+88] + DEFINE_TRANSPOSED_P7_TO_Q7 8 + movh [P7], m0 + movh [P5], m1 + movh [P3], m2 + movh [P1], m3 + movh [Q2], m5 + movh [Q4], m6 + movh [Q6], m7 + movhps [P6], m0 + movhps [P4], m1 + movhps [P2], m2 + movhps [P0], m3 + movhps [Q3], m5 + movhps [Q5], m6 + movhps [Q7], m7 + DEFINE_TRANSPOSED_P7_TO_Q7 +%elif %2 > 16 ; %2 == 44/48/84/88 + punpcklbw m0, m1 + punpcklbw m2, m3 + punpcklbw m4, m5 + punpcklbw m6, m7 + movx m1, [Q0] + movx m3, [Q1] + movx m5, [Q2] + movx m7, [Q3] + punpcklbw m1, m3 + punpcklbw m5, m7 + movx m3, [Q4] + movx m7, [Q5] + punpcklbw m3, m7 + mova [rsp], m3 + movx m3, [Q6] + movx m7, [Q7] + punpcklbw m3, m7 + DEFINE_TRANSPOSED_P7_TO_Q7 + TRANSPOSE8x8W 0, 2, 4, 6, 1, 5, 7, 3, [rsp], [Q0], 1 + mova [P3], m0 + mova [P2], m2 + mova [P1], m4 + mova [P0], m6 + mova [Q1], m5 + mova [Q2], m7 + mova [Q3], m3 +%else ; %2 == 4 || %2 == 8 + SBUTTERFLY bw, 0, 1, 6 + SBUTTERFLY bw, 2, 3, 6 + SBUTTERFLY bw, 4, 5, 6 + mova [rsp+4*mmsize], m5 + mova m6, [P1] + SBUTTERFLY bw, 6, 7, 5 + DEFINE_TRANSPOSED_P7_TO_Q7 + TRANSPOSE4x4W 0, 2, 4, 6, 5 + mova [P3], m0 + mova [P2], m2 + mova [P1], m4 + mova [P0], m6 + mova m5, [rsp+4*mmsize] + TRANSPOSE4x4W 1, 3, 5, 7, 0 + mova [Q0], m1 + mova [Q1], m3 + mova [Q2], m5 + mova [Q3], m7 +%endif ; %2 +%endif ; x86-32/64 +%endif ; %1 == h + + ; calc fm mask +%if %2 == 16 || mmsize == 8 +%if cpuflag(ssse3) + pxor m0, m0 +%endif + SPLATB_REG m2, I, m0 ; I I I I ... + SPLATB_REG m3, E, m0 ; E E E E ... +%else +%if cpuflag(ssse3) + mova m0, [mask_mix] +%endif + movd m2, Id + movd m3, Ed + SPLATB_MIX m2, m0 + SPLATB_MIX m3, m0 +%endif + mova m0, [pb_80] + pxor m2, m0 + pxor m3, m0 +%ifdef m8 +%ifidn %1, v + mova m8, [P3] + mova m9, [P2] + mova m10, [P1] + mova m11, [P0] + mova m12, [Q0] + mova m13, [Q1] + mova m14, [Q2] + mova m15, [Q3] +%else + ; In case of horizontal, P3..Q3 are already present in some registers due + ; to the previous transpose, so we just swap registers. + SWAP 8, 4, 12 + SWAP 9, 5, 13 + SWAP 10, 6, 14 + SWAP 11, 7, 15 +%endif +%define rp3 m8 +%define rp2 m9 +%define rp1 m10 +%define rp0 m11 +%define rq0 m12 +%define rq1 m13 +%define rq2 m14 +%define rq3 m15 +%else +%define rp3 [P3] +%define rp2 [P2] +%define rp1 [P1] +%define rp0 [P0] +%define rq0 [Q0] +%define rq1 [Q1] +%define rq2 [Q2] +%define rq3 [Q3] +%endif + ABSSUB_GT m5, rp3, rp2, m2, m7, m0 ; m5 = abs(p3-p2) <= I + ABSSUB_GT m1, rp2, rp1, m2, m7, m0 ; m1 = abs(p2-p1) <= I + por m5, m1 + ABSSUB_GT m1, rp1, rp0, m2, m7, m0 ; m1 = abs(p1-p0) <= I + por m5, m1 + ABSSUB_GT m1, rq0, rq1, m2, m7, m0 ; m1 = abs(q1-q0) <= I + por m5, m1 + ABSSUB_GT m1, rq1, rq2, m2, m7, m0 ; m1 = abs(q2-q1) <= I + por m5, m1 + ABSSUB_GT m1, rq2, rq3, m2, m7, m0 ; m1 = abs(q3-q2) <= I + por m5, m1 + ABSSUB m1, rp0, rq0, m7 ; abs(p0-q0) + paddusb m1, m1 ; abs(p0-q0) * 2 + ABSSUB m2, rp1, rq1, m7 ; abs(p1-q1) + pand m2, [pb_fe] ; drop lsb so shift can work + psrlq m2, 1 ; abs(p1-q1)/2 + paddusb m1, m2 ; abs(p0-q0)*2 + abs(p1-q1)/2 + pxor m1, m0 + pcmpgtb m1, m3 + por m1, m5 ; fm final value + SWAP 1, 3 + pxor m3, [pb_ff] + + ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3) + ; calc flat8in (if not 44_16) and hev masks +%if %2 != 44 && %2 != 4 + mova m6, [pb_81] ; [1 1 1 1 ...] ^ 0x80 + ABSSUB_GT m2, rp3, rp0, m6, m5 ; abs(p3 - p0) <= 1 +%ifdef m8 + mova m8, [pb_80] +%define rb80 m8 +%else +%define rb80 [pb_80] +%endif + ABSSUB_GT m1, rp2, rp0, m6, m5, rb80 ; abs(p2 - p0) <= 1 + por m2, m1 + ABSSUB m4, rp1, rp0, m5 ; abs(p1 - p0) +%if %2 <= 16 +%if cpuflag(ssse3) + pxor m0, m0 +%endif + SPLATB_REG m7, H, m0 ; H H H H ... +%else + movd m7, Hd + SPLATB_MIX m7 +%endif + pxor m7, rb80 + pxor m4, rb80 + pcmpgtb m0, m4, m7 ; abs(p1 - p0) > H (1/2 hev condition) + CMP_GT m4, m6 ; abs(p1 - p0) <= 1 + por m2, m4 ; (flat8in) + ABSSUB m4, rq1, rq0, m1 ; abs(q1 - q0) + pxor m4, rb80 + pcmpgtb m5, m4, m7 ; abs(q1 - q0) > H (2/2 hev condition) + por m0, m5 ; hev final value + CMP_GT m4, m6 ; abs(q1 - q0) <= 1 + por m2, m4 ; (flat8in) + ABSSUB_GT m1, rq2, rq0, m6, m5, rb80 ; abs(q2 - q0) <= 1 + por m2, m1 + ABSSUB_GT m1, rq3, rq0, m6, m5, rb80 ; abs(q3 - q0) <= 1 + por m2, m1 ; flat8in final value + pxor m2, [pb_ff] +%if %2 == 84 || %2 == 48 + pand m2, [mask_mix%2] +%endif +%else + mova m6, [pb_80] +%if %2 == 44 + movd m7, Hd + SPLATB_MIX m7 +%else +%if cpuflag(ssse3) + pxor m0, m0 +%endif + SPLATB_REG m7, H, m0 ; H H H H ... +%endif + pxor m7, m6 + ABSSUB m4, rp1, rp0, m1 ; abs(p1 - p0) + pxor m4, m6 + pcmpgtb m0, m4, m7 ; abs(p1 - p0) > H (1/2 hev condition) + ABSSUB m4, rq1, rq0, m1 ; abs(q1 - q0) + pxor m4, m6 + pcmpgtb m5, m4, m7 ; abs(q1 - q0) > H (2/2 hev condition) + por m0, m5 ; hev final value +%endif + +%if %2 == 16 + ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3) + ; calc flat8out mask +%ifdef m8 + mova m8, [P7] + mova m9, [P6] +%define rp7 m8 +%define rp6 m9 +%else +%define rp7 [P7] +%define rp6 [P6] +%endif + ABSSUB_GT m1, rp7, rp0, m6, m5 ; abs(p7 - p0) <= 1 + ABSSUB_GT m7, rp6, rp0, m6, m5 ; abs(p6 - p0) <= 1 + por m1, m7 +%ifdef m8 + mova m8, [P5] + mova m9, [P4] +%define rp5 m8 +%define rp4 m9 +%else +%define rp5 [P5] +%define rp4 [P4] +%endif + ABSSUB_GT m7, rp5, rp0, m6, m5 ; abs(p5 - p0) <= 1 + por m1, m7 + ABSSUB_GT m7, rp4, rp0, m6, m5 ; abs(p4 - p0) <= 1 + por m1, m7 +%ifdef m8 + mova m14, [Q4] + mova m15, [Q5] +%define rq4 m14 +%define rq5 m15 +%else +%define rq4 [Q4] +%define rq5 [Q5] +%endif + ABSSUB_GT m7, rq4, rq0, m6, m5 ; abs(q4 - q0) <= 1 + por m1, m7 + ABSSUB_GT m7, rq5, rq0, m6, m5 ; abs(q5 - q0) <= 1 + por m1, m7 +%ifdef m8 + mova m14, [Q6] + mova m15, [Q7] +%define rq6 m14 +%define rq7 m15 +%else +%define rq6 [Q6] +%define rq7 [Q7] +%endif + ABSSUB_GT m7, rq6, rq0, m6, m5 ; abs(q4 - q0) <= 1 + por m1, m7 + ABSSUB_GT m7, rq7, rq0, m6, m5 ; abs(q5 - q0) <= 1 + por m1, m7 ; flat8out final value + pxor m1, [pb_ff] +%endif + + ; if (fm) { + ; if (out && in) filter_14() + ; else if (in) filter_6() + ; else if (hev) filter_2() + ; else filter_4() + ; } + ; + ; f14: fm & out & in + ; f6: fm & ~f14 & in => fm & ~(out & in) & in => fm & ~out & in + ; f2: fm & ~f14 & ~f6 & hev => fm & ~(out & in) & ~(~out & in) & hev => fm & ~in & hev + ; f4: fm & ~f14 & ~f6 & ~f2 => fm & ~(out & in) & ~(~out & in) & ~(~in & hev) => fm & ~in & ~hev + + ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7) + ; filter2() +%if %2 != 44 && %2 != 4 + mova m6, [pb_80] ; already in m6 if 44_16 + SCRATCH 2, 15, rsp+%3+%4 +%if %2 == 16 + SCRATCH 1, 8, rsp+%3+%4+16 +%endif +%endif + pxor m2, m6, rq0 ; q0 ^ 0x80 + pxor m4, m6, rp0 ; p0 ^ 0x80 + psubsb m2, m4 ; (signed) q0 - p0 + pxor m4, m6, rp1 ; p1 ^ 0x80 + pxor m5, m6, rq1 ; q1 ^ 0x80 + psubsb m4, m5 ; (signed) p1 - q1 + paddsb m4, m2 ; (q0 - p0) + (p1 - q1) + paddsb m4, m2 ; 2*(q0 - p0) + (p1 - q1) + paddsb m4, m2 ; 3*(q0 - p0) + (p1 - q1) + paddsb m6, m4, [pb_4] ; m6: f1 = clip(f + 4, 127) + paddsb m4, [pb_3] ; m4: f2 = clip(f + 3, 127) +%ifdef m8 + mova m14, [pb_10] ; will be reused in filter4() +%define rb10 m14 +%else +%define rb10 [pb_10] +%endif + SRSHIFT3B_2X m6, m4, rb10, m7 ; f1 and f2 sign byte shift by 3 + SIGN_SUB m7, rq0, m6, m5 ; m7 = q0 - f1 + SIGN_ADD m1, rp0, m4, m5 ; m1 = p0 + f2 +%if %2 != 44 && %2 != 4 +%ifdef m8 + pandn m6, m15, m3 ; ~mask(in) & mask(fm) +%else + mova m6, [rsp+%3+%4] + pandn m6, m3 +%endif + pand m6, m0 ; (~mask(in) & mask(fm)) & mask(hev) +%else + pand m6, m3, m0 +%endif + MASK_APPLY m7, rq0, m6, m5 ; m7 = filter2(q0) & mask / we write it in filter4() + MASK_APPLY m1, rp0, m6, m5 ; m1 = filter2(p0) & mask / we write it in filter4() + + ; (m0: hev, m1: p0', m2: q0-p0, m3: fm, m7: q0', [m8: flat8out], m10..13: p1 p0 q0 q1, m14: pb_10, [m15: flat8in], ) + ; filter4() + mova m4, m2 + paddsb m2, m4 ; 2 * (q0 - p0) + paddsb m2, m4 ; 3 * (q0 - p0) + paddsb m6, m2, [pb_4] ; m6: f1 = clip(f + 4, 127) + paddsb m2, [pb_3] ; m2: f2 = clip(f + 3, 127) + SRSHIFT3B_2X m6, m2, rb10, m4 ; f1 and f2 sign byte shift by 3 +%if %2 != 44 && %2 != 4 +%ifdef m8 + pandn m5, m15, m3 ; ~mask(in) & mask(fm) +%else + mova m5, [rsp+%3+%4] + pandn m5, m3 +%endif + pandn m0, m5 ; ~mask(hev) & (~mask(in) & mask(fm)) +%else + pandn m0, m3 +%endif + SIGN_SUB m5, rq0, m6, m4 ; q0 - f1 + MASK_APPLY m5, m7, m0, m4 ; filter4(q0) & mask + mova [Q0], m5 + SIGN_ADD m7, rp0, m2, m4 ; p0 + f2 + MASK_APPLY m7, m1, m0, m4 ; filter4(p0) & mask + mova [P0], m7 + paddb m6, [pb_80] ; + pxor m1, m1 ; f=(f1+1)>>1 + pavgb m6, m1 ; + psubb m6, [pb_40] ; + SIGN_ADD m1, rp1, m6, m2 ; p1 + f + SIGN_SUB m4, rq1, m6, m2 ; q1 - f + MASK_APPLY m1, rp1, m0, m2 ; m1 = filter4(p1) + MASK_APPLY m4, rq1, m0, m2 ; m4 = filter4(q1) + mova [P1], m1 + mova [Q1], m4 + +%if %2 != 44 && %2 != 4 + UNSCRATCH 2, 15, rsp+%3+%4 +%endif + + ; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1) + ; filter6() +%if %2 != 44 && %2 != 4 + pxor m0, m0 +%if %2 != 16 + pand m3, m2 +%else + pand m2, m3 ; mask(fm) & mask(in) +%ifdef m8 + pandn m3, m8, m2 ; ~mask(out) & (mask(fm) & mask(in)) +%else + mova m3, [rsp+%3+%4+16] + pandn m3, m2 +%endif +%endif +%ifdef m8 + mova m14, [P3] + mova m9, [Q3] +%define rp3 m14 +%define rq3 m9 +%else +%define rp3 [P3] +%define rq3 [Q3] +%endif + mova m1, [P2] + FILTER_INIT m4, m5, m6, m7, [P2], %4, 6, m3, m1 ; [p2] + mova m1, [Q2] + FILTER_UPDATE m4, m5, m6, m7, [P1], %4, 0, 1, 2, 5, 3, m3, "", rq1, "", 1 ; [p1] -p3 -p2 +p1 +q1 + FILTER_UPDATE m4, m5, m6, m7, [P0], %4, 0, 2, 3, 6, 3, m3, "", m1 ; [p0] -p3 -p1 +p0 +q2 + FILTER_UPDATE m4, m5, m6, m7, [Q0], %4, 0, 3, 4, 7, 3, m3, "", rq3, "", 1 ; [q0] -p3 -p0 +q0 +q3 + FILTER_UPDATE m4, m5, m6, m7, [Q1], %4, 1, 4, 5, 7, 3, m3, "" ; [q1] -p2 -q0 +q1 +q3 + FILTER_UPDATE m4, m5, m6, m7, [Q2], %4, 2, 5, 6, 7, 3, m3, m1 ; [q2] -p1 -q1 +q2 +q3 +%endif + +%if %2 == 16 + UNSCRATCH 1, 8, rsp+%3+%4+16 +%endif + + ; (m0: 0, [m1: flat8out], m2: fm & flat8in, m8..15: q2 q3 p1 p0 q0 q1 p3 p2) + ; filter14() + ; + ; m2 m3 m8 m9 m14 m15 m10 m11 m12 m13 + ; + ; q2 q3 p3 p2 p1 p0 q0 q1 + ; p6 -7 p7 p6 p5 p4 . . . . . + ; p5 -6 -p7 -p6 +p5 +q1 . . . . + ; p4 -5 -p7 -p5 +p4 +q2 . . . q2 + ; p3 -4 -p7 -p4 +p3 +q3 . . . q3 + ; p2 -3 -p7 -p3 +p2 +q4 . . . q4 + ; p1 -2 -p7 -p2 +p1 +q5 . . . q5 + ; p0 -1 -p7 -p1 +p0 +q6 . . . q6 + ; q0 +0 -p7 -p0 +q0 +q7 . . . q7 + ; q1 +1 -p6 -q0 +q1 +q7 q1 . . . + ; q2 +2 -p5 -q1 +q2 +q7 . q2 . . + ; q3 +3 -p4 -q2 +q3 +q7 . q3 . . + ; q4 +4 -p3 -q3 +q4 +q7 . q4 . . + ; q5 +5 -p2 -q4 +q5 +q7 . q5 . . + ; q6 +6 -p1 -q5 +q6 +q7 . q6 . . + +%if %2 == 16 + pand m1, m2 ; mask(out) & (mask(fm) & mask(in)) + mova m2, [P7] + mova m3, [P6] +%ifdef m8 + mova m8, [P5] + mova m9, [P4] +%define rp5 m8 +%define rp4 m9 +%define rp5s m8 +%define rp4s m9 +%define rp3s m14 +%define rq4 m8 +%define rq5 m9 +%define rq6 m14 +%define rq7 m15 +%define rq4s m8 +%define rq5s m9 +%define rq6s m14 +%else +%define rp5 [P5] +%define rp4 [P4] +%define rp5s "" +%define rp4s "" +%define rp3s "" +%define rq4 [Q4] +%define rq5 [Q5] +%define rq6 [Q6] +%define rq7 [Q7] +%define rq4s "" +%define rq5s "" +%define rq6s "" +%endif + FILTER_INIT m4, m5, m6, m7, [P6], %4, 14, m1, m3 ; [p6] + FILTER_UPDATE m4, m5, m6, m7, [P5], %4, 8, 9, 10, 5, 4, m1, rp5s ; [p5] -p7 -p6 +p5 +q1 + FILTER_UPDATE m4, m5, m6, m7, [P4], %4, 8, 10, 11, 6, 4, m1, rp4s ; [p4] -p7 -p5 +p4 +q2 + FILTER_UPDATE m4, m5, m6, m7, [P3], %4, 8, 11, 0, 7, 4, m1, rp3s ; [p3] -p7 -p4 +p3 +q3 + FILTER_UPDATE m4, m5, m6, m7, [P2], %4, 8, 0, 1, 12, 4, m1, "", rq4, [Q4], 1 ; [p2] -p7 -p3 +p2 +q4 + FILTER_UPDATE m4, m5, m6, m7, [P1], %4, 8, 1, 2, 13, 4, m1, "", rq5, [Q5], 1 ; [p1] -p7 -p2 +p1 +q5 + FILTER_UPDATE m4, m5, m6, m7, [P0], %4, 8, 2, 3, 14, 4, m1, "", rq6, [Q6], 1 ; [p0] -p7 -p1 +p0 +q6 + FILTER_UPDATE m4, m5, m6, m7, [Q0], %4, 8, 3, 4, 15, 4, m1, "", rq7, [Q7], 1 ; [q0] -p7 -p0 +q0 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q1], %4, 9, 4, 5, 15, 4, m1, "" ; [q1] -p6 -q0 +q1 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q2], %4, 10, 5, 6, 15, 4, m1, "" ; [q2] -p5 -q1 +q2 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q3], %4, 11, 6, 7, 15, 4, m1, "" ; [q3] -p4 -q2 +q3 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q4], %4, 0, 7, 12, 15, 4, m1, rq4s ; [q4] -p3 -q3 +q4 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q5], %4, 1, 12, 13, 15, 4, m1, rq5s ; [q5] -p2 -q4 +q5 +q7 + FILTER_UPDATE m4, m5, m6, m7, [Q6], %4, 2, 13, 14, 15, 4, m1, rq6s ; [q6] -p1 -q5 +q6 +q7 +%endif + +%ifidn %1, h +%if %2 == 16 + mova m0, [P7] + mova m1, [P6] + mova m2, [P5] + mova m3, [P4] + mova m4, [P3] + mova m5, [P2] +%if ARCH_X86_64 + mova m6, [P1] +%endif + mova m7, [P0] +%if ARCH_X86_64 + mova m8, [Q0] + mova m9, [Q1] + mova m10, [Q2] + mova m11, [Q3] + mova m12, [Q4] + mova m13, [Q5] + mova m14, [Q6] + mova m15, [Q7] + TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp] + DEFINE_REAL_P7_TO_Q7 + movu [P7], m0 + movu [P6], m1 + movu [P5], m2 + movu [P4], m3 + movu [P3], m4 + movu [P2], m5 + movu [P1], m6 + movu [P0], m7 + movu [Q0], m8 + movu [Q1], m9 + movu [Q2], m10 + movu [Q3], m11 + movu [Q4], m12 + movu [Q5], m13 + movu [Q6], m14 + movu [Q7], m15 +%else + DEFINE_REAL_P7_TO_Q7 + TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+32], a, [rsp+%3+%4], [Q0], [Q1] + movh [P7], m0 + movh [P5], m1 + movh [P3], m2 + movh [P1], m3 + movh [Q2], m5 + movh [Q4], m6 + movh [Q6], m7 + movhps [P6], m0 + movhps [P4], m1 + movhps [P2], m2 + movhps [P0], m3 + movhps [Q3], m5 + movhps [Q5], m6 + movhps [Q7], m7 + DEFINE_TRANSPOSED_P7_TO_Q7 + mova m0, [Q0] + mova m1, [Q1] + mova m2, [Q2] + mova m3, [Q3] + mova m4, [Q4] + mova m5, [Q5] + mova m7, [Q7] + DEFINE_REAL_P7_TO_Q7 8 + TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+224], a, [rsp+%3+%4], [Q0], [Q1] + movh [P7], m0 + movh [P5], m1 + movh [P3], m2 + movh [P1], m3 + movh [Q2], m5 + movh [Q4], m6 + movh [Q6], m7 + movhps [P6], m0 + movhps [P4], m1 + movhps [P2], m2 + movhps [P0], m3 + movhps [Q3], m5 + movhps [Q5], m6 + movhps [Q7], m7 +%endif +%elif %2 == 44 || %2 == 4 + SWAP 0, 1 ; m0 = p1 + SWAP 1, 7 ; m1 = p0 + SWAP 2, 5 ; m2 = q0 + SWAP 3, 4 ; m3 = q1 + DEFINE_REAL_P7_TO_Q7 2 + SBUTTERFLY bw, 0, 1, 4 + SBUTTERFLY bw, 2, 3, 4 + SBUTTERFLY wd, 0, 2, 4 + SBUTTERFLY wd, 1, 3, 4 +%if mmsize == 16 + movd [P7], m0 + movd [P3], m2 + movd [Q0], m1 + movd [Q4], m3 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P6], m0 + movd [P2], m2 + movd [Q1], m1 + movd [Q5], m3 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P5], m0 + movd [P1], m2 + movd [Q2], m1 + movd [Q6], m3 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P4], m0 + movd [P0], m2 + movd [Q3], m1 + movd [Q7], m3 +%else + movd [P7], m0 + movd [P5], m2 + movd [P3], m1 + movd [P1], m3 + psrlq m0, 32 + psrlq m2, 32 + psrlq m1, 32 + psrlq m3, 32 + movd [P6], m0 + movd [P4], m2 + movd [P2], m1 + movd [P0], m3 +%endif +%else + ; the following code do a transpose of 8 full lines to 16 half + ; lines (high part). It is inlined to avoid the need of a staging area + mova m0, [P3] + mova m1, [P2] + mova m2, [P1] + mova m3, [P0] + mova m4, [Q0] + mova m5, [Q1] +%ifdef m8 + mova m6, [Q2] +%endif + mova m7, [Q3] + DEFINE_REAL_P7_TO_Q7 +%ifdef m8 + SBUTTERFLY bw, 0, 1, 8 + SBUTTERFLY bw, 2, 3, 8 + SBUTTERFLY bw, 4, 5, 8 + SBUTTERFLY bw, 6, 7, 8 + SBUTTERFLY wd, 0, 2, 8 + SBUTTERFLY wd, 1, 3, 8 + SBUTTERFLY wd, 4, 6, 8 + SBUTTERFLY wd, 5, 7, 8 + SBUTTERFLY dq, 0, 4, 8 + SBUTTERFLY dq, 1, 5, 8 + SBUTTERFLY dq, 2, 6, 8 + SBUTTERFLY dq, 3, 7, 8 +%else + SBUTTERFLY bw, 0, 1, 6 + mova [rsp+mmsize*4], m1 + mova m6, [rsp+mmsize*6] + SBUTTERFLY bw, 2, 3, 1 + SBUTTERFLY bw, 4, 5, 1 + SBUTTERFLY bw, 6, 7, 1 + SBUTTERFLY wd, 0, 2, 1 + mova [rsp+mmsize*6], m2 + mova m1, [rsp+mmsize*4] + SBUTTERFLY wd, 1, 3, 2 + SBUTTERFLY wd, 4, 6, 2 + SBUTTERFLY wd, 5, 7, 2 + SBUTTERFLY dq, 0, 4, 2 + SBUTTERFLY dq, 1, 5, 2 +%if mmsize == 16 + movh [Q0], m1 + movhps [Q1], m1 +%else + mova [P3], m1 +%endif + mova m2, [rsp+mmsize*6] + SBUTTERFLY dq, 2, 6, 1 + SBUTTERFLY dq, 3, 7, 1 +%endif + SWAP 3, 6 + SWAP 1, 4 +%if mmsize == 16 + movh [P7], m0 + movhps [P6], m0 + movh [P5], m1 + movhps [P4], m1 + movh [P3], m2 + movhps [P2], m2 + movh [P1], m3 + movhps [P0], m3 +%ifdef m8 + movh [Q0], m4 + movhps [Q1], m4 +%endif + movh [Q2], m5 + movhps [Q3], m5 + movh [Q4], m6 + movhps [Q5], m6 + movh [Q6], m7 + movhps [Q7], m7 +%else + mova [P7], m0 + mova [P6], m1 + mova [P5], m2 + mova [P4], m3 + mova [P2], m5 + mova [P1], m6 + mova [P0], m7 +%endif +%endif +%endif + + RET +%endmacro + +%macro LPF_16_VH 5 +INIT_XMM %5 +LOOPFILTER v, %1, %2, 0, %4 +LOOPFILTER h, %1, %2, %3, %4 +%endmacro + +%macro LPF_16_VH_ALL_OPTS 4 +LPF_16_VH %1, %2, %3, %4, sse2 +LPF_16_VH %1, %2, %3, %4, ssse3 +LPF_16_VH %1, %2, %3, %4, avx +%endmacro + +LPF_16_VH_ALL_OPTS 16, 512, 256, 32 +LPF_16_VH_ALL_OPTS 44, 0, 128, 0 +LPF_16_VH_ALL_OPTS 48, 256, 128, 16 +LPF_16_VH_ALL_OPTS 84, 256, 128, 16 +LPF_16_VH_ALL_OPTS 88, 256, 128, 16 + +INIT_MMX mmxext +LOOPFILTER v, 4, 0, 0, 0 +LOOPFILTER h, 4, 0, 64, 0 +LOOPFILTER v, 8, 128, 0, 8 +LOOPFILTER h, 8, 128, 64, 8 diff --git a/libs/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm b/libs/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm new file mode 100644 index 000000000..c0888170c --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm @@ -0,0 +1,823 @@ +;****************************************************************************** +;* VP9 loop filter SIMD optimizations +;* +;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pw_511: times 16 dw 511 +pw_2047: times 16 dw 2047 +pw_16384: times 16 dw 16384 +pw_m512: times 16 dw -512 +pw_m2048: times 16 dw -2048 + +cextern pw_1 +cextern pw_3 +cextern pw_4 +cextern pw_8 +cextern pw_16 +cextern pw_256 +cextern pw_1023 +cextern pw_4095 +cextern pw_m1 + +SECTION .text + +%macro SCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%if %0 == 4 +%define reg_%4 m%2 +%endif +%else + mova [%3], m%1 +%if %0 == 4 +%define reg_%4 [%3] +%endif +%endif +%endmacro + +%macro UNSCRATCH 3-4 +%if ARCH_X86_64 + SWAP %1, %2 +%else + mova m%1, [%3] +%endif +%if %0 == 4 +%undef reg_%4 +%endif +%endmacro + +%macro PRELOAD 2-3 +%if ARCH_X86_64 + mova m%1, [%2] +%if %0 == 3 +%define reg_%3 m%1 +%endif +%elif %0 == 3 +%define reg_%3 [%2] +%endif +%endmacro + +; calculate p or q portion of flat8out +%macro FLAT8OUT_HALF 0 + psubw m4, m0 ; q4-q0 + psubw m5, m0 ; q5-q0 + psubw m6, m0 ; q6-q0 + psubw m7, m0 ; q7-q0 + ABS2 m4, m5, m2, m3 ; abs(q4-q0) | abs(q5-q0) + ABS2 m6, m7, m2, m3 ; abs(q6-q0) | abs(q7-q0) + pcmpgtw m4, reg_F ; abs(q4-q0) > F + pcmpgtw m5, reg_F ; abs(q5-q0) > F + pcmpgtw m6, reg_F ; abs(q6-q0) > F + pcmpgtw m7, reg_F ; abs(q7-q0) > F + por m5, m4 + por m7, m6 + por m7, m5 ; !flat8out, q portion +%endmacro + +; calculate p or q portion of flat8in/hev/fm (excluding mb_edge condition) +%macro FLAT8IN_HALF 1 +%if %1 > 4 + psubw m4, m3, m0 ; q3-q0 + psubw m5, m2, m0 ; q2-q0 + ABS2 m4, m5, m6, m7 ; abs(q3-q0) | abs(q2-q0) + pcmpgtw m4, reg_F ; abs(q3-q0) > F + pcmpgtw m5, reg_F ; abs(q2-q0) > F +%endif + psubw m3, m2 ; q3-q2 + psubw m2, m1 ; q2-q1 + ABS2 m3, m2, m6, m7 ; abs(q3-q2) | abs(q2-q1) + pcmpgtw m3, reg_I ; abs(q3-q2) > I + pcmpgtw m2, reg_I ; abs(q2-q1) > I +%if %1 > 4 + por m4, m5 +%endif + por m2, m3 + psubw m3, m1, m0 ; q1-q0 + ABS1 m3, m5 ; abs(q1-q0) +%if %1 > 4 + pcmpgtw m6, m3, reg_F ; abs(q1-q0) > F +%endif + pcmpgtw m7, m3, reg_H ; abs(q1-q0) > H + pcmpgtw m3, reg_I ; abs(q1-q0) > I +%if %1 > 4 + por m4, m6 +%endif + por m2, m3 +%endmacro + +; one step in filter_14/filter_6 +; +; take sum $reg, downshift, apply mask and write into dst +; +; if sub2/add1-2 are present, add/sub as appropriate to prepare for the next +; step's sum $reg. This is omitted for the last row in each filter. +; +; if dont_store is set, don't write the result into memory, instead keep the +; values in register so we can write it out later +%macro FILTER_STEP 6-10 "", "", "", 0 ; tmp, reg, mask, shift, dst, \ + ; src/sub1, sub2, add1, add2, dont_store + psrlw %1, %2, %4 + psubw %1, %6 ; abs->delta +%ifnidn %7, "" + psubw %2, %6 + psubw %2, %7 + paddw %2, %8 + paddw %2, %9 +%endif + pand %1, reg_%3 ; apply mask +%if %10 == 1 + paddw %6, %1 ; delta->abs +%else + paddw %1, %6 ; delta->abs + mova [%5], %1 +%endif +%endmacro + +; FIXME avx2 versions for 16_16 and mix2_{4,8}{4,8} + +%macro LOOP_FILTER 3 ; dir[h/v], wd[4/8/16], bpp[10/12] + +%if ARCH_X86_64 +%if %2 == 16 +%assign %%num_xmm_regs 16 +%elif %2 == 8 +%assign %%num_xmm_regs 15 +%else ; %2 == 4 +%assign %%num_xmm_regs 14 +%endif ; %2 +%assign %%bak_mem 0 +%else ; ARCH_X86_32 +%assign %%num_xmm_regs 8 +%if %2 == 16 +%assign %%bak_mem 7 +%elif %2 == 8 +%assign %%bak_mem 6 +%else ; %2 == 4 +%assign %%bak_mem 5 +%endif ; %2 +%endif ; ARCH_X86_64/32 + +%if %2 == 16 +%ifidn %1, v +%assign %%num_gpr_regs 6 +%else ; %1 == h +%assign %%num_gpr_regs 5 +%endif ; %1 +%assign %%wd_mem 6 +%else ; %2 == 8/4 +%assign %%num_gpr_regs 5 +%if ARCH_X86_32 && %2 == 8 +%assign %%wd_mem 2 +%else ; ARCH_X86_64 || %2 == 4 +%assign %%wd_mem 0 +%endif ; ARCH_X86_64/32 etc. +%endif ; %2 + +%ifidn %1, v +%assign %%tsp_mem 0 +%elif %2 == 16 ; && %1 == h +%assign %%tsp_mem 16 +%else ; %1 == h && %1 == 8/4 +%assign %%tsp_mem 8 +%endif ; %1/%2 + +%assign %%off %%wd_mem +%assign %%tspoff %%bak_mem+%%wd_mem +%assign %%stack_mem ((%%bak_mem+%%wd_mem+%%tsp_mem)*mmsize) + +%if %3 == 10 +%define %%maxsgn 511 +%define %%minsgn m512 +%define %%maxusgn 1023 +%define %%maxf 4 +%else ; %3 == 12 +%define %%maxsgn 2047 +%define %%minsgn m2048 +%define %%maxusgn 4095 +%define %%maxf 16 +%endif ; %3 + +cglobal vp9_loop_filter_%1_%2_%3, 5, %%num_gpr_regs, %%num_xmm_regs, %%stack_mem, dst, stride, E, I, H + ; prepare E, I and H masks + shl Ed, %3-8 + shl Id, %3-8 + shl Hd, %3-8 +%if cpuflag(ssse3) + mova m0, [pw_256] +%endif + movd m1, Ed + movd m2, Id + movd m3, Hd +%if cpuflag(ssse3) + pshufb m1, m0 ; E << (bit_depth - 8) + pshufb m2, m0 ; I << (bit_depth - 8) + pshufb m3, m0 ; H << (bit_depth - 8) +%else + punpcklwd m1, m1 + punpcklwd m2, m2 + punpcklwd m3, m3 + pshufd m1, m1, q0000 + pshufd m2, m2, q0000 + pshufd m3, m3, q0000 +%endif + SCRATCH 1, 8, rsp+(%%off+0)*mmsize, E + SCRATCH 2, 9, rsp+(%%off+1)*mmsize, I + SCRATCH 3, 10, rsp+(%%off+2)*mmsize, H +%if %2 > 4 + PRELOAD 11, pw_ %+ %%maxf, F +%endif + + ; set up variables to load data +%ifidn %1, v + DEFINE_ARGS dst8, stride, stride3, dst0, dst4, dst12 + lea stride3q, [strideq*3] + neg strideq +%if %2 == 16 + lea dst0q, [dst8q+strideq*8] +%else + lea dst4q, [dst8q+strideq*4] +%endif + neg strideq +%if %2 == 16 + lea dst12q, [dst8q+strideq*4] + lea dst4q, [dst0q+strideq*4] +%endif + +%if %2 == 16 +%define %%p7 dst0q +%define %%p6 dst0q+strideq +%define %%p5 dst0q+strideq*2 +%define %%p4 dst0q+stride3q +%endif +%define %%p3 dst4q +%define %%p2 dst4q+strideq +%define %%p1 dst4q+strideq*2 +%define %%p0 dst4q+stride3q +%define %%q0 dst8q +%define %%q1 dst8q+strideq +%define %%q2 dst8q+strideq*2 +%define %%q3 dst8q+stride3q +%if %2 == 16 +%define %%q4 dst12q +%define %%q5 dst12q+strideq +%define %%q6 dst12q+strideq*2 +%define %%q7 dst12q+stride3q +%endif +%else ; %1 == h + DEFINE_ARGS dst0, stride, stride3, dst4 + lea stride3q, [strideq*3] + lea dst4q, [dst0q+strideq*4] + +%define %%p3 rsp+(%%tspoff+0)*mmsize +%define %%p2 rsp+(%%tspoff+1)*mmsize +%define %%p1 rsp+(%%tspoff+2)*mmsize +%define %%p0 rsp+(%%tspoff+3)*mmsize +%define %%q0 rsp+(%%tspoff+4)*mmsize +%define %%q1 rsp+(%%tspoff+5)*mmsize +%define %%q2 rsp+(%%tspoff+6)*mmsize +%define %%q3 rsp+(%%tspoff+7)*mmsize + +%if %2 < 16 + movu m0, [dst0q+strideq*0-8] + movu m1, [dst0q+strideq*1-8] + movu m2, [dst0q+strideq*2-8] + movu m3, [dst0q+stride3q -8] + movu m4, [dst4q+strideq*0-8] + movu m5, [dst4q+strideq*1-8] + movu m6, [dst4q+strideq*2-8] + movu m7, [dst4q+stride3q -8] + +%if ARCH_X86_64 + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 12 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%%p0], [%%q0] +%endif + + mova [%%p3], m0 + mova [%%p2], m1 + mova [%%p1], m2 + mova [%%p0], m3 +%if ARCH_X86_64 + mova [%%q0], m4 +%endif + mova [%%q1], m5 + mova [%%q2], m6 + mova [%%q3], m7 + + ; FIXME investigate if we can _not_ load q0-3 below if h, and adjust register + ; order here accordingly +%else ; %2 == 16 + +%define %%p7 rsp+(%%tspoff+ 8)*mmsize +%define %%p6 rsp+(%%tspoff+ 9)*mmsize +%define %%p5 rsp+(%%tspoff+10)*mmsize +%define %%p4 rsp+(%%tspoff+11)*mmsize +%define %%q4 rsp+(%%tspoff+12)*mmsize +%define %%q5 rsp+(%%tspoff+13)*mmsize +%define %%q6 rsp+(%%tspoff+14)*mmsize +%define %%q7 rsp+(%%tspoff+15)*mmsize + + mova m0, [dst0q+strideq*0-16] + mova m1, [dst0q+strideq*1-16] + mova m2, [dst0q+strideq*2-16] + mova m3, [dst0q+stride3q -16] + mova m4, [dst4q+strideq*0-16] + mova m5, [dst4q+strideq*1-16] +%if ARCH_X86_64 + mova m6, [dst4q+strideq*2-16] +%endif + mova m7, [dst4q+stride3q -16] + +%if ARCH_X86_64 + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 12 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2-16], [%%p3], 1 +%endif + + mova [%%p7], m0 + mova [%%p6], m1 + mova [%%p5], m2 + mova [%%p4], m3 +%if ARCH_X86_64 + mova [%%p3], m4 +%endif + mova [%%p2], m5 + mova [%%p1], m6 + mova [%%p0], m7 + + mova m0, [dst0q+strideq*0] + mova m1, [dst0q+strideq*1] + mova m2, [dst0q+strideq*2] + mova m3, [dst0q+stride3q ] + mova m4, [dst4q+strideq*0] + mova m5, [dst4q+strideq*1] +%if ARCH_X86_64 + mova m6, [dst4q+strideq*2] +%endif + mova m7, [dst4q+stride3q ] + +%if ARCH_X86_64 + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 12 +%else + TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2], [%%q4], 1 +%endif + + mova [%%q0], m0 + mova [%%q1], m1 + mova [%%q2], m2 + mova [%%q3], m3 +%if ARCH_X86_64 + mova [%%q4], m4 +%endif + mova [%%q5], m5 + mova [%%q6], m6 + mova [%%q7], m7 + + ; FIXME investigate if we can _not_ load q0|q4-7 below if h, and adjust register + ; order here accordingly +%endif ; %2 +%endif ; %1 + + ; load q0|q4-7 data + mova m0, [%%q0] +%if %2 == 16 + mova m4, [%%q4] + mova m5, [%%q5] + mova m6, [%%q6] + mova m7, [%%q7] + + ; flat8out q portion + FLAT8OUT_HALF + SCRATCH 7, 15, rsp+(%%off+6)*mmsize, F8O +%endif + + ; load q1-3 data + mova m1, [%%q1] + mova m2, [%%q2] + mova m3, [%%q3] + + ; r6-8|pw_4[m8-11]=reg_E/I/H/F + ; r9[m15]=!flatout[q] + ; m12-14=free + ; m0-3=q0-q3 + ; m4-7=free + + ; flat8in|fm|hev q portion + FLAT8IN_HALF %2 + SCRATCH 7, 13, rsp+(%%off+4)*mmsize, HEV +%if %2 > 4 + SCRATCH 4, 14, rsp+(%%off+5)*mmsize, F8I +%endif + + ; r6-8|pw_4[m8-11]=reg_E/I/H/F + ; r9[m15]=!flat8out[q] + ; r10[m13]=hev[q] + ; r11[m14]=!flat8in[q] + ; m2=!fm[q] + ; m0,1=q0-q1 + ; m2-7=free + ; m12=free + + ; load p0-1 + mova m3, [%%p0] + mova m4, [%%p1] + + ; fm mb_edge portion + psubw m5, m3, m0 ; q0-p0 + psubw m6, m4, m1 ; q1-p1 +%if ARCH_X86_64 + ABS2 m5, m6, m7, m12 ; abs(q0-p0) | abs(q1-p1) +%else + ABS1 m5, m7 ; abs(q0-p0) + ABS1 m6, m7 ; abs(q1-p1) +%endif + paddw m5, m5 + psraw m6, 1 + paddw m6, m5 ; abs(q0-p0)*2+(abs(q1-p1)>>1) + pcmpgtw m6, reg_E + por m2, m6 + SCRATCH 2, 12, rsp+(%%off+3)*mmsize, FM + + ; r6-8|pw_4[m8-11]=reg_E/I/H/F + ; r9[m15]=!flat8out[q] + ; r10[m13]=hev[q] + ; r11[m14]=!flat8in[q] + ; r12[m12]=!fm[q] + ; m3-4=q0-1 + ; m0-2/5-7=free + + ; load p4-7 data + SWAP 3, 0 ; p0 + SWAP 4, 1 ; p1 +%if %2 == 16 + mova m7, [%%p7] + mova m6, [%%p6] + mova m5, [%%p5] + mova m4, [%%p4] + + ; flat8out p portion + FLAT8OUT_HALF + por m7, reg_F8O + SCRATCH 7, 15, rsp+(%%off+6)*mmsize, F8O +%endif + + ; r6-8|pw_4[m8-11]=reg_E/I/H/F + ; r9[m15]=!flat8out + ; r10[m13]=hev[q] + ; r11[m14]=!flat8in[q] + ; r12[m12]=!fm[q] + ; m0=p0 + ; m1-7=free + + ; load p2-3 data + mova m2, [%%p2] + mova m3, [%%p3] + + ; flat8in|fm|hev p portion + FLAT8IN_HALF %2 + por m7, reg_HEV +%if %2 > 4 + por m4, reg_F8I +%endif + por m2, reg_FM +%if %2 > 4 + por m4, m2 ; !flat8|!fm +%if %2 == 16 + por m5, m4, reg_F8O ; !flat16|!fm + pandn m2, m4 ; filter4_mask + pandn m4, m5 ; filter8_mask + pxor m5, [pw_m1] ; filter16_mask + SCRATCH 5, 15, rsp+(%%off+6)*mmsize, F16M +%else + pandn m2, m4 ; filter4_mask + pxor m4, [pw_m1] ; filter8_mask +%endif + SCRATCH 4, 14, rsp+(%%off+5)*mmsize, F8M +%else + pxor m2, [pw_m1] ; filter4_mask +%endif + SCRATCH 7, 13, rsp+(%%off+4)*mmsize, HEV + SCRATCH 2, 12, rsp+(%%off+3)*mmsize, F4M + + ; r9[m15]=filter16_mask + ; r10[m13]=hev + ; r11[m14]=filter8_mask + ; r12[m12]=filter4_mask + ; m0,1=p0-p1 + ; m2-7=free + ; m8-11=free + +%if %2 > 4 +%if %2 == 16 + ; filter_14 + mova m2, [%%p7] + mova m3, [%%p6] + mova m6, [%%p5] + mova m7, [%%p4] + PRELOAD 8, %%p3, P3 + PRELOAD 9, %%p2, P2 +%endif + PRELOAD 10, %%q0, Q0 + PRELOAD 11, %%q1, Q1 +%if %2 == 16 + psllw m4, m2, 3 + paddw m5, m3, m3 + paddw m4, m6 + paddw m5, m7 + paddw m4, reg_P3 + paddw m5, reg_P2 + paddw m4, m1 + paddw m5, m0 + paddw m4, reg_Q0 ; q0+p1+p3+p5+p7*8 + psubw m5, m2 ; p0+p2+p4+p6*2-p7 + paddw m4, [pw_8] + paddw m5, m4 ; q0+p0+p1+p2+p3+p4+p5+p6*2+p7*7+8 + + ; below, we use r0-5 for storing pre-filter pixels for subsequent subtraction + ; at the end of the filter + + mova [rsp+0*mmsize], m3 + FILTER_STEP m4, m5, F16M, 4, %%p6, m3, m2, m6, reg_Q1 +%endif + mova m3, [%%q2] +%if %2 == 16 + mova [rsp+1*mmsize], m6 + FILTER_STEP m4, m5, F16M, 4, %%p5, m6, m2, m7, m3 +%endif + mova m6, [%%q3] +%if %2 == 16 + mova [rsp+2*mmsize], m7 + FILTER_STEP m4, m5, F16M, 4, %%p4, m7, m2, reg_P3, m6 + mova m7, [%%q4] +%if ARCH_X86_64 + mova [rsp+3*mmsize], reg_P3 +%else + mova m4, reg_P3 + mova [rsp+3*mmsize], m4 +%endif + FILTER_STEP m4, m5, F16M, 4, %%p3, reg_P3, m2, reg_P2, m7 + PRELOAD 8, %%q5, Q5 +%if ARCH_X86_64 + mova [rsp+4*mmsize], reg_P2 +%else + mova m4, reg_P2 + mova [rsp+4*mmsize], m4 +%endif + FILTER_STEP m4, m5, F16M, 4, %%p2, reg_P2, m2, m1, reg_Q5 + PRELOAD 9, %%q6, Q6 + mova [rsp+5*mmsize], m1 + FILTER_STEP m4, m5, F16M, 4, %%p1, m1, m2, m0, reg_Q6 + mova m1, [%%q7] + FILTER_STEP m4, m5, F16M, 4, %%p0, m0, m2, reg_Q0, m1, 1 + FILTER_STEP m4, m5, F16M, 4, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m1, ARCH_X86_64 + FILTER_STEP m4, m5, F16M, 4, %%q1, reg_Q1, [rsp+1*mmsize], m3, m1, ARCH_X86_64 + FILTER_STEP m4, m5, F16M, 4, %%q2, m3, [rsp+2*mmsize], m6, m1, 1 + FILTER_STEP m4, m5, F16M, 4, %%q3, m6, [rsp+3*mmsize], m7, m1 + FILTER_STEP m4, m5, F16M, 4, %%q4, m7, [rsp+4*mmsize], reg_Q5, m1 + FILTER_STEP m4, m5, F16M, 4, %%q5, reg_Q5, [rsp+5*mmsize], reg_Q6, m1 + FILTER_STEP m4, m5, F16M, 4, %%q6, reg_Q6 + + mova m7, [%%p1] +%else + SWAP 1, 7 +%endif + + mova m2, [%%p3] + mova m1, [%%p2] + + ; reg_Q0-1 (m10-m11) + ; m0=p0 + ; m1=p2 + ; m2=p3 + ; m3=q2 + ; m4-5=free + ; m6=q3 + ; m7=p1 + ; m8-9 unused + + ; filter_6 + psllw m4, m2, 2 + paddw m5, m1, m1 + paddw m4, m7 + psubw m5, m2 + paddw m4, m0 + paddw m5, reg_Q0 + paddw m4, [pw_4] + paddw m5, m4 + +%if ARCH_X86_64 + mova m8, m1 + mova m9, m7 +%else + mova [rsp+0*mmsize], m1 + mova [rsp+1*mmsize], m7 +%endif +%ifidn %1, v + FILTER_STEP m4, m5, F8M, 3, %%p2, m1, m2, m7, reg_Q1 +%else + FILTER_STEP m4, m5, F8M, 3, %%p2, m1, m2, m7, reg_Q1, 1 +%endif + FILTER_STEP m4, m5, F8M, 3, %%p1, m7, m2, m0, m3, 1 + FILTER_STEP m4, m5, F8M, 3, %%p0, m0, m2, reg_Q0, m6, 1 +%if ARCH_X86_64 + FILTER_STEP m4, m5, F8M, 3, %%q0, reg_Q0, m8, reg_Q1, m6, ARCH_X86_64 + FILTER_STEP m4, m5, F8M, 3, %%q1, reg_Q1, m9, m3, m6, ARCH_X86_64 +%else + FILTER_STEP m4, m5, F8M, 3, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m6, ARCH_X86_64 + FILTER_STEP m4, m5, F8M, 3, %%q1, reg_Q1, [rsp+1*mmsize], m3, m6, ARCH_X86_64 +%endif + FILTER_STEP m4, m5, F8M, 3, %%q2, m3 + + UNSCRATCH 2, 10, %%q0 + UNSCRATCH 6, 11, %%q1 +%else + SWAP 1, 7 + mova m2, [%%q0] + mova m6, [%%q1] +%endif + UNSCRATCH 3, 13, rsp+(%%off+4)*mmsize, HEV + + ; m0=p0 + ; m1=p2 + ; m2=q0 + ; m3=hev_mask + ; m4-5=free + ; m6=q1 + ; m7=p1 + + ; filter_4 + psubw m4, m7, m6 ; p1-q1 + psubw m5, m2, m0 ; q0-p0 + pand m4, m3 + pminsw m4, [pw_ %+ %%maxsgn] + pmaxsw m4, [pw_ %+ %%minsgn] ; clip_intp2(p1-q1, 9) -> f + paddw m4, m5 + paddw m5, m5 + paddw m4, m5 ; 3*(q0-p0)+f + pminsw m4, [pw_ %+ %%maxsgn] + pmaxsw m4, [pw_ %+ %%minsgn] ; clip_intp2(3*(q0-p0)+f, 9) -> f + pand m4, reg_F4M + paddw m5, m4, [pw_4] + paddw m4, [pw_3] + pminsw m5, [pw_ %+ %%maxsgn] + pminsw m4, [pw_ %+ %%maxsgn] + psraw m5, 3 ; min_intp2(f+4, 9)>>3 -> f1 + psraw m4, 3 ; min_intp2(f+3, 9)>>3 -> f2 + psubw m2, m5 ; q0-f1 + paddw m0, m4 ; p0+f2 + pandn m3, m5 ; f1 & !hev (for p1/q1 adj) + pxor m4, m4 + mova m5, [pw_ %+ %%maxusgn] + pmaxsw m2, m4 + pmaxsw m0, m4 + pminsw m2, m5 + pminsw m0, m5 +%if cpuflag(ssse3) + pmulhrsw m3, [pw_16384] ; (f1+1)>>1 +%else + paddw m3, [pw_1] + psraw m3, 1 +%endif + paddw m7, m3 ; p1+f + psubw m6, m3 ; q1-f + pmaxsw m7, m4 + pmaxsw m6, m4 + pminsw m7, m5 + pminsw m6, m5 + + ; store +%ifidn %1, v + mova [%%p1], m7 + mova [%%p0], m0 + mova [%%q0], m2 + mova [%%q1], m6 +%else ; %1 == h +%if %2 == 4 + TRANSPOSE4x4W 7, 0, 2, 6, 1 + movh [dst0q+strideq*0-4], m7 + movhps [dst0q+strideq*1-4], m7 + movh [dst0q+strideq*2-4], m0 + movhps [dst0q+stride3q -4], m0 + movh [dst4q+strideq*0-4], m2 + movhps [dst4q+strideq*1-4], m2 + movh [dst4q+strideq*2-4], m6 + movhps [dst4q+stride3q -4], m6 +%elif %2 == 8 + mova m3, [%%p3] + mova m4, [%%q2] + mova m5, [%%q3] + +%if ARCH_X86_64 + TRANSPOSE8x8W 3, 1, 7, 0, 2, 6, 4, 5, 8 +%else + TRANSPOSE8x8W 3, 1, 7, 0, 2, 6, 4, 5, [%%q2], [%%q0], 1 + mova m2, [%%q0] +%endif + + movu [dst0q+strideq*0-8], m3 + movu [dst0q+strideq*1-8], m1 + movu [dst0q+strideq*2-8], m7 + movu [dst0q+stride3q -8], m0 + movu [dst4q+strideq*0-8], m2 + movu [dst4q+strideq*1-8], m6 + movu [dst4q+strideq*2-8], m4 + movu [dst4q+stride3q -8], m5 +%else ; %2 == 16 + SCRATCH 2, 8, %%q0 + SCRATCH 6, 9, %%q1 + mova m2, [%%p7] + mova m3, [%%p6] + mova m4, [%%p5] + mova m5, [%%p4] + mova m6, [%%p3] + +%if ARCH_X86_64 + TRANSPOSE8x8W 2, 3, 4, 5, 6, 1, 7, 0, 10 +%else + mova [%%p1], m7 + TRANSPOSE8x8W 2, 3, 4, 5, 6, 1, 7, 0, [%%p1], [dst4q+strideq*0-16], 1 +%endif + + mova [dst0q+strideq*0-16], m2 + mova [dst0q+strideq*1-16], m3 + mova [dst0q+strideq*2-16], m4 + mova [dst0q+stride3q -16], m5 +%if ARCH_X86_64 + mova [dst4q+strideq*0-16], m6 +%endif + mova [dst4q+strideq*1-16], m1 + mova [dst4q+strideq*2-16], m7 + mova [dst4q+stride3q -16], m0 + + UNSCRATCH 2, 8, %%q0 + UNSCRATCH 6, 9, %%q1 + mova m0, [%%q2] + mova m1, [%%q3] + mova m3, [%%q4] + mova m4, [%%q5] +%if ARCH_X86_64 + mova m5, [%%q6] +%endif + mova m7, [%%q7] + +%if ARCH_X86_64 + TRANSPOSE8x8W 2, 6, 0, 1, 3, 4, 5, 7, 8 +%else + TRANSPOSE8x8W 2, 6, 0, 1, 3, 4, 5, 7, [%%q6], [dst4q+strideq*0], 1 +%endif + + mova [dst0q+strideq*0], m2 + mova [dst0q+strideq*1], m6 + mova [dst0q+strideq*2], m0 + mova [dst0q+stride3q ], m1 +%if ARCH_X86_64 + mova [dst4q+strideq*0], m3 +%endif + mova [dst4q+strideq*1], m4 + mova [dst4q+strideq*2], m5 + mova [dst4q+stride3q ], m7 +%endif ; %2 +%endif ; %1 + RET +%endmacro + +%macro LOOP_FILTER_CPUSETS 3 +INIT_XMM sse2 +LOOP_FILTER %1, %2, %3 +INIT_XMM ssse3 +LOOP_FILTER %1, %2, %3 +INIT_XMM avx +LOOP_FILTER %1, %2, %3 +%endmacro + +%macro LOOP_FILTER_WDSETS 2 +LOOP_FILTER_CPUSETS %1, 4, %2 +LOOP_FILTER_CPUSETS %1, 8, %2 +LOOP_FILTER_CPUSETS %1, 16, %2 +%endmacro + +LOOP_FILTER_WDSETS h, 10 +LOOP_FILTER_WDSETS v, 10 +LOOP_FILTER_WDSETS h, 12 +LOOP_FILTER_WDSETS v, 12 diff --git a/libs/ffvpx/libavcodec/x86/vp9mc.asm b/libs/ffvpx/libavcodec/x86/vp9mc.asm new file mode 100644 index 000000000..f64161b2c --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9mc.asm @@ -0,0 +1,675 @@ +;****************************************************************************** +;* VP9 motion compensation SIMD optimizations +;* +;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 + +cextern pw_256 +cextern pw_64 + +%macro F8_SSSE3_TAPS 8 +times 16 db %1, %2 +times 16 db %3, %4 +times 16 db %5, %6 +times 16 db %7, %8 +%endmacro + +%macro F8_SSE2_TAPS 8 +times 8 dw %1 +times 8 dw %2 +times 8 dw %3 +times 8 dw %4 +times 8 dw %5 +times 8 dw %6 +times 8 dw %7 +times 8 dw %8 +%endmacro + +%macro F8_16BPP_TAPS 8 +times 8 dw %1, %2 +times 8 dw %3, %4 +times 8 dw %5, %6 +times 8 dw %7, %8 +%endmacro + +%macro FILTER 1 +const filters_%1 ; smooth + F8_TAPS -3, -1, 32, 64, 38, 1, -3, 0 + F8_TAPS -2, -2, 29, 63, 41, 2, -3, 0 + F8_TAPS -2, -2, 26, 63, 43, 4, -4, 0 + F8_TAPS -2, -3, 24, 62, 46, 5, -4, 0 + F8_TAPS -2, -3, 21, 60, 49, 7, -4, 0 + F8_TAPS -1, -4, 18, 59, 51, 9, -4, 0 + F8_TAPS -1, -4, 16, 57, 53, 12, -4, -1 + F8_TAPS -1, -4, 14, 55, 55, 14, -4, -1 + F8_TAPS -1, -4, 12, 53, 57, 16, -4, -1 + F8_TAPS 0, -4, 9, 51, 59, 18, -4, -1 + F8_TAPS 0, -4, 7, 49, 60, 21, -3, -2 + F8_TAPS 0, -4, 5, 46, 62, 24, -3, -2 + F8_TAPS 0, -4, 4, 43, 63, 26, -2, -2 + F8_TAPS 0, -3, 2, 41, 63, 29, -2, -2 + F8_TAPS 0, -3, 1, 38, 64, 32, -1, -3 + ; regular + F8_TAPS 0, 1, -5, 126, 8, -3, 1, 0 + F8_TAPS -1, 3, -10, 122, 18, -6, 2, 0 + F8_TAPS -1, 4, -13, 118, 27, -9, 3, -1 + F8_TAPS -1, 4, -16, 112, 37, -11, 4, -1 + F8_TAPS -1, 5, -18, 105, 48, -14, 4, -1 + F8_TAPS -1, 5, -19, 97, 58, -16, 5, -1 + F8_TAPS -1, 6, -19, 88, 68, -18, 5, -1 + F8_TAPS -1, 6, -19, 78, 78, -19, 6, -1 + F8_TAPS -1, 5, -18, 68, 88, -19, 6, -1 + F8_TAPS -1, 5, -16, 58, 97, -19, 5, -1 + F8_TAPS -1, 4, -14, 48, 105, -18, 5, -1 + F8_TAPS -1, 4, -11, 37, 112, -16, 4, -1 + F8_TAPS -1, 3, -9, 27, 118, -13, 4, -1 + F8_TAPS 0, 2, -6, 18, 122, -10, 3, -1 + F8_TAPS 0, 1, -3, 8, 126, -5, 1, 0 + ; sharp + F8_TAPS -1, 3, -7, 127, 8, -3, 1, 0 + F8_TAPS -2, 5, -13, 125, 17, -6, 3, -1 + F8_TAPS -3, 7, -17, 121, 27, -10, 5, -2 + F8_TAPS -4, 9, -20, 115, 37, -13, 6, -2 + F8_TAPS -4, 10, -23, 108, 48, -16, 8, -3 + F8_TAPS -4, 10, -24, 100, 59, -19, 9, -3 + F8_TAPS -4, 11, -24, 90, 70, -21, 10, -4 + F8_TAPS -4, 11, -23, 80, 80, -23, 11, -4 + F8_TAPS -4, 10, -21, 70, 90, -24, 11, -4 + F8_TAPS -3, 9, -19, 59, 100, -24, 10, -4 + F8_TAPS -3, 8, -16, 48, 108, -23, 10, -4 + F8_TAPS -2, 6, -13, 37, 115, -20, 9, -4 + F8_TAPS -2, 5, -10, 27, 121, -17, 7, -3 + F8_TAPS -1, 3, -6, 17, 125, -13, 5, -2 + F8_TAPS 0, 1, -3, 8, 127, -7, 3, -1 +%endmacro + +%define F8_TAPS F8_SSSE3_TAPS +; int8_t ff_filters_ssse3[3][15][4][32] +FILTER ssse3 +%define F8_TAPS F8_SSE2_TAPS +; int16_t ff_filters_sse2[3][15][8][8] +FILTER sse2 +%define F8_TAPS F8_16BPP_TAPS +; int16_t ff_filters_16bpp[3][15][4][16] +FILTER 16bpp + +SECTION .text + +%macro filter_sse2_h_fn 1 +%assign %%px mmsize/2 +cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h, filtery + pxor m5, m5 + mova m6, [pw_64] + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+ 16] + mova m9, [filteryq+ 32] + mova m10, [filteryq+ 48] + mova m11, [filteryq+ 64] + mova m12, [filteryq+ 80] + mova m13, [filteryq+ 96] + mova m14, [filteryq+112] +%endif +.loop: + movh m0, [srcq-3] + movh m1, [srcq-2] + movh m2, [srcq-1] + movh m3, [srcq+0] + movh m4, [srcq+1] + punpcklbw m0, m5 + punpcklbw m1, m5 + punpcklbw m2, m5 + punpcklbw m3, m5 + punpcklbw m4, m5 + pmullw m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmullw m1, m8 + pmullw m2, m9 + pmullw m3, m10 + pmullw m4, m11 +%else + pmullw m1, [filteryq+ 16] + pmullw m2, [filteryq+ 32] + pmullw m3, [filteryq+ 48] + pmullw m4, [filteryq+ 64] +%endif + paddw m0, m1 + paddw m2, m3 + paddw m0, m4 + movh m1, [srcq+2] + movh m3, [srcq+3] + movh m4, [srcq+4] + add srcq, sstrideq + punpcklbw m1, m5 + punpcklbw m3, m5 + punpcklbw m4, m5 +%if ARCH_X86_64 && mmsize > 8 + pmullw m1, m12 + pmullw m3, m13 + pmullw m4, m14 +%else + pmullw m1, [filteryq+ 80] + pmullw m3, [filteryq+ 96] + pmullw m4, [filteryq+112] +%endif + paddw m0, m1 + paddw m3, m4 + paddw m0, m6 + paddw m2, m3 + paddsw m0, m2 + psraw m0, 7 +%ifidn %1, avg + movh m1, [dstq] +%endif + packuswb m0, m0 +%ifidn %1, avg + pavgb m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_MMX mmxext +filter_sse2_h_fn put +filter_sse2_h_fn avg + +INIT_XMM sse2 +filter_sse2_h_fn put +filter_sse2_h_fn avg + +%macro filter_h_fn 1 +%assign %%px mmsize/2 +cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 11, dst, dstride, src, sstride, h, filtery + mova m6, [pw_256] + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+32] + mova m9, [filteryq+64] + mova m10, [filteryq+96] +%endif +.loop: + movh m0, [srcq-3] + movh m1, [srcq-2] + movh m2, [srcq-1] + movh m3, [srcq+0] + movh m4, [srcq+1] + movh m5, [srcq+2] + punpcklbw m0, m1 + punpcklbw m2, m3 + movh m1, [srcq+3] + movh m3, [srcq+4] + add srcq, sstrideq + punpcklbw m4, m5 + punpcklbw m1, m3 + pmaddubsw m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddubsw m2, m8 + pmaddubsw m4, m9 + pmaddubsw m1, m10 +%else + pmaddubsw m2, [filteryq+32] + pmaddubsw m4, [filteryq+64] + pmaddubsw m1, [filteryq+96] +%endif + paddw m0, m4 + paddw m2, m1 + paddsw m0, m2 + pmulhrsw m0, m6 +%ifidn %1, avg + movh m1, [dstq] +%endif + packuswb m0, m0 +%ifidn %1, avg + pavgb m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_MMX ssse3 +filter_h_fn put +filter_h_fn avg + +INIT_XMM ssse3 +filter_h_fn put +filter_h_fn avg + +%if ARCH_X86_64 +%macro filter_hx2_fn 1 +%assign %%px mmsize +cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 14, dst, dstride, src, sstride, h, filtery + mova m13, [pw_256] + mova m8, [filteryq+ 0] + mova m9, [filteryq+32] + mova m10, [filteryq+64] + mova m11, [filteryq+96] +.loop: + movu m0, [srcq-3] + movu m1, [srcq-2] + movu m2, [srcq-1] + movu m3, [srcq+0] + movu m4, [srcq+1] + movu m5, [srcq+2] + movu m6, [srcq+3] + movu m7, [srcq+4] + add srcq, sstrideq + SBUTTERFLY bw, 0, 1, 12 + SBUTTERFLY bw, 2, 3, 12 + SBUTTERFLY bw, 4, 5, 12 + SBUTTERFLY bw, 6, 7, 12 + pmaddubsw m0, m8 + pmaddubsw m1, m8 + pmaddubsw m2, m9 + pmaddubsw m3, m9 + pmaddubsw m4, m10 + pmaddubsw m5, m10 + pmaddubsw m6, m11 + pmaddubsw m7, m11 + paddw m0, m4 + paddw m1, m5 + paddw m2, m6 + paddw m3, m7 + paddsw m0, m2 + paddsw m1, m3 + pmulhrsw m0, m13 + pmulhrsw m1, m13 + packuswb m0, m1 +%ifidn %1, avg + pavgb m0, [dstq] +%endif + mova [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_XMM ssse3 +filter_hx2_fn put +filter_hx2_fn avg + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +filter_hx2_fn put +filter_hx2_fn avg +%endif + +%endif ; ARCH_X86_64 + +%macro filter_sse2_v_fn 1 +%assign %%px mmsize/2 +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%define hd r4mp +%endif + pxor m5, m5 + mova m6, [pw_64] + lea sstride3q, [sstrideq*3] + lea src4q, [srcq+sstrideq] + sub srcq, sstride3q + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+ 16] + mova m9, [filteryq+ 32] + mova m10, [filteryq+ 48] + mova m11, [filteryq+ 64] + mova m12, [filteryq+ 80] + mova m13, [filteryq+ 96] + mova m14, [filteryq+112] +%endif +.loop: + ; FIXME maybe reuse loads from previous rows, or just + ; more generally unroll this to prevent multiple loads of + ; the same data? + movh m0, [srcq] + movh m1, [srcq+sstrideq] + movh m2, [srcq+sstrideq*2] + movh m3, [srcq+sstride3q] + add srcq, sstrideq + movh m4, [src4q] + punpcklbw m0, m5 + punpcklbw m1, m5 + punpcklbw m2, m5 + punpcklbw m3, m5 + punpcklbw m4, m5 + pmullw m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmullw m1, m8 + pmullw m2, m9 + pmullw m3, m10 + pmullw m4, m11 +%else + pmullw m1, [filteryq+ 16] + pmullw m2, [filteryq+ 32] + pmullw m3, [filteryq+ 48] + pmullw m4, [filteryq+ 64] +%endif + paddw m0, m1 + paddw m2, m3 + paddw m0, m4 + movh m1, [src4q+sstrideq] + movh m3, [src4q+sstrideq*2] + movh m4, [src4q+sstride3q] + add src4q, sstrideq + punpcklbw m1, m5 + punpcklbw m3, m5 + punpcklbw m4, m5 +%if ARCH_X86_64 && mmsize > 8 + pmullw m1, m12 + pmullw m3, m13 + pmullw m4, m14 +%else + pmullw m1, [filteryq+ 80] + pmullw m3, [filteryq+ 96] + pmullw m4, [filteryq+112] +%endif + paddw m0, m1 + paddw m3, m4 + paddw m0, m6 + paddw m2, m3 + paddsw m0, m2 + psraw m0, 7 +%ifidn %1, avg + movh m1, [dstq] +%endif + packuswb m0, m0 +%ifidn %1, avg + pavgb m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_MMX mmxext +filter_sse2_v_fn put +filter_sse2_v_fn avg + +INIT_XMM sse2 +filter_sse2_v_fn put +filter_sse2_v_fn avg + +%macro filter_v_fn 1 +%assign %%px mmsize/2 +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%define hd r4mp +%endif + mova m6, [pw_256] + lea sstride3q, [sstrideq*3] + lea src4q, [srcq+sstrideq] + sub srcq, sstride3q + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+32] + mova m9, [filteryq+64] + mova m10, [filteryq+96] +%endif +.loop: + ; FIXME maybe reuse loads from previous rows, or just more generally + ; unroll this to prevent multiple loads of the same data? + movh m0, [srcq] + movh m1, [srcq+sstrideq] + movh m2, [srcq+sstrideq*2] + movh m3, [srcq+sstride3q] + movh m4, [src4q] + movh m5, [src4q+sstrideq] + punpcklbw m0, m1 + punpcklbw m2, m3 + movh m1, [src4q+sstrideq*2] + movh m3, [src4q+sstride3q] + add srcq, sstrideq + add src4q, sstrideq + punpcklbw m4, m5 + punpcklbw m1, m3 + pmaddubsw m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddubsw m2, m8 + pmaddubsw m4, m9 + pmaddubsw m1, m10 +%else + pmaddubsw m2, [filteryq+32] + pmaddubsw m4, [filteryq+64] + pmaddubsw m1, [filteryq+96] +%endif + paddw m0, m4 + paddw m2, m1 + paddsw m0, m2 + pmulhrsw m0, m6 +%ifidn %1, avg + movh m1, [dstq] +%endif + packuswb m0, m0 +%ifidn %1, avg + pavgb m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_MMX ssse3 +filter_v_fn put +filter_v_fn avg + +INIT_XMM ssse3 +filter_v_fn put +filter_v_fn avg + +%if ARCH_X86_64 + +%macro filter_vx2_fn 1 +%assign %%px mmsize +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3 + mova m13, [pw_256] + lea sstride3q, [sstrideq*3] + lea src4q, [srcq+sstrideq] + sub srcq, sstride3q + mova m8, [filteryq+ 0] + mova m9, [filteryq+32] + mova m10, [filteryq+64] + mova m11, [filteryq+96] +.loop: + ; FIXME maybe reuse loads from previous rows, or just + ; more generally unroll this to prevent multiple loads of + ; the same data? + movu m0, [srcq] + movu m1, [srcq+sstrideq] + movu m2, [srcq+sstrideq*2] + movu m3, [srcq+sstride3q] + movu m4, [src4q] + movu m5, [src4q+sstrideq] + movu m6, [src4q+sstrideq*2] + movu m7, [src4q+sstride3q] + add srcq, sstrideq + add src4q, sstrideq + SBUTTERFLY bw, 0, 1, 12 + SBUTTERFLY bw, 2, 3, 12 + SBUTTERFLY bw, 4, 5, 12 + SBUTTERFLY bw, 6, 7, 12 + pmaddubsw m0, m8 + pmaddubsw m1, m8 + pmaddubsw m2, m9 + pmaddubsw m3, m9 + pmaddubsw m4, m10 + pmaddubsw m5, m10 + pmaddubsw m6, m11 + pmaddubsw m7, m11 + paddw m0, m4 + paddw m1, m5 + paddw m2, m6 + paddw m3, m7 + paddsw m0, m2 + paddsw m1, m3 + pmulhrsw m0, m13 + pmulhrsw m1, m13 + packuswb m0, m1 +%ifidn %1, avg + pavgb m0, [dstq] +%endif + mova [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET +%endmacro + +INIT_XMM ssse3 +filter_vx2_fn put +filter_vx2_fn avg + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +filter_vx2_fn put +filter_vx2_fn avg +%endif + +%endif ; ARCH_X86_64 + +%macro fpel_fn 6-8 0, 4 +%if %2 == 4 +%define %%srcfn movh +%define %%dstfn movh +%else +%define %%srcfn movu +%define %%dstfn mova +%endif + +%if %7 == 8 +%define %%pavg pavgb +%define %%szsuf _8 +%elif %7 == 16 +%define %%pavg pavgw +%define %%szsuf _16 +%else +%define %%szsuf +%endif + +%if %2 <= mmsize +cglobal vp9_%1%2 %+ %%szsuf, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3 + lea sstride3q, [sstrideq*3] + lea dstride3q, [dstrideq*3] +%else +cglobal vp9_%1%2 %+ %%szsuf, 5, 5, %8, dst, dstride, src, sstride, h +%endif +.loop: + %%srcfn m0, [srcq] + %%srcfn m1, [srcq+s%3] + %%srcfn m2, [srcq+s%4] + %%srcfn m3, [srcq+s%5] +%if %2/mmsize == 8 + %%srcfn m4, [srcq+mmsize*4] + %%srcfn m5, [srcq+mmsize*5] + %%srcfn m6, [srcq+mmsize*6] + %%srcfn m7, [srcq+mmsize*7] +%endif + lea srcq, [srcq+sstrideq*%6] +%ifidn %1, avg + %%pavg m0, [dstq] + %%pavg m1, [dstq+d%3] + %%pavg m2, [dstq+d%4] + %%pavg m3, [dstq+d%5] +%if %2/mmsize == 8 + %%pavg m4, [dstq+mmsize*4] + %%pavg m5, [dstq+mmsize*5] + %%pavg m6, [dstq+mmsize*6] + %%pavg m7, [dstq+mmsize*7] +%endif +%endif + %%dstfn [dstq], m0 + %%dstfn [dstq+d%3], m1 + %%dstfn [dstq+d%4], m2 + %%dstfn [dstq+d%5], m3 +%if %2/mmsize == 8 + %%dstfn [dstq+mmsize*4], m4 + %%dstfn [dstq+mmsize*5], m5 + %%dstfn [dstq+mmsize*6], m6 + %%dstfn [dstq+mmsize*7], m7 +%endif + lea dstq, [dstq+dstrideq*%6] + sub hd, %6 + jnz .loop + RET +%endmacro + +%define d16 16 +%define s16 16 +%define d32 32 +%define s32 32 +INIT_MMX mmx +fpel_fn put, 4, strideq, strideq*2, stride3q, 4 +fpel_fn put, 8, strideq, strideq*2, stride3q, 4 +INIT_MMX mmxext +fpel_fn avg, 4, strideq, strideq*2, stride3q, 4, 8 +fpel_fn avg, 8, strideq, strideq*2, stride3q, 4, 8 +INIT_XMM sse +fpel_fn put, 16, strideq, strideq*2, stride3q, 4 +fpel_fn put, 32, mmsize, strideq, strideq+mmsize, 2 +fpel_fn put, 64, mmsize, mmsize*2, mmsize*3, 1 +fpel_fn put, 128, mmsize, mmsize*2, mmsize*3, 1, 0, 8 +INIT_XMM sse2 +fpel_fn avg, 16, strideq, strideq*2, stride3q, 4, 8 +fpel_fn avg, 32, mmsize, strideq, strideq+mmsize, 2, 8 +fpel_fn avg, 64, mmsize, mmsize*2, mmsize*3, 1, 8 +INIT_YMM avx +fpel_fn put, 32, strideq, strideq*2, stride3q, 4 +fpel_fn put, 64, mmsize, strideq, strideq+mmsize, 2 +fpel_fn put, 128, mmsize, mmsize*2, mmsize*3, 1 +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +fpel_fn avg, 32, strideq, strideq*2, stride3q, 4, 8 +fpel_fn avg, 64, mmsize, strideq, strideq+mmsize, 2, 8 +%endif +INIT_MMX mmxext +fpel_fn avg, 8, strideq, strideq*2, stride3q, 4, 16 +INIT_XMM sse2 +fpel_fn avg, 16, strideq, strideq*2, stride3q, 4, 16 +fpel_fn avg, 32, mmsize, strideq, strideq+mmsize, 2, 16 +fpel_fn avg, 64, mmsize, mmsize*2, mmsize*3, 1, 16 +fpel_fn avg, 128, mmsize, mmsize*2, mmsize*3, 1, 16, 8 +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +fpel_fn avg, 32, strideq, strideq*2, stride3q, 4, 16 +fpel_fn avg, 64, mmsize, strideq, strideq+mmsize, 2, 16 +fpel_fn avg, 128, mmsize, mmsize*2, mmsize*3, 1, 16 +%endif +%undef s16 +%undef d16 +%undef s32 +%undef d32 diff --git a/libs/ffvpx/libavcodec/x86/vp9mc_16bpp.asm b/libs/ffvpx/libavcodec/x86/vp9mc_16bpp.asm new file mode 100644 index 000000000..9a462eaf8 --- /dev/null +++ b/libs/ffvpx/libavcodec/x86/vp9mc_16bpp.asm @@ -0,0 +1,431 @@ +;****************************************************************************** +;* VP9 MC SIMD optimizations +;* +;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 + +pd_64: times 8 dd 64 + +cextern pw_1023 +cextern pw_4095 + +SECTION .text + +%macro filter_h4_fn 1-2 12 +cglobal vp9_%1_8tap_1d_h_4_10, 6, 6, %2, dst, dstride, src, sstride, h, filtery + mova m5, [pw_1023] +.body: +%if notcpuflag(sse4) && ARCH_X86_64 + pxor m11, m11 +%endif + mova m6, [pd_64] + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+32] + mova m9, [filteryq+64] + mova m10, [filteryq+96] +%endif +.loop: + movh m0, [srcq-6] + movh m1, [srcq-4] + movh m2, [srcq-2] + movh m3, [srcq+0] + movh m4, [srcq+2] + punpcklwd m0, m1 + punpcklwd m2, m3 + pmaddwd m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m2, m8 +%else + pmaddwd m2, [filteryq+32] +%endif + movu m1, [srcq+4] + movu m3, [srcq+6] + paddd m0, m2 + movu m2, [srcq+8] + add srcq, sstrideq + punpcklwd m4, m1 + punpcklwd m3, m2 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m4, m9 + pmaddwd m3, m10 +%else + pmaddwd m4, [filteryq+64] + pmaddwd m3, [filteryq+96] +%endif + paddd m0, m4 + paddd m0, m3 + paddd m0, m6 + psrad m0, 7 +%if cpuflag(sse4) + packusdw m0, m0 +%else + packssdw m0, m0 +%endif +%ifidn %1, avg + movh m1, [dstq] +%endif + pminsw m0, m5 +%if notcpuflag(sse4) +%if ARCH_X86_64 + pmaxsw m0, m11 +%else + pxor m2, m2 + pmaxsw m0, m2 +%endif +%endif +%ifidn %1, avg + pavgw m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET + +cglobal vp9_%1_8tap_1d_h_4_12, 6, 6, %2, dst, dstride, src, sstride, h, filtery + mova m5, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_4_10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +filter_h4_fn put +filter_h4_fn avg + +%macro filter_h_fn 1-2 12 +%assign %%px mmsize/2 +cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _10, 6, 6, %2, dst, dstride, src, sstride, h, filtery + mova m5, [pw_1023] +.body: +%if notcpuflag(sse4) && ARCH_X86_64 + pxor m11, m11 +%endif + mova m6, [pd_64] + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+32] + mova m9, [filteryq+64] + mova m10, [filteryq+96] +%endif +.loop: + movu m0, [srcq-6] + movu m1, [srcq-4] + movu m2, [srcq-2] + movu m3, [srcq+0] + movu m4, [srcq+2] + pmaddwd m0, m7 + pmaddwd m1, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m2, m8 + pmaddwd m3, m8 + pmaddwd m4, m9 +%else + pmaddwd m2, [filteryq+32] + pmaddwd m3, [filteryq+32] + pmaddwd m4, [filteryq+64] +%endif + paddd m0, m2 + paddd m1, m3 + paddd m0, m4 + movu m2, [srcq+4] + movu m3, [srcq+6] + movu m4, [srcq+8] + add srcq, sstrideq +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m2, m9 + pmaddwd m3, m10 + pmaddwd m4, m10 +%else + pmaddwd m2, [filteryq+64] + pmaddwd m3, [filteryq+96] + pmaddwd m4, [filteryq+96] +%endif + paddd m1, m2 + paddd m0, m3 + paddd m1, m4 + paddd m0, m6 + paddd m1, m6 + psrad m0, 7 + psrad m1, 7 +%if cpuflag(sse4) + packusdw m0, m0 + packusdw m1, m1 +%else + packssdw m0, m0 + packssdw m1, m1 +%endif + punpcklwd m0, m1 + pminsw m0, m5 +%if notcpuflag(sse4) +%if ARCH_X86_64 + pmaxsw m0, m11 +%else + pxor m2, m2 + pmaxsw m0, m2 +%endif +%endif +%ifidn %1, avg + pavgw m0, [dstq] +%endif + mova [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET + +cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, h, filtery + mova m5, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_ %+ %%px %+ _10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +filter_h_fn put +filter_h_fn avg +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +filter_h_fn put +filter_h_fn avg +%endif + +%macro filter_v4_fn 1-2 12 +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_4_10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_4_10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%define hd r4mp +%endif + mova m5, [pw_1023] +.body: +%if notcpuflag(sse4) && ARCH_X86_64 + pxor m11, m11 +%endif + mova m6, [pd_64] + lea sstride3q, [sstrideq*3] + lea src4q, [srcq+sstrideq] + sub srcq, sstride3q + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+ 32] + mova m9, [filteryq+ 64] + mova m10, [filteryq+ 96] +%endif +.loop: + ; FIXME maybe reuse loads from previous rows, or just + ; more generally unroll this to prevent multiple loads of + ; the same data? + movh m0, [srcq] + movh m1, [srcq+sstrideq] + movh m2, [srcq+sstrideq*2] + movh m3, [srcq+sstride3q] + add srcq, sstrideq + movh m4, [src4q] + punpcklwd m0, m1 + punpcklwd m2, m3 + pmaddwd m0, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m2, m8 +%else + pmaddwd m2, [filteryq+ 32] +%endif + movh m1, [src4q+sstrideq] + movh m3, [src4q+sstrideq*2] + paddd m0, m2 + movh m2, [src4q+sstride3q] + add src4q, sstrideq + punpcklwd m4, m1 + punpcklwd m3, m2 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m4, m9 + pmaddwd m3, m10 +%else + pmaddwd m4, [filteryq+ 64] + pmaddwd m3, [filteryq+ 96] +%endif + paddd m0, m4 + paddd m0, m3 + paddd m0, m6 + psrad m0, 7 +%if cpuflag(sse4) + packusdw m0, m0 +%else + packssdw m0, m0 +%endif +%ifidn %1, avg + movh m1, [dstq] +%endif + pminsw m0, m5 +%if notcpuflag(sse4) +%if ARCH_X86_64 + pmaxsw m0, m11 +%else + pxor m2, m2 + pmaxsw m0, m2 +%endif +%endif +%ifidn %1, avg + pavgw m0, m1 +%endif + movh [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET + +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_4_12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_4_12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%endif + mova m5, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_4_10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +filter_v4_fn put +filter_v4_fn avg + +%macro filter_v_fn 1-2 13 +%assign %%px mmsize/2 +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%define hd r4mp +%endif + mova m5, [pw_1023] +.body: +%if notcpuflag(sse4) && ARCH_X86_64 + pxor m12, m12 +%endif +%if ARCH_X86_64 + mova m11, [pd_64] +%endif + lea sstride3q, [sstrideq*3] + lea src4q, [srcq+sstrideq] + sub srcq, sstride3q + mova m7, [filteryq+ 0] +%if ARCH_X86_64 && mmsize > 8 + mova m8, [filteryq+ 32] + mova m9, [filteryq+ 64] + mova m10, [filteryq+ 96] +%endif +.loop: + ; FIXME maybe reuse loads from previous rows, or just + ; more generally unroll this to prevent multiple loads of + ; the same data? + movu m0, [srcq] + movu m1, [srcq+sstrideq] + movu m2, [srcq+sstrideq*2] + movu m3, [srcq+sstride3q] + add srcq, sstrideq + movu m4, [src4q] + SBUTTERFLY wd, 0, 1, 6 + SBUTTERFLY wd, 2, 3, 6 + pmaddwd m0, m7 + pmaddwd m1, m7 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m2, m8 + pmaddwd m3, m8 +%else + pmaddwd m2, [filteryq+ 32] + pmaddwd m3, [filteryq+ 32] +%endif + paddd m0, m2 + paddd m1, m3 + movu m2, [src4q+sstrideq] + movu m3, [src4q+sstrideq*2] + SBUTTERFLY wd, 4, 2, 6 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m4, m9 + pmaddwd m2, m9 +%else + pmaddwd m4, [filteryq+ 64] + pmaddwd m2, [filteryq+ 64] +%endif + paddd m0, m4 + paddd m1, m2 + movu m4, [src4q+sstride3q] + add src4q, sstrideq + SBUTTERFLY wd, 3, 4, 6 +%if ARCH_X86_64 && mmsize > 8 + pmaddwd m3, m10 + pmaddwd m4, m10 +%else + pmaddwd m3, [filteryq+ 96] + pmaddwd m4, [filteryq+ 96] +%endif + paddd m0, m3 + paddd m1, m4 +%if ARCH_X86_64 + paddd m0, m11 + paddd m1, m11 +%else + paddd m0, [pd_64] + paddd m1, [pd_64] +%endif + psrad m0, 7 + psrad m1, 7 +%if cpuflag(sse4) + packusdw m0, m1 +%else + packssdw m0, m1 +%endif + pminsw m0, m5 +%if notcpuflag(sse4) +%if ARCH_X86_64 + pmaxsw m0, m12 +%else + pxor m2, m2 + pmaxsw m0, m2 +%endif +%endif +%ifidn %1, avg + pavgw m0, [dstq] +%endif + mova [dstq], m0 + add dstq, dstrideq + dec hd + jg .loop + RET + +%if ARCH_X86_64 +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3 +%else +cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3 + mov filteryq, r5mp +%endif + mova m5, [pw_4095] + jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_ %+ %%px %+ _10 %+ SUFFIX).body +%endmacro + +INIT_XMM sse2 +filter_v_fn put +filter_v_fn avg +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +filter_v_fn put +filter_v_fn avg +%endif diff --git a/libs/ffvpx/libavcodec/xiph.c b/libs/ffvpx/libavcodec/xiph.c new file mode 100644 index 000000000..d072224b4 --- /dev/null +++ b/libs/ffvpx/libavcodec/xiph.c @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2007 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "xiph.h" + +int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size, + int first_header_size, const uint8_t *header_start[3], + int header_len[3]) +{ + int i; + + if (extradata_size >= 6 && AV_RB16(extradata) == first_header_size) { + int overall_len = 6; + for (i=0; i<3; i++) { + header_len[i] = AV_RB16(extradata); + extradata += 2; + header_start[i] = extradata; + extradata += header_len[i]; + if (overall_len > extradata_size - header_len[i]) + return -1; + overall_len += header_len[i]; + } + } else if (extradata_size >= 3 && extradata_size < INT_MAX - 0x1ff && extradata[0] == 2) { + int overall_len = 3; + extradata++; + for (i=0; i<2; i++, extradata++) { + header_len[i] = 0; + for (; overall_len < extradata_size && *extradata==0xff; extradata++) { + header_len[i] += 0xff; + overall_len += 0xff + 1; + } + header_len[i] += *extradata; + overall_len += *extradata; + if (overall_len > extradata_size) + return -1; + } + header_len[2] = extradata_size - overall_len; + header_start[0] = extradata; + header_start[1] = header_start[0] + header_len[0]; + header_start[2] = header_start[1] + header_len[1]; + } else { + return -1; + } + return 0; +} diff --git a/libs/ffvpx/libavcodec/xiph.h b/libs/ffvpx/libavcodec/xiph.h new file mode 100644 index 000000000..1741a51b6 --- /dev/null +++ b/libs/ffvpx/libavcodec/xiph.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2007 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_XIPH_H +#define AVCODEC_XIPH_H + +#include "libavutil/common.h" + +/** + * Split a single extradata buffer into the three headers that most + * Xiph codecs use. (e.g. Theora and Vorbis) + * Works both with Matroska's packing and lavc's packing. + * + * @param[in] extradata The single chunk that combines all three headers + * @param[in] extradata_size The size of the extradata buffer + * @param[in] first_header_size The size of the first header, used to + * differentiate between the Matroska packing and lavc packing. + * @param[out] header_start Pointers to the start of the three separate headers. + * @param[out] header_len The sizes of each of the three headers. + * @return On error a negative value is returned, on success zero. + */ +int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size, + int first_header_size, const uint8_t *header_start[3], + int header_len[3]); + +#endif /* AVCODEC_XIPH_H */ |