diff options
Diffstat (limited to 'libs/ffvpx/libavutil')
39 files changed, 853 insertions, 124 deletions
diff --git a/libs/ffvpx/libavutil/avassert.h b/libs/ffvpx/libavutil/avassert.h index 46f3fea58..9abeadea4 100644 --- a/libs/ffvpx/libavutil/avassert.h +++ b/libs/ffvpx/libavutil/avassert.h @@ -66,7 +66,7 @@ #endif /** - * Assert that floating point opperations can be executed. + * Assert that floating point operations can be executed. * * This will av_assert0() that the cpu is not in MMX state on X86 */ diff --git a/libs/ffvpx/libavutil/avstring.c b/libs/ffvpx/libavutil/avstring.c index f03dd2514..4c068f5bc 100644 --- a/libs/ffvpx/libavutil/avstring.c +++ b/libs/ffvpx/libavutil/avstring.c @@ -222,12 +222,13 @@ int av_strcasecmp(const char *a, const char *b) int av_strncasecmp(const char *a, const char *b, size_t n) { - const char *end = a + n; uint8_t c1, c2; + if (n <= 0) + return 0; do { c1 = av_tolower(*a++); c2 = av_tolower(*b++); - } while (a < end && c1 && c1 == c2); + } while (--n && c1 && c1 == c2); return c1 - c2; } diff --git a/libs/ffvpx/libavutil/avstring.h b/libs/ffvpx/libavutil/avstring.h index 04d269564..37dd4e2da 100644 --- a/libs/ffvpx/libavutil/avstring.h +++ b/libs/ffvpx/libavutil/avstring.h @@ -401,6 +401,12 @@ int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, int av_match_list(const char *name, const char *list, char separator); /** + * See libc sscanf manual for more information. + * Locale-independent sscanf implementation. + */ +int av_sscanf(const char *string, const char *format, ...); + +/** * @} */ diff --git a/libs/ffvpx/libavutil/colorspace.h b/libs/ffvpx/libavutil/colorspace.h index b6dba2c95..ef6f6107d 100644 --- a/libs/ffvpx/libavutil/colorspace.h +++ b/libs/ffvpx/libavutil/colorspace.h @@ -107,4 +107,44 @@ static inline int C_JPEG_TO_CCIR(int y) { (((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 - \ FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) +#define RGB_TO_Y_JPEG(r, g, b) \ +(FFMIN((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \ + FIX(0.11400) * (b) + (ONE_HALF)) >> SCALEBITS, 255)) + +#define RGB_TO_U_JPEG(r1, g1, b1)\ +(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \ + FIX(0.50000) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128) + +#define RGB_TO_V_JPEG(r1, g1, b1)\ +(((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \ + FIX(0.08131) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128) + +// Conversion macros for 8-bit RGB to YUV +// Derived from ITU-R BT.709-6 (06/2015) Item 3.5 +// https://www.itu.int/rec/R-REC-BT.709-6-201506-I/en + +#define RGB_TO_Y_BT709(r, g, b) \ +((FIX(0.21260*219.0/255.0) * (r) + FIX(0.71520*219.0/255.0) * (g) + \ + FIX(0.07220*219.0/255.0) * (b) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS) + +#define RGB_TO_U_BT709(r1, g1, b1, shift)\ +(((- FIX(0.11457*224.0/255.0) * r1 - FIX(0.38543*224.0/255.0) * g1 + \ + FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V_BT709(r1, g1, b1, shift)\ +(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.45415*224.0/255.0) * g1 - \ + FIX(0.04585*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_Y_BT709_FULL(r, g, b) \ +(FFMIN((FIX(0.21260) * (r) + FIX(0.71520) * (g) + \ + FIX(0.07220) * (b) + (ONE_HALF)) >> SCALEBITS, 255)) + +#define RGB_TO_U_BT709_FULL(r1, g1, b1)\ +(((- FIX(0.11457) * r1 - FIX(0.38543) * g1 + \ + FIX(0.50000) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128) + +#define RGB_TO_V_BT709_FULL(r1, g1, b1)\ +(((FIX(0.50000) * r1 - FIX(0.45415) * g1 - \ + FIX(0.04585) * b1 + (ONE_HALF) - 1) >> (SCALEBITS)) + 128) + #endif /* AVUTIL_COLORSPACE_H */ diff --git a/libs/ffvpx/libavutil/common.h b/libs/ffvpx/libavutil/common.h index 8db029117..bad43e426 100644 --- a/libs/ffvpx/libavutil/common.h +++ b/libs/ffvpx/libavutil/common.h @@ -331,7 +331,7 @@ static av_always_inline av_const double av_clipd_c(double a, double amin, double */ static av_always_inline av_const int av_ceil_log2_c(int x) { - return av_log2((x - 1) << 1); + return av_log2((x - 1U) << 1); } /** diff --git a/libs/ffvpx/libavutil/dynarray.h b/libs/ffvpx/libavutil/dynarray.h index 034a9fedf..3a7e14642 100644 --- a/libs/ffvpx/libavutil/dynarray.h +++ b/libs/ffvpx/libavutil/dynarray.h @@ -23,7 +23,7 @@ #include "mem.h" /** - * Add an element of to a dynamic array. + * Add an element to a dynamic array. * * The array is reallocated when its number of elements reaches powers of 2. * Therefore, the amortized cost of adding an element is constant. diff --git a/libs/ffvpx/libavutil/eval.c b/libs/ffvpx/libavutil/eval.c index efed91b6e..046a45090 100644 --- a/libs/ffvpx/libavutil/eval.c +++ b/libs/ffvpx/libavutil/eval.c @@ -304,7 +304,7 @@ static double eval_expr(Parser *p, AVExpr *e) double d = eval_expr(p, e->param[0]); double d2 = eval_expr(p, e->param[1]); switch (e->type) { - case e_mod: return e->value * (d - floor((!CONFIG_FTRAPV || d2) ? d / d2 : d * INFINITY) * d2); + case e_mod: return e->value * (d - floor(d2 ? d / d2 : d * INFINITY) * d2); case e_gcd: return e->value * av_gcd(d,d2); case e_max: return e->value * (d > d2 ? d : d2); case e_min: return e->value * (d < d2 ? d : d2); diff --git a/libs/ffvpx/libavutil/fixed_dsp.c b/libs/ffvpx/libavutil/fixed_dsp.c index 8c018581d..f1b195f18 100644 --- a/libs/ffvpx/libavutil/fixed_dsp.c +++ b/libs/ffvpx/libavutil/fixed_dsp.c @@ -134,9 +134,10 @@ static int scalarproduct_fixed_c(const int *v1, const int *v2, int len) return (int)(p >> 31); } -static void butterflies_fixed_c(int *v1, int *v2, int len) +static void butterflies_fixed_c(int *v1s, int *v2, int len) { int i; + unsigned int *v1 = v1s; for (i = 0; i < len; i++){ int t = v1[i] - v2[i]; diff --git a/libs/ffvpx/libavutil/float_dsp.c b/libs/ffvpx/libavutil/float_dsp.c index 1d4911d81..6e28d71b5 100644 --- a/libs/ffvpx/libavutil/float_dsp.c +++ b/libs/ffvpx/libavutil/float_dsp.c @@ -32,6 +32,14 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1, dst[i] = src0[i] * src1[i]; } +static void vector_dmul_c(double *dst, const double *src0, const double *src1, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src0[i] * src1[i]; +} + static void vector_fmac_scalar_c(float *dst, const float *src, float mul, int len) { @@ -131,6 +139,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact) return NULL; fdsp->vector_fmul = vector_fmul_c; + fdsp->vector_dmul = vector_dmul_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c; fdsp->vector_fmul_scalar = vector_fmul_scalar_c; fdsp->vector_dmac_scalar = vector_dmac_scalar_c; diff --git a/libs/ffvpx/libavutil/float_dsp.h b/libs/ffvpx/libavutil/float_dsp.h index 2c24d9347..9c664592b 100644 --- a/libs/ffvpx/libavutil/float_dsp.h +++ b/libs/ffvpx/libavutil/float_dsp.h @@ -173,6 +173,22 @@ typedef struct AVFloatDSPContext { * @return sum of elementwise products */ float (*scalarproduct_float)(const float *v1, const float *v2, int len); + + /** + * Calculate the entry wise product of two vectors of doubles and store the result in + * a vector of doubles. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ + void (*vector_dmul)(double *dst, const double *src0, const double *src1, + int len); } AVFloatDSPContext; /** diff --git a/libs/ffvpx/libavutil/frame.c b/libs/ffvpx/libavutil/frame.c index 00215ac29..dcf1fc3d1 100644 --- a/libs/ffvpx/libavutil/frame.c +++ b/libs/ffvpx/libavutil/frame.c @@ -211,7 +211,8 @@ void av_frame_free(AVFrame **frame) static int get_video_buffer(AVFrame *frame, int align) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); - int ret, i; + int ret, i, padded_height; + int plane_padding = FFMAX(16 + 16/*STRIDE_ALIGN*/, align); if (!desc) return AVERROR(EINVAL); @@ -236,23 +237,24 @@ static int get_video_buffer(AVFrame *frame, int align) frame->linesize[i] = FFALIGN(frame->linesize[i], align); } - for (i = 0; i < 4 && frame->linesize[i]; i++) { - int h = FFALIGN(frame->height, 32); - if (i == 1 || i == 2) - h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); - - frame->buf[i] = av_buffer_alloc(frame->linesize[i] * h + 16 + 16/*STRIDE_ALIGN*/ - 1); - if (!frame->buf[i]) - goto fail; + padded_height = FFALIGN(frame->height, 32); + if ((ret = av_image_fill_pointers(frame->data, frame->format, padded_height, + NULL, frame->linesize)) < 0) + return ret; - frame->data[i] = frame->buf[i]->data; + frame->buf[0] = av_buffer_alloc(ret + 4*plane_padding); + if (!frame->buf[0]) { + ret = AVERROR(ENOMEM); + goto fail; } - if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & FF_PSEUDOPAL) { - av_buffer_unref(&frame->buf[1]); - frame->buf[1] = av_buffer_alloc(AVPALETTE_SIZE); - if (!frame->buf[1]) - goto fail; - frame->data[1] = frame->buf[1]->data; + + if ((ret = av_image_fill_pointers(frame->data, frame->format, padded_height, + frame->buf[0]->data, frame->linesize)) < 0) + goto fail; + + for (i = 1; i < 4; i++) { + if (frame->data[i]) + frame->data[i] += i * plane_padding; } frame->extended_data = frame->data; @@ -260,7 +262,7 @@ static int get_video_buffer(AVFrame *frame, int align) return 0; fail: av_frame_unref(frame); - return AVERROR(ENOMEM); + return ret; } static int get_audio_buffer(AVFrame *frame, int align) @@ -819,7 +821,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) switch(type) { case AV_FRAME_DATA_PANSCAN: return "AVPanScan"; case AV_FRAME_DATA_A53_CC: return "ATSC A53 Part 4 Closed Captions"; - case AV_FRAME_DATA_STEREO3D: return "Stereoscopic 3d metadata"; + case AV_FRAME_DATA_STEREO3D: return "Stereo 3D"; case AV_FRAME_DATA_MATRIXENCODING: return "AVMatrixEncoding"; case AV_FRAME_DATA_DOWNMIX_INFO: return "Metadata relevant to a downmix procedure"; case AV_FRAME_DATA_REPLAYGAIN: return "AVReplayGain"; @@ -831,9 +833,15 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display metadata"; case AV_FRAME_DATA_CONTENT_LIGHT_LEVEL: return "Content light level metadata"; case AV_FRAME_DATA_GOP_TIMECODE: return "GOP timecode"; + case AV_FRAME_DATA_S12M_TIMECODE: return "SMPTE 12-1 timecode"; + case AV_FRAME_DATA_SPHERICAL: return "Spherical Mapping"; case AV_FRAME_DATA_ICC_PROFILE: return "ICC profile"; +#if FF_API_FRAME_QP case AV_FRAME_DATA_QP_TABLE_PROPERTIES: return "QP table properties"; case AV_FRAME_DATA_QP_TABLE_DATA: return "QP table data"; +#endif + case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)"; + case AV_FRAME_DATA_REGIONS_OF_INTEREST: return "Regions Of Interest"; } return NULL; } diff --git a/libs/ffvpx/libavutil/frame.h b/libs/ffvpx/libavutil/frame.h index 9d57d6ce6..5d3231e7b 100644 --- a/libs/ffvpx/libavutil/frame.h +++ b/libs/ffvpx/libavutil/frame.h @@ -158,6 +158,27 @@ enum AVFrameSideDataType { */ AV_FRAME_DATA_QP_TABLE_DATA, #endif + + /** + * Timecode which conforms to SMPTE ST 12-1. The data is an array of 4 uint32_t + * where the first uint32_t describes how many (1-3) of the other timecodes are used. + * The timecode format is described in the av_timecode_get_smpte_from_framenum() + * function in libavutil/timecode.c. + */ + AV_FRAME_DATA_S12M_TIMECODE, + + /** + * HDR dynamic metadata associated with a video frame. The payload is + * an AVDynamicHDRPlus type and contains information for color + * volume transform - application 4 of SMPTE 2094-40:2016 standard. + */ + AV_FRAME_DATA_DYNAMIC_HDR_PLUS, + + /** + * Regions Of Interest, the data is an array of AVRegionOfInterest type, the number of + * array element is implied by AVFrameSideData.size / AVRegionOfInterest.self_size. + */ + AV_FRAME_DATA_REGIONS_OF_INTEREST, }; enum AVActiveFormatDescription { @@ -186,6 +207,62 @@ typedef struct AVFrameSideData { } AVFrameSideData; /** + * Structure describing a single Region Of Interest. + * + * When multiple regions are defined in a single side-data block, they + * should be ordered from most to least important - some encoders are only + * capable of supporting a limited number of distinct regions, so will have + * to truncate the list. + * + * When overlapping regions are defined, the first region containing a given + * area of the frame applies. + */ +typedef struct AVRegionOfInterest { + /** + * Must be set to the size of this data structure (that is, + * sizeof(AVRegionOfInterest)). + */ + uint32_t self_size; + /** + * Distance in pixels from the top edge of the frame to the top and + * bottom edges and from the left edge of the frame to the left and + * right edges of the rectangle defining this region of interest. + * + * The constraints on a region are encoder dependent, so the region + * actually affected may be slightly larger for alignment or other + * reasons. + */ + int top; + int bottom; + int left; + int right; + /** + * Quantisation offset. + * + * Must be in the range -1 to +1. A value of zero indicates no quality + * change. A negative value asks for better quality (less quantisation), + * while a positive value asks for worse quality (greater quantisation). + * + * The range is calibrated so that the extreme values indicate the + * largest possible offset - if the rest of the frame is encoded with the + * worst possible quality, an offset of -1 indicates that this region + * should be encoded with the best possible quality anyway. Intermediate + * values are then interpolated in some codec-dependent way. + * + * For example, in 10-bit H.264 the quantisation parameter varies between + * -12 and 51. A typical qoffset value of -1/10 therefore indicates that + * this region should be encoded with a QP around one-tenth of the full + * range better than the rest of the frame. So, if most of the frame + * were to be encoded with a QP of around 30, this region would get a QP + * of around 24 (an offset of approximately -1/10 * (51 - -12) = -6.3). + * An extreme value of -1 would indicate that this region should be + * encoded with the best possible quality regardless of the treatment of + * the rest of the frame - that is, should be encoded at a QP of -12. + */ + AVRational qoffset; +} AVRegionOfInterest; + +/** * This structure describes decoded (raw) audio or video data. * * AVFrame must be allocated using av_frame_alloc(). Note that this only @@ -381,7 +458,6 @@ typedef struct AVFrame { * that time, * the decoder reorders values as needed and sets AVFrame.reordered_opaque * to exactly one of the values provided by the user through AVCodecContext.reordered_opaque - * @deprecated in favor of pkt_pts */ int64_t reordered_opaque; @@ -514,6 +590,8 @@ typedef struct AVFrame { int decode_error_flags; #define FF_DECODE_ERROR_INVALID_BITSTREAM 1 #define FF_DECODE_ERROR_MISSING_REFERENCE 2 +#define FF_DECODE_ERROR_CONCEALMENT_ACTIVE 4 +#define FF_DECODE_ERROR_DECODE_SLICES 8 /** * number of audio channels, only used for audio. diff --git a/libs/ffvpx/libavutil/hwcontext.c b/libs/ffvpx/libavutil/hwcontext.c index 70c556eca..f1e404ab2 100644 --- a/libs/ffvpx/libavutil/hwcontext.c +++ b/libs/ffvpx/libavutil/hwcontext.c @@ -646,6 +646,9 @@ int av_hwdevice_ctx_create_derived(AVBufferRef **dst_ref_ptr, ret = AVERROR(ENOMEM); goto fail; } + ret = av_hwdevice_ctx_init(dst_ref); + if (ret < 0) + goto fail; goto done; } if (ret != AVERROR(ENOSYS)) @@ -658,10 +661,6 @@ int av_hwdevice_ctx_create_derived(AVBufferRef **dst_ref_ptr, goto fail; done: - ret = av_hwdevice_ctx_init(dst_ref); - if (ret < 0) - goto fail; - *dst_ref_ptr = dst_ref; return 0; @@ -871,3 +870,10 @@ fail: av_buffer_unref(&dst_ref); return ret; } + +int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src) +{ + HWMapDescriptor *hwmap = (HWMapDescriptor*)dst->buf[0]->data; + av_frame_unref(hwmap->source); + return av_frame_ref(hwmap->source, src); +} diff --git a/libs/ffvpx/libavutil/hwcontext_internal.h b/libs/ffvpx/libavutil/hwcontext_internal.h index 332062dda..77dc47ddd 100644 --- a/libs/ffvpx/libavutil/hwcontext_internal.h +++ b/libs/ffvpx/libavutil/hwcontext_internal.h @@ -156,6 +156,11 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref, HWMapDescriptor *hwmap), void *priv); +/** + * Replace the current hwmap of dst with the one from src, used for indirect + * mappings like VAAPI->(DRM)->OpenCL/Vulkan where a direct interop is missing + */ +int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src); extern const HWContextType ff_hwcontext_type_cuda; extern const HWContextType ff_hwcontext_type_d3d11va; diff --git a/libs/ffvpx/libavutil/imgutils.c b/libs/ffvpx/libavutil/imgutils.c index 4938a7ef6..c733cb5cf 100644 --- a/libs/ffvpx/libavutil/imgutils.c +++ b/libs/ffvpx/libavutil/imgutils.c @@ -311,8 +311,8 @@ static void image_copy_plane(uint8_t *dst, ptrdiff_t dst_linesize, { if (!dst || !src) return; - av_assert0(abs(src_linesize) >= bytewidth); - av_assert0(abs(dst_linesize) >= bytewidth); + av_assert0(FFABS(src_linesize) >= bytewidth); + av_assert0(FFABS(dst_linesize) >= bytewidth); for (;height > 0; height--) { memcpy(dst, src, bytewidth); dst += dst_linesize; @@ -501,7 +501,6 @@ int av_image_copy_to_buffer(uint8_t *dst, int dst_size, static void memset_bytes(uint8_t *dst, size_t dst_size, uint8_t *clear, size_t clear_size) { - size_t pos = 0; int same = 1; int i; @@ -521,28 +520,12 @@ static void memset_bytes(uint8_t *dst, size_t dst_size, uint8_t *clear, if (clear_size == 1) { memset(dst, clear[0], dst_size); dst_size = 0; - } else if (clear_size == 2) { - uint16_t val = AV_RN16(clear); - for (; dst_size >= 2; dst_size -= 2) { - AV_WN16(dst, val); - dst += 2; - } - } else if (clear_size == 4) { - uint32_t val = AV_RN32(clear); - for (; dst_size >= 4; dst_size -= 4) { - AV_WN32(dst, val); - dst += 4; - } - } else if (clear_size == 8) { - uint32_t val = AV_RN64(clear); - for (; dst_size >= 8; dst_size -= 8) { - AV_WN64(dst, val); - dst += 8; - } + } else { + if (clear_size > dst_size) + clear_size = dst_size; + memcpy(dst, clear, clear_size); + av_memcpy_backptr(dst + clear_size, clear_size, dst_size - clear_size); } - - for (; dst_size; dst_size--) - *dst++ = clear[pos++ % clear_size]; } // Maximum size in bytes of a plane element (usually a pixel, or multiple pixels diff --git a/libs/ffvpx/libavutil/integer.c b/libs/ffvpx/libavutil/integer.c index 890e314dc..78e252fbd 100644 --- a/libs/ffvpx/libavutil/integer.c +++ b/libs/ffvpx/libavutil/integer.c @@ -74,7 +74,7 @@ AVInteger av_mul_i(AVInteger a, AVInteger b){ if(a.v[i]) for(j=i; j<AV_INTEGER_SIZE && j-i<=nb; j++){ - carry= (carry>>16) + out.v[j] + a.v[i]*b.v[j-i]; + carry= (carry>>16) + out.v[j] + a.v[i]*(unsigned)b.v[j-i]; out.v[j]= carry; } } diff --git a/libs/ffvpx/libavutil/internal.h b/libs/ffvpx/libavutil/internal.h index 06bd561e8..3a5d1c7bc 100644 --- a/libs/ffvpx/libavutil/internal.h +++ b/libs/ffvpx/libavutil/internal.h @@ -52,7 +52,7 @@ #endif #ifndef emms_c -# define emms_c() while(0) +# define emms_c() do {} while(0) #endif #ifndef attribute_align_arg @@ -173,6 +173,8 @@ }\ } +#define FF_PTR_ADD(ptr, off) ((off) ? (ptr) + (off) : (ptr)) + #include "libm.h" /** diff --git a/libs/ffvpx/libavutil/intreadwrite.h b/libs/ffvpx/libavutil/intreadwrite.h index 67c763b13..4c8413a53 100644 --- a/libs/ffvpx/libavutil/intreadwrite.h +++ b/libs/ffvpx/libavutil/intreadwrite.h @@ -542,6 +542,21 @@ union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias; # define AV_WN64A(p, v) AV_WNA(64, p, v) #endif +#if AV_HAVE_BIGENDIAN +# define AV_RLA(s, p) av_bswap##s(AV_RN##s##A(p)) +# define AV_WLA(s, p, v) AV_WN##s##A(p, av_bswap##s(v)) +#else +# define AV_RLA(s, p) AV_RN##s##A(p) +# define AV_WLA(s, p, v) AV_WN##s##A(p, v) +#endif + +#ifndef AV_RL64A +# define AV_RL64A(p) AV_RLA(64, p) +#endif +#ifndef AV_WL64A +# define AV_WL64A(p, v) AV_WLA(64, p, v) +#endif + /* * The AV_COPYxxU macros are suitable for copying data to/from unaligned * memory locations. diff --git a/libs/ffvpx/libavutil/log.c b/libs/ffvpx/libavutil/log.c index 9b7d48487..93a156b8e 100644 --- a/libs/ffvpx/libavutil/log.c +++ b/libs/ffvpx/libavutil/log.c @@ -247,9 +247,9 @@ static void format_line(void *avcl, int level, const char *fmt, va_list vl, AVBPrint part[4], int *print_prefix, int type[2]) { AVClass* avc = avcl ? *(AVClass **) avcl : NULL; - av_bprint_init(part+0, 0, 1); - av_bprint_init(part+1, 0, 1); - av_bprint_init(part+2, 0, 1); + av_bprint_init(part+0, 0, AV_BPRINT_SIZE_AUTOMATIC); + av_bprint_init(part+1, 0, AV_BPRINT_SIZE_AUTOMATIC); + av_bprint_init(part+2, 0, AV_BPRINT_SIZE_AUTOMATIC); av_bprint_init(part+3, 0, 65536); if(type) type[0] = type[1] = AV_CLASS_CATEGORY_NA + 16; diff --git a/libs/ffvpx/libavutil/mathematics.h b/libs/ffvpx/libavutil/mathematics.h index 54901800b..64d4137a6 100644 --- a/libs/ffvpx/libavutil/mathematics.h +++ b/libs/ffvpx/libavutil/mathematics.h @@ -134,6 +134,7 @@ int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const; * * The operation is mathematically equivalent to `a * b / c`, but writing that * directly can overflow, and does not support different rounding methods. + * If the result is not representable then INT64_MIN is returned. * * @see av_rescale(), av_rescale_q(), av_rescale_q_rnd() */ diff --git a/libs/ffvpx/libavutil/mem.c b/libs/ffvpx/libavutil/mem.c index 6149755a6..88fe09b17 100644 --- a/libs/ffvpx/libavutil/mem.c +++ b/libs/ffvpx/libavutil/mem.c @@ -399,6 +399,18 @@ static void fill32(uint8_t *dst, int len) { uint32_t v = AV_RN32(dst - 4); +#if HAVE_FAST_64BIT + uint64_t v2= v + ((uint64_t)v<<32); + while (len >= 32) { + AV_WN64(dst , v2); + AV_WN64(dst+ 8, v2); + AV_WN64(dst+16, v2); + AV_WN64(dst+24, v2); + dst += 32; + len -= 32; + } +#endif + while (len >= 4) { AV_WN32(dst, v); dst += 4; diff --git a/libs/ffvpx/libavutil/mem.h b/libs/ffvpx/libavutil/mem.h index 7e0b12a8a..5fb1a02dd 100644 --- a/libs/ffvpx/libavutil/mem.h +++ b/libs/ffvpx/libavutil/mem.h @@ -339,7 +339,7 @@ av_alloc_size(2, 3) void *av_realloc_array(void *ptr, size_t nmemb, size_t size) * @warning Unlike av_malloc(), the allocated memory is not guaranteed to be * correctly aligned. */ -av_alloc_size(2, 3) int av_reallocp_array(void *ptr, size_t nmemb, size_t size); +int av_reallocp_array(void *ptr, size_t nmemb, size_t size); /** * Reallocate the given buffer if it is not large enough, otherwise do nothing. @@ -363,10 +363,10 @@ av_alloc_size(2, 3) int av_reallocp_array(void *ptr, size_t nmemb, size_t size); * @endcode * * @param[in,out] ptr Already allocated buffer, or `NULL` - * @param[in,out] size Pointer to current size of buffer `ptr`. `*size` is - * changed to `min_size` in case of success or 0 in - * case of failure - * @param[in] min_size New size of buffer `ptr` + * @param[in,out] size Pointer to the size of buffer `ptr`. `*size` is + * updated to the new allocated size, in particular 0 + * in case of failure. + * @param[in] min_size Desired minimal size of buffer `ptr` * @return `ptr` if the buffer is large enough, a pointer to newly reallocated * buffer if the buffer was not large enough, or `NULL` in case of * error @@ -397,10 +397,10 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size); * @param[in,out] ptr Pointer to pointer to an already allocated buffer. * `*ptr` will be overwritten with pointer to new * buffer on success or `NULL` on failure - * @param[in,out] size Pointer to current size of buffer `*ptr`. `*size` is - * changed to `min_size` in case of success or 0 in - * case of failure - * @param[in] min_size New size of buffer `*ptr` + * @param[in,out] size Pointer to the size of buffer `*ptr`. `*size` is + * updated to the new allocated size, in particular 0 + * in case of failure. + * @param[in] min_size Desired minimal size of buffer `*ptr` * @see av_realloc() * @see av_fast_mallocz() */ @@ -418,10 +418,10 @@ void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size); * @param[in,out] ptr Pointer to pointer to an already allocated buffer. * `*ptr` will be overwritten with pointer to new * buffer on success or `NULL` on failure - * @param[in,out] size Pointer to current size of buffer `*ptr`. `*size` is - * changed to `min_size` in case of success or 0 in - * case of failure - * @param[in] min_size New size of buffer `*ptr` + * @param[in,out] size Pointer to the size of buffer `*ptr`. `*size` is + * updated to the new allocated size, in particular 0 + * in case of failure. + * @param[in] min_size Desired minimal size of buffer `*ptr` * @see av_fast_malloc() */ void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size); diff --git a/libs/ffvpx/libavutil/opt.c b/libs/ffvpx/libavutil/opt.c index 3b0aab4ee..93d6c26c1 100644 --- a/libs/ffvpx/libavutil/opt.c +++ b/libs/ffvpx/libavutil/opt.c @@ -463,6 +463,9 @@ int av_opt_set(void *obj, const char *name, const char *val, int search_flags) if (o->flags & AV_OPT_FLAG_READONLY) return AVERROR(EINVAL); + if (o->flags & AV_OPT_FLAG_DEPRECATED) + av_log(obj, AV_LOG_WARNING, "The \"%s\" option is deprecated: %s\n", name, o->help); + dst = ((uint8_t *)target_obj) + o->offset; switch (o->type) { case AV_OPT_TYPE_BOOL: @@ -493,15 +496,22 @@ int av_opt_set(void *obj, const char *name, const char *val, int search_flags) case AV_OPT_TYPE_SAMPLE_FMT: return set_string_sample_fmt(obj, o, val, dst); case AV_OPT_TYPE_DURATION: - if (!val) { - *(int64_t *)dst = 0; + { + int64_t usecs = 0; + if (val) { + if ((ret = av_parse_time(&usecs, val, 1)) < 0) { + av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as duration\n", val); + return ret; + } + } + if (usecs < o->min || usecs > o->max) { + av_log(obj, AV_LOG_ERROR, "Value %f for parameter '%s' out of range [%g - %g]\n", + usecs / 1000000.0, o->name, o->min / 1000000.0, o->max / 1000000.0); + return AVERROR(ERANGE); + } + *(int64_t *)dst = usecs; return 0; - } else { - if ((ret = av_parse_time(dst, val, 1)) < 0) - av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as duration\n", val); - return ret; } - break; case AV_OPT_TYPE_COLOR: return set_string_color(obj, o, val, dst); case AV_OPT_TYPE_CHANNEL_LAYOUT: @@ -759,6 +769,9 @@ int av_opt_get(void *obj, const char *name, int search_flags, uint8_t **out_val) if (!o || !target_obj || (o->offset<=0 && o->type != AV_OPT_TYPE_CONST)) return AVERROR_OPTION_NOT_FOUND; + if (o->flags & AV_OPT_FLAG_DEPRECATED) + av_log(obj, AV_LOG_WARNING, "The \"%s\" option is deprecated: %s\n", name, o->help); + dst = (uint8_t *)target_obj + o->offset; buf[0] = 0; diff --git a/libs/ffvpx/libavutil/opt.h b/libs/ffvpx/libavutil/opt.h index 07da68ea2..39f4a8dda 100644 --- a/libs/ffvpx/libavutil/opt.h +++ b/libs/ffvpx/libavutil/opt.h @@ -289,6 +289,7 @@ typedef struct AVOption { #define AV_OPT_FLAG_READONLY 128 #define AV_OPT_FLAG_BSF_PARAM (1<<8) ///< a generic parameter which can be set by the user for bit stream filtering #define AV_OPT_FLAG_FILTERING_PARAM (1<<16) ///< a generic parameter which can be set by the user for filtering +#define AV_OPT_FLAG_DEPRECATED (1<<17) ///< set if option is deprecated, users should refer to AVOption.help text for more information //FIXME think about enc-audio, ... style flags /** diff --git a/libs/ffvpx/libavutil/parseutils.c b/libs/ffvpx/libavutil/parseutils.c index 9de19d1c1..af90e5bf2 100644 --- a/libs/ffvpx/libavutil/parseutils.c +++ b/libs/ffvpx/libavutil/parseutils.c @@ -504,7 +504,7 @@ char *av_small_strptime(const char *p, const char *fmt, struct tm *dt) switch(c) { case 'H': case 'J': - val = date_get_num(&p, 0, c == 'H' ? 23 : INT_MAX, 2); + val = date_get_num(&p, 0, c == 'H' ? 23 : INT_MAX, c == 'H' ? 2 : 4); if (val == -1) return NULL; @@ -661,12 +661,15 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration) if (!q) { char *o; /* parse timestr as S+ */ - dt.tm_sec = strtol(p, &o, 10); + errno = 0; + t = strtoll(p, &o, 10); if (o == p) /* the parsing didn't succeed */ return AVERROR(EINVAL); - dt.tm_min = 0; - dt.tm_hour = 0; + if (errno == ERANGE) + return AVERROR(ERANGE); q = o; + } else { + t = dt.tm_hour * 3600 + dt.tm_min * 60 + dt.tm_sec; } } @@ -688,7 +691,6 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration) } if (duration) { - t = dt.tm_hour * 3600 + dt.tm_min * 60 + dt.tm_sec; if (q[0] == 'm' && q[1] == 's') { suffix = 1000; microseconds /= 1000; @@ -734,8 +736,14 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration) if (*q) return AVERROR(EINVAL); + if (INT64_MAX / suffix < t || t < INT64_MIN / suffix) + return AVERROR(ERANGE); t *= suffix; + if (INT64_MAX - microseconds < t) + return AVERROR(ERANGE); t += microseconds; + if (t == INT64_MIN && negative) + return AVERROR(ERANGE); *timeval = negative ? -t : t; return 0; } diff --git a/libs/ffvpx/libavutil/pixdesc.c b/libs/ffvpx/libavutil/pixdesc.c index 8ed52751c..b97b0665b 100644 --- a/libs/ffvpx/libavutil/pixdesc.c +++ b/libs/ffvpx/libavutil/pixdesc.c @@ -31,19 +31,22 @@ #include "intreadwrite.h" #include "version.h" -void av_read_image_line(uint16_t *dst, +void av_read_image_line2(void *dst, const uint8_t *data[4], const int linesize[4], const AVPixFmtDescriptor *desc, int x, int y, int c, int w, - int read_pal_component) + int read_pal_component, + int dst_element_size) { AVComponentDescriptor comp = desc->comp[c]; int plane = comp.plane; int depth = comp.depth; - int mask = (1 << depth) - 1; + unsigned mask = (1ULL << depth) - 1; int shift = comp.shift; int step = comp.step; int flags = desc->flags; + uint16_t *dst16 = dst; + uint32_t *dst32 = dst; if (flags & AV_PIX_FMT_FLAG_BITSTREAM) { int skip = x * step + comp.offset; @@ -57,38 +60,56 @@ void av_read_image_line(uint16_t *dst, shift -= step; p -= shift >> 3; shift &= 7; - *dst++ = val; + if (dst_element_size == 4) *dst32++ = val; + else *dst16++ = val; } } else { const uint8_t *p = data[plane] + y * linesize[plane] + x * step + comp.offset; int is_8bit = shift + depth <= 8; + int is_16bit= shift + depth <=16; if (is_8bit) p += !!(flags & AV_PIX_FMT_FLAG_BE); while (w--) { - int val = is_8bit ? *p : - flags & AV_PIX_FMT_FLAG_BE ? AV_RB16(p) : AV_RL16(p); + unsigned val; + if (is_8bit) val = *p; + else if(is_16bit) val = flags & AV_PIX_FMT_FLAG_BE ? AV_RB16(p) : AV_RL16(p); + else val = flags & AV_PIX_FMT_FLAG_BE ? AV_RB32(p) : AV_RL32(p); val = (val >> shift) & mask; if (read_pal_component) val = data[1][4 * val + c]; p += step; - *dst++ = val; + if (dst_element_size == 4) *dst32++ = val; + else *dst16++ = val; } } } -void av_write_image_line(const uint16_t *src, +void av_read_image_line(uint16_t *dst, + const uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w, + int read_pal_component) +{ + av_read_image_line2(dst, data, linesize, desc,x, y, c, w, + read_pal_component, + 2); +} + +void av_write_image_line2(const void *src, uint8_t *data[4], const int linesize[4], const AVPixFmtDescriptor *desc, - int x, int y, int c, int w) + int x, int y, int c, int w, int src_element_size) { AVComponentDescriptor comp = desc->comp[c]; int plane = comp.plane; int depth = comp.depth; int step = comp.step; int flags = desc->flags; + const uint32_t *src32 = src; + const uint16_t *src16 = src; if (flags & AV_PIX_FMT_FLAG_BITSTREAM) { int skip = x * step + comp.offset; @@ -96,7 +117,7 @@ void av_write_image_line(const uint16_t *src, int shift = 8 - depth - (skip & 7); while (w--) { - *p |= *src++ << shift; + *p |= (src_element_size == 4 ? *src32++ : *src16++) << shift; shift -= step; p -= shift >> 3; shift &= 7; @@ -109,17 +130,28 @@ void av_write_image_line(const uint16_t *src, if (shift + depth <= 8) { p += !!(flags & AV_PIX_FMT_FLAG_BE); while (w--) { - *p |= (*src++ << shift); + *p |= ((src_element_size == 4 ? *src32++ : *src16++) << shift); p += step; } } else { while (w--) { - if (flags & AV_PIX_FMT_FLAG_BE) { - uint16_t val = AV_RB16(p) | (*src++ << shift); - AV_WB16(p, val); + unsigned s = (src_element_size == 4 ? *src32++ : *src16++); + if (shift + depth <= 16) { + if (flags & AV_PIX_FMT_FLAG_BE) { + uint16_t val = AV_RB16(p) | (s << shift); + AV_WB16(p, val); + } else { + uint16_t val = AV_RL16(p) | (s << shift); + AV_WL16(p, val); + } } else { - uint16_t val = AV_RL16(p) | (*src++ << shift); - AV_WL16(p, val); + if (flags & AV_PIX_FMT_FLAG_BE) { + uint32_t val = AV_RB32(p) | (s << shift); + AV_WB32(p, val); + } else { + uint32_t val = AV_RL32(p) | (s << shift); + AV_WL32(p, val); + } } p += step; } @@ -127,6 +159,14 @@ void av_write_image_line(const uint16_t *src, } } +void av_write_image_line(const uint16_t *src, + uint8_t *data[4], const int linesize[4], + const AVPixFmtDescriptor *desc, + int x, int y, int c, int w) +{ + av_write_image_line2(src, data, linesize, desc, x, y, c, w, 2); +} + #if FF_API_PLUS1_MINUS1 FF_DISABLE_DEPRECATION_WARNINGS #endif @@ -288,7 +328,7 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .comp = { { 0, 1, 0, 0, 8, 0, 7, 1 }, }, - .flags = AV_PIX_FMT_FLAG_PAL, + .flags = AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_ALPHA, }, [AV_PIX_FMT_YUVJ420P] = { .name = "yuvj420p", @@ -611,6 +651,27 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .alias = "y12le", }, + [AV_PIX_FMT_GRAY14BE] = { + .name = "gray14be", + .nb_components = 1, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 14, 1, 13, 1 }, /* Y */ + }, + .flags = AV_PIX_FMT_FLAG_BE, + .alias = "y14be", + }, + [AV_PIX_FMT_GRAY14LE] = { + .name = "gray14le", + .nb_components = 1, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 14, 1, 13, 1 }, /* Y */ + }, + .alias = "y14le", + }, [AV_PIX_FMT_GRAY16BE] = { .name = "gray16be", .nb_components = 1, @@ -2185,6 +2246,104 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .name = "opencl", .flags = AV_PIX_FMT_FLAG_HWACCEL, }, + [AV_PIX_FMT_GRAYF32BE] = { + .name = "grayf32be", + .nb_components = 1, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 4, 0, 0, 32, 3, 31, 1 }, /* Y */ + }, + .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_FLOAT, + .alias = "yf32be", + }, + [AV_PIX_FMT_GRAYF32LE] = { + .name = "grayf32le", + .nb_components = 1, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 4, 0, 0, 32, 3, 31, 1 }, /* Y */ + }, + .flags = AV_PIX_FMT_FLAG_FLOAT, + .alias = "yf32le", + }, + [AV_PIX_FMT_YUVA422P12BE] = { + .name = "yuva422p12be", + .nb_components = 4, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 12, 1, 11, 1 }, /* Y */ + { 1, 2, 0, 0, 12, 1, 11, 1 }, /* U */ + { 2, 2, 0, 0, 12, 1, 11, 1 }, /* V */ + { 3, 2, 0, 0, 12, 1, 11, 1 }, /* A */ + }, + .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA, + }, + [AV_PIX_FMT_YUVA422P12LE] = { + .name = "yuva422p12le", + .nb_components = 4, + .log2_chroma_w = 1, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 12, 1, 11, 1 }, /* Y */ + { 1, 2, 0, 0, 12, 1, 11, 1 }, /* U */ + { 2, 2, 0, 0, 12, 1, 11, 1 }, /* V */ + { 3, 2, 0, 0, 12, 1, 11, 1 }, /* A */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA, + }, + [AV_PIX_FMT_YUVA444P12BE] = { + .name = "yuva444p12be", + .nb_components = 4, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 12, 1, 11, 1 }, /* Y */ + { 1, 2, 0, 0, 12, 1, 11, 1 }, /* U */ + { 2, 2, 0, 0, 12, 1, 11, 1 }, /* V */ + { 3, 2, 0, 0, 12, 1, 11, 1 }, /* A */ + }, + .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA, + }, + [AV_PIX_FMT_YUVA444P12LE] = { + .name = "yuva444p12le", + .nb_components = 4, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 2, 0, 0, 12, 1, 11, 1 }, /* Y */ + { 1, 2, 0, 0, 12, 1, 11, 1 }, /* U */ + { 2, 2, 0, 0, 12, 1, 11, 1 }, /* V */ + { 3, 2, 0, 0, 12, 1, 11, 1 }, /* A */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA, + }, + [AV_PIX_FMT_NV24] = { + .name = "nv24", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 1, 0, 0, 8, 0, 7, 1 }, /* Y */ + { 1, 2, 0, 0, 8, 1, 7, 1 }, /* U */ + { 1, 2, 1, 0, 8, 1, 7, 2 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, + [AV_PIX_FMT_NV42] = { + .name = "nv42", + .nb_components = 3, + .log2_chroma_w = 0, + .log2_chroma_h = 0, + .comp = { + { 0, 1, 0, 0, 8, 0, 7, 1 }, /* Y */ + { 1, 2, 1, 0, 8, 1, 7, 2 }, /* U */ + { 1, 2, 0, 0, 8, 1, 7, 1 }, /* V */ + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, }; #if FF_API_PLUS1_MINUS1 FF_ENABLE_DEPRECATION_WARNINGS @@ -2432,7 +2591,6 @@ void ff_check_pixfmt_descriptors(void){ av_assert0(d->log2_chroma_h <= 3); av_assert0(d->nb_components <= 4); av_assert0(d->name && d->name[0]); - av_assert0((d->nb_components==4 || d->nb_components==2) == !!(d->flags & AV_PIX_FMT_FLAG_ALPHA)); av_assert2(av_get_pix_fmt(d->name) == i); for (j=0; j<FF_ARRAY_ELEMS(d->comp); j++) { @@ -2483,7 +2641,7 @@ enum AVPixelFormat av_pix_fmt_swap_endianness(enum AVPixelFormat pix_fmt) #define FF_COLOR_XYZ 4 #define pixdesc_has_alpha(pixdesc) \ - ((pixdesc)->nb_components == 2 || (pixdesc)->nb_components == 4 || (pixdesc)->flags & AV_PIX_FMT_FLAG_PAL) + ((pixdesc)->flags & AV_PIX_FMT_FLAG_ALPHA) static int get_color_type(const AVPixFmtDescriptor *desc) { @@ -2717,7 +2875,12 @@ int av_color_primaries_from_name(const char *name) int i; for (i = 0; i < FF_ARRAY_ELEMS(color_primaries_names); i++) { - size_t len = strlen(color_primaries_names[i]); + size_t len; + + if (!color_primaries_names[i]) + continue; + + len = strlen(color_primaries_names[i]); if (!strncmp(color_primaries_names[i], name, len)) return i; } @@ -2736,7 +2899,12 @@ int av_color_transfer_from_name(const char *name) int i; for (i = 0; i < FF_ARRAY_ELEMS(color_transfer_names); i++) { - size_t len = strlen(color_transfer_names[i]); + size_t len; + + if (!color_transfer_names[i]) + continue; + + len = strlen(color_transfer_names[i]); if (!strncmp(color_transfer_names[i], name, len)) return i; } @@ -2755,7 +2923,12 @@ int av_color_space_from_name(const char *name) int i; for (i = 0; i < FF_ARRAY_ELEMS(color_space_names); i++) { - size_t len = strlen(color_space_names[i]); + size_t len; + + if (!color_space_names[i]) + continue; + + len = strlen(color_space_names[i]); if (!strncmp(color_space_names[i], name, len)) return i; } @@ -2774,7 +2947,12 @@ int av_chroma_location_from_name(const char *name) int i; for (i = 0; i < FF_ARRAY_ELEMS(chroma_location_names); i++) { - size_t len = strlen(chroma_location_names[i]); + size_t len; + + if (!chroma_location_names[i]) + continue; + + len = strlen(chroma_location_names[i]); if (!strncmp(chroma_location_names[i], name, len)) return i; } diff --git a/libs/ffvpx/libavutil/pixdesc.h b/libs/ffvpx/libavutil/pixdesc.h index 1ab372782..c055810ae 100644 --- a/libs/ffvpx/libavutil/pixdesc.h +++ b/libs/ffvpx/libavutil/pixdesc.h @@ -167,12 +167,8 @@ typedef struct AVPixFmtDescriptor { /** * The pixel format has an alpha channel. This is set on all formats that - * support alpha in some way. The exception is AV_PIX_FMT_PAL8, which can - * carry alpha as part of the palette. Details are explained in the - * AVPixelFormat enum, and are also encoded in the corresponding - * AVPixFmtDescriptor. - * - * The alpha is always straight, never pre-multiplied. + * support alpha in some way, including AV_PIX_FMT_PAL8. The alpha is always + * straight, never pre-multiplied. * * If a codec or a filter does not support alpha, it should set all alpha to * opaque, or use the equivalent pixel formats without alpha component, e.g. @@ -347,7 +343,13 @@ char *av_get_pix_fmt_string(char *buf, int buf_size, * format writes the values corresponding to the palette * component c in data[1] to dst, rather than the palette indexes in * data[0]. The behavior is undefined if the format is not paletted. + * @param dst_element_size size of elements in dst array (2 or 4 byte) */ +void av_read_image_line2(void *dst, const uint8_t *data[4], + const int linesize[4], const AVPixFmtDescriptor *desc, + int x, int y, int c, int w, int read_pal_component, + int dst_element_size); + void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesize[4], const AVPixFmtDescriptor *desc, int x, int y, int c, int w, int read_pal_component); @@ -365,7 +367,12 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], * @param y the vertical coordinate of the first pixel to write * @param w the width of the line to write, that is the number of * values to write to the image line + * @param src_element_size size of elements in src array (2 or 4 byte) */ +void av_write_image_line2(const void *src, uint8_t *data[4], + const int linesize[4], const AVPixFmtDescriptor *desc, + int x, int y, int c, int w, int src_element_size); + void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesize[4], const AVPixFmtDescriptor *desc, int x, int y, int c, int w); diff --git a/libs/ffvpx/libavutil/pixelutils.c b/libs/ffvpx/libavutil/pixelutils.c index b663027b4..ebee3d6f9 100644 --- a/libs/ffvpx/libavutil/pixelutils.c +++ b/libs/ffvpx/libavutil/pixelutils.c @@ -51,12 +51,14 @@ DECLARE_BLOCK_FUNCTIONS(2) DECLARE_BLOCK_FUNCTIONS(4) DECLARE_BLOCK_FUNCTIONS(8) DECLARE_BLOCK_FUNCTIONS(16) +DECLARE_BLOCK_FUNCTIONS(32) static const av_pixelutils_sad_fn sad_c[] = { block_sad_2x2_c, block_sad_4x4_c, block_sad_8x8_c, block_sad_16x16_c, + block_sad_32x32_c, }; #endif /* CONFIG_PIXELUTILS */ diff --git a/libs/ffvpx/libavutil/pixfmt.h b/libs/ffvpx/libavutil/pixfmt.h index e184a5667..8b54c9415 100644 --- a/libs/ffvpx/libavutil/pixfmt.h +++ b/libs/ffvpx/libavutil/pixfmt.h @@ -42,6 +42,10 @@ * This is stored as BGRA on little-endian CPU architectures and ARGB on * big-endian CPUs. * + * @note + * If the resolution is not a multiple of the chroma subsampling factor + * then the chroma plane resolution must be rounded up. + * * @par * When the pixel format is palettized RGB32 (AV_PIX_FMT_PAL8), the palettized * image data is stored in AVFrame.data[0]. The palette is transported in @@ -330,6 +334,20 @@ enum AVPixelFormat { */ AV_PIX_FMT_OPENCL, + AV_PIX_FMT_GRAY14BE, ///< Y , 14bpp, big-endian + AV_PIX_FMT_GRAY14LE, ///< Y , 14bpp, little-endian + + AV_PIX_FMT_GRAYF32BE, ///< IEEE-754 single precision Y, 32bpp, big-endian + AV_PIX_FMT_GRAYF32LE, ///< IEEE-754 single precision Y, 32bpp, little-endian + + AV_PIX_FMT_YUVA422P12BE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), 12b alpha, big-endian + AV_PIX_FMT_YUVA422P12LE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), 12b alpha, little-endian + AV_PIX_FMT_YUVA444P12BE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, big-endian + AV_PIX_FMT_YUVA444P12LE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, little-endian + + AV_PIX_FMT_NV24, ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V) + AV_PIX_FMT_NV42, ///< as above, but U and V bytes are swapped + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; @@ -349,6 +367,7 @@ enum AVPixelFormat { #define AV_PIX_FMT_GRAY9 AV_PIX_FMT_NE(GRAY9BE, GRAY9LE) #define AV_PIX_FMT_GRAY10 AV_PIX_FMT_NE(GRAY10BE, GRAY10LE) #define AV_PIX_FMT_GRAY12 AV_PIX_FMT_NE(GRAY12BE, GRAY12LE) +#define AV_PIX_FMT_GRAY14 AV_PIX_FMT_NE(GRAY14BE, GRAY14LE) #define AV_PIX_FMT_GRAY16 AV_PIX_FMT_NE(GRAY16BE, GRAY16LE) #define AV_PIX_FMT_YA16 AV_PIX_FMT_NE(YA16BE, YA16LE) #define AV_PIX_FMT_RGB48 AV_PIX_FMT_NE(RGB48BE, RGB48LE) @@ -397,12 +416,16 @@ enum AVPixelFormat { #define AV_PIX_FMT_GBRPF32 AV_PIX_FMT_NE(GBRPF32BE, GBRPF32LE) #define AV_PIX_FMT_GBRAPF32 AV_PIX_FMT_NE(GBRAPF32BE, GBRAPF32LE) +#define AV_PIX_FMT_GRAYF32 AV_PIX_FMT_NE(GRAYF32BE, GRAYF32LE) + #define AV_PIX_FMT_YUVA420P9 AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE) #define AV_PIX_FMT_YUVA422P9 AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE) #define AV_PIX_FMT_YUVA444P9 AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE) #define AV_PIX_FMT_YUVA420P10 AV_PIX_FMT_NE(YUVA420P10BE, YUVA420P10LE) #define AV_PIX_FMT_YUVA422P10 AV_PIX_FMT_NE(YUVA422P10BE, YUVA422P10LE) #define AV_PIX_FMT_YUVA444P10 AV_PIX_FMT_NE(YUVA444P10BE, YUVA444P10LE) +#define AV_PIX_FMT_YUVA422P12 AV_PIX_FMT_NE(YUVA422P12BE, YUVA422P12LE) +#define AV_PIX_FMT_YUVA444P12 AV_PIX_FMT_NE(YUVA444P12BE, YUVA444P12LE) #define AV_PIX_FMT_YUVA420P16 AV_PIX_FMT_NE(YUVA420P16BE, YUVA420P16LE) #define AV_PIX_FMT_YUVA422P16 AV_PIX_FMT_NE(YUVA422P16BE, YUVA422P16LE) #define AV_PIX_FMT_YUVA444P16 AV_PIX_FMT_NE(YUVA444P16BE, YUVA444P16LE) diff --git a/libs/ffvpx/libavutil/threadmessage.c b/libs/ffvpx/libavutil/threadmessage.c index 872e9392b..764b7fb81 100644 --- a/libs/ffvpx/libavutil/threadmessage.c +++ b/libs/ffvpx/libavutil/threadmessage.c @@ -102,6 +102,19 @@ void av_thread_message_queue_free(AVThreadMessageQueue **mq) #endif } +int av_thread_message_queue_nb_elems(AVThreadMessageQueue *mq) +{ +#if HAVE_THREADS + int ret; + pthread_mutex_lock(&mq->lock); + ret = av_fifo_size(mq->fifo); + pthread_mutex_unlock(&mq->lock); + return ret / mq->elsize; +#else + return AVERROR(ENOSYS); +#endif +} + #if HAVE_THREADS static int av_thread_message_queue_send_locked(AVThreadMessageQueue *mq, diff --git a/libs/ffvpx/libavutil/threadmessage.h b/libs/ffvpx/libavutil/threadmessage.h index 8480a0a3d..42ce655f3 100644 --- a/libs/ffvpx/libavutil/threadmessage.h +++ b/libs/ffvpx/libavutil/threadmessage.h @@ -96,6 +96,14 @@ void av_thread_message_queue_set_free_func(AVThreadMessageQueue *mq, void (*free_func)(void *msg)); /** + * Return the current number of messages in the queue. + * + * @return the current number of messages or AVERROR(ENOSYS) if lavu was built + * without thread support + */ +int av_thread_message_queue_nb_elems(AVThreadMessageQueue *mq); + +/** * Flush the message queue * * This function is mostly equivalent to reading and free-ing every message diff --git a/libs/ffvpx/libavutil/time_internal.h b/libs/ffvpx/libavutil/time_internal.h index 612a75a04..d0f007ab1 100644 --- a/libs/ffvpx/libavutil/time_internal.h +++ b/libs/ffvpx/libavutil/time_internal.h @@ -23,7 +23,7 @@ #include "config.h" #if !HAVE_GMTIME_R && !defined(gmtime_r) -static inline struct tm *gmtime_r(const time_t* clock, struct tm *result) +static inline struct tm *ff_gmtime_r(const time_t* clock, struct tm *result) { struct tm *ptr = gmtime(clock); if (!ptr) @@ -31,10 +31,11 @@ static inline struct tm *gmtime_r(const time_t* clock, struct tm *result) *result = *ptr; return result; } +#define gmtime_r ff_gmtime_r #endif #if !HAVE_LOCALTIME_R && !defined(localtime_r) -static inline struct tm *localtime_r(const time_t* clock, struct tm *result) +static inline struct tm *ff_localtime_r(const time_t* clock, struct tm *result) { struct tm *ptr = localtime(clock); if (!ptr) @@ -42,6 +43,7 @@ static inline struct tm *localtime_r(const time_t* clock, struct tm *result) *result = *ptr; return result; } +#define localtime_r ff_localtime_r #endif #endif /* AVUTIL_TIME_INTERNAL_H */ diff --git a/libs/ffvpx/libavutil/timecode.c b/libs/ffvpx/libavutil/timecode.c index 60077ba0c..f029f2583 100644 --- a/libs/ffvpx/libavutil/timecode.c +++ b/libs/ffvpx/libavutil/timecode.c @@ -49,7 +49,7 @@ int av_timecode_adjust_ntsc_framenum2(int framenum, int fps) d = framenum / frames_per_10mins; m = framenum % frames_per_10mins; - return framenum + 9 * drop_frames * d + drop_frames * ((m - drop_frames) / (frames_per_10mins / 10)); + return framenum + 9U * drop_frames * d + drop_frames * ((m - drop_frames) / (frames_per_10mins / 10)); } uint32_t av_timecode_get_smpte_from_framenum(const AVTimecode *tc, int framenum) @@ -96,8 +96,8 @@ char *av_timecode_make_string(const AVTimecode *tc, char *buf, int framenum) } ff = framenum % fps; ss = framenum / fps % 60; - mm = framenum / (fps*60) % 60; - hh = framenum / (fps*3600); + mm = framenum / (fps*60LL) % 60; + hh = framenum / (fps*3600LL); if (tc->flags & AV_TIMECODE_FLAG_24HOURSMAX) hh = hh % 24; snprintf(buf, AV_TIMECODE_STR_SIZE, "%s%02d:%02d:%02d%c%02d", diff --git a/libs/ffvpx/libavutil/version.h b/libs/ffvpx/libavutil/version.h index 3a63e6355..24ca8ab7d 100644 --- a/libs/ffvpx/libavutil/version.h +++ b/libs/ffvpx/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 14 +#define LIBAVUTIL_VERSION_MINOR 31 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libs/ffvpx/libavutil/x86/cpu.c b/libs/ffvpx/libavutil/x86/cpu.c index aca893174..bcd41a50a 100644 --- a/libs/ffvpx/libavutil/x86/cpu.c +++ b/libs/ffvpx/libavutil/x86/cpu.c @@ -127,7 +127,7 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_SSE4; if (ecx & 0x00100000 ) rval |= AV_CPU_FLAG_SSE42; - if (ecx & 0x01000000 ) + if (ecx & 0x02000000 ) rval |= AV_CPU_FLAG_AESNI; #if HAVE_AVX /* Check OXSAVE and AVX bits */ diff --git a/libs/ffvpx/libavutil/x86/float_dsp.asm b/libs/ffvpx/libavutil/x86/float_dsp.asm index 06d2d2cfd..517fd6363 100644 --- a/libs/ffvpx/libavutil/x86/float_dsp.asm +++ b/libs/ffvpx/libavutil/x86/float_dsp.asm @@ -58,6 +58,39 @@ INIT_YMM avx VECTOR_FMUL %endif +;----------------------------------------------------------------------------- +; void vector_dmul(double *dst, const double *src0, const double *src1, int len) +;----------------------------------------------------------------------------- +%macro VECTOR_DMUL 0 +cglobal vector_dmul, 4,4,4, dst, src0, src1, len + lea lend, [lenq*8 - mmsize*4] +ALIGN 16 +.loop: + movaps m0, [src0q + lenq + 0*mmsize] + movaps m1, [src0q + lenq + 1*mmsize] + movaps m2, [src0q + lenq + 2*mmsize] + movaps m3, [src0q + lenq + 3*mmsize] + mulpd m0, m0, [src1q + lenq + 0*mmsize] + mulpd m1, m1, [src1q + lenq + 1*mmsize] + mulpd m2, m2, [src1q + lenq + 2*mmsize] + mulpd m3, m3, [src1q + lenq + 3*mmsize] + movaps [dstq + lenq + 0*mmsize], m0 + movaps [dstq + lenq + 1*mmsize], m1 + movaps [dstq + lenq + 2*mmsize], m2 + movaps [dstq + lenq + 3*mmsize], m3 + + sub lenq, mmsize*4 + jge .loop + RET +%endmacro + +INIT_XMM sse2 +VECTOR_DMUL +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +VECTOR_DMUL +%endif + ;------------------------------------------------------------------------------ ; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len) ;------------------------------------------------------------------------------ diff --git a/libs/ffvpx/libavutil/x86/float_dsp_init.c b/libs/ffvpx/libavutil/x86/float_dsp_init.c index 122087a19..8826e4e2c 100644 --- a/libs/ffvpx/libavutil/x86/float_dsp_init.c +++ b/libs/ffvpx/libavutil/x86/float_dsp_init.c @@ -29,6 +29,11 @@ void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, int len); +void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1, + int len); +void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1, + int len); + void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, int len); void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, @@ -92,11 +97,13 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->butterflies_float = ff_butterflies_float_sse; } if (EXTERNAL_SSE2(cpu_flags)) { + fdsp->vector_dmul = ff_vector_dmul_sse2; fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; } if (EXTERNAL_AVX_FAST(cpu_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; + fdsp->vector_dmul = ff_vector_dmul_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx; diff --git a/libs/ffvpx/libavutil/x86/pixelutils.asm b/libs/ffvpx/libavutil/x86/pixelutils.asm index 7af3007d0..36c57c5f7 100644 --- a/libs/ffvpx/libavutil/x86/pixelutils.asm +++ b/libs/ffvpx/libavutil/x86/pixelutils.asm @@ -104,8 +104,8 @@ cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 RET ;------------------------------------------------------------------------------- -; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, -; const uint8_t *src2, ptrdiff_t stride2); +; int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); ;------------------------------------------------------------------------------- INIT_XMM sse2 cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 @@ -134,8 +134,8 @@ cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 RET ;------------------------------------------------------------------------------- -; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, -; const uint8_t *src2, ptrdiff_t stride2); +; int ff_pixelutils_sad_[au]_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); ;------------------------------------------------------------------------------- %macro SAD_XMM_16x16 1 INIT_XMM sse2 @@ -163,3 +163,224 @@ cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 SAD_XMM_16x16 a SAD_XMM_16x16 u + + +%macro PROCESS_SAD_32x4_U 0 + movu m1, [r2] + movu m2, [r2 + 16] + movu m3, [r0] + movu m4, [r0 + 16] + psadbw m1, m3 + psadbw m2, m4 + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + movu m1, [r2] + movu m2, [r2 + 16] + movu m3, [r0] + movu m4, [r0 + 16] + psadbw m1, m3 + psadbw m2, m4 + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + movu m1, [r2] + movu m2, [r2 + 16] + movu m3, [r0] + movu m4, [r0 + 16] + psadbw m1, m3 + psadbw m2, m4 + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + movu m1, [r2] + movu m2, [r2 + 16] + movu m3, [r0] + movu m4, [r0 + 16] + psadbw m1, m3 + psadbw m2, m4 + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] +%endmacro + +%macro PROCESS_SAD_32x4 1 + mov%1 m1, [r2] + mov%1 m2, [r2 + 16] + psadbw m1, [r0] + psadbw m2, [r0 + 16] + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + mov%1 m1, [r2] + mov%1 m2, [r2 + 16] + psadbw m1, [r0] + psadbw m2, [r0 + 16] + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + mov%1 m1, [r2] + mov%1 m2, [r2 + 16] + psadbw m1, [r0] + psadbw m2, [r0 + 16] + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] + + mov%1 m1, [r2] + mov%1 m2, [r2 + 16] + psadbw m1, [r0] + psadbw m2, [r0 + 16] + paddd m1, m2 + paddd m0, m1 + lea r2, [r2 + r3] + lea r0, [r0 + r1] +%endmacro + +;----------------------------------------------------------------------------- +; int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;----------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pixelutils_sad_32x32, 4,5,5, src1, stride1, src2, stride2 + pxor m0, m0 + mov r4d, 4 +.loop: + PROCESS_SAD_32x4_U + PROCESS_SAD_32x4_U + dec r4d + jnz .loop + + movhlps m1, m0 + paddd m0, m1 + movd eax, m0 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_XMM_32x32 1 +INIT_XMM sse2 +cglobal pixelutils_sad_%1_32x32, 4,5,3, src1, stride1, src2, stride2 + pxor m0, m0 + mov r4d, 4 +.loop: + PROCESS_SAD_32x4 %1 + PROCESS_SAD_32x4 %1 + dec r4d + jnz .loop + + movhlps m1, m0 + paddd m0, m1 + movd eax, m0 + RET +%endmacro + +SAD_XMM_32x32 a +SAD_XMM_32x32 u + +%if HAVE_AVX2_EXTERNAL +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_YMM avx2 +cglobal pixelutils_sad_32x32, 4,7,5, src1, stride1, src2, stride2 + pxor m0, m0 + mov r4d, 32/4 + lea r5, [stride1q * 3] + lea r6, [stride2q * 3] + +.loop: + movu m1, [src1q] ; row 0 of pix0 + movu m2, [src2q] ; row 0 of pix1 + movu m3, [src1q + stride1q] ; row 1 of pix0 + movu m4, [src2q + stride2q] ; row 1 of pix1 + + psadbw m1, m2 + psadbw m3, m4 + paddd m0, m1 + paddd m0, m3 + + movu m1, [src1q + 2 * stride1q] ; row 2 of pix0 + movu m2, [src2q + 2 * stride2q] ; row 2 of pix1 + movu m3, [src1q + r5] ; row 3 of pix0 + movu m4, [src2q + r6] ; row 3 of pix1 + + psadbw m1, m2 + psadbw m3, m4 + paddd m0, m1 + paddd m0, m3 + + lea src2q, [src2q + 4 * stride2q] + lea src1q, [src1q + 4 * stride1q] + + dec r4d + jnz .loop + + vextracti128 xm1, m0, 1 + paddd xm0, xm1 + pshufd xm1, xm0, 2 + paddd xm0, xm1 + movd eax, xm0 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_AVX2_32x32 1 +INIT_YMM avx2 +cglobal pixelutils_sad_%1_32x32, 4,7,3, src1, stride1, src2, stride2 + pxor m0, m0 + mov r4d, 32/4 + lea r5, [stride1q * 3] + lea r6, [stride2q * 3] + +.loop: + mov%1 m1, [src2q] ; row 0 of pix1 + psadbw m1, [src1q] + mov%1 m2, [src2q + stride2q] ; row 1 of pix1 + psadbw m2, [src1q + stride1q] + + paddd m0, m1 + paddd m0, m2 + + mov%1 m1, [src2q + 2 * stride2q] ; row 2 of pix1 + psadbw m1, [src1q + 2 * stride1q] + mov%1 m2, [src2q + r6] ; row 3 of pix1 + psadbw m2, [src1q + r5] + + paddd m0, m1 + paddd m0, m2 + + lea src2q, [src2q + 4 * stride2q] + lea src1q, [src1q + 4 * stride1q] + + dec r4d + jnz .loop + + vextracti128 xm1, m0, 1 + paddd xm0, xm1 + pshufd xm1, xm0, 2 + paddd xm0, xm1 + movd eax, xm0 + RET +%endmacro + +SAD_AVX2_32x32 a +SAD_AVX2_32x32 u +%endif diff --git a/libs/ffvpx/libavutil/x86/pixelutils_init.c b/libs/ffvpx/libavutil/x86/pixelutils_init.c index c24a533ae..184a3a4a9 100644 --- a/libs/ffvpx/libavutil/x86/pixelutils_init.c +++ b/libs/ffvpx/libavutil/x86/pixelutils_init.c @@ -35,6 +35,20 @@ int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) { int cpu_flags = av_get_cpu_flags(); @@ -61,4 +75,20 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned } } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned + } + } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + switch (aligned) { + case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned + case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned + case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned + } + } } |