From e573f05b10d23b9dda7ec8aadd7ced11b18d8691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Fri, 22 Nov 2013 20:16:09 +0100 Subject: [PATCH] avcodec/vp9dsp: add DC only versions for idct/idct. before: ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 16.29s user 0.02s system 99% cpu 16.323 total ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 16.32s user 0.01s system 99% cpu 16.351 total ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 16.27s user 0.05s system 99% cpu 16.335 total after: ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 15.22s user 0.03s system 99% cpu 15.257 total ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 15.20s user 0.02s system 99% cpu 15.237 total ./ffmpeg -v 0 -nostats -i ~/samples/vp9/etv.webm -f null - 15.19s user 0.02s system 99% cpu 15.227 total --- libavcodec/vp9dsp.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c index 6159616..2b111ad 100644 --- a/libavcodec/vp9dsp.c +++ b/libavcodec/vp9dsp.c @@ -853,6 +853,12 @@ static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp) #undef init_intra_pred } +#define has_dconly_idct_idct 1 +#define has_dconly_iadst_idct 0 +#define has_dconly_idct_iadst 0 +#define has_dconly_iadst_iadst 0 +#define has_dconly_iwht_iwht 0 + #define itxfm_wrapper(type_a, type_b, sz, bits) \ static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *dst, \ ptrdiff_t stride, \ @@ -860,6 +866,22 @@ static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *dst, \ { \ int i, j; \ int16_t tmp[sz * sz], out[sz]; \ +\ + if (has_dconly_##type_a##_##type_b && eob == 1) { \ + const int t = (((block[0] * 11585 + (1 << 13)) >> 14) \ + * 11585 + (1 << 13)) >> 14; \ + block[0] = 0; \ + for (i = 0; i < sz; i++) { \ + for (j = 0; j < sz; j++) \ + dst[j * stride] = av_clip_uint8(dst[j * stride] + \ + (bits ? \ + (t + (1 << (bits - 1))) >> bits : \ + t)); \ + dst++; \ + } \ + return; \ + } \ +\ for (i = 0; i < sz; i++) \ type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \ memset(block, 0, sz * sz * sizeof(*block)); \ -- 1.8.4.2