#include #include #include /********* generated code snippet *********/ #define N 8 static void fdct_1d(float *dst, const float *src, int stridea, int strideb) { int i; for (i = 0; i < N; i++) { const float x0_0 = src[0*stridea] + src[7*stridea]; const float x0_1 = src[1*stridea] + src[6*stridea]; const float x0_2 = src[2*stridea] + src[5*stridea]; const float x0_3 = src[3*stridea] + src[4*stridea]; const float x0_4 = src[0*stridea] - src[7*stridea]; const float x0_5 = src[1*stridea] - src[6*stridea]; const float x0_6 = src[2*stridea] - src[5*stridea]; const float x0_7 = src[3*stridea] - src[4*stridea]; const float x2_0 = x0_0 + x0_3; const float x2_1 = x0_1 + x0_2; const float x2_2 = x0_0 - x0_3; const float x2_3 = x0_1 - x0_2; const float x3_0 = x2_0 + x2_1; const float x3_1 = x2_0 - x2_1; const float x3_2 = 1.30656296487638*x2_2 + 0.541196100146197*x2_3; const float x3_3 = 0.541196100146197*x2_2 - 1.30656296487638*x2_3; const float x4_0 = 1.38703984532215*x0_4 + 0.275899379282943*x0_7; const float x4_1 = 1.17587560241936*x0_5 + 0.785694958387102*x0_6; const float x4_2 = -0.785694958387102*x0_5 + 1.17587560241936*x0_6; const float x4_3 = 0.275899379282943*x0_4 - 1.38703984532215*x0_7; const float x5_0 = x4_0 + x4_1; const float x5_1 = x4_0 - x4_1; const float x5_2 = x4_2 + x4_3; const float x5_3 = x4_2 - x4_3; const float x1_5 = 0.707106781186547*x5_1 - 0.707106781186547*x5_3; const float x1_6 = 0.707106781186547*x5_1 + 0.707106781186547*x5_3; dst[0*stridea] = 0.353553390593274*x3_0; dst[1*stridea] = 0.353553390593274*x5_0; dst[2*stridea] = 0.353553390593274*x3_2; dst[3*stridea] = 0.353553390593274*x1_5; dst[4*stridea] = 0.353553390593274*x3_1; dst[5*stridea] = 0.353553390593274*x1_6; dst[6*stridea] = 0.353553390593274*x3_3; dst[7*stridea] = 0.353553390593274*x5_2; dst += strideb; src += strideb; } } static void fdct(float *dst, const float *src) { float tmp[N*N]; fdct_1d(tmp, src, 1, N); fdct_1d(dst, tmp, N, 1); } static void idct_1d(float *dst, const float *src, int stridea, int strideb) { int i; for (i = 0; i < N; i++) { const float x6_0 = 1.4142135623731*src[0*stridea]; const float x6_1 = 1.38703984532215*src[1*stridea] + 0.275899379282943*src[7*stridea]; const float x6_2 = 1.30656296487638*src[2*stridea] + 0.541196100146197*src[6*stridea]; const float x6_3 = 1.17587560241936*src[3*stridea] + 0.785694958387102*src[5*stridea]; const float x6_4 = 1.4142135623731*src[4*stridea]; const float x6_5 = -0.785694958387102*src[3*stridea] + 1.17587560241936*src[5*stridea]; const float x6_6 = 0.541196100146197*src[2*stridea] - 1.30656296487638*src[6*stridea]; const float x6_7 = -0.275899379282943*src[1*stridea] + 1.38703984532215*src[7*stridea]; const float x8_0 = x6_0 + x6_4; const float x8_1 = x6_1 + x6_3; const float x8_2 = 1.4142135623731*x6_2; const float x8_3 = x6_0 - x6_4; const float x8_4 = x6_1 - x6_3; const float x9_0 = 0.5*x8_0 + 0.707106781186548*x8_1 + 0.5*x8_2; const float x9_1 = 0.707106781186548*x8_0 - 0.707106781186548*x8_2; const float x9_2 = 0.5*x8_0 - 0.707106781186548*x8_1 + 0.5*x8_2; const float x9_3 = 0.707106781186547*x8_3 + 0.707106781186547*x8_4; const float x9_4 = 0.707106781186547*x8_3 - 0.707106781186547*x8_4; const float xa_0 = 1.4142135623731*x6_6; const float xa_1 = x6_5 + x6_7; const float xa_2 = x6_5 - x6_7; const float xb_0 = 0.707106781186547*xa_0 + 0.707106781186547*xa_1; const float xb_1 = 0.707106781186547*xa_0 - 0.707106781186547*xa_1; const float x7_7 = -xb_1; dst[0*stridea] = 0.5*x9_0; dst[1*stridea] = 0.353553390593274*x9_3 - 0.353553390593274*x7_7; dst[2*stridea] = 0.353553390593274*x9_3 + 0.353553390593274*x7_7; dst[3*stridea] = 0.353553390593274*x9_1 + 0.353553390593274*xa_2; dst[4*stridea] = 0.353553390593274*x9_1 - 0.353553390593274*xa_2; dst[5*stridea] = 0.353553390593274*x9_4 - 0.353553390593274*xb_0; dst[6*stridea] = 0.353553390593274*x9_4 + 0.353553390593274*xb_0; dst[7*stridea] = 0.5*x9_2; dst += strideb; src += strideb; } } static void idct(float *dst, const float *src) { float tmp[N*N]; idct_1d(tmp, src, 1, N); idct_1d(dst, tmp, N, 1); } /********* slow reference dct code ********/ static float dct_matrix [N*N]; static float dct_trp_matrix[N*N]; void init_dct(void) { int i, j; // dct matrix for (i = 0; i < N; i++) for (j = 0; j < N; j++) if (i == 0) dct_matrix[i*N + j] = 1 / sqrt(N); else dct_matrix[i*N + j] = sqrt(2./N) * cos(((2*j+1)*i*M_PI) / (2*N)); // dct matrix transposed for (i = 0; i < N; i++) for (j = 0; j < N; j++) dct_trp_matrix[i*N + j] = dct_matrix[j*N + i]; } static void dct_1d_ref(float *dst, const float *src, int stridea, int strideb, const float *matrix) { int x; for (x = 0; x < N; x++) { int i, j; for (j = 0; j < N; j++) { float sum = 0.; for (i = 0; i < N; i++) sum += matrix[j*N + i] * src[i*stridea]; dst[j*stridea] = sum; } dst += strideb; src += strideb; } } static void fdct_ref(float *dst, const float *src) { float tmp[N*N]; dct_1d_ref(tmp, src, 1, N, dct_matrix); dct_1d_ref(dst, tmp, N, 1, dct_matrix); } static void idct_ref(float *dst, const float *src) { float tmp[N*N]; dct_1d_ref(tmp, src, 1, N, dct_trp_matrix); dct_1d_ref(dst, tmp, N, 1, dct_trp_matrix); } /********** test **************************/ static int check_output(const char *name, const float *ref, const float *out) { int i, ret = 0; for (i = 0; i < N*N; i++) { const int ok = fabs(ref[i] - out[i]) < 0.0005; if (!ok) { printf("%s ref:%9.3f out:%9.3f diff:%9.3f\n", name, ref[i], out[i], ref[i] - out[i]); ret = -1; } } return ret; } int main() { int i; float src[N*N]; float ref_fdct[N*N], ref_idct[N*N]; float out_fdct[N*N], out_idct[N*N]; for (i = 0; i < N*N; i++) src[i] = random() % 256; init_dct(); fdct_ref(ref_fdct, src); fdct(out_fdct, src); if (check_output("FDCT", ref_fdct, out_fdct) < 0) return 1; idct_ref(ref_idct, ref_fdct); idct(out_idct, out_fdct); if (check_output("IDCT", ref_idct, out_idct) < 0) return 1; return 0; }