#include #include #include /********* generated code snippet *********/ #define N 32 static void fdct_1d(float *dst, const float *src, int stridea, int strideb) { int i; for (i = 0; i < N; i++) { const float x0_0 = src[ 0*stridea] + src[31*stridea]; const float x0_1 = src[ 1*stridea] + src[30*stridea]; const float x0_2 = src[ 2*stridea] + src[29*stridea]; const float x0_3 = src[ 3*stridea] + src[28*stridea]; const float x0_4 = src[ 4*stridea] + src[27*stridea]; const float x0_5 = src[ 5*stridea] + src[26*stridea]; const float x0_6 = src[ 6*stridea] + src[25*stridea]; const float x0_7 = src[ 7*stridea] + src[24*stridea]; const float x0_8 = src[ 8*stridea] + src[23*stridea]; const float x0_9 = src[ 9*stridea] + src[22*stridea]; const float x0_a = src[10*stridea] + src[21*stridea]; const float x0_b = src[11*stridea] + src[20*stridea]; const float x0_c = src[12*stridea] + src[19*stridea]; const float x0_d = src[13*stridea] + src[18*stridea]; const float x0_e = src[14*stridea] + src[17*stridea]; const float x0_f = src[15*stridea] + src[16*stridea]; const float x0_10 = src[ 0*stridea] - src[31*stridea]; const float x0_11 = src[ 1*stridea] - src[30*stridea]; const float x0_12 = src[ 2*stridea] - src[29*stridea]; const float x0_13 = src[ 3*stridea] - src[28*stridea]; const float x0_14 = src[ 4*stridea] - src[27*stridea]; const float x0_15 = src[ 5*stridea] - src[26*stridea]; const float x0_16 = src[ 6*stridea] - src[25*stridea]; const float x0_17 = src[ 7*stridea] - src[24*stridea]; const float x0_18 = src[ 8*stridea] - src[23*stridea]; const float x0_19 = src[ 9*stridea] - src[22*stridea]; const float x0_1a = src[10*stridea] - src[21*stridea]; const float x0_1b = src[11*stridea] - src[20*stridea]; const float x0_1c = src[12*stridea] - src[19*stridea]; const float x0_1d = src[13*stridea] - src[18*stridea]; const float x0_1e = src[14*stridea] - src[17*stridea]; const float x0_1f = src[15*stridea] - src[16*stridea]; const float x2_0 = x0_0 + x0_f; const float x2_1 = x0_1 + x0_e; const float x2_2 = x0_2 + x0_d; const float x2_3 = x0_3 + x0_c; const float x2_4 = x0_4 + x0_b; const float x2_5 = x0_5 + x0_a; const float x2_6 = x0_6 + x0_9; const float x2_7 = x0_7 + x0_8; const float x2_8 = x0_0 - x0_f; const float x2_9 = x0_1 - x0_e; const float x2_a = x0_2 - x0_d; const float x2_b = x0_3 - x0_c; const float x2_c = x0_4 - x0_b; const float x2_d = x0_5 - x0_a; const float x2_e = x0_6 - x0_9; const float x2_f = x0_7 - x0_8; const float x4_0 = x2_0 + x2_7; const float x4_1 = x2_1 + x2_6; const float x4_2 = x2_2 + x2_5; const float x4_3 = x2_3 + x2_4; const float x4_4 = x2_0 - x2_7; const float x4_5 = x2_1 - x2_6; const float x4_6 = x2_2 - x2_5; const float x4_7 = x2_3 - x2_4; const float x6_0 = x4_0 + x4_3; const float x6_1 = x4_1 + x4_2; const float x6_2 = x4_0 - x4_3; const float x6_3 = x4_1 - x4_2; const float x7_0 = x6_0 + x6_1; const float x7_1 = x6_0 - x6_1; const float x7_2 = 1.30656296487638*x6_2 + 0.541196100146197*x6_3; const float x7_3 = 0.541196100146197*x6_2 - 1.30656296487638*x6_3; const float x8_0 = 1.38703984532215*x4_4 + 0.275899379282943*x4_7; const float x8_1 = 1.17587560241936*x4_5 + 0.785694958387102*x4_6; const float x8_2 = -0.785694958387102*x4_5 + 1.17587560241936*x4_6; const float x8_3 = 0.275899379282943*x4_4 - 1.38703984532215*x4_7; const float x9_0 = x8_0 + x8_1; const float x9_1 = x8_0 - x8_1; const float x9_2 = x8_2 + x8_3; const float x9_3 = x8_2 - x8_3; const float x5_5 = 0.707106781186547*x9_1 - 0.707106781186547*x9_3; const float x5_6 = 0.707106781186547*x9_1 + 0.707106781186547*x9_3; const float xa_0 = 1.40740373752638*x2_8 + 0.138617169199091*x2_f; const float xa_1 = 1.35331800117435*x2_9 + 0.410524527522357*x2_e; const float xa_2 = 1.24722501298667*x2_a + 0.666655658477747*x2_d; const float xa_3 = 1.09320186700176*x2_b + 0.897167586342636*x2_c; const float xa_4 = -0.897167586342636*x2_b + 1.09320186700176*x2_c; const float xa_5 = 0.666655658477747*x2_a - 1.24722501298667*x2_d; const float xa_6 = -0.410524527522357*x2_9 + 1.35331800117435*x2_e; const float xa_7 = 0.138617169199091*x2_8 - 1.40740373752638*x2_f; const float xc_0 = xa_0 + xa_3; const float xc_1 = xa_1 + xa_2; const float xc_2 = xa_0 - xa_3; const float xc_3 = xa_1 - xa_2; const float xd_0 = xc_0 + xc_1; const float xd_1 = xc_0 - xc_1; const float xd_2 = 1.30656296487638*xc_2 + 0.541196100146197*xc_3; const float xd_3 = 0.541196100146197*xc_2 - 1.30656296487638*xc_3; const float xe_0 = xa_4 + xa_7; const float xe_1 = xa_5 + xa_6; const float xe_2 = xa_4 - xa_7; const float xe_3 = xa_5 - xa_6; const float xf_0 = xe_0 + xe_1; const float xf_1 = xe_0 - xe_1; const float xf_2 = 1.30656296487638*xe_2 + 0.541196100146197*xe_3; const float xf_3 = 0.541196100146197*xe_2 - 1.30656296487638*xe_3; const float x3_9 = 0.707106781186547*xd_2 - 0.707106781186547*xf_3; const float x3_a = 0.707106781186547*xd_2 + 0.707106781186547*xf_3; const float x3_b = 0.707106781186547*xd_1 + 0.707106781186547*xf_1; const float x3_c = 0.707106781186547*xd_1 - 0.707106781186547*xf_1; const float x3_d = 0.707106781186547*xd_3 - 0.707106781186547*xf_2; const float x3_e = 0.707106781186547*xd_3 + 0.707106781186547*xf_2; const float x10_0 = 1.41251008020198*x0_10 + 0.0693921705079407*x0_1f; const float x10_1 = 1.39890683597308*x0_11 + 0.207508226988212*x0_1e; const float x10_2 = 1.37183135419349*x0_12 + 0.34362586580705*x0_1d; const float x10_3 = 1.33154438655373*x0_13 + 0.476434199693161*x0_1c; const float x10_4 = 1.27843391857524*x0_14 + 0.604654211790801*x0_1b; const float x10_5 = 1.21301143309781*x0_15 + 0.72705107329128*x0_1a; const float x10_6 = 1.13590698442014*x0_16 + 0.842446035509419*x0_19; const float x10_7 = 1.04786313053259*x0_17 + 0.949727781877754*x0_18; const float x10_8 = -0.949727781877754*x0_17 + 1.04786313053259*x0_18; const float x10_9 = 0.842446035509419*x0_16 - 1.13590698442014*x0_19; const float x10_a = -0.72705107329128*x0_15 + 1.21301143309781*x0_1a; const float x10_b = 0.604654211790801*x0_14 - 1.27843391857524*x0_1b; const float x10_c = -0.476434199693161*x0_13 + 1.33154438655373*x0_1c; const float x10_d = 0.34362586580705*x0_12 - 1.37183135419349*x0_1d; const float x10_e = -0.207508226988212*x0_11 + 1.39890683597308*x0_1e; const float x10_f = 0.0693921705079407*x0_10 - 1.41251008020198*x0_1f; const float x12_0 = x10_0 + x10_7; const float x12_1 = x10_1 + x10_6; const float x12_2 = x10_2 + x10_5; const float x12_3 = x10_3 + x10_4; const float x12_4 = x10_0 - x10_7; const float x12_5 = x10_1 - x10_6; const float x12_6 = x10_2 - x10_5; const float x12_7 = x10_3 - x10_4; const float x14_0 = x12_0 + x12_3; const float x14_1 = x12_1 + x12_2; const float x14_2 = x12_0 - x12_3; const float x14_3 = x12_1 - x12_2; const float x15_0 = x14_0 + x14_1; const float x15_1 = x14_0 - x14_1; const float x15_2 = 1.30656296487638*x14_2 + 0.541196100146197*x14_3; const float x15_3 = 0.541196100146197*x14_2 - 1.30656296487638*x14_3; const float x16_0 = 1.38703984532215*x12_4 + 0.275899379282943*x12_7; const float x16_1 = 1.17587560241936*x12_5 + 0.785694958387102*x12_6; const float x16_2 = -0.785694958387102*x12_5 + 1.17587560241936*x12_6; const float x16_3 = 0.275899379282943*x12_4 - 1.38703984532215*x12_7; const float x17_0 = x16_0 + x16_1; const float x17_1 = x16_0 - x16_1; const float x17_2 = x16_2 + x16_3; const float x17_3 = x16_2 - x16_3; const float x13_5 = 0.707106781186547*x17_1 - 0.707106781186547*x17_3; const float x13_6 = 0.707106781186547*x17_1 + 0.707106781186547*x17_3; const float x18_0 = x10_8 + x10_f; const float x18_1 = x10_9 + x10_e; const float x18_2 = x10_a + x10_d; const float x18_3 = x10_b + x10_c; const float x18_4 = x10_8 - x10_f; const float x18_5 = x10_9 - x10_e; const float x18_6 = x10_a - x10_d; const float x18_7 = x10_b - x10_c; const float x1a_0 = x18_0 + x18_3; const float x1a_1 = x18_1 + x18_2; const float x1a_2 = x18_0 - x18_3; const float x1a_3 = x18_1 - x18_2; const float x1b_0 = x1a_0 + x1a_1; const float x1b_1 = x1a_0 - x1a_1; const float x1b_2 = 1.30656296487638*x1a_2 + 0.541196100146197*x1a_3; const float x1b_3 = 0.541196100146197*x1a_2 - 1.30656296487638*x1a_3; const float x1c_0 = 1.38703984532215*x18_4 + 0.275899379282943*x18_7; const float x1c_1 = 1.17587560241936*x18_5 + 0.785694958387102*x18_6; const float x1c_2 = -0.785694958387102*x18_5 + 1.17587560241936*x18_6; const float x1c_3 = 0.275899379282943*x18_4 - 1.38703984532215*x18_7; const float x1d_0 = x1c_0 + x1c_1; const float x1d_1 = x1c_0 - x1c_1; const float x1d_2 = x1c_2 + x1c_3; const float x1d_3 = x1c_2 - x1c_3; const float x19_5 = 0.707106781186547*x1d_1 - 0.707106781186547*x1d_3; const float x19_6 = 0.707106781186547*x1d_1 + 0.707106781186547*x1d_3; const float x1_11 = 0.707106781186547*x17_0 - 0.707106781186547*x1d_2; const float x1_12 = 0.707106781186547*x17_0 + 0.707106781186547*x1d_2; const float x1_13 = 0.707106781186547*x15_2 + 0.707106781186547*x1b_3; const float x1_14 = 0.707106781186547*x15_2 - 0.707106781186547*x1b_3; const float x1_15 = 0.707106781186547*x13_5 - 0.707106781186547*x19_6; const float x1_16 = 0.707106781186547*x13_5 + 0.707106781186547*x19_6; const float x1_17 = 0.707106781186547*x15_1 + 0.707106781186547*x1b_1; const float x1_18 = 0.707106781186547*x15_1 - 0.707106781186547*x1b_1; const float x1_19 = 0.707106781186547*x13_6 - 0.707106781186547*x19_5; const float x1_1a = 0.707106781186547*x13_6 + 0.707106781186547*x19_5; const float x1_1b = 0.707106781186547*x15_3 + 0.707106781186547*x1b_2; const float x1_1c = 0.707106781186547*x15_3 - 0.707106781186547*x1b_2; const float x1_1d = 0.707106781186547*x17_2 - 0.707106781186547*x1d_0; const float x1_1e = 0.707106781186547*x17_2 + 0.707106781186547*x1d_0; dst[ 0*stridea] = 0.176776695296637*x7_0; dst[ 1*stridea] = 0.176776695296637*x15_0; dst[ 2*stridea] = 0.176776695296637*xd_0; dst[ 3*stridea] = 0.176776695296637*x1_11; dst[ 4*stridea] = 0.176776695296637*x9_0; dst[ 5*stridea] = 0.176776695296637*x1_12; dst[ 6*stridea] = 0.176776695296637*x3_9; dst[ 7*stridea] = 0.176776695296637*x1_13; dst[ 8*stridea] = 0.176776695296637*x7_2; dst[ 9*stridea] = 0.176776695296637*x1_14; dst[10*stridea] = 0.176776695296637*x3_a; dst[11*stridea] = 0.176776695296637*x1_15; dst[12*stridea] = 0.176776695296637*x5_5; dst[13*stridea] = 0.176776695296637*x1_16; dst[14*stridea] = 0.176776695296637*x3_b; dst[15*stridea] = 0.176776695296637*x1_17; dst[16*stridea] = 0.176776695296637*x7_1; dst[17*stridea] = 0.176776695296637*x1_18; dst[18*stridea] = 0.176776695296637*x3_c; dst[19*stridea] = 0.176776695296637*x1_19; dst[20*stridea] = 0.176776695296637*x5_6; dst[21*stridea] = 0.176776695296637*x1_1a; dst[22*stridea] = 0.176776695296637*x3_d; dst[23*stridea] = 0.176776695296637*x1_1b; dst[24*stridea] = 0.176776695296637*x7_3; dst[25*stridea] = 0.176776695296637*x1_1c; dst[26*stridea] = 0.176776695296637*x3_e; dst[27*stridea] = 0.176776695296637*x1_1d; dst[28*stridea] = 0.176776695296637*x9_2; dst[29*stridea] = 0.176776695296637*x1_1e; dst[30*stridea] = 0.176776695296637*xf_0; dst[31*stridea] = 0.176776695296637*x1b_0; dst += strideb; src += strideb; } } static void fdct(float *dst, const float *src) { float tmp[N*N]; fdct_1d(tmp, src, 1, N); fdct_1d(dst, tmp, N, 1); } static void idct_1d(float *dst, const float *src, int stridea, int strideb) { int i; for (i = 0; i < N; i++) { const float x1e_0 = 1.4142135623731*src[ 0*stridea]; const float x1e_1 = 1.41251008020198*src[ 1*stridea] + 0.0693921705079407*src[31*stridea]; const float x1e_2 = 1.40740373752638*src[ 2*stridea] + 0.138617169199091*src[30*stridea]; const float x1e_3 = 1.39890683597308*src[ 3*stridea] + 0.207508226988212*src[29*stridea]; const float x1e_4 = 1.38703984532215*src[ 4*stridea] + 0.275899379282943*src[28*stridea]; const float x1e_5 = 1.37183135419349*src[ 5*stridea] + 0.34362586580705*src[27*stridea]; const float x1e_6 = 1.35331800117435*src[ 6*stridea] + 0.410524527522357*src[26*stridea]; const float x1e_7 = 1.33154438655373*src[ 7*stridea] + 0.476434199693161*src[25*stridea]; const float x1e_8 = 1.30656296487638*src[ 8*stridea] + 0.541196100146197*src[24*stridea]; const float x1e_9 = 1.27843391857524*src[ 9*stridea] + 0.604654211790801*src[23*stridea]; const float x1e_a = 1.24722501298667*src[10*stridea] + 0.666655658477747*src[22*stridea]; const float x1e_b = 1.21301143309781*src[11*stridea] + 0.72705107329128*src[21*stridea]; const float x1e_c = 1.17587560241936*src[12*stridea] + 0.785694958387102*src[20*stridea]; const float x1e_d = 1.13590698442014*src[13*stridea] + 0.842446035509419*src[19*stridea]; const float x1e_e = 1.09320186700176*src[14*stridea] + 0.897167586342636*src[18*stridea]; const float x1e_f = 1.04786313053259*src[15*stridea] + 0.949727781877754*src[17*stridea]; const float x1e_10 = 1.4142135623731*src[16*stridea]; const float x1e_11 = -0.949727781877754*src[15*stridea] + 1.04786313053259*src[17*stridea]; const float x1e_12 = 0.897167586342636*src[14*stridea] - 1.09320186700176*src[18*stridea]; const float x1e_13 = -0.842446035509419*src[13*stridea] + 1.13590698442014*src[19*stridea]; const float x1e_14 = 0.785694958387102*src[12*stridea] - 1.17587560241936*src[20*stridea]; const float x1e_15 = -0.72705107329128*src[11*stridea] + 1.21301143309781*src[21*stridea]; const float x1e_16 = 0.666655658477747*src[10*stridea] - 1.24722501298667*src[22*stridea]; const float x1e_17 = -0.604654211790801*src[ 9*stridea] + 1.27843391857524*src[23*stridea]; const float x1e_18 = 0.541196100146197*src[ 8*stridea] - 1.30656296487638*src[24*stridea]; const float x1e_19 = -0.476434199693161*src[ 7*stridea] + 1.33154438655373*src[25*stridea]; const float x1e_1a = 0.410524527522357*src[ 6*stridea] - 1.35331800117435*src[26*stridea]; const float x1e_1b = -0.34362586580705*src[ 5*stridea] + 1.37183135419349*src[27*stridea]; const float x1e_1c = 0.275899379282943*src[ 4*stridea] - 1.38703984532215*src[28*stridea]; const float x1e_1d = -0.207508226988212*src[ 3*stridea] + 1.39890683597308*src[29*stridea]; const float x1e_1e = 0.138617169199091*src[ 2*stridea] - 1.40740373752638*src[30*stridea]; const float x1e_1f = -0.0693921705079407*src[ 1*stridea] + 1.41251008020198*src[31*stridea]; const float x20_0 = x1e_0 + x1e_10; const float x20_1 = x1e_1 + x1e_f; const float x20_2 = x1e_2 + x1e_e; const float x20_3 = x1e_3 + x1e_d; const float x20_4 = x1e_4 + x1e_c; const float x20_5 = x1e_5 + x1e_b; const float x20_6 = x1e_6 + x1e_a; const float x20_7 = x1e_7 + x1e_9; const float x20_8 = 1.4142135623731*x1e_8; const float x20_9 = x1e_0 - x1e_10; const float x20_a = x1e_1 - x1e_f; const float x20_b = x1e_2 - x1e_e; const float x20_c = x1e_3 - x1e_d; const float x20_d = x1e_4 - x1e_c; const float x20_e = x1e_5 - x1e_b; const float x20_f = x1e_6 - x1e_a; const float x20_10 = x1e_7 - x1e_9; const float x22_0 = x20_0 + x20_8; const float x22_1 = x20_1 + x20_7; const float x22_2 = x20_2 + x20_6; const float x22_3 = x20_3 + x20_5; const float x22_4 = 1.4142135623731*x20_4; const float x22_5 = x20_0 - x20_8; const float x22_6 = x20_1 - x20_7; const float x22_7 = x20_2 - x20_6; const float x22_8 = x20_3 - x20_5; const float x24_0 = x22_0 + x22_4; const float x24_1 = x22_1 + x22_3; const float x24_2 = 1.4142135623731*x22_2; const float x24_3 = x22_0 - x22_4; const float x24_4 = x22_1 - x22_3; const float x25_0 = 0.5*x24_0 + 0.707106781186548*x24_1 + 0.5*x24_2; const float x25_1 = 0.707106781186548*x24_0 - 0.707106781186548*x24_2; const float x25_2 = 0.5*x24_0 - 0.707106781186548*x24_1 + 0.5*x24_2; const float x25_3 = 0.707106781186547*x24_3 + 0.707106781186547*x24_4; const float x25_4 = 0.707106781186547*x24_3 - 0.707106781186547*x24_4; const float x26_0 = 1.4142135623731*x22_5; const float x26_1 = 1.30656296487638*x22_6 + 0.541196100146197*x22_8; const float x26_2 = 1.4142135623731*x22_7; const float x26_3 = -0.541196100146197*x22_6 + 1.30656296487638*x22_8; const float x27_0 = 0.5*x26_0 + 0.707106781186548*x26_1 + 0.5*x26_2; const float x27_1 = 0.707106781186548*x26_0 - 0.707106781186548*x26_2; const float x27_2 = 0.5*x26_0 - 0.707106781186548*x26_1 + 0.5*x26_2; const float x23_6 = 0.707106781186547*x27_1 - 0.707106781186547*x26_3; const float x23_7 = 0.707106781186547*x27_1 + 0.707106781186547*x26_3; const float x28_0 = 1.4142135623731*x20_9; const float x28_1 = 0.275899379282943*x20_10 + 1.38703984532215*x20_a; const float x28_2 = 1.30656296487638*x20_b + 0.541196100146197*x20_f; const float x28_3 = 1.17587560241936*x20_c + 0.785694958387102*x20_e; const float x28_4 = 1.4142135623731*x20_d; const float x28_5 = -0.785694958387102*x20_c + 1.17587560241936*x20_e; const float x28_6 = 0.541196100146197*x20_b - 1.30656296487638*x20_f; const float x28_7 = 1.38703984532215*x20_10 - 0.275899379282943*x20_a; const float x2a_0 = x28_0 + x28_4; const float x2a_1 = x28_1 + x28_3; const float x2a_2 = 1.4142135623731*x28_2; const float x2a_3 = x28_0 - x28_4; const float x2a_4 = x28_1 - x28_3; const float x2b_0 = 0.5*x2a_0 + 0.707106781186548*x2a_1 + 0.5*x2a_2; const float x2b_1 = 0.707106781186548*x2a_0 - 0.707106781186548*x2a_2; const float x2b_2 = 0.5*x2a_0 - 0.707106781186548*x2a_1 + 0.5*x2a_2; const float x2b_3 = 0.707106781186547*x2a_3 + 0.707106781186547*x2a_4; const float x2b_4 = 0.707106781186547*x2a_3 - 0.707106781186547*x2a_4; const float x2c_0 = 1.4142135623731*x28_6; const float x2c_1 = x28_5 + x28_7; const float x2c_2 = x28_5 - x28_7; const float x2d_0 = 0.707106781186547*x2c_0 + 0.707106781186547*x2c_1; const float x2d_1 = 0.707106781186547*x2c_0 - 0.707106781186547*x2c_1; const float x29_7 = -x2d_1; const float x21_a = 0.707106781186547*x2b_3 - 0.707106781186547*x29_7; const float x21_b = 0.707106781186547*x2b_3 + 0.707106781186547*x29_7; const float x21_c = 0.707106781186547*x2b_1 + 0.707106781186547*x2c_2; const float x21_d = 0.707106781186547*x2b_1 - 0.707106781186547*x2c_2; const float x21_e = 0.707106781186547*x2b_4 - 0.707106781186547*x2d_0; const float x21_f = 0.707106781186547*x2b_4 + 0.707106781186547*x2d_0; const float x2e_0 = 1.4142135623731*x1e_18; const float x2e_1 = x1e_17 + x1e_19; const float x2e_2 = x1e_16 + x1e_1a; const float x2e_3 = x1e_15 + x1e_1b; const float x2e_4 = x1e_14 + x1e_1c; const float x2e_5 = x1e_13 + x1e_1d; const float x2e_6 = x1e_12 + x1e_1e; const float x2e_7 = x1e_11 + x1e_1f; const float x2e_8 = x1e_11 - x1e_1f; const float x2e_9 = x1e_12 - x1e_1e; const float x2e_a = x1e_13 - x1e_1d; const float x2e_b = x1e_14 - x1e_1c; const float x2e_c = x1e_15 - x1e_1b; const float x2e_d = x1e_16 - x1e_1a; const float x2e_e = x1e_17 - x1e_19; const float x30_0 = 1.4142135623731*x2e_0; const float x30_1 = 1.38703984532215*x2e_1 + 0.275899379282943*x2e_7; const float x30_2 = 1.30656296487638*x2e_2 + 0.541196100146197*x2e_6; const float x30_3 = 1.17587560241936*x2e_3 + 0.785694958387102*x2e_5; const float x30_4 = 1.4142135623731*x2e_4; const float x30_5 = -0.785694958387102*x2e_3 + 1.17587560241936*x2e_5; const float x30_6 = 0.541196100146197*x2e_2 - 1.30656296487638*x2e_6; const float x30_7 = -0.275899379282943*x2e_1 + 1.38703984532215*x2e_7; const float x32_0 = x30_0 + x30_4; const float x32_1 = x30_1 + x30_3; const float x32_2 = 1.4142135623731*x30_2; const float x32_3 = x30_0 - x30_4; const float x32_4 = x30_1 - x30_3; const float x33_0 = 0.5*x32_0 + 0.707106781186548*x32_1 + 0.5*x32_2; const float x33_1 = 0.707106781186548*x32_0 - 0.707106781186548*x32_2; const float x33_2 = 0.5*x32_0 - 0.707106781186548*x32_1 + 0.5*x32_2; const float x33_3 = 0.707106781186547*x32_3 + 0.707106781186547*x32_4; const float x33_4 = 0.707106781186547*x32_3 - 0.707106781186547*x32_4; const float x34_0 = 1.4142135623731*x30_6; const float x34_1 = x30_5 + x30_7; const float x34_2 = x30_5 - x30_7; const float x35_0 = 0.707106781186547*x34_0 + 0.707106781186547*x34_1; const float x35_1 = 0.707106781186547*x34_0 - 0.707106781186547*x34_1; const float x31_7 = -x35_1; const float x2f_1 = 0.707106781186547*x33_3 - 0.707106781186547*x31_7; const float x2f_2 = 0.707106781186547*x33_3 + 0.707106781186547*x31_7; const float x2f_3 = 0.707106781186547*x33_1 + 0.707106781186547*x34_2; const float x2f_4 = 0.707106781186547*x33_1 - 0.707106781186547*x34_2; const float x2f_5 = 0.707106781186547*x33_4 - 0.707106781186547*x35_0; const float x2f_6 = 0.707106781186547*x33_4 + 0.707106781186547*x35_0; const float x36_0 = 1.4142135623731*x2e_b; const float x36_1 = x2e_a + x2e_c; const float x36_2 = x2e_9 + x2e_d; const float x36_3 = x2e_8 + x2e_e; const float x36_4 = x2e_8 - x2e_e; const float x36_5 = x2e_9 - x2e_d; const float x36_6 = x2e_a - x2e_c; const float x38_0 = 1.4142135623731*x36_0; const float x38_1 = 1.30656296487638*x36_1 + 0.541196100146197*x36_3; const float x38_2 = 1.4142135623731*x36_2; const float x38_3 = -0.541196100146197*x36_1 + 1.30656296487638*x36_3; const float x39_0 = 0.5*x38_0 + 0.707106781186548*x38_1 + 0.5*x38_2; const float x39_1 = 0.707106781186548*x38_0 - 0.707106781186548*x38_2; const float x39_2 = 0.5*x38_0 - 0.707106781186548*x38_1 + 0.5*x38_2; const float x37_1 = 0.707106781186547*x39_1 - 0.707106781186547*x38_3; const float x37_2 = 0.707106781186547*x39_1 + 0.707106781186547*x38_3; const float x3a_0 = 1.4142135623731*x36_5; const float x3a_1 = x36_4 + x36_6; const float x3a_2 = x36_4 - x36_6; const float x3b_0 = 0.707106781186547*x3a_0 + 0.707106781186547*x3a_1; const float x3b_1 = 0.707106781186547*x3a_0 - 0.707106781186547*x3a_1; const float x37_6 = -x3b_1; const float x2f_a = -x37_1; const float x2f_e = -x39_2; const float x1f_13 = -x2f_1; const float x1f_17 = -x2f_3; const float x1f_1b = -x2f_5; const float x1f_1f = -x33_2; dst[ 0*stridea] = 0.25*x25_0; dst[ 1*stridea] = -0.176776695296637*x1f_1f + 0.176776695296637*x2b_0; dst[ 2*stridea] = 0.176776695296637*x1f_1f + 0.176776695296637*x2b_0; dst[ 3*stridea] = 0.176776695296637*x2f_e + 0.176776695296637*x27_0; dst[ 4*stridea] = -0.176776695296637*x2f_e + 0.176776695296637*x27_0; dst[ 5*stridea] = -0.176776695296637*x2f_6 + 0.176776695296637*x21_a; dst[ 6*stridea] = 0.176776695296637*x2f_6 + 0.176776695296637*x21_a; dst[ 7*stridea] = 0.176776695296637*x37_6 + 0.176776695296637*x25_3; dst[ 8*stridea] = -0.176776695296637*x37_6 + 0.176776695296637*x25_3; dst[ 9*stridea] = -0.176776695296637*x1f_1b + 0.176776695296637*x21_b; dst[10*stridea] = 0.176776695296637*x1f_1b + 0.176776695296637*x21_b; dst[11*stridea] = 0.176776695296637*x37_2 + 0.176776695296637*x23_6; dst[12*stridea] = -0.176776695296637*x37_2 + 0.176776695296637*x23_6; dst[13*stridea] = -0.176776695296637*x2f_4 + 0.176776695296637*x21_c; dst[14*stridea] = 0.176776695296637*x2f_4 + 0.176776695296637*x21_c; dst[15*stridea] = 0.176776695296637*x3a_2 + 0.176776695296637*x25_1; dst[16*stridea] = -0.176776695296637*x3a_2 + 0.176776695296637*x25_1; dst[17*stridea] = -0.176776695296637*x1f_17 + 0.176776695296637*x21_d; dst[18*stridea] = 0.176776695296637*x1f_17 + 0.176776695296637*x21_d; dst[19*stridea] = 0.176776695296637*x2f_a + 0.176776695296637*x23_7; dst[20*stridea] = -0.176776695296637*x2f_a + 0.176776695296637*x23_7; dst[21*stridea] = -0.176776695296637*x2f_2 + 0.176776695296637*x21_e; dst[22*stridea] = 0.176776695296637*x2f_2 + 0.176776695296637*x21_e; dst[23*stridea] = 0.176776695296637*x3b_0 + 0.176776695296637*x25_4; dst[24*stridea] = -0.176776695296637*x3b_0 + 0.176776695296637*x25_4; dst[25*stridea] = -0.176776695296637*x1f_13 + 0.176776695296637*x21_f; dst[26*stridea] = 0.176776695296637*x1f_13 + 0.176776695296637*x21_f; dst[27*stridea] = 0.176776695296637*x39_0 + 0.176776695296637*x27_2; dst[28*stridea] = -0.176776695296637*x39_0 + 0.176776695296637*x27_2; dst[29*stridea] = -0.176776695296637*x33_0 + 0.176776695296637*x2b_2; dst[30*stridea] = 0.176776695296637*x33_0 + 0.176776695296637*x2b_2; dst[31*stridea] = 0.25*x25_2; dst += strideb; src += strideb; } } static void idct(float *dst, const float *src) { float tmp[N*N]; idct_1d(tmp, src, 1, N); idct_1d(dst, tmp, N, 1); } /********* slow reference dct code ********/ static float dct_matrix [N*N]; static float dct_trp_matrix[N*N]; void init_dct(void) { int i, j; // dct matrix for (i = 0; i < N; i++) for (j = 0; j < N; j++) if (i == 0) dct_matrix[i*N + j] = 1 / sqrt(N); else dct_matrix[i*N + j] = sqrt(2./N) * cos(((2*j+1)*i*M_PI) / (2*N)); // dct matrix transposed for (i = 0; i < N; i++) for (j = 0; j < N; j++) dct_trp_matrix[i*N + j] = dct_matrix[j*N + i]; } static void dct_1d_ref(float *dst, const float *src, int stridea, int strideb, const float *matrix) { int x; for (x = 0; x < N; x++) { int i, j; for (j = 0; j < N; j++) { float sum = 0.; for (i = 0; i < N; i++) sum += matrix[j*N + i] * src[i*stridea]; dst[j*stridea] = sum; } dst += strideb; src += strideb; } } static void fdct_ref(float *dst, const float *src) { float tmp[N*N]; dct_1d_ref(tmp, src, 1, N, dct_matrix); dct_1d_ref(dst, tmp, N, 1, dct_matrix); } static void idct_ref(float *dst, const float *src) { float tmp[N*N]; dct_1d_ref(tmp, src, 1, N, dct_trp_matrix); dct_1d_ref(dst, tmp, N, 1, dct_trp_matrix); } /********** test **************************/ static int check_output(const char *name, const float *ref, const float *out) { int i, ret = 0; for (i = 0; i < N*N; i++) { const int ok = fabs(ref[i] - out[i]) < 0.0005; if (!ok) { printf("%s ref:%9.3f out:%9.3f diff:%9.3f\n", name, ref[i], out[i], ref[i] - out[i]); ret = -1; } } return ret; } int main() { int i; float src[N*N]; float ref_fdct[N*N], ref_idct[N*N]; float out_fdct[N*N], out_idct[N*N]; for (i = 0; i < N*N; i++) src[i] = random() % 256; init_dct(); fdct_ref(ref_fdct, src); fdct(out_fdct, src); if (check_output("FDCT", ref_fdct, out_fdct) < 0) return 1; idct_ref(ref_idct, ref_fdct); idct(out_idct, out_fdct); if (check_output("IDCT", ref_idct, out_idct) < 0) return 1; return 0; }