/* Fast Inverse DCT implemented using Lee's algorithm */ /* Bjorn Wesen 1997 */ #include "mp3dec.h" /* The DCT matrix for N values is defined as: D(i,j) = cos((2*j+1)*i*PI/(2*N)) Lee's fast-DCT algorithm, as used here, needs an 8-value DCT and an 16-value DCT matrix. */ #ifdef USE_DATA #include "fastsb_A8.h" #include "fastsb_B8.h" #include "fastsb_B16.h" #else static mpfloat A16[16][16], A8[8][8]; /* DCT matrix */ static mpfloat G16[16][16], G8[8][8]; /* Output butterfly */ static mpfloat H16[16][16], H8[8][8]; /* Scaling */ static mpfloat B16[16][16], B8[8][8]; /* B = G * DCT * H */ #if 0 static mpfloat A32[32][32]; #endif void matrix_mul16(mpfloat in1[16][16], mpfloat in2[16][16], mpfloat out[16][16]); void matrix_mul8(mpfloat in1[8][8], mpfloat in2[8][8], mpfloat out[8][8]); void fast_idct_init() { int i,j; mpfloat t16[16][16], t8[8][8]; #if 0 for(i = 0; i < 32; i++) for(j = 0; j < 32; j++) A32[i][j] = cos((2*j+1)*i*PI/64); #endif /* create the 16 matrixes */ for(i = 0; i < 16; i++) { for(j = 0; j < 16; j++) { A16[i][j] = cos((2*j+1)*i*PI/32); if(i == j || j == (i + 1)) G16[i][j] = 1.0f; else G16[i][j] = 0.0f; if(i == j) H16[i][j] = 1.0f/(2*cos((2*i+1)*PI/64)); else H16[i][j] = 0.0; } } /* create the 8 matrixes */ for(i = 0; i < 8; i++) { for(j = 0; j < 8; j++) { A8[i][j] = cos((2*j+1)*i*PI/16); if(i == j || j == (i + 1)) G8[i][j] = 1.0f; else G8[i][j] = 0.0f; if(i == j) H8[i][j] = 1.0f/(2*cos((2*i+1)*PI/32)); else H8[i][j] = 0.0f; } } /* generate the B matrixes */ matrix_mul16(A16, H16, t16); matrix_mul16(G16, t16, B16); matrix_mul8(A8, H8, t8); matrix_mul8(G8, t8, B8); #ifdef MAKE_DATA make_data_file_2d("fastsb_A8.h", "A8", &A8[0][0], 8, 8); make_data_file_2d("fastsb_B8.h", "B8", &B8[0][0], 8, 8); make_data_file_2d("fastsb_B16.h", "B16", &B16[0][0], 16, 16); #endif } #endif /* This is a two-level implementation of Lee's fast-DCT algorithm */ /* The 32 input values are split in two 16-value vectors using an even butterfly and an odd butterfly. The odd values are taken through Lee's odd path using a 16x16 DCT matrix (A16) and appropriate scaling (G16*A16*H16). The even values are further split into two 8-value vectors using even and odd butterflies into ee and eo. The ee values are fed through an 8x8 DCT matrix (A8) while the eo values are fed through the odd path using G8*A8*H8. This two-level configuration uses 384 muls and 432 adds, compared to the direct 32x32 DCT which uses 1024 muls and 992 adds. */ #ifndef USE_C3X_ASM void fast_idct(mpfloat *in, mpfloat *out) { mpfloat even[16], odd[16], ee[8], eo[8]; mpfloat s1, s2; mpfloat t[32]; int i, j; #if 0 /* direct 32x32 idct */ for(i = 0; i < 32; i++) { s1 = 0.0; for(j = 0; j < 32; j++) s1 += in[j] * A32[i][j]; t[i] = s1; } #endif /* input butterflies - level 1 */ /* 32 adds */ for(i = 0; i < 16; i++) { even[i] = in[i] + in[31-i]; odd[i] = in[i] - in[31-i]; } /* input butterflies - level 2 */ /* 16 adds */ for(i = 0; i < 8; i++) { ee[i] = even[i] + even[15-i]; eo[i] = even[i] - even[15-i]; } /* multiply the even_even vector (ee) with the ee matrix (A8) */ /* multiply the even_odd vector (eo) with the eo matrix (B8) */ /* 128 muls, 128 adds */ for(i = 0; i < 8; i++) { s1 = 0.0; s2 = 0.0; for(j = 0; j < 8; j += 2) { s1 += A8[i][j] * ee[j] + A8[i][j+1] * ee[j+1]; s2 += B8[i][j] * eo[j] + B8[i][j+1] * eo[j+1]; } ISCALE(s1); t[i*4] = s1; ISCALE(s2); t[i*4+2] = s2; } #if 0 /* multiply the even vector (even) with the even matrix (A16) */ /* JUST FOR TESTING if we only want to use a 1-level Lee */ for(i = 0; i < 16; i++) { s1 = 0.0; for(j = 0; j < 16; j++) { s1 += A16[i][j] * even[j]; } ISCALE(s1); t[i*2] = s1; } #endif /* multiply the odd vector (odd) with the odd matrix (B16) */ /* 256 muls, 256 adds */ for(i = 0; i < 16; i++) { s1 = 0.0; for(j = 0; j < 16; j += 4) { s1 += B16[i][j] * odd[j] + B16[i][j+1] * odd[j+1] + B16[i][j+2] * odd[j+2] + B16[i][j+3] * odd[j+3]; } ISCALE(s1); t[i*2+1] = s1; } /* the output vector t now is expanded to 64 values using the symmetric property of the cosinus function */ for(i = 0; i < 16; i++) { out[i] = t[i+16]; out[i+17] = -t[31-i]; out[i+32] = -t[16-i]; out[i+48] = -t[i]; } out[16] = 0.0; } #endif #ifndef USE_DATA void matrix_mul16(mpfloat in1[16][16], mpfloat in2[16][16], mpfloat out[16][16]) { int i,j,z; for(i = 0; i < 16; i++) { for(j = 0; j < 16; j++) { out[i][j] = 0.0; for(z = 0; z < 16; z++) out[i][j] += in1[i][z] * in2[z][j]; ISCALE(out[i][j]); } } } void matrix_mul8(mpfloat in1[8][8], mpfloat in2[8][8], mpfloat out[8][8]) { int i,j,z; for(i = 0; i < 8; i++) { for(j = 0; j < 8; j++) { out[i][j] = 0.0; for(z = 0; z < 8; z++) out[i][j] += in1[i][z] * in2[z][j]; ISCALE(out[i][j]); } } } #endif