93 #if CONFIG_MPEG4_DECODER
111 #define AANSCALE_BITS 12
114 #define NB_ITS_SPEED 50000
127 for (
i = 0;
i < 64;
i++)
131 for (
i = 0;
i < 64;
i++)
137 for (
i = 0;
i < j;
i++) {
161 for (
i = 0;
i < 64;
i++)
162 dst[(
i & 0x38) | ((
i & 6) >> 1) | ((
i & 1) << 2)] =
src[
i];
165 for (
i = 0;
i < 64;
i++)
166 dst[(
i & 0x24) | ((
i & 3) << 3) | ((
i >> 3) & 3)] =
src[
i];
169 for (
i = 0;
i < 64;
i++)
170 dst[(
i>>3) | ((
i<<3)&0x38)] =
src[
i];
173 for (
i = 0;
i < 64;
i++)
184 int64_t err2, ti, ti1, it1, err_sum = 0;
185 int64_t sysErr[64], sysErrMax = 0;
186 int64_t err2_matrix[64], err2_max = 0;
188 int blockSumErrMax = 0, blockSumErr;
190 const int vals=1<<
bits;
198 for (
i = 0;
i < 64;
i++)
199 err2_matrix[
i] = sysErr[
i] = 0;
200 for (it = 0; it <
NB_ITS; it++) {
207 if (!strcmp(
dct->name,
"IJG-AAN-INT")) {
208 for (
i = 0;
i < 64;
i++) {
215 if (!strcmp(
dct->name,
"PR-SSE2"))
216 for (
i = 0;
i < 64;
i++)
220 for (
i = 0;
i < 64;
i++) {
233 if (blockSumErrMax < blockSumErr)
234 blockSumErrMax = blockSumErr;
236 for (
i = 0;
i < 64;
i++) {
238 err2_max =
FFMAX(err2_max ,
FFABS(err2_matrix[
i]));
241 for (
i = 0;
i < 64;
i++) {
244 printf(
"%7d ", (
int) sysErr[
i]);
248 omse = (double) err2 /
NB_ITS / 64;
249 ome = (double) err_sum /
NB_ITS / 64;
251 spec_err = is_idct && (err_inf > 1 || omse > 0.02 ||
fabs(ome) > 0.0015);
253 spec_err = is_idct && ((double) err2_max /
NB_ITS > 0.06 || (
double) sysErrMax /
NB_ITS > 0.015);
255 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
256 is_idct ?
"IDCT" :
"DCT",
dct->name, err_inf,
257 omse, ome, (
double) sysErrMax /
NB_ITS,
258 maxout, blockSumErrMax);
260 if (spec_err && !
dct->nonspec) {
283 }
while (ti1 < 1000000);
285 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT",
dct->name,
286 (
double) it1 * 1000.0 / (
double) ti1);
297 static double c8[8][8];
298 static double c4[4][4];
299 double block1[64], block2[64], block3[64];
306 for (
i = 0;
i < 8;
i++) {
308 for (j = 0; j < 8; j++) {
309 s = (
i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
310 c8[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 8.0);
311 sum += c8[
i][j] * c8[
i][j];
315 for (
i = 0;
i < 4;
i++) {
317 for (j = 0; j < 4; j++) {
318 s = (
i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
319 c4[
i][j] =
s * cos(
M_PI *
i * (j + 0.5) / 4.0);
320 sum += c4[
i][j] * c4[
i][j];
327 for (
i = 0;
i < 4;
i++) {
328 for (j = 0; j < 8; j++) {
337 for (
i = 0;
i < 8;
i++) {
338 for (j = 0; j < 8; j++) {
340 for (k = 0; k < 8; k++)
341 sum += c8[k][j] *
block1[8 *
i + k];
342 block2[8 *
i + j] = sum;
347 for (
i = 0;
i < 8;
i++) {
348 for (j = 0; j < 4; j++) {
351 for (k = 0; k < 4; k++)
352 sum += c4[k][j] * block2[8 * (2 * k) +
i];
353 block3[8 * (2 * j) +
i] = sum;
357 for (k = 0; k < 4; k++)
358 sum += c4[k][j] * block2[8 * (2 * k + 1) +
i];
359 block3[8 * (2 * j + 1) +
i] = sum;
364 for (
i = 0;
i < 8;
i++) {
365 for (j = 0; j < 8; j++) {
366 v = block3[8 *
i + j];
368 else if (v > 255) v = 255;
369 dest[
i * linesize + j] = (
int)
rint(v);
375 void (*idct248_put)(
uint8_t *dest,
380 int it,
i, it1, ti, ti1, err_max, v;
388 for (it = 0; it <
NB_ITS; it++) {
390 for (
i = 0;
i < 64;
i++)
394 for (
i = 0;
i < 64;
i++)
398 for (
i = 0;
i < 64;
i++)
402 for (
i = 0;
i < 64;
i++) {
429 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248",
name, err_max);
438 for (
i = 0;
i < 64;
i++)
445 }
while (ti1 < 1000000);
447 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248",
name,
448 (
double) it1 * 1000.0 / (
double) ti1);
453 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
454 "test-number 0 -> test with random matrixes\n"
455 " 1 -> test with random sparse matrixes\n"
456 " 2 -> do 3. test from MPEG-4 std\n"
457 "bits Number of time domain bits to use, 8 is default\n"
458 "-i test IDCT implementations\n"
459 "-4 test IDCT248 implementations\n"
467 int main(
int argc,
char **argv)
469 int test_idct = 0, test_248_dct = 0;
479 c =
getopt(argc, argv,
"ih4t");
503 printf(
"ffmpeg DCT/IDCT test\n");
530 printf(
"Error: %d.\n", err);
const uint16_t ff_aanscales[64]
AAN (Arai, Agui and Nakajima) (I)DCT tables.
static void dct(AudioRNNContext *s, float *out, const float *in)
static av_cold int init(AVCodecContext *avctx)
common internal and external API header
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static atomic_int cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static __device__ float fabs(float a)
void ff_j_rev_dct(int16_t *data)
void ff_fdct_ifast(int16_t *data)
void ff_jpeg_fdct_islow_8(int16_t *data)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation.
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation.
void ff_faandct(int16_t *data)
void ff_faanidct(int16_t block[64])
static int getopt(int argc, char *argv[], char *opts)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
common internal API header
#define LOCAL_ALIGNED(a, t, v,...)
static void test(const char *pattern, const char *host)
typedef void(RENAME(mix_any_func_type))
void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
Special version of ff_simple_idct_int16_10bit() which does dequantization and scales by a factor of 2...
void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_simple_idct_int16_8bit(int16_t *block)
void ff_simple_idct_int16_12bit(int16_t *block)
void ff_simple_idct_int16_10bit(int16_t *block)
#define FF_ARRAY_ELEMS(a)
Context structure for the Lagged Fibonacci PRNG.
enum idct_permutation_type perm_type
void(* func)(int16_t *block)
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block), int speed)
static const struct algo fdct_tab[]
static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
static uint8_t img_dest1[64]
int main(int argc, char **argv)
static const struct algo fdct_tab_arch[]
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
static void ff_prores_idct_wrap(int16_t *dst)
static const struct algo idct_tab[]
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
static const struct algo idct_tab_arch[]
static int16_t block1[64]
static uint8_t img_dest[64]
static int ref[MAX_W *MAX_W]
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
int64_t av_gettime_relative(void)
Get the current time in microseconds since some unspecified starting point.
void ff_xvid_idct(int16_t *const in)