src/3rdparty/libjpeg/jcdctmgr.c
changeset 30 5dc02b23752f
parent 0 1918ee327afb
equal deleted inserted replaced
29:b72c6db6890b 30:5dc02b23752f
    21 
    21 
    22 typedef struct {
    22 typedef struct {
    23   struct jpeg_forward_dct pub;	/* public fields */
    23   struct jpeg_forward_dct pub;	/* public fields */
    24 
    24 
    25   /* Pointer to the DCT routine actually in use */
    25   /* Pointer to the DCT routine actually in use */
    26   forward_DCT_method_ptr do_dct;
    26   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
    27 
    27 
    28   /* The actual post-DCT divisors --- not identical to the quant table
    28   /* The actual post-DCT divisors --- not identical to the quant table
    29    * entries, because of scaling (especially for an unnormalized DCT).
    29    * entries, because of scaling (especially for an unnormalized DCT).
    30    * Each table is given in normal array order.
    30    * Each table is given in normal array order.
    31    */
    31    */
    32   DCTELEM * divisors[NUM_QUANT_TBLS];
    32   DCTELEM * divisors[NUM_QUANT_TBLS];
    33 
    33 
    34 #ifdef DCT_FLOAT_SUPPORTED
    34 #ifdef DCT_FLOAT_SUPPORTED
    35   /* Same as above for the floating-point case. */
    35   /* Same as above for the floating-point case. */
    36   float_DCT_method_ptr do_float_dct;
    36   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
    37   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
    37   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
    38 #endif
    38 #endif
    39 } my_fdct_controller;
    39 } my_fdct_controller;
    40 
    40 
    41 typedef my_fdct_controller * my_fdct_ptr;
    41 typedef my_fdct_controller * my_fdct_ptr;
       
    42 
       
    43 
       
    44 /* The current scaled-DCT routines require ISLOW-style divisor tables,
       
    45  * so be sure to compile that code if either ISLOW or SCALING is requested.
       
    46  */
       
    47 #ifdef DCT_ISLOW_SUPPORTED
       
    48 #define PROVIDE_ISLOW_TABLES
       
    49 #else
       
    50 #ifdef DCT_SCALING_SUPPORTED
       
    51 #define PROVIDE_ISLOW_TABLES
       
    52 #endif
       
    53 #endif
       
    54 
       
    55 
       
    56 /*
       
    57  * Perform forward DCT on one or more blocks of a component.
       
    58  *
       
    59  * The input samples are taken from the sample_data[] array starting at
       
    60  * position start_row/start_col, and moving to the right for any additional
       
    61  * blocks. The quantized coefficients are returned in coef_blocks[].
       
    62  */
       
    63 
       
    64 METHODDEF(void)
       
    65 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
       
    66 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       
    67 	     JDIMENSION start_row, JDIMENSION start_col,
       
    68 	     JDIMENSION num_blocks)
       
    69 /* This version is used for integer DCT implementations. */
       
    70 {
       
    71   /* This routine is heavily used, so it's worth coding it tightly. */
       
    72   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
       
    73   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
       
    74   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
       
    75   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
       
    76   JDIMENSION bi;
       
    77 
       
    78   sample_data += start_row;	/* fold in the vertical offset once */
       
    79 
       
    80   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
       
    81     /* Perform the DCT */
       
    82     (*do_dct) (workspace, sample_data, start_col);
       
    83 
       
    84     /* Quantize/descale the coefficients, and store into coef_blocks[] */
       
    85     { register DCTELEM temp, qval;
       
    86       register int i;
       
    87       register JCOEFPTR output_ptr = coef_blocks[bi];
       
    88 
       
    89       for (i = 0; i < DCTSIZE2; i++) {
       
    90 	qval = divisors[i];
       
    91 	temp = workspace[i];
       
    92 	/* Divide the coefficient value by qval, ensuring proper rounding.
       
    93 	 * Since C does not specify the direction of rounding for negative
       
    94 	 * quotients, we have to force the dividend positive for portability.
       
    95 	 *
       
    96 	 * In most files, at least half of the output values will be zero
       
    97 	 * (at default quantization settings, more like three-quarters...)
       
    98 	 * so we should ensure that this case is fast.  On many machines,
       
    99 	 * a comparison is enough cheaper than a divide to make a special test
       
   100 	 * a win.  Since both inputs will be nonnegative, we need only test
       
   101 	 * for a < b to discover whether a/b is 0.
       
   102 	 * If your machine's division is fast enough, define FAST_DIVIDE.
       
   103 	 */
       
   104 #ifdef FAST_DIVIDE
       
   105 #define DIVIDE_BY(a,b)	a /= b
       
   106 #else
       
   107 #define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
       
   108 #endif
       
   109 	if (temp < 0) {
       
   110 	  temp = -temp;
       
   111 	  temp += qval>>1;	/* for rounding */
       
   112 	  DIVIDE_BY(temp, qval);
       
   113 	  temp = -temp;
       
   114 	} else {
       
   115 	  temp += qval>>1;	/* for rounding */
       
   116 	  DIVIDE_BY(temp, qval);
       
   117 	}
       
   118 	output_ptr[i] = (JCOEF) temp;
       
   119       }
       
   120     }
       
   121   }
       
   122 }
       
   123 
       
   124 
       
   125 #ifdef DCT_FLOAT_SUPPORTED
       
   126 
       
   127 METHODDEF(void)
       
   128 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
       
   129 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       
   130 		   JDIMENSION start_row, JDIMENSION start_col,
       
   131 		   JDIMENSION num_blocks)
       
   132 /* This version is used for floating-point DCT implementations. */
       
   133 {
       
   134   /* This routine is heavily used, so it's worth coding it tightly. */
       
   135   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
       
   136   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
       
   137   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
       
   138   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
       
   139   JDIMENSION bi;
       
   140 
       
   141   sample_data += start_row;	/* fold in the vertical offset once */
       
   142 
       
   143   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
       
   144     /* Perform the DCT */
       
   145     (*do_dct) (workspace, sample_data, start_col);
       
   146 
       
   147     /* Quantize/descale the coefficients, and store into coef_blocks[] */
       
   148     { register FAST_FLOAT temp;
       
   149       register int i;
       
   150       register JCOEFPTR output_ptr = coef_blocks[bi];
       
   151 
       
   152       for (i = 0; i < DCTSIZE2; i++) {
       
   153 	/* Apply the quantization and scaling factor */
       
   154 	temp = workspace[i] * divisors[i];
       
   155 	/* Round to nearest integer.
       
   156 	 * Since C does not specify the direction of rounding for negative
       
   157 	 * quotients, we have to force the dividend positive for portability.
       
   158 	 * The maximum coefficient size is +-16K (for 12-bit data), so this
       
   159 	 * code should work for either 16-bit or 32-bit ints.
       
   160 	 */
       
   161 	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
       
   162       }
       
   163     }
       
   164   }
       
   165 }
       
   166 
       
   167 #endif /* DCT_FLOAT_SUPPORTED */
    42 
   168 
    43 
   169 
    44 /*
   170 /*
    45  * Initialize for a processing pass.
   171  * Initialize for a processing pass.
    46  * Verify that all referenced Q-tables are present, and set up
   172  * Verify that all referenced Q-tables are present, and set up
    54 start_pass_fdctmgr (j_compress_ptr cinfo)
   180 start_pass_fdctmgr (j_compress_ptr cinfo)
    55 {
   181 {
    56   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   182   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
    57   int ci, qtblno, i;
   183   int ci, qtblno, i;
    58   jpeg_component_info *compptr;
   184   jpeg_component_info *compptr;
       
   185   int method = 0;
    59   JQUANT_TBL * qtbl;
   186   JQUANT_TBL * qtbl;
    60   DCTELEM * dtbl;
   187   DCTELEM * dtbl;
    61 
   188 
    62   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
   189   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
    63        ci++, compptr++) {
   190        ci++, compptr++) {
       
   191     /* Select the proper DCT routine for this component's scaling */
       
   192     switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
       
   193 #ifdef DCT_SCALING_SUPPORTED
       
   194     case ((1 << 8) + 1):
       
   195       fdct->do_dct[ci] = jpeg_fdct_1x1;
       
   196       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   197       break;
       
   198     case ((2 << 8) + 2):
       
   199       fdct->do_dct[ci] = jpeg_fdct_2x2;
       
   200       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   201       break;
       
   202     case ((3 << 8) + 3):
       
   203       fdct->do_dct[ci] = jpeg_fdct_3x3;
       
   204       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   205       break;
       
   206     case ((4 << 8) + 4):
       
   207       fdct->do_dct[ci] = jpeg_fdct_4x4;
       
   208       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   209       break;
       
   210     case ((5 << 8) + 5):
       
   211       fdct->do_dct[ci] = jpeg_fdct_5x5;
       
   212       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   213       break;
       
   214     case ((6 << 8) + 6):
       
   215       fdct->do_dct[ci] = jpeg_fdct_6x6;
       
   216       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   217       break;
       
   218     case ((7 << 8) + 7):
       
   219       fdct->do_dct[ci] = jpeg_fdct_7x7;
       
   220       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   221       break;
       
   222     case ((9 << 8) + 9):
       
   223       fdct->do_dct[ci] = jpeg_fdct_9x9;
       
   224       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   225       break;
       
   226     case ((10 << 8) + 10):
       
   227       fdct->do_dct[ci] = jpeg_fdct_10x10;
       
   228       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   229       break;
       
   230     case ((11 << 8) + 11):
       
   231       fdct->do_dct[ci] = jpeg_fdct_11x11;
       
   232       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   233       break;
       
   234     case ((12 << 8) + 12):
       
   235       fdct->do_dct[ci] = jpeg_fdct_12x12;
       
   236       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   237       break;
       
   238     case ((13 << 8) + 13):
       
   239       fdct->do_dct[ci] = jpeg_fdct_13x13;
       
   240       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   241       break;
       
   242     case ((14 << 8) + 14):
       
   243       fdct->do_dct[ci] = jpeg_fdct_14x14;
       
   244       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   245       break;
       
   246     case ((15 << 8) + 15):
       
   247       fdct->do_dct[ci] = jpeg_fdct_15x15;
       
   248       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   249       break;
       
   250     case ((16 << 8) + 16):
       
   251       fdct->do_dct[ci] = jpeg_fdct_16x16;
       
   252       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   253       break;
       
   254     case ((16 << 8) + 8):
       
   255       fdct->do_dct[ci] = jpeg_fdct_16x8;
       
   256       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   257       break;
       
   258     case ((14 << 8) + 7):
       
   259       fdct->do_dct[ci] = jpeg_fdct_14x7;
       
   260       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   261       break;
       
   262     case ((12 << 8) + 6):
       
   263       fdct->do_dct[ci] = jpeg_fdct_12x6;
       
   264       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   265       break;
       
   266     case ((10 << 8) + 5):
       
   267       fdct->do_dct[ci] = jpeg_fdct_10x5;
       
   268       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   269       break;
       
   270     case ((8 << 8) + 4):
       
   271       fdct->do_dct[ci] = jpeg_fdct_8x4;
       
   272       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   273       break;
       
   274     case ((6 << 8) + 3):
       
   275       fdct->do_dct[ci] = jpeg_fdct_6x3;
       
   276       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   277       break;
       
   278     case ((4 << 8) + 2):
       
   279       fdct->do_dct[ci] = jpeg_fdct_4x2;
       
   280       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   281       break;
       
   282     case ((2 << 8) + 1):
       
   283       fdct->do_dct[ci] = jpeg_fdct_2x1;
       
   284       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   285       break;
       
   286     case ((8 << 8) + 16):
       
   287       fdct->do_dct[ci] = jpeg_fdct_8x16;
       
   288       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   289       break;
       
   290     case ((7 << 8) + 14):
       
   291       fdct->do_dct[ci] = jpeg_fdct_7x14;
       
   292       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   293       break;
       
   294     case ((6 << 8) + 12):
       
   295       fdct->do_dct[ci] = jpeg_fdct_6x12;
       
   296       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   297       break;
       
   298     case ((5 << 8) + 10):
       
   299       fdct->do_dct[ci] = jpeg_fdct_5x10;
       
   300       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   301       break;
       
   302     case ((4 << 8) + 8):
       
   303       fdct->do_dct[ci] = jpeg_fdct_4x8;
       
   304       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   305       break;
       
   306     case ((3 << 8) + 6):
       
   307       fdct->do_dct[ci] = jpeg_fdct_3x6;
       
   308       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   309       break;
       
   310     case ((2 << 8) + 4):
       
   311       fdct->do_dct[ci] = jpeg_fdct_2x4;
       
   312       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   313       break;
       
   314     case ((1 << 8) + 2):
       
   315       fdct->do_dct[ci] = jpeg_fdct_1x2;
       
   316       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
       
   317       break;
       
   318 #endif
       
   319     case ((DCTSIZE << 8) + DCTSIZE):
       
   320       switch (cinfo->dct_method) {
       
   321 #ifdef DCT_ISLOW_SUPPORTED
       
   322       case JDCT_ISLOW:
       
   323 	fdct->do_dct[ci] = jpeg_fdct_islow;
       
   324 	method = JDCT_ISLOW;
       
   325 	break;
       
   326 #endif
       
   327 #ifdef DCT_IFAST_SUPPORTED
       
   328       case JDCT_IFAST:
       
   329 	fdct->do_dct[ci] = jpeg_fdct_ifast;
       
   330 	method = JDCT_IFAST;
       
   331 	break;
       
   332 #endif
       
   333 #ifdef DCT_FLOAT_SUPPORTED
       
   334       case JDCT_FLOAT:
       
   335 	fdct->do_float_dct[ci] = jpeg_fdct_float;
       
   336 	method = JDCT_FLOAT;
       
   337 	break;
       
   338 #endif
       
   339       default:
       
   340 	ERREXIT(cinfo, JERR_NOT_COMPILED);
       
   341 	break;
       
   342       }
       
   343       break;
       
   344     default:
       
   345       ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
       
   346 	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
       
   347       break;
       
   348     }
    64     qtblno = compptr->quant_tbl_no;
   349     qtblno = compptr->quant_tbl_no;
    65     /* Make sure specified quantization table is present */
   350     /* Make sure specified quantization table is present */
    66     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
   351     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
    67 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
   352 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
    68       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
   353       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
    69     qtbl = cinfo->quant_tbl_ptrs[qtblno];
   354     qtbl = cinfo->quant_tbl_ptrs[qtblno];
    70     /* Compute divisors for this quant table */
   355     /* Compute divisors for this quant table */
    71     /* We may do this more than once for same table, but it's not a big deal */
   356     /* We may do this more than once for same table, but it's not a big deal */
    72     switch (cinfo->dct_method) {
   357     switch (method) {
    73 #ifdef DCT_ISLOW_SUPPORTED
   358 #ifdef PROVIDE_ISLOW_TABLES
    74     case JDCT_ISLOW:
   359     case JDCT_ISLOW:
    75       /* For LL&M IDCT method, divisors are equal to raw quantization
   360       /* For LL&M IDCT method, divisors are equal to raw quantization
    76        * coefficients multiplied by 8 (to counteract scaling).
   361        * coefficients multiplied by 8 (to counteract scaling).
    77        */
   362        */
    78       if (fdct->divisors[qtblno] == NULL) {
   363       if (fdct->divisors[qtblno] == NULL) {
    82       }
   367       }
    83       dtbl = fdct->divisors[qtblno];
   368       dtbl = fdct->divisors[qtblno];
    84       for (i = 0; i < DCTSIZE2; i++) {
   369       for (i = 0; i < DCTSIZE2; i++) {
    85 	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
   370 	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
    86       }
   371       }
       
   372       fdct->pub.forward_DCT[ci] = forward_DCT;
    87       break;
   373       break;
    88 #endif
   374 #endif
    89 #ifdef DCT_IFAST_SUPPORTED
   375 #ifdef DCT_IFAST_SUPPORTED
    90     case JDCT_IFAST:
   376     case JDCT_IFAST:
    91       {
   377       {
   120 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
   406 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
   121 				  (INT32) aanscales[i]),
   407 				  (INT32) aanscales[i]),
   122 		    CONST_BITS-3);
   408 		    CONST_BITS-3);
   123 	}
   409 	}
   124       }
   410       }
       
   411       fdct->pub.forward_DCT[ci] = forward_DCT;
   125       break;
   412       break;
   126 #endif
   413 #endif
   127 #ifdef DCT_FLOAT_SUPPORTED
   414 #ifdef DCT_FLOAT_SUPPORTED
   128     case JDCT_FLOAT:
   415     case JDCT_FLOAT:
   129       {
   416       {
   156 		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
   443 		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
   157 	    i++;
   444 	    i++;
   158 	  }
   445 	  }
   159 	}
   446 	}
   160       }
   447       }
       
   448       fdct->pub.forward_DCT[ci] = forward_DCT_float;
   161       break;
   449       break;
   162 #endif
   450 #endif
   163     default:
   451     default:
   164       ERREXIT(cinfo, JERR_NOT_COMPILED);
   452       ERREXIT(cinfo, JERR_NOT_COMPILED);
   165       break;
   453       break;
   166     }
   454     }
   167   }
   455   }
   168 }
   456 }
   169 
       
   170 
       
   171 /*
       
   172  * Perform forward DCT on one or more blocks of a component.
       
   173  *
       
   174  * The input samples are taken from the sample_data[] array starting at
       
   175  * position start_row/start_col, and moving to the right for any additional
       
   176  * blocks. The quantized coefficients are returned in coef_blocks[].
       
   177  */
       
   178 
       
   179 METHODDEF(void)
       
   180 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
       
   181 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       
   182 	     JDIMENSION start_row, JDIMENSION start_col,
       
   183 	     JDIMENSION num_blocks)
       
   184 /* This version is used for integer DCT implementations. */
       
   185 {
       
   186   /* This routine is heavily used, so it's worth coding it tightly. */
       
   187   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
       
   188   forward_DCT_method_ptr do_dct = fdct->do_dct;
       
   189   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
       
   190   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
       
   191   JDIMENSION bi;
       
   192 
       
   193   sample_data += start_row;	/* fold in the vertical offset once */
       
   194 
       
   195   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
       
   196     /* Load data into workspace, applying unsigned->signed conversion */
       
   197     { register DCTELEM *workspaceptr;
       
   198       register JSAMPROW elemptr;
       
   199       register int elemr;
       
   200 
       
   201       workspaceptr = workspace;
       
   202       for (elemr = 0; elemr < DCTSIZE; elemr++) {
       
   203 	elemptr = sample_data[elemr] + start_col;
       
   204 #if DCTSIZE == 8		/* unroll the inner loop */
       
   205 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   206 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   207 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   208 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   209 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   210 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   211 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   212 	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   213 #else
       
   214 	{ register int elemc;
       
   215 	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
       
   216 	    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
       
   217 	  }
       
   218 	}
       
   219 #endif
       
   220       }
       
   221     }
       
   222 
       
   223     /* Perform the DCT */
       
   224     (*do_dct) (workspace);
       
   225 
       
   226     /* Quantize/descale the coefficients, and store into coef_blocks[] */
       
   227     { register DCTELEM temp, qval;
       
   228       register int i;
       
   229       register JCOEFPTR output_ptr = coef_blocks[bi];
       
   230 
       
   231       for (i = 0; i < DCTSIZE2; i++) {
       
   232 	qval = divisors[i];
       
   233 	temp = workspace[i];
       
   234 	/* Divide the coefficient value by qval, ensuring proper rounding.
       
   235 	 * Since C does not specify the direction of rounding for negative
       
   236 	 * quotients, we have to force the dividend positive for portability.
       
   237 	 *
       
   238 	 * In most files, at least half of the output values will be zero
       
   239 	 * (at default quantization settings, more like three-quarters...)
       
   240 	 * so we should ensure that this case is fast.  On many machines,
       
   241 	 * a comparison is enough cheaper than a divide to make a special test
       
   242 	 * a win.  Since both inputs will be nonnegative, we need only test
       
   243 	 * for a < b to discover whether a/b is 0.
       
   244 	 * If your machine's division is fast enough, define FAST_DIVIDE.
       
   245 	 */
       
   246 #ifdef FAST_DIVIDE
       
   247 #define DIVIDE_BY(a,b)	a /= b
       
   248 #else
       
   249 #define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
       
   250 #endif
       
   251 	if (temp < 0) {
       
   252 	  temp = -temp;
       
   253 	  temp += qval>>1;	/* for rounding */
       
   254 	  DIVIDE_BY(temp, qval);
       
   255 	  temp = -temp;
       
   256 	} else {
       
   257 	  temp += qval>>1;	/* for rounding */
       
   258 	  DIVIDE_BY(temp, qval);
       
   259 	}
       
   260 	output_ptr[i] = (JCOEF) temp;
       
   261       }
       
   262     }
       
   263   }
       
   264 }
       
   265 
       
   266 
       
   267 #ifdef DCT_FLOAT_SUPPORTED
       
   268 
       
   269 METHODDEF(void)
       
   270 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
       
   271 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
       
   272 		   JDIMENSION start_row, JDIMENSION start_col,
       
   273 		   JDIMENSION num_blocks)
       
   274 /* This version is used for floating-point DCT implementations. */
       
   275 {
       
   276   /* This routine is heavily used, so it's worth coding it tightly. */
       
   277   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
       
   278   float_DCT_method_ptr do_dct = fdct->do_float_dct;
       
   279   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
       
   280   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
       
   281   JDIMENSION bi;
       
   282 
       
   283   sample_data += start_row;	/* fold in the vertical offset once */
       
   284 
       
   285   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
       
   286     /* Load data into workspace, applying unsigned->signed conversion */
       
   287     { register FAST_FLOAT *workspaceptr;
       
   288       register JSAMPROW elemptr;
       
   289       register int elemr;
       
   290 
       
   291       workspaceptr = workspace;
       
   292       for (elemr = 0; elemr < DCTSIZE; elemr++) {
       
   293 	elemptr = sample_data[elemr] + start_col;
       
   294 #if DCTSIZE == 8		/* unroll the inner loop */
       
   295 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   296 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   297 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   298 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   299 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   300 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   301 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   302 	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   303 #else
       
   304 	{ register int elemc;
       
   305 	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
       
   306 	    *workspaceptr++ = (FAST_FLOAT)
       
   307 	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
       
   308 	  }
       
   309 	}
       
   310 #endif
       
   311       }
       
   312     }
       
   313 
       
   314     /* Perform the DCT */
       
   315     (*do_dct) (workspace);
       
   316 
       
   317     /* Quantize/descale the coefficients, and store into coef_blocks[] */
       
   318     { register FAST_FLOAT temp;
       
   319       register int i;
       
   320       register JCOEFPTR output_ptr = coef_blocks[bi];
       
   321 
       
   322       for (i = 0; i < DCTSIZE2; i++) {
       
   323 	/* Apply the quantization and scaling factor */
       
   324 	temp = workspace[i] * divisors[i];
       
   325 	/* Round to nearest integer.
       
   326 	 * Since C does not specify the direction of rounding for negative
       
   327 	 * quotients, we have to force the dividend positive for portability.
       
   328 	 * The maximum coefficient size is +-16K (for 12-bit data), so this
       
   329 	 * code should work for either 16-bit or 32-bit ints.
       
   330 	 */
       
   331 	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
       
   332       }
       
   333     }
       
   334   }
       
   335 }
       
   336 
       
   337 #endif /* DCT_FLOAT_SUPPORTED */
       
   338 
   457 
   339 
   458 
   340 /*
   459 /*
   341  * Initialize FDCT manager.
   460  * Initialize FDCT manager.
   342  */
   461  */
   351     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   470     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   352 				SIZEOF(my_fdct_controller));
   471 				SIZEOF(my_fdct_controller));
   353   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
   472   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
   354   fdct->pub.start_pass = start_pass_fdctmgr;
   473   fdct->pub.start_pass = start_pass_fdctmgr;
   355 
   474 
   356   switch (cinfo->dct_method) {
       
   357 #ifdef DCT_ISLOW_SUPPORTED
       
   358   case JDCT_ISLOW:
       
   359     fdct->pub.forward_DCT = forward_DCT;
       
   360     fdct->do_dct = jpeg_fdct_islow;
       
   361     break;
       
   362 #endif
       
   363 #ifdef DCT_IFAST_SUPPORTED
       
   364   case JDCT_IFAST:
       
   365     fdct->pub.forward_DCT = forward_DCT;
       
   366     fdct->do_dct = jpeg_fdct_ifast;
       
   367     break;
       
   368 #endif
       
   369 #ifdef DCT_FLOAT_SUPPORTED
       
   370   case JDCT_FLOAT:
       
   371     fdct->pub.forward_DCT = forward_DCT_float;
       
   372     fdct->do_float_dct = jpeg_fdct_float;
       
   373     break;
       
   374 #endif
       
   375   default:
       
   376     ERREXIT(cinfo, JERR_NOT_COMPILED);
       
   377     break;
       
   378   }
       
   379 
       
   380   /* Mark divisor tables unallocated */
   475   /* Mark divisor tables unallocated */
   381   for (i = 0; i < NUM_QUANT_TBLS; i++) {
   476   for (i = 0; i < NUM_QUANT_TBLS; i++) {
   382     fdct->divisors[i] = NULL;
   477     fdct->divisors[i] = NULL;
   383 #ifdef DCT_FLOAT_SUPPORTED
   478 #ifdef DCT_FLOAT_SUPPORTED
   384     fdct->float_divisors[i] = NULL;
   479     fdct->float_divisors[i] = NULL;