149 JCOEFPTR coef_block, |
172 JCOEFPTR coef_block, |
150 JSAMPARRAY output_buf, JDIMENSION output_col) |
173 JSAMPARRAY output_buf, JDIMENSION output_col) |
151 { |
174 { |
152 INT32 tmp0, tmp1, tmp2, tmp3; |
175 INT32 tmp0, tmp1, tmp2, tmp3; |
153 INT32 tmp10, tmp11, tmp12, tmp13; |
176 INT32 tmp10, tmp11, tmp12, tmp13; |
|
177 INT32 z1, z2, z3; |
|
178 JCOEFPTR inptr; |
|
179 ISLOW_MULT_TYPE * quantptr; |
|
180 int * wsptr; |
|
181 JSAMPROW outptr; |
|
182 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
183 int ctr; |
|
184 int workspace[DCTSIZE2]; /* buffers data between passes */ |
|
185 SHIFT_TEMPS |
|
186 |
|
187 /* Pass 1: process columns from input, store into work array. */ |
|
188 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
|
189 /* furthermore, we scale the results by 2**PASS1_BITS. */ |
|
190 |
|
191 inptr = coef_block; |
|
192 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
193 wsptr = workspace; |
|
194 for (ctr = DCTSIZE; ctr > 0; ctr--) { |
|
195 /* Due to quantization, we will usually find that many of the input |
|
196 * coefficients are zero, especially the AC terms. We can exploit this |
|
197 * by short-circuiting the IDCT calculation for any column in which all |
|
198 * the AC terms are zero. In that case each output is equal to the |
|
199 * DC coefficient (with scale factor as needed). |
|
200 * With typical images and quantization tables, half or more of the |
|
201 * column DCT calculations can be simplified this way. |
|
202 */ |
|
203 |
|
204 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && |
|
205 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && |
|
206 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && |
|
207 inptr[DCTSIZE*7] == 0) { |
|
208 /* AC terms all zero */ |
|
209 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; |
|
210 |
|
211 wsptr[DCTSIZE*0] = dcval; |
|
212 wsptr[DCTSIZE*1] = dcval; |
|
213 wsptr[DCTSIZE*2] = dcval; |
|
214 wsptr[DCTSIZE*3] = dcval; |
|
215 wsptr[DCTSIZE*4] = dcval; |
|
216 wsptr[DCTSIZE*5] = dcval; |
|
217 wsptr[DCTSIZE*6] = dcval; |
|
218 wsptr[DCTSIZE*7] = dcval; |
|
219 |
|
220 inptr++; /* advance pointers to next column */ |
|
221 quantptr++; |
|
222 wsptr++; |
|
223 continue; |
|
224 } |
|
225 |
|
226 /* Even part: reverse the even part of the forward DCT. */ |
|
227 /* The rotator is sqrt(2)*c(-6). */ |
|
228 |
|
229 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
230 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
231 |
|
232 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
|
233 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
|
234 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
|
235 |
|
236 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
237 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
238 z2 <<= CONST_BITS; |
|
239 z3 <<= CONST_BITS; |
|
240 /* Add fudge factor here for final descale. */ |
|
241 z2 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
242 |
|
243 tmp0 = z2 + z3; |
|
244 tmp1 = z2 - z3; |
|
245 |
|
246 tmp10 = tmp0 + tmp2; |
|
247 tmp13 = tmp0 - tmp2; |
|
248 tmp11 = tmp1 + tmp3; |
|
249 tmp12 = tmp1 - tmp3; |
|
250 |
|
251 /* Odd part per figure 8; the matrix is unitary and hence its |
|
252 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
|
253 */ |
|
254 |
|
255 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
256 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
257 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
258 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
259 |
|
260 z2 = tmp0 + tmp2; |
|
261 z3 = tmp1 + tmp3; |
|
262 |
|
263 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
|
264 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
265 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
266 z2 += z1; |
|
267 z3 += z1; |
|
268 |
|
269 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
|
270 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
271 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
272 tmp0 += z1 + z2; |
|
273 tmp3 += z1 + z3; |
|
274 |
|
275 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
276 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
|
277 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
|
278 tmp1 += z1 + z3; |
|
279 tmp2 += z1 + z2; |
|
280 |
|
281 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
|
282 |
|
283 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
|
284 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
|
285 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
|
286 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
|
287 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
|
288 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
|
289 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
|
290 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
|
291 |
|
292 inptr++; /* advance pointers to next column */ |
|
293 quantptr++; |
|
294 wsptr++; |
|
295 } |
|
296 |
|
297 /* Pass 2: process rows from work array, store into output array. */ |
|
298 /* Note that we must descale the results by a factor of 8 == 2**3, */ |
|
299 /* and also undo the PASS1_BITS scaling. */ |
|
300 |
|
301 wsptr = workspace; |
|
302 for (ctr = 0; ctr < DCTSIZE; ctr++) { |
|
303 outptr = output_buf[ctr] + output_col; |
|
304 /* Rows of zeroes can be exploited in the same way as we did with columns. |
|
305 * However, the column calculation has created many nonzero AC terms, so |
|
306 * the simplification applies less often (typically 5% to 10% of the time). |
|
307 * On machines with very fast multiplication, it's possible that the |
|
308 * test takes more time than it's worth. In that case this section |
|
309 * may be commented out. |
|
310 */ |
|
311 |
|
312 #ifndef NO_ZERO_ROW_TEST |
|
313 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && |
|
314 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { |
|
315 /* AC terms all zero */ |
|
316 JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) |
|
317 & RANGE_MASK]; |
|
318 |
|
319 outptr[0] = dcval; |
|
320 outptr[1] = dcval; |
|
321 outptr[2] = dcval; |
|
322 outptr[3] = dcval; |
|
323 outptr[4] = dcval; |
|
324 outptr[5] = dcval; |
|
325 outptr[6] = dcval; |
|
326 outptr[7] = dcval; |
|
327 |
|
328 wsptr += DCTSIZE; /* advance pointer to next row */ |
|
329 continue; |
|
330 } |
|
331 #endif |
|
332 |
|
333 /* Even part: reverse the even part of the forward DCT. */ |
|
334 /* The rotator is sqrt(2)*c(-6). */ |
|
335 |
|
336 z2 = (INT32) wsptr[2]; |
|
337 z3 = (INT32) wsptr[6]; |
|
338 |
|
339 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
|
340 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
|
341 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
|
342 |
|
343 /* Add fudge factor here for final descale. */ |
|
344 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
345 z3 = (INT32) wsptr[4]; |
|
346 |
|
347 tmp0 = (z2 + z3) << CONST_BITS; |
|
348 tmp1 = (z2 - z3) << CONST_BITS; |
|
349 |
|
350 tmp10 = tmp0 + tmp2; |
|
351 tmp13 = tmp0 - tmp2; |
|
352 tmp11 = tmp1 + tmp3; |
|
353 tmp12 = tmp1 - tmp3; |
|
354 |
|
355 /* Odd part per figure 8; the matrix is unitary and hence its |
|
356 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
|
357 */ |
|
358 |
|
359 tmp0 = (INT32) wsptr[7]; |
|
360 tmp1 = (INT32) wsptr[5]; |
|
361 tmp2 = (INT32) wsptr[3]; |
|
362 tmp3 = (INT32) wsptr[1]; |
|
363 |
|
364 z2 = tmp0 + tmp2; |
|
365 z3 = tmp1 + tmp3; |
|
366 |
|
367 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
|
368 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
369 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
370 z2 += z1; |
|
371 z3 += z1; |
|
372 |
|
373 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
|
374 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
375 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
376 tmp0 += z1 + z2; |
|
377 tmp3 += z1 + z3; |
|
378 |
|
379 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
380 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
|
381 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
|
382 tmp1 += z1 + z3; |
|
383 tmp2 += z1 + z2; |
|
384 |
|
385 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
|
386 |
|
387 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, |
|
388 CONST_BITS+PASS1_BITS+3) |
|
389 & RANGE_MASK]; |
|
390 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, |
|
391 CONST_BITS+PASS1_BITS+3) |
|
392 & RANGE_MASK]; |
|
393 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, |
|
394 CONST_BITS+PASS1_BITS+3) |
|
395 & RANGE_MASK]; |
|
396 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, |
|
397 CONST_BITS+PASS1_BITS+3) |
|
398 & RANGE_MASK]; |
|
399 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, |
|
400 CONST_BITS+PASS1_BITS+3) |
|
401 & RANGE_MASK]; |
|
402 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, |
|
403 CONST_BITS+PASS1_BITS+3) |
|
404 & RANGE_MASK]; |
|
405 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, |
|
406 CONST_BITS+PASS1_BITS+3) |
|
407 & RANGE_MASK]; |
|
408 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, |
|
409 CONST_BITS+PASS1_BITS+3) |
|
410 & RANGE_MASK]; |
|
411 |
|
412 wsptr += DCTSIZE; /* advance pointer to next row */ |
|
413 } |
|
414 } |
|
415 |
|
416 #ifdef IDCT_SCALING_SUPPORTED |
|
417 |
|
418 |
|
419 /* |
|
420 * Perform dequantization and inverse DCT on one block of coefficients, |
|
421 * producing a 7x7 output block. |
|
422 * |
|
423 * Optimized algorithm with 12 multiplications in the 1-D kernel. |
|
424 * cK represents sqrt(2) * cos(K*pi/14). |
|
425 */ |
|
426 |
|
427 GLOBAL(void) |
|
428 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
429 JCOEFPTR coef_block, |
|
430 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
431 { |
|
432 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; |
|
433 INT32 z1, z2, z3; |
|
434 JCOEFPTR inptr; |
|
435 ISLOW_MULT_TYPE * quantptr; |
|
436 int * wsptr; |
|
437 JSAMPROW outptr; |
|
438 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
439 int ctr; |
|
440 int workspace[7*7]; /* buffers data between passes */ |
|
441 SHIFT_TEMPS |
|
442 |
|
443 /* Pass 1: process columns from input, store into work array. */ |
|
444 |
|
445 inptr = coef_block; |
|
446 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
447 wsptr = workspace; |
|
448 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { |
|
449 /* Even part */ |
|
450 |
|
451 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
452 tmp13 <<= CONST_BITS; |
|
453 /* Add fudge factor here for final descale. */ |
|
454 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
455 |
|
456 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
457 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
458 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
459 |
|
460 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
|
461 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
|
462 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
|
463 tmp0 = z1 + z3; |
|
464 z2 -= tmp0; |
|
465 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ |
|
466 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
|
467 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
|
468 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
|
469 |
|
470 /* Odd part */ |
|
471 |
|
472 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
473 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
474 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
475 |
|
476 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
|
477 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
|
478 tmp0 = tmp1 - tmp2; |
|
479 tmp1 += tmp2; |
|
480 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
|
481 tmp1 += tmp2; |
|
482 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
|
483 tmp0 += z2; |
|
484 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
|
485 |
|
486 /* Final output stage */ |
|
487 |
|
488 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
489 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
490 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
|
491 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
|
492 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
|
493 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
|
494 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); |
|
495 } |
|
496 |
|
497 /* Pass 2: process 7 rows from work array, store into output array. */ |
|
498 |
|
499 wsptr = workspace; |
|
500 for (ctr = 0; ctr < 7; ctr++) { |
|
501 outptr = output_buf[ctr] + output_col; |
|
502 |
|
503 /* Even part */ |
|
504 |
|
505 /* Add fudge factor here for final descale. */ |
|
506 tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
507 tmp13 <<= CONST_BITS; |
|
508 |
|
509 z1 = (INT32) wsptr[2]; |
|
510 z2 = (INT32) wsptr[4]; |
|
511 z3 = (INT32) wsptr[6]; |
|
512 |
|
513 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
|
514 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
|
515 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
|
516 tmp0 = z1 + z3; |
|
517 z2 -= tmp0; |
|
518 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ |
|
519 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
|
520 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
|
521 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
|
522 |
|
523 /* Odd part */ |
|
524 |
|
525 z1 = (INT32) wsptr[1]; |
|
526 z2 = (INT32) wsptr[3]; |
|
527 z3 = (INT32) wsptr[5]; |
|
528 |
|
529 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
|
530 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
|
531 tmp0 = tmp1 - tmp2; |
|
532 tmp1 += tmp2; |
|
533 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
|
534 tmp1 += tmp2; |
|
535 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
|
536 tmp0 += z2; |
|
537 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
|
538 |
|
539 /* Final output stage */ |
|
540 |
|
541 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
542 CONST_BITS+PASS1_BITS+3) |
|
543 & RANGE_MASK]; |
|
544 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
545 CONST_BITS+PASS1_BITS+3) |
|
546 & RANGE_MASK]; |
|
547 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
|
548 CONST_BITS+PASS1_BITS+3) |
|
549 & RANGE_MASK]; |
|
550 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
|
551 CONST_BITS+PASS1_BITS+3) |
|
552 & RANGE_MASK]; |
|
553 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
554 CONST_BITS+PASS1_BITS+3) |
|
555 & RANGE_MASK]; |
|
556 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
557 CONST_BITS+PASS1_BITS+3) |
|
558 & RANGE_MASK]; |
|
559 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, |
|
560 CONST_BITS+PASS1_BITS+3) |
|
561 & RANGE_MASK]; |
|
562 |
|
563 wsptr += 7; /* advance pointer to next row */ |
|
564 } |
|
565 } |
|
566 |
|
567 |
|
568 /* |
|
569 * Perform dequantization and inverse DCT on one block of coefficients, |
|
570 * producing a reduced-size 6x6 output block. |
|
571 * |
|
572 * Optimized algorithm with 3 multiplications in the 1-D kernel. |
|
573 * cK represents sqrt(2) * cos(K*pi/12). |
|
574 */ |
|
575 |
|
576 GLOBAL(void) |
|
577 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
578 JCOEFPTR coef_block, |
|
579 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
580 { |
|
581 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; |
|
582 INT32 z1, z2, z3; |
|
583 JCOEFPTR inptr; |
|
584 ISLOW_MULT_TYPE * quantptr; |
|
585 int * wsptr; |
|
586 JSAMPROW outptr; |
|
587 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
588 int ctr; |
|
589 int workspace[6*6]; /* buffers data between passes */ |
|
590 SHIFT_TEMPS |
|
591 |
|
592 /* Pass 1: process columns from input, store into work array. */ |
|
593 |
|
594 inptr = coef_block; |
|
595 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
596 wsptr = workspace; |
|
597 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { |
|
598 /* Even part */ |
|
599 |
|
600 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
601 tmp0 <<= CONST_BITS; |
|
602 /* Add fudge factor here for final descale. */ |
|
603 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
604 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
605 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
|
606 tmp1 = tmp0 + tmp10; |
|
607 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); |
|
608 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
609 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
|
610 tmp10 = tmp1 + tmp0; |
|
611 tmp12 = tmp1 - tmp0; |
|
612 |
|
613 /* Odd part */ |
|
614 |
|
615 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
616 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
617 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
618 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
619 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
|
620 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
|
621 tmp1 = (z1 - z2 - z3) << PASS1_BITS; |
|
622 |
|
623 /* Final output stage */ |
|
624 |
|
625 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
626 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
627 wsptr[6*1] = (int) (tmp11 + tmp1); |
|
628 wsptr[6*4] = (int) (tmp11 - tmp1); |
|
629 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
|
630 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
|
631 } |
|
632 |
|
633 /* Pass 2: process 6 rows from work array, store into output array. */ |
|
634 |
|
635 wsptr = workspace; |
|
636 for (ctr = 0; ctr < 6; ctr++) { |
|
637 outptr = output_buf[ctr] + output_col; |
|
638 |
|
639 /* Even part */ |
|
640 |
|
641 /* Add fudge factor here for final descale. */ |
|
642 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
643 tmp0 <<= CONST_BITS; |
|
644 tmp2 = (INT32) wsptr[4]; |
|
645 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
|
646 tmp1 = tmp0 + tmp10; |
|
647 tmp11 = tmp0 - tmp10 - tmp10; |
|
648 tmp10 = (INT32) wsptr[2]; |
|
649 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
|
650 tmp10 = tmp1 + tmp0; |
|
651 tmp12 = tmp1 - tmp0; |
|
652 |
|
653 /* Odd part */ |
|
654 |
|
655 z1 = (INT32) wsptr[1]; |
|
656 z2 = (INT32) wsptr[3]; |
|
657 z3 = (INT32) wsptr[5]; |
|
658 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
659 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
|
660 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
|
661 tmp1 = (z1 - z2 - z3) << CONST_BITS; |
|
662 |
|
663 /* Final output stage */ |
|
664 |
|
665 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
666 CONST_BITS+PASS1_BITS+3) |
|
667 & RANGE_MASK]; |
|
668 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
669 CONST_BITS+PASS1_BITS+3) |
|
670 & RANGE_MASK]; |
|
671 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
|
672 CONST_BITS+PASS1_BITS+3) |
|
673 & RANGE_MASK]; |
|
674 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
|
675 CONST_BITS+PASS1_BITS+3) |
|
676 & RANGE_MASK]; |
|
677 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
678 CONST_BITS+PASS1_BITS+3) |
|
679 & RANGE_MASK]; |
|
680 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
681 CONST_BITS+PASS1_BITS+3) |
|
682 & RANGE_MASK]; |
|
683 |
|
684 wsptr += 6; /* advance pointer to next row */ |
|
685 } |
|
686 } |
|
687 |
|
688 |
|
689 /* |
|
690 * Perform dequantization and inverse DCT on one block of coefficients, |
|
691 * producing a reduced-size 5x5 output block. |
|
692 * |
|
693 * Optimized algorithm with 5 multiplications in the 1-D kernel. |
|
694 * cK represents sqrt(2) * cos(K*pi/10). |
|
695 */ |
|
696 |
|
697 GLOBAL(void) |
|
698 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
699 JCOEFPTR coef_block, |
|
700 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
701 { |
|
702 INT32 tmp0, tmp1, tmp10, tmp11, tmp12; |
|
703 INT32 z1, z2, z3; |
|
704 JCOEFPTR inptr; |
|
705 ISLOW_MULT_TYPE * quantptr; |
|
706 int * wsptr; |
|
707 JSAMPROW outptr; |
|
708 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
709 int ctr; |
|
710 int workspace[5*5]; /* buffers data between passes */ |
|
711 SHIFT_TEMPS |
|
712 |
|
713 /* Pass 1: process columns from input, store into work array. */ |
|
714 |
|
715 inptr = coef_block; |
|
716 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
717 wsptr = workspace; |
|
718 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { |
|
719 /* Even part */ |
|
720 |
|
721 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
722 tmp12 <<= CONST_BITS; |
|
723 /* Add fudge factor here for final descale. */ |
|
724 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
725 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
726 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
727 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ |
|
728 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ |
|
729 z3 = tmp12 + z2; |
|
730 tmp10 = z3 + z1; |
|
731 tmp11 = z3 - z1; |
|
732 tmp12 -= z2 << 2; |
|
733 |
|
734 /* Odd part */ |
|
735 |
|
736 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
737 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
738 |
|
739 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
|
740 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
|
741 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
|
742 |
|
743 /* Final output stage */ |
|
744 |
|
745 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
746 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
747 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
|
748 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
|
749 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); |
|
750 } |
|
751 |
|
752 /* Pass 2: process 5 rows from work array, store into output array. */ |
|
753 |
|
754 wsptr = workspace; |
|
755 for (ctr = 0; ctr < 5; ctr++) { |
|
756 outptr = output_buf[ctr] + output_col; |
|
757 |
|
758 /* Even part */ |
|
759 |
|
760 /* Add fudge factor here for final descale. */ |
|
761 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
762 tmp12 <<= CONST_BITS; |
|
763 tmp0 = (INT32) wsptr[2]; |
|
764 tmp1 = (INT32) wsptr[4]; |
|
765 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ |
|
766 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ |
|
767 z3 = tmp12 + z2; |
|
768 tmp10 = z3 + z1; |
|
769 tmp11 = z3 - z1; |
|
770 tmp12 -= z2 << 2; |
|
771 |
|
772 /* Odd part */ |
|
773 |
|
774 z2 = (INT32) wsptr[1]; |
|
775 z3 = (INT32) wsptr[3]; |
|
776 |
|
777 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
|
778 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
|
779 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
|
780 |
|
781 /* Final output stage */ |
|
782 |
|
783 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
784 CONST_BITS+PASS1_BITS+3) |
|
785 & RANGE_MASK]; |
|
786 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
787 CONST_BITS+PASS1_BITS+3) |
|
788 & RANGE_MASK]; |
|
789 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
|
790 CONST_BITS+PASS1_BITS+3) |
|
791 & RANGE_MASK]; |
|
792 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
|
793 CONST_BITS+PASS1_BITS+3) |
|
794 & RANGE_MASK]; |
|
795 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, |
|
796 CONST_BITS+PASS1_BITS+3) |
|
797 & RANGE_MASK]; |
|
798 |
|
799 wsptr += 5; /* advance pointer to next row */ |
|
800 } |
|
801 } |
|
802 |
|
803 |
|
804 /* |
|
805 * Perform dequantization and inverse DCT on one block of coefficients, |
|
806 * producing a reduced-size 4x4 output block. |
|
807 * |
|
808 * Optimized algorithm with 3 multiplications in the 1-D kernel. |
|
809 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. |
|
810 */ |
|
811 |
|
812 GLOBAL(void) |
|
813 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
814 JCOEFPTR coef_block, |
|
815 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
816 { |
|
817 INT32 tmp0, tmp2, tmp10, tmp12; |
|
818 INT32 z1, z2, z3; |
|
819 JCOEFPTR inptr; |
|
820 ISLOW_MULT_TYPE * quantptr; |
|
821 int * wsptr; |
|
822 JSAMPROW outptr; |
|
823 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
824 int ctr; |
|
825 int workspace[4*4]; /* buffers data between passes */ |
|
826 SHIFT_TEMPS |
|
827 |
|
828 /* Pass 1: process columns from input, store into work array. */ |
|
829 |
|
830 inptr = coef_block; |
|
831 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
832 wsptr = workspace; |
|
833 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { |
|
834 /* Even part */ |
|
835 |
|
836 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
837 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
838 |
|
839 tmp10 = (tmp0 + tmp2) << PASS1_BITS; |
|
840 tmp12 = (tmp0 - tmp2) << PASS1_BITS; |
|
841 |
|
842 /* Odd part */ |
|
843 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
844 |
|
845 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
846 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
847 |
|
848 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
849 /* Add fudge factor here for final descale. */ |
|
850 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
851 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ |
|
852 CONST_BITS-PASS1_BITS); |
|
853 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ |
|
854 CONST_BITS-PASS1_BITS); |
|
855 |
|
856 /* Final output stage */ |
|
857 |
|
858 wsptr[4*0] = (int) (tmp10 + tmp0); |
|
859 wsptr[4*3] = (int) (tmp10 - tmp0); |
|
860 wsptr[4*1] = (int) (tmp12 + tmp2); |
|
861 wsptr[4*2] = (int) (tmp12 - tmp2); |
|
862 } |
|
863 |
|
864 /* Pass 2: process 4 rows from work array, store into output array. */ |
|
865 |
|
866 wsptr = workspace; |
|
867 for (ctr = 0; ctr < 4; ctr++) { |
|
868 outptr = output_buf[ctr] + output_col; |
|
869 |
|
870 /* Even part */ |
|
871 |
|
872 /* Add fudge factor here for final descale. */ |
|
873 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
874 tmp2 = (INT32) wsptr[2]; |
|
875 |
|
876 tmp10 = (tmp0 + tmp2) << CONST_BITS; |
|
877 tmp12 = (tmp0 - tmp2) << CONST_BITS; |
|
878 |
|
879 /* Odd part */ |
|
880 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
881 |
|
882 z2 = (INT32) wsptr[1]; |
|
883 z3 = (INT32) wsptr[3]; |
|
884 |
|
885 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
886 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ |
|
887 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ |
|
888 |
|
889 /* Final output stage */ |
|
890 |
|
891 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
892 CONST_BITS+PASS1_BITS+3) |
|
893 & RANGE_MASK]; |
|
894 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
895 CONST_BITS+PASS1_BITS+3) |
|
896 & RANGE_MASK]; |
|
897 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
898 CONST_BITS+PASS1_BITS+3) |
|
899 & RANGE_MASK]; |
|
900 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
901 CONST_BITS+PASS1_BITS+3) |
|
902 & RANGE_MASK]; |
|
903 |
|
904 wsptr += 4; /* advance pointer to next row */ |
|
905 } |
|
906 } |
|
907 |
|
908 |
|
909 /* |
|
910 * Perform dequantization and inverse DCT on one block of coefficients, |
|
911 * producing a reduced-size 3x3 output block. |
|
912 * |
|
913 * Optimized algorithm with 2 multiplications in the 1-D kernel. |
|
914 * cK represents sqrt(2) * cos(K*pi/6). |
|
915 */ |
|
916 |
|
917 GLOBAL(void) |
|
918 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
919 JCOEFPTR coef_block, |
|
920 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
921 { |
|
922 INT32 tmp0, tmp2, tmp10, tmp12; |
|
923 JCOEFPTR inptr; |
|
924 ISLOW_MULT_TYPE * quantptr; |
|
925 int * wsptr; |
|
926 JSAMPROW outptr; |
|
927 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
928 int ctr; |
|
929 int workspace[3*3]; /* buffers data between passes */ |
|
930 SHIFT_TEMPS |
|
931 |
|
932 /* Pass 1: process columns from input, store into work array. */ |
|
933 |
|
934 inptr = coef_block; |
|
935 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
936 wsptr = workspace; |
|
937 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { |
|
938 /* Even part */ |
|
939 |
|
940 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
941 tmp0 <<= CONST_BITS; |
|
942 /* Add fudge factor here for final descale. */ |
|
943 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
944 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
945 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
|
946 tmp10 = tmp0 + tmp12; |
|
947 tmp2 = tmp0 - tmp12 - tmp12; |
|
948 |
|
949 /* Odd part */ |
|
950 |
|
951 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
952 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
|
953 |
|
954 /* Final output stage */ |
|
955 |
|
956 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
957 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
958 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); |
|
959 } |
|
960 |
|
961 /* Pass 2: process 3 rows from work array, store into output array. */ |
|
962 |
|
963 wsptr = workspace; |
|
964 for (ctr = 0; ctr < 3; ctr++) { |
|
965 outptr = output_buf[ctr] + output_col; |
|
966 |
|
967 /* Even part */ |
|
968 |
|
969 /* Add fudge factor here for final descale. */ |
|
970 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
971 tmp0 <<= CONST_BITS; |
|
972 tmp2 = (INT32) wsptr[2]; |
|
973 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
|
974 tmp10 = tmp0 + tmp12; |
|
975 tmp2 = tmp0 - tmp12 - tmp12; |
|
976 |
|
977 /* Odd part */ |
|
978 |
|
979 tmp12 = (INT32) wsptr[1]; |
|
980 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
|
981 |
|
982 /* Final output stage */ |
|
983 |
|
984 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
985 CONST_BITS+PASS1_BITS+3) |
|
986 & RANGE_MASK]; |
|
987 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
988 CONST_BITS+PASS1_BITS+3) |
|
989 & RANGE_MASK]; |
|
990 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, |
|
991 CONST_BITS+PASS1_BITS+3) |
|
992 & RANGE_MASK]; |
|
993 |
|
994 wsptr += 3; /* advance pointer to next row */ |
|
995 } |
|
996 } |
|
997 |
|
998 |
|
999 /* |
|
1000 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1001 * producing a reduced-size 2x2 output block. |
|
1002 * |
|
1003 * Multiplication-less algorithm. |
|
1004 */ |
|
1005 |
|
1006 GLOBAL(void) |
|
1007 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1008 JCOEFPTR coef_block, |
|
1009 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1010 { |
|
1011 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; |
|
1012 ISLOW_MULT_TYPE * quantptr; |
|
1013 JSAMPROW outptr; |
|
1014 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1015 SHIFT_TEMPS |
|
1016 |
|
1017 /* Pass 1: process columns from input. */ |
|
1018 |
|
1019 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1020 |
|
1021 /* Column 0 */ |
|
1022 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1023 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1024 /* Add fudge factor here for final descale. */ |
|
1025 tmp4 += ONE << 2; |
|
1026 |
|
1027 tmp0 = tmp4 + tmp5; |
|
1028 tmp2 = tmp4 - tmp5; |
|
1029 |
|
1030 /* Column 1 */ |
|
1031 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]); |
|
1032 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]); |
|
1033 |
|
1034 tmp1 = tmp4 + tmp5; |
|
1035 tmp3 = tmp4 - tmp5; |
|
1036 |
|
1037 /* Pass 2: process 2 rows, store into output array. */ |
|
1038 |
|
1039 /* Row 0 */ |
|
1040 outptr = output_buf[0] + output_col; |
|
1041 |
|
1042 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; |
|
1043 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; |
|
1044 |
|
1045 /* Row 1 */ |
|
1046 outptr = output_buf[1] + output_col; |
|
1047 |
|
1048 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; |
|
1049 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; |
|
1050 } |
|
1051 |
|
1052 |
|
1053 /* |
|
1054 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1055 * producing a reduced-size 1x1 output block. |
|
1056 * |
|
1057 * We hardly need an inverse DCT routine for this: just take the |
|
1058 * average pixel value, which is one-eighth of the DC coefficient. |
|
1059 */ |
|
1060 |
|
1061 GLOBAL(void) |
|
1062 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1063 JCOEFPTR coef_block, |
|
1064 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1065 { |
|
1066 int dcval; |
|
1067 ISLOW_MULT_TYPE * quantptr; |
|
1068 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1069 SHIFT_TEMPS |
|
1070 |
|
1071 /* 1x1 is trivial: just take the DC coefficient divided by 8. */ |
|
1072 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1073 dcval = DEQUANTIZE(coef_block[0], quantptr[0]); |
|
1074 dcval = (int) DESCALE((INT32) dcval, 3); |
|
1075 |
|
1076 output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; |
|
1077 } |
|
1078 |
|
1079 |
|
1080 /* |
|
1081 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1082 * producing a 9x9 output block. |
|
1083 * |
|
1084 * Optimized algorithm with 10 multiplications in the 1-D kernel. |
|
1085 * cK represents sqrt(2) * cos(K*pi/18). |
|
1086 */ |
|
1087 |
|
1088 GLOBAL(void) |
|
1089 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1090 JCOEFPTR coef_block, |
|
1091 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1092 { |
|
1093 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; |
|
1094 INT32 z1, z2, z3, z4; |
|
1095 JCOEFPTR inptr; |
|
1096 ISLOW_MULT_TYPE * quantptr; |
|
1097 int * wsptr; |
|
1098 JSAMPROW outptr; |
|
1099 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1100 int ctr; |
|
1101 int workspace[8*9]; /* buffers data between passes */ |
|
1102 SHIFT_TEMPS |
|
1103 |
|
1104 /* Pass 1: process columns from input, store into work array. */ |
|
1105 |
|
1106 inptr = coef_block; |
|
1107 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1108 wsptr = workspace; |
|
1109 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
1110 /* Even part */ |
|
1111 |
|
1112 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1113 tmp0 <<= CONST_BITS; |
|
1114 /* Add fudge factor here for final descale. */ |
|
1115 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
1116 |
|
1117 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
1118 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
1119 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
1120 |
|
1121 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ |
|
1122 tmp1 = tmp0 + tmp3; |
|
1123 tmp2 = tmp0 - tmp3 - tmp3; |
|
1124 |
|
1125 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ |
|
1126 tmp11 = tmp2 + tmp0; |
|
1127 tmp14 = tmp2 - tmp0 - tmp0; |
|
1128 |
|
1129 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ |
|
1130 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ |
|
1131 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ |
|
1132 |
|
1133 tmp10 = tmp1 + tmp0 - tmp3; |
|
1134 tmp12 = tmp1 - tmp0 + tmp2; |
|
1135 tmp13 = tmp1 - tmp2 + tmp3; |
|
1136 |
|
1137 /* Odd part */ |
|
1138 |
|
1139 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1140 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
1141 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
1142 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
1143 |
|
1144 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ |
|
1145 |
|
1146 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ |
|
1147 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ |
|
1148 tmp0 = tmp2 + tmp3 - z2; |
|
1149 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ |
|
1150 tmp2 += z2 - tmp1; |
|
1151 tmp3 += z2 + tmp1; |
|
1152 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ |
|
1153 |
|
1154 /* Final output stage */ |
|
1155 |
|
1156 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
1157 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
1158 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
|
1159 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
|
1160 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
|
1161 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
|
1162 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); |
|
1163 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); |
|
1164 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); |
|
1165 } |
|
1166 |
|
1167 /* Pass 2: process 9 rows from work array, store into output array. */ |
|
1168 |
|
1169 wsptr = workspace; |
|
1170 for (ctr = 0; ctr < 9; ctr++) { |
|
1171 outptr = output_buf[ctr] + output_col; |
|
1172 |
|
1173 /* Even part */ |
|
1174 |
|
1175 /* Add fudge factor here for final descale. */ |
|
1176 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
1177 tmp0 <<= CONST_BITS; |
|
1178 |
|
1179 z1 = (INT32) wsptr[2]; |
|
1180 z2 = (INT32) wsptr[4]; |
|
1181 z3 = (INT32) wsptr[6]; |
|
1182 |
|
1183 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ |
|
1184 tmp1 = tmp0 + tmp3; |
|
1185 tmp2 = tmp0 - tmp3 - tmp3; |
|
1186 |
|
1187 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ |
|
1188 tmp11 = tmp2 + tmp0; |
|
1189 tmp14 = tmp2 - tmp0 - tmp0; |
|
1190 |
|
1191 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ |
|
1192 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ |
|
1193 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ |
|
1194 |
|
1195 tmp10 = tmp1 + tmp0 - tmp3; |
|
1196 tmp12 = tmp1 - tmp0 + tmp2; |
|
1197 tmp13 = tmp1 - tmp2 + tmp3; |
|
1198 |
|
1199 /* Odd part */ |
|
1200 |
|
1201 z1 = (INT32) wsptr[1]; |
|
1202 z2 = (INT32) wsptr[3]; |
|
1203 z3 = (INT32) wsptr[5]; |
|
1204 z4 = (INT32) wsptr[7]; |
|
1205 |
|
1206 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ |
|
1207 |
|
1208 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ |
|
1209 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ |
|
1210 tmp0 = tmp2 + tmp3 - z2; |
|
1211 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ |
|
1212 tmp2 += z2 - tmp1; |
|
1213 tmp3 += z2 + tmp1; |
|
1214 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ |
|
1215 |
|
1216 /* Final output stage */ |
|
1217 |
|
1218 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
1219 CONST_BITS+PASS1_BITS+3) |
|
1220 & RANGE_MASK]; |
|
1221 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
1222 CONST_BITS+PASS1_BITS+3) |
|
1223 & RANGE_MASK]; |
|
1224 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
|
1225 CONST_BITS+PASS1_BITS+3) |
|
1226 & RANGE_MASK]; |
|
1227 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
|
1228 CONST_BITS+PASS1_BITS+3) |
|
1229 & RANGE_MASK]; |
|
1230 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
1231 CONST_BITS+PASS1_BITS+3) |
|
1232 & RANGE_MASK]; |
|
1233 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
1234 CONST_BITS+PASS1_BITS+3) |
|
1235 & RANGE_MASK]; |
|
1236 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, |
|
1237 CONST_BITS+PASS1_BITS+3) |
|
1238 & RANGE_MASK]; |
|
1239 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, |
|
1240 CONST_BITS+PASS1_BITS+3) |
|
1241 & RANGE_MASK]; |
|
1242 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, |
|
1243 CONST_BITS+PASS1_BITS+3) |
|
1244 & RANGE_MASK]; |
|
1245 |
|
1246 wsptr += 8; /* advance pointer to next row */ |
|
1247 } |
|
1248 } |
|
1249 |
|
1250 |
|
1251 /* |
|
1252 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1253 * producing a 10x10 output block. |
|
1254 * |
|
1255 * Optimized algorithm with 12 multiplications in the 1-D kernel. |
|
1256 * cK represents sqrt(2) * cos(K*pi/20). |
|
1257 */ |
|
1258 |
|
1259 GLOBAL(void) |
|
1260 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1261 JCOEFPTR coef_block, |
|
1262 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1263 { |
|
1264 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
|
1265 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; |
154 INT32 z1, z2, z3, z4, z5; |
1266 INT32 z1, z2, z3, z4, z5; |
155 JCOEFPTR inptr; |
1267 JCOEFPTR inptr; |
156 ISLOW_MULT_TYPE * quantptr; |
1268 ISLOW_MULT_TYPE * quantptr; |
157 int * wsptr; |
1269 int * wsptr; |
158 JSAMPROW outptr; |
1270 JSAMPROW outptr; |
159 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
1271 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
160 int ctr; |
1272 int ctr; |
161 int workspace[DCTSIZE2]; /* buffers data between passes */ |
1273 int workspace[8*10]; /* buffers data between passes */ |
|
1274 SHIFT_TEMPS |
|
1275 |
|
1276 /* Pass 1: process columns from input, store into work array. */ |
|
1277 |
|
1278 inptr = coef_block; |
|
1279 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1280 wsptr = workspace; |
|
1281 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
1282 /* Even part */ |
|
1283 |
|
1284 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1285 z3 <<= CONST_BITS; |
|
1286 /* Add fudge factor here for final descale. */ |
|
1287 z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
1288 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
1289 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
|
1290 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
|
1291 tmp10 = z3 + z1; |
|
1292 tmp11 = z3 - z2; |
|
1293 |
|
1294 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ |
|
1295 CONST_BITS-PASS1_BITS); |
|
1296 |
|
1297 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
1298 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
1299 |
|
1300 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
|
1301 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
|
1302 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
|
1303 |
|
1304 tmp20 = tmp10 + tmp12; |
|
1305 tmp24 = tmp10 - tmp12; |
|
1306 tmp21 = tmp11 + tmp13; |
|
1307 tmp23 = tmp11 - tmp13; |
|
1308 |
|
1309 /* Odd part */ |
|
1310 |
|
1311 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1312 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
1313 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
1314 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
1315 |
|
1316 tmp11 = z2 + z4; |
|
1317 tmp13 = z2 - z4; |
|
1318 |
|
1319 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
|
1320 z5 = z3 << CONST_BITS; |
|
1321 |
|
1322 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
|
1323 z4 = z5 + tmp12; |
|
1324 |
|
1325 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
|
1326 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
|
1327 |
|
1328 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
|
1329 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
|
1330 |
|
1331 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; |
|
1332 |
|
1333 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
|
1334 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
|
1335 |
|
1336 /* Final output stage */ |
|
1337 |
|
1338 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
1339 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
1340 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
1341 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
1342 wsptr[8*2] = (int) (tmp22 + tmp12); |
|
1343 wsptr[8*7] = (int) (tmp22 - tmp12); |
|
1344 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
1345 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
1346 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
1347 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
1348 } |
|
1349 |
|
1350 /* Pass 2: process 10 rows from work array, store into output array. */ |
|
1351 |
|
1352 wsptr = workspace; |
|
1353 for (ctr = 0; ctr < 10; ctr++) { |
|
1354 outptr = output_buf[ctr] + output_col; |
|
1355 |
|
1356 /* Even part */ |
|
1357 |
|
1358 /* Add fudge factor here for final descale. */ |
|
1359 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
1360 z3 <<= CONST_BITS; |
|
1361 z4 = (INT32) wsptr[4]; |
|
1362 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
|
1363 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
|
1364 tmp10 = z3 + z1; |
|
1365 tmp11 = z3 - z2; |
|
1366 |
|
1367 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ |
|
1368 |
|
1369 z2 = (INT32) wsptr[2]; |
|
1370 z3 = (INT32) wsptr[6]; |
|
1371 |
|
1372 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
|
1373 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
|
1374 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
|
1375 |
|
1376 tmp20 = tmp10 + tmp12; |
|
1377 tmp24 = tmp10 - tmp12; |
|
1378 tmp21 = tmp11 + tmp13; |
|
1379 tmp23 = tmp11 - tmp13; |
|
1380 |
|
1381 /* Odd part */ |
|
1382 |
|
1383 z1 = (INT32) wsptr[1]; |
|
1384 z2 = (INT32) wsptr[3]; |
|
1385 z3 = (INT32) wsptr[5]; |
|
1386 z3 <<= CONST_BITS; |
|
1387 z4 = (INT32) wsptr[7]; |
|
1388 |
|
1389 tmp11 = z2 + z4; |
|
1390 tmp13 = z2 - z4; |
|
1391 |
|
1392 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
|
1393 |
|
1394 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
|
1395 z4 = z3 + tmp12; |
|
1396 |
|
1397 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
|
1398 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
|
1399 |
|
1400 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
|
1401 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
|
1402 |
|
1403 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; |
|
1404 |
|
1405 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
|
1406 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
|
1407 |
|
1408 /* Final output stage */ |
|
1409 |
|
1410 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
1411 CONST_BITS+PASS1_BITS+3) |
|
1412 & RANGE_MASK]; |
|
1413 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
1414 CONST_BITS+PASS1_BITS+3) |
|
1415 & RANGE_MASK]; |
|
1416 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
1417 CONST_BITS+PASS1_BITS+3) |
|
1418 & RANGE_MASK]; |
|
1419 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
1420 CONST_BITS+PASS1_BITS+3) |
|
1421 & RANGE_MASK]; |
|
1422 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
1423 CONST_BITS+PASS1_BITS+3) |
|
1424 & RANGE_MASK]; |
|
1425 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
1426 CONST_BITS+PASS1_BITS+3) |
|
1427 & RANGE_MASK]; |
|
1428 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
1429 CONST_BITS+PASS1_BITS+3) |
|
1430 & RANGE_MASK]; |
|
1431 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
1432 CONST_BITS+PASS1_BITS+3) |
|
1433 & RANGE_MASK]; |
|
1434 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
1435 CONST_BITS+PASS1_BITS+3) |
|
1436 & RANGE_MASK]; |
|
1437 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
1438 CONST_BITS+PASS1_BITS+3) |
|
1439 & RANGE_MASK]; |
|
1440 |
|
1441 wsptr += 8; /* advance pointer to next row */ |
|
1442 } |
|
1443 } |
|
1444 |
|
1445 |
|
1446 /* |
|
1447 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1448 * producing a 11x11 output block. |
|
1449 * |
|
1450 * Optimized algorithm with 24 multiplications in the 1-D kernel. |
|
1451 * cK represents sqrt(2) * cos(K*pi/22). |
|
1452 */ |
|
1453 |
|
1454 GLOBAL(void) |
|
1455 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1456 JCOEFPTR coef_block, |
|
1457 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1458 { |
|
1459 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
|
1460 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
|
1461 INT32 z1, z2, z3, z4; |
|
1462 JCOEFPTR inptr; |
|
1463 ISLOW_MULT_TYPE * quantptr; |
|
1464 int * wsptr; |
|
1465 JSAMPROW outptr; |
|
1466 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1467 int ctr; |
|
1468 int workspace[8*11]; /* buffers data between passes */ |
|
1469 SHIFT_TEMPS |
|
1470 |
|
1471 /* Pass 1: process columns from input, store into work array. */ |
|
1472 |
|
1473 inptr = coef_block; |
|
1474 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1475 wsptr = workspace; |
|
1476 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
1477 /* Even part */ |
|
1478 |
|
1479 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1480 tmp10 <<= CONST_BITS; |
|
1481 /* Add fudge factor here for final descale. */ |
|
1482 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
1483 |
|
1484 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
1485 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
1486 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
1487 |
|
1488 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ |
|
1489 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ |
|
1490 z4 = z1 + z3; |
|
1491 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ |
|
1492 z4 -= z2; |
|
1493 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ |
|
1494 tmp21 = tmp20 + tmp23 + tmp25 - |
|
1495 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ |
|
1496 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ |
|
1497 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ |
|
1498 tmp24 += tmp25; |
|
1499 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ |
|
1500 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ |
|
1501 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ |
|
1502 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ |
|
1503 |
|
1504 /* Odd part */ |
|
1505 |
|
1506 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1507 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
1508 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
1509 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
1510 |
|
1511 tmp11 = z1 + z2; |
|
1512 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ |
|
1513 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ |
|
1514 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ |
|
1515 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ |
|
1516 tmp10 = tmp11 + tmp12 + tmp13 - |
|
1517 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ |
|
1518 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ |
|
1519 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ |
|
1520 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ |
|
1521 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ |
|
1522 tmp11 += z1; |
|
1523 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ |
|
1524 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ |
|
1525 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ |
|
1526 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ |
|
1527 |
|
1528 /* Final output stage */ |
|
1529 |
|
1530 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
1531 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
1532 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
1533 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
1534 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
1535 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
1536 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
1537 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
1538 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
1539 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
1540 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); |
|
1541 } |
|
1542 |
|
1543 /* Pass 2: process 11 rows from work array, store into output array. */ |
|
1544 |
|
1545 wsptr = workspace; |
|
1546 for (ctr = 0; ctr < 11; ctr++) { |
|
1547 outptr = output_buf[ctr] + output_col; |
|
1548 |
|
1549 /* Even part */ |
|
1550 |
|
1551 /* Add fudge factor here for final descale. */ |
|
1552 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
1553 tmp10 <<= CONST_BITS; |
|
1554 |
|
1555 z1 = (INT32) wsptr[2]; |
|
1556 z2 = (INT32) wsptr[4]; |
|
1557 z3 = (INT32) wsptr[6]; |
|
1558 |
|
1559 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ |
|
1560 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ |
|
1561 z4 = z1 + z3; |
|
1562 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ |
|
1563 z4 -= z2; |
|
1564 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ |
|
1565 tmp21 = tmp20 + tmp23 + tmp25 - |
|
1566 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ |
|
1567 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ |
|
1568 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ |
|
1569 tmp24 += tmp25; |
|
1570 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ |
|
1571 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ |
|
1572 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ |
|
1573 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ |
|
1574 |
|
1575 /* Odd part */ |
|
1576 |
|
1577 z1 = (INT32) wsptr[1]; |
|
1578 z2 = (INT32) wsptr[3]; |
|
1579 z3 = (INT32) wsptr[5]; |
|
1580 z4 = (INT32) wsptr[7]; |
|
1581 |
|
1582 tmp11 = z1 + z2; |
|
1583 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ |
|
1584 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ |
|
1585 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ |
|
1586 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ |
|
1587 tmp10 = tmp11 + tmp12 + tmp13 - |
|
1588 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ |
|
1589 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ |
|
1590 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ |
|
1591 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ |
|
1592 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ |
|
1593 tmp11 += z1; |
|
1594 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ |
|
1595 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ |
|
1596 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ |
|
1597 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ |
|
1598 |
|
1599 /* Final output stage */ |
|
1600 |
|
1601 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
1602 CONST_BITS+PASS1_BITS+3) |
|
1603 & RANGE_MASK]; |
|
1604 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
1605 CONST_BITS+PASS1_BITS+3) |
|
1606 & RANGE_MASK]; |
|
1607 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
1608 CONST_BITS+PASS1_BITS+3) |
|
1609 & RANGE_MASK]; |
|
1610 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
1611 CONST_BITS+PASS1_BITS+3) |
|
1612 & RANGE_MASK]; |
|
1613 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
1614 CONST_BITS+PASS1_BITS+3) |
|
1615 & RANGE_MASK]; |
|
1616 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
1617 CONST_BITS+PASS1_BITS+3) |
|
1618 & RANGE_MASK]; |
|
1619 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
1620 CONST_BITS+PASS1_BITS+3) |
|
1621 & RANGE_MASK]; |
|
1622 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
1623 CONST_BITS+PASS1_BITS+3) |
|
1624 & RANGE_MASK]; |
|
1625 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
1626 CONST_BITS+PASS1_BITS+3) |
|
1627 & RANGE_MASK]; |
|
1628 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
1629 CONST_BITS+PASS1_BITS+3) |
|
1630 & RANGE_MASK]; |
|
1631 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, |
|
1632 CONST_BITS+PASS1_BITS+3) |
|
1633 & RANGE_MASK]; |
|
1634 |
|
1635 wsptr += 8; /* advance pointer to next row */ |
|
1636 } |
|
1637 } |
|
1638 |
|
1639 |
|
1640 /* |
|
1641 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1642 * producing a 12x12 output block. |
|
1643 * |
|
1644 * Optimized algorithm with 15 multiplications in the 1-D kernel. |
|
1645 * cK represents sqrt(2) * cos(K*pi/24). |
|
1646 */ |
|
1647 |
|
1648 GLOBAL(void) |
|
1649 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1650 JCOEFPTR coef_block, |
|
1651 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1652 { |
|
1653 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
|
1654 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
|
1655 INT32 z1, z2, z3, z4; |
|
1656 JCOEFPTR inptr; |
|
1657 ISLOW_MULT_TYPE * quantptr; |
|
1658 int * wsptr; |
|
1659 JSAMPROW outptr; |
|
1660 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1661 int ctr; |
|
1662 int workspace[8*12]; /* buffers data between passes */ |
|
1663 SHIFT_TEMPS |
|
1664 |
|
1665 /* Pass 1: process columns from input, store into work array. */ |
|
1666 |
|
1667 inptr = coef_block; |
|
1668 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1669 wsptr = workspace; |
|
1670 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
1671 /* Even part */ |
|
1672 |
|
1673 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1674 z3 <<= CONST_BITS; |
|
1675 /* Add fudge factor here for final descale. */ |
|
1676 z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
1677 |
|
1678 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
1679 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
|
1680 |
|
1681 tmp10 = z3 + z4; |
|
1682 tmp11 = z3 - z4; |
|
1683 |
|
1684 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
1685 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
|
1686 z1 <<= CONST_BITS; |
|
1687 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
1688 z2 <<= CONST_BITS; |
|
1689 |
|
1690 tmp12 = z1 - z2; |
|
1691 |
|
1692 tmp21 = z3 + tmp12; |
|
1693 tmp24 = z3 - tmp12; |
|
1694 |
|
1695 tmp12 = z4 + z2; |
|
1696 |
|
1697 tmp20 = tmp10 + tmp12; |
|
1698 tmp25 = tmp10 - tmp12; |
|
1699 |
|
1700 tmp12 = z4 - z1 - z2; |
|
1701 |
|
1702 tmp22 = tmp11 + tmp12; |
|
1703 tmp23 = tmp11 - tmp12; |
|
1704 |
|
1705 /* Odd part */ |
|
1706 |
|
1707 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1708 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
1709 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
1710 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
1711 |
|
1712 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
|
1713 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
|
1714 |
|
1715 tmp10 = z1 + z3; |
|
1716 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
|
1717 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
|
1718 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
|
1719 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
|
1720 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
|
1721 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
|
1722 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
|
1723 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
|
1724 |
|
1725 z1 -= z4; |
|
1726 z2 -= z3; |
|
1727 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
|
1728 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
|
1729 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
|
1730 |
|
1731 /* Final output stage */ |
|
1732 |
|
1733 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
1734 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
1735 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
1736 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
1737 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
1738 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
1739 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
1740 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
1741 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
1742 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
1743 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
1744 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
1745 } |
|
1746 |
|
1747 /* Pass 2: process 12 rows from work array, store into output array. */ |
|
1748 |
|
1749 wsptr = workspace; |
|
1750 for (ctr = 0; ctr < 12; ctr++) { |
|
1751 outptr = output_buf[ctr] + output_col; |
|
1752 |
|
1753 /* Even part */ |
|
1754 |
|
1755 /* Add fudge factor here for final descale. */ |
|
1756 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
1757 z3 <<= CONST_BITS; |
|
1758 |
|
1759 z4 = (INT32) wsptr[4]; |
|
1760 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
|
1761 |
|
1762 tmp10 = z3 + z4; |
|
1763 tmp11 = z3 - z4; |
|
1764 |
|
1765 z1 = (INT32) wsptr[2]; |
|
1766 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
|
1767 z1 <<= CONST_BITS; |
|
1768 z2 = (INT32) wsptr[6]; |
|
1769 z2 <<= CONST_BITS; |
|
1770 |
|
1771 tmp12 = z1 - z2; |
|
1772 |
|
1773 tmp21 = z3 + tmp12; |
|
1774 tmp24 = z3 - tmp12; |
|
1775 |
|
1776 tmp12 = z4 + z2; |
|
1777 |
|
1778 tmp20 = tmp10 + tmp12; |
|
1779 tmp25 = tmp10 - tmp12; |
|
1780 |
|
1781 tmp12 = z4 - z1 - z2; |
|
1782 |
|
1783 tmp22 = tmp11 + tmp12; |
|
1784 tmp23 = tmp11 - tmp12; |
|
1785 |
|
1786 /* Odd part */ |
|
1787 |
|
1788 z1 = (INT32) wsptr[1]; |
|
1789 z2 = (INT32) wsptr[3]; |
|
1790 z3 = (INT32) wsptr[5]; |
|
1791 z4 = (INT32) wsptr[7]; |
|
1792 |
|
1793 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
|
1794 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
|
1795 |
|
1796 tmp10 = z1 + z3; |
|
1797 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
|
1798 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
|
1799 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
|
1800 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
|
1801 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
|
1802 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
|
1803 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
|
1804 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
|
1805 |
|
1806 z1 -= z4; |
|
1807 z2 -= z3; |
|
1808 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
|
1809 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
|
1810 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
|
1811 |
|
1812 /* Final output stage */ |
|
1813 |
|
1814 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
1815 CONST_BITS+PASS1_BITS+3) |
|
1816 & RANGE_MASK]; |
|
1817 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
1818 CONST_BITS+PASS1_BITS+3) |
|
1819 & RANGE_MASK]; |
|
1820 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
1821 CONST_BITS+PASS1_BITS+3) |
|
1822 & RANGE_MASK]; |
|
1823 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
1824 CONST_BITS+PASS1_BITS+3) |
|
1825 & RANGE_MASK]; |
|
1826 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
1827 CONST_BITS+PASS1_BITS+3) |
|
1828 & RANGE_MASK]; |
|
1829 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
1830 CONST_BITS+PASS1_BITS+3) |
|
1831 & RANGE_MASK]; |
|
1832 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
1833 CONST_BITS+PASS1_BITS+3) |
|
1834 & RANGE_MASK]; |
|
1835 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
1836 CONST_BITS+PASS1_BITS+3) |
|
1837 & RANGE_MASK]; |
|
1838 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
1839 CONST_BITS+PASS1_BITS+3) |
|
1840 & RANGE_MASK]; |
|
1841 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
1842 CONST_BITS+PASS1_BITS+3) |
|
1843 & RANGE_MASK]; |
|
1844 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
1845 CONST_BITS+PASS1_BITS+3) |
|
1846 & RANGE_MASK]; |
|
1847 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
1848 CONST_BITS+PASS1_BITS+3) |
|
1849 & RANGE_MASK]; |
|
1850 |
|
1851 wsptr += 8; /* advance pointer to next row */ |
|
1852 } |
|
1853 } |
|
1854 |
|
1855 |
|
1856 /* |
|
1857 * Perform dequantization and inverse DCT on one block of coefficients, |
|
1858 * producing a 13x13 output block. |
|
1859 * |
|
1860 * Optimized algorithm with 29 multiplications in the 1-D kernel. |
|
1861 * cK represents sqrt(2) * cos(K*pi/26). |
|
1862 */ |
|
1863 |
|
1864 GLOBAL(void) |
|
1865 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1866 JCOEFPTR coef_block, |
|
1867 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
1868 { |
|
1869 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
|
1870 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
|
1871 INT32 z1, z2, z3, z4; |
|
1872 JCOEFPTR inptr; |
|
1873 ISLOW_MULT_TYPE * quantptr; |
|
1874 int * wsptr; |
|
1875 JSAMPROW outptr; |
|
1876 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
1877 int ctr; |
|
1878 int workspace[8*13]; /* buffers data between passes */ |
|
1879 SHIFT_TEMPS |
|
1880 |
|
1881 /* Pass 1: process columns from input, store into work array. */ |
|
1882 |
|
1883 inptr = coef_block; |
|
1884 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
1885 wsptr = workspace; |
|
1886 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
1887 /* Even part */ |
|
1888 |
|
1889 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
1890 z1 <<= CONST_BITS; |
|
1891 /* Add fudge factor here for final descale. */ |
|
1892 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
1893 |
|
1894 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
1895 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
1896 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
1897 |
|
1898 tmp10 = z3 + z4; |
|
1899 tmp11 = z3 - z4; |
|
1900 |
|
1901 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ |
|
1902 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ |
|
1903 |
|
1904 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ |
|
1905 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ |
|
1906 |
|
1907 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ |
|
1908 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ |
|
1909 |
|
1910 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ |
|
1911 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ |
|
1912 |
|
1913 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ |
|
1914 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ |
|
1915 |
|
1916 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ |
|
1917 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ |
|
1918 |
|
1919 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ |
|
1920 |
|
1921 /* Odd part */ |
|
1922 |
|
1923 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
1924 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
1925 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
1926 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
1927 |
|
1928 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ |
|
1929 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ |
|
1930 tmp15 = z1 + z4; |
|
1931 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ |
|
1932 tmp10 = tmp11 + tmp12 + tmp13 - |
|
1933 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ |
|
1934 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ |
|
1935 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ |
|
1936 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ |
|
1937 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ |
|
1938 tmp11 += tmp14; |
|
1939 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ |
|
1940 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ |
|
1941 tmp12 += tmp14; |
|
1942 tmp13 += tmp14; |
|
1943 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ |
|
1944 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ |
|
1945 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ |
|
1946 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ |
|
1947 tmp14 += z1; |
|
1948 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ |
|
1949 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ |
|
1950 |
|
1951 /* Final output stage */ |
|
1952 |
|
1953 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
1954 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
1955 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
1956 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
1957 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
1958 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
1959 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
1960 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
1961 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
1962 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
1963 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
1964 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
1965 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); |
|
1966 } |
|
1967 |
|
1968 /* Pass 2: process 13 rows from work array, store into output array. */ |
|
1969 |
|
1970 wsptr = workspace; |
|
1971 for (ctr = 0; ctr < 13; ctr++) { |
|
1972 outptr = output_buf[ctr] + output_col; |
|
1973 |
|
1974 /* Even part */ |
|
1975 |
|
1976 /* Add fudge factor here for final descale. */ |
|
1977 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
1978 z1 <<= CONST_BITS; |
|
1979 |
|
1980 z2 = (INT32) wsptr[2]; |
|
1981 z3 = (INT32) wsptr[4]; |
|
1982 z4 = (INT32) wsptr[6]; |
|
1983 |
|
1984 tmp10 = z3 + z4; |
|
1985 tmp11 = z3 - z4; |
|
1986 |
|
1987 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ |
|
1988 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ |
|
1989 |
|
1990 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ |
|
1991 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ |
|
1992 |
|
1993 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ |
|
1994 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ |
|
1995 |
|
1996 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ |
|
1997 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ |
|
1998 |
|
1999 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ |
|
2000 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ |
|
2001 |
|
2002 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ |
|
2003 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ |
|
2004 |
|
2005 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ |
|
2006 |
|
2007 /* Odd part */ |
|
2008 |
|
2009 z1 = (INT32) wsptr[1]; |
|
2010 z2 = (INT32) wsptr[3]; |
|
2011 z3 = (INT32) wsptr[5]; |
|
2012 z4 = (INT32) wsptr[7]; |
|
2013 |
|
2014 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ |
|
2015 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ |
|
2016 tmp15 = z1 + z4; |
|
2017 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ |
|
2018 tmp10 = tmp11 + tmp12 + tmp13 - |
|
2019 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ |
|
2020 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ |
|
2021 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ |
|
2022 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ |
|
2023 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ |
|
2024 tmp11 += tmp14; |
|
2025 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ |
|
2026 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ |
|
2027 tmp12 += tmp14; |
|
2028 tmp13 += tmp14; |
|
2029 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ |
|
2030 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ |
|
2031 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ |
|
2032 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ |
|
2033 tmp14 += z1; |
|
2034 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ |
|
2035 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ |
|
2036 |
|
2037 /* Final output stage */ |
|
2038 |
|
2039 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
2040 CONST_BITS+PASS1_BITS+3) |
|
2041 & RANGE_MASK]; |
|
2042 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
2043 CONST_BITS+PASS1_BITS+3) |
|
2044 & RANGE_MASK]; |
|
2045 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
2046 CONST_BITS+PASS1_BITS+3) |
|
2047 & RANGE_MASK]; |
|
2048 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
2049 CONST_BITS+PASS1_BITS+3) |
|
2050 & RANGE_MASK]; |
|
2051 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
2052 CONST_BITS+PASS1_BITS+3) |
|
2053 & RANGE_MASK]; |
|
2054 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
2055 CONST_BITS+PASS1_BITS+3) |
|
2056 & RANGE_MASK]; |
|
2057 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
2058 CONST_BITS+PASS1_BITS+3) |
|
2059 & RANGE_MASK]; |
|
2060 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
2061 CONST_BITS+PASS1_BITS+3) |
|
2062 & RANGE_MASK]; |
|
2063 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
2064 CONST_BITS+PASS1_BITS+3) |
|
2065 & RANGE_MASK]; |
|
2066 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
2067 CONST_BITS+PASS1_BITS+3) |
|
2068 & RANGE_MASK]; |
|
2069 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
2070 CONST_BITS+PASS1_BITS+3) |
|
2071 & RANGE_MASK]; |
|
2072 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
2073 CONST_BITS+PASS1_BITS+3) |
|
2074 & RANGE_MASK]; |
|
2075 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, |
|
2076 CONST_BITS+PASS1_BITS+3) |
|
2077 & RANGE_MASK]; |
|
2078 |
|
2079 wsptr += 8; /* advance pointer to next row */ |
|
2080 } |
|
2081 } |
|
2082 |
|
2083 |
|
2084 /* |
|
2085 * Perform dequantization and inverse DCT on one block of coefficients, |
|
2086 * producing a 14x14 output block. |
|
2087 * |
|
2088 * Optimized algorithm with 20 multiplications in the 1-D kernel. |
|
2089 * cK represents sqrt(2) * cos(K*pi/28). |
|
2090 */ |
|
2091 |
|
2092 GLOBAL(void) |
|
2093 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
2094 JCOEFPTR coef_block, |
|
2095 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
2096 { |
|
2097 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
|
2098 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
|
2099 INT32 z1, z2, z3, z4; |
|
2100 JCOEFPTR inptr; |
|
2101 ISLOW_MULT_TYPE * quantptr; |
|
2102 int * wsptr; |
|
2103 JSAMPROW outptr; |
|
2104 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
2105 int ctr; |
|
2106 int workspace[8*14]; /* buffers data between passes */ |
|
2107 SHIFT_TEMPS |
|
2108 |
|
2109 /* Pass 1: process columns from input, store into work array. */ |
|
2110 |
|
2111 inptr = coef_block; |
|
2112 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
2113 wsptr = workspace; |
|
2114 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
2115 /* Even part */ |
|
2116 |
|
2117 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
2118 z1 <<= CONST_BITS; |
|
2119 /* Add fudge factor here for final descale. */ |
|
2120 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
2121 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
2122 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
|
2123 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
|
2124 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
|
2125 |
|
2126 tmp10 = z1 + z2; |
|
2127 tmp11 = z1 + z3; |
|
2128 tmp12 = z1 - z4; |
|
2129 |
|
2130 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ |
|
2131 CONST_BITS-PASS1_BITS); |
|
2132 |
|
2133 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
2134 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
2135 |
|
2136 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
|
2137 |
|
2138 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
|
2139 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
|
2140 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
|
2141 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
|
2142 |
|
2143 tmp20 = tmp10 + tmp13; |
|
2144 tmp26 = tmp10 - tmp13; |
|
2145 tmp21 = tmp11 + tmp14; |
|
2146 tmp25 = tmp11 - tmp14; |
|
2147 tmp22 = tmp12 + tmp15; |
|
2148 tmp24 = tmp12 - tmp15; |
|
2149 |
|
2150 /* Odd part */ |
|
2151 |
|
2152 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
2153 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
2154 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
2155 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
2156 tmp13 = z4 << CONST_BITS; |
|
2157 |
|
2158 tmp14 = z1 + z3; |
|
2159 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
|
2160 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
|
2161 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
|
2162 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
|
2163 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
|
2164 z1 -= z2; |
|
2165 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ |
|
2166 tmp16 += tmp15; |
|
2167 z1 += z4; |
|
2168 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ |
|
2169 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
|
2170 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
|
2171 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
|
2172 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
|
2173 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
|
2174 |
|
2175 tmp13 = (z1 - z3) << PASS1_BITS; |
|
2176 |
|
2177 /* Final output stage */ |
|
2178 |
|
2179 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
2180 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
2181 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
2182 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
2183 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
2184 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
2185 wsptr[8*3] = (int) (tmp23 + tmp13); |
|
2186 wsptr[8*10] = (int) (tmp23 - tmp13); |
|
2187 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
2188 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
2189 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
2190 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
2191 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); |
|
2192 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); |
|
2193 } |
|
2194 |
|
2195 /* Pass 2: process 14 rows from work array, store into output array. */ |
|
2196 |
|
2197 wsptr = workspace; |
|
2198 for (ctr = 0; ctr < 14; ctr++) { |
|
2199 outptr = output_buf[ctr] + output_col; |
|
2200 |
|
2201 /* Even part */ |
|
2202 |
|
2203 /* Add fudge factor here for final descale. */ |
|
2204 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
2205 z1 <<= CONST_BITS; |
|
2206 z4 = (INT32) wsptr[4]; |
|
2207 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
|
2208 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
|
2209 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
|
2210 |
|
2211 tmp10 = z1 + z2; |
|
2212 tmp11 = z1 + z3; |
|
2213 tmp12 = z1 - z4; |
|
2214 |
|
2215 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ |
|
2216 |
|
2217 z1 = (INT32) wsptr[2]; |
|
2218 z2 = (INT32) wsptr[6]; |
|
2219 |
|
2220 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
|
2221 |
|
2222 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
|
2223 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
|
2224 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
|
2225 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
|
2226 |
|
2227 tmp20 = tmp10 + tmp13; |
|
2228 tmp26 = tmp10 - tmp13; |
|
2229 tmp21 = tmp11 + tmp14; |
|
2230 tmp25 = tmp11 - tmp14; |
|
2231 tmp22 = tmp12 + tmp15; |
|
2232 tmp24 = tmp12 - tmp15; |
|
2233 |
|
2234 /* Odd part */ |
|
2235 |
|
2236 z1 = (INT32) wsptr[1]; |
|
2237 z2 = (INT32) wsptr[3]; |
|
2238 z3 = (INT32) wsptr[5]; |
|
2239 z4 = (INT32) wsptr[7]; |
|
2240 z4 <<= CONST_BITS; |
|
2241 |
|
2242 tmp14 = z1 + z3; |
|
2243 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
|
2244 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
|
2245 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
|
2246 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
|
2247 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
|
2248 z1 -= z2; |
|
2249 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ |
|
2250 tmp16 += tmp15; |
|
2251 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ |
|
2252 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
|
2253 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
|
2254 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
|
2255 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
|
2256 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
|
2257 |
|
2258 tmp13 = ((z1 - z3) << CONST_BITS) + z4; |
|
2259 |
|
2260 /* Final output stage */ |
|
2261 |
|
2262 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
2263 CONST_BITS+PASS1_BITS+3) |
|
2264 & RANGE_MASK]; |
|
2265 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
2266 CONST_BITS+PASS1_BITS+3) |
|
2267 & RANGE_MASK]; |
|
2268 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
2269 CONST_BITS+PASS1_BITS+3) |
|
2270 & RANGE_MASK]; |
|
2271 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
2272 CONST_BITS+PASS1_BITS+3) |
|
2273 & RANGE_MASK]; |
|
2274 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
2275 CONST_BITS+PASS1_BITS+3) |
|
2276 & RANGE_MASK]; |
|
2277 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
2278 CONST_BITS+PASS1_BITS+3) |
|
2279 & RANGE_MASK]; |
|
2280 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
2281 CONST_BITS+PASS1_BITS+3) |
|
2282 & RANGE_MASK]; |
|
2283 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
2284 CONST_BITS+PASS1_BITS+3) |
|
2285 & RANGE_MASK]; |
|
2286 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
2287 CONST_BITS+PASS1_BITS+3) |
|
2288 & RANGE_MASK]; |
|
2289 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
2290 CONST_BITS+PASS1_BITS+3) |
|
2291 & RANGE_MASK]; |
|
2292 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
2293 CONST_BITS+PASS1_BITS+3) |
|
2294 & RANGE_MASK]; |
|
2295 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
2296 CONST_BITS+PASS1_BITS+3) |
|
2297 & RANGE_MASK]; |
|
2298 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, |
|
2299 CONST_BITS+PASS1_BITS+3) |
|
2300 & RANGE_MASK]; |
|
2301 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, |
|
2302 CONST_BITS+PASS1_BITS+3) |
|
2303 & RANGE_MASK]; |
|
2304 |
|
2305 wsptr += 8; /* advance pointer to next row */ |
|
2306 } |
|
2307 } |
|
2308 |
|
2309 |
|
2310 /* |
|
2311 * Perform dequantization and inverse DCT on one block of coefficients, |
|
2312 * producing a 15x15 output block. |
|
2313 * |
|
2314 * Optimized algorithm with 22 multiplications in the 1-D kernel. |
|
2315 * cK represents sqrt(2) * cos(K*pi/30). |
|
2316 */ |
|
2317 |
|
2318 GLOBAL(void) |
|
2319 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
2320 JCOEFPTR coef_block, |
|
2321 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
2322 { |
|
2323 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
|
2324 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
|
2325 INT32 z1, z2, z3, z4; |
|
2326 JCOEFPTR inptr; |
|
2327 ISLOW_MULT_TYPE * quantptr; |
|
2328 int * wsptr; |
|
2329 JSAMPROW outptr; |
|
2330 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
2331 int ctr; |
|
2332 int workspace[8*15]; /* buffers data between passes */ |
|
2333 SHIFT_TEMPS |
|
2334 |
|
2335 /* Pass 1: process columns from input, store into work array. */ |
|
2336 |
|
2337 inptr = coef_block; |
|
2338 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
2339 wsptr = workspace; |
|
2340 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
2341 /* Even part */ |
|
2342 |
|
2343 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
2344 z1 <<= CONST_BITS; |
|
2345 /* Add fudge factor here for final descale. */ |
|
2346 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
2347 |
|
2348 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
2349 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
2350 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
2351 |
|
2352 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ |
|
2353 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ |
|
2354 |
|
2355 tmp12 = z1 - tmp10; |
|
2356 tmp13 = z1 + tmp11; |
|
2357 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ |
|
2358 |
|
2359 z4 = z2 - z3; |
|
2360 z3 += z2; |
|
2361 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ |
|
2362 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ |
|
2363 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ |
|
2364 |
|
2365 tmp20 = tmp13 + tmp10 + tmp11; |
|
2366 tmp23 = tmp12 - tmp10 + tmp11 + z2; |
|
2367 |
|
2368 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ |
|
2369 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ |
|
2370 |
|
2371 tmp25 = tmp13 - tmp10 - tmp11; |
|
2372 tmp26 = tmp12 + tmp10 - tmp11 - z2; |
|
2373 |
|
2374 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ |
|
2375 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ |
|
2376 |
|
2377 tmp21 = tmp12 + tmp10 + tmp11; |
|
2378 tmp24 = tmp13 - tmp10 + tmp11; |
|
2379 tmp11 += tmp11; |
|
2380 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ |
|
2381 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ |
|
2382 |
|
2383 /* Odd part */ |
|
2384 |
|
2385 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
2386 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
2387 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
2388 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ |
|
2389 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
2390 |
|
2391 tmp13 = z2 - z4; |
|
2392 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ |
|
2393 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ |
|
2394 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ |
|
2395 |
|
2396 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ |
|
2397 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ |
|
2398 z2 = z1 - z4; |
|
2399 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ |
|
2400 |
|
2401 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ |
|
2402 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ |
|
2403 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ |
|
2404 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ |
|
2405 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ |
|
2406 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ |
|
2407 |
|
2408 /* Final output stage */ |
|
2409 |
|
2410 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
2411 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
2412 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
2413 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
2414 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
2415 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
2416 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
2417 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
2418 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
2419 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
2420 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
2421 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
2422 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); |
|
2423 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); |
|
2424 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); |
|
2425 } |
|
2426 |
|
2427 /* Pass 2: process 15 rows from work array, store into output array. */ |
|
2428 |
|
2429 wsptr = workspace; |
|
2430 for (ctr = 0; ctr < 15; ctr++) { |
|
2431 outptr = output_buf[ctr] + output_col; |
|
2432 |
|
2433 /* Even part */ |
|
2434 |
|
2435 /* Add fudge factor here for final descale. */ |
|
2436 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
2437 z1 <<= CONST_BITS; |
|
2438 |
|
2439 z2 = (INT32) wsptr[2]; |
|
2440 z3 = (INT32) wsptr[4]; |
|
2441 z4 = (INT32) wsptr[6]; |
|
2442 |
|
2443 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ |
|
2444 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ |
|
2445 |
|
2446 tmp12 = z1 - tmp10; |
|
2447 tmp13 = z1 + tmp11; |
|
2448 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ |
|
2449 |
|
2450 z4 = z2 - z3; |
|
2451 z3 += z2; |
|
2452 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ |
|
2453 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ |
|
2454 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ |
|
2455 |
|
2456 tmp20 = tmp13 + tmp10 + tmp11; |
|
2457 tmp23 = tmp12 - tmp10 + tmp11 + z2; |
|
2458 |
|
2459 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ |
|
2460 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ |
|
2461 |
|
2462 tmp25 = tmp13 - tmp10 - tmp11; |
|
2463 tmp26 = tmp12 + tmp10 - tmp11 - z2; |
|
2464 |
|
2465 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ |
|
2466 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ |
|
2467 |
|
2468 tmp21 = tmp12 + tmp10 + tmp11; |
|
2469 tmp24 = tmp13 - tmp10 + tmp11; |
|
2470 tmp11 += tmp11; |
|
2471 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ |
|
2472 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ |
|
2473 |
|
2474 /* Odd part */ |
|
2475 |
|
2476 z1 = (INT32) wsptr[1]; |
|
2477 z2 = (INT32) wsptr[3]; |
|
2478 z4 = (INT32) wsptr[5]; |
|
2479 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ |
|
2480 z4 = (INT32) wsptr[7]; |
|
2481 |
|
2482 tmp13 = z2 - z4; |
|
2483 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ |
|
2484 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ |
|
2485 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ |
|
2486 |
|
2487 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ |
|
2488 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ |
|
2489 z2 = z1 - z4; |
|
2490 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ |
|
2491 |
|
2492 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ |
|
2493 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ |
|
2494 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ |
|
2495 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ |
|
2496 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ |
|
2497 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ |
|
2498 |
|
2499 /* Final output stage */ |
|
2500 |
|
2501 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
2502 CONST_BITS+PASS1_BITS+3) |
|
2503 & RANGE_MASK]; |
|
2504 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
2505 CONST_BITS+PASS1_BITS+3) |
|
2506 & RANGE_MASK]; |
|
2507 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
2508 CONST_BITS+PASS1_BITS+3) |
|
2509 & RANGE_MASK]; |
|
2510 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
2511 CONST_BITS+PASS1_BITS+3) |
|
2512 & RANGE_MASK]; |
|
2513 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
2514 CONST_BITS+PASS1_BITS+3) |
|
2515 & RANGE_MASK]; |
|
2516 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
2517 CONST_BITS+PASS1_BITS+3) |
|
2518 & RANGE_MASK]; |
|
2519 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
2520 CONST_BITS+PASS1_BITS+3) |
|
2521 & RANGE_MASK]; |
|
2522 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
2523 CONST_BITS+PASS1_BITS+3) |
|
2524 & RANGE_MASK]; |
|
2525 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
2526 CONST_BITS+PASS1_BITS+3) |
|
2527 & RANGE_MASK]; |
|
2528 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
2529 CONST_BITS+PASS1_BITS+3) |
|
2530 & RANGE_MASK]; |
|
2531 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
2532 CONST_BITS+PASS1_BITS+3) |
|
2533 & RANGE_MASK]; |
|
2534 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
2535 CONST_BITS+PASS1_BITS+3) |
|
2536 & RANGE_MASK]; |
|
2537 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, |
|
2538 CONST_BITS+PASS1_BITS+3) |
|
2539 & RANGE_MASK]; |
|
2540 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, |
|
2541 CONST_BITS+PASS1_BITS+3) |
|
2542 & RANGE_MASK]; |
|
2543 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, |
|
2544 CONST_BITS+PASS1_BITS+3) |
|
2545 & RANGE_MASK]; |
|
2546 |
|
2547 wsptr += 8; /* advance pointer to next row */ |
|
2548 } |
|
2549 } |
|
2550 |
|
2551 |
|
2552 /* |
|
2553 * Perform dequantization and inverse DCT on one block of coefficients, |
|
2554 * producing a 16x16 output block. |
|
2555 * |
|
2556 * Optimized algorithm with 28 multiplications in the 1-D kernel. |
|
2557 * cK represents sqrt(2) * cos(K*pi/32). |
|
2558 */ |
|
2559 |
|
2560 GLOBAL(void) |
|
2561 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
2562 JCOEFPTR coef_block, |
|
2563 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
2564 { |
|
2565 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; |
|
2566 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
|
2567 INT32 z1, z2, z3, z4; |
|
2568 JCOEFPTR inptr; |
|
2569 ISLOW_MULT_TYPE * quantptr; |
|
2570 int * wsptr; |
|
2571 JSAMPROW outptr; |
|
2572 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
2573 int ctr; |
|
2574 int workspace[8*16]; /* buffers data between passes */ |
|
2575 SHIFT_TEMPS |
|
2576 |
|
2577 /* Pass 1: process columns from input, store into work array. */ |
|
2578 |
|
2579 inptr = coef_block; |
|
2580 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
2581 wsptr = workspace; |
|
2582 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
2583 /* Even part */ |
|
2584 |
|
2585 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
2586 tmp0 <<= CONST_BITS; |
|
2587 /* Add fudge factor here for final descale. */ |
|
2588 tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); |
|
2589 |
|
2590 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
2591 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
|
2592 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
|
2593 |
|
2594 tmp10 = tmp0 + tmp1; |
|
2595 tmp11 = tmp0 - tmp1; |
|
2596 tmp12 = tmp0 + tmp2; |
|
2597 tmp13 = tmp0 - tmp2; |
|
2598 |
|
2599 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
2600 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
2601 z3 = z1 - z2; |
|
2602 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
|
2603 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
|
2604 |
|
2605 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
|
2606 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
|
2607 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
|
2608 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
|
2609 |
|
2610 tmp20 = tmp10 + tmp0; |
|
2611 tmp27 = tmp10 - tmp0; |
|
2612 tmp21 = tmp12 + tmp1; |
|
2613 tmp26 = tmp12 - tmp1; |
|
2614 tmp22 = tmp13 + tmp2; |
|
2615 tmp25 = tmp13 - tmp2; |
|
2616 tmp23 = tmp11 + tmp3; |
|
2617 tmp24 = tmp11 - tmp3; |
|
2618 |
|
2619 /* Odd part */ |
|
2620 |
|
2621 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
2622 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
2623 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
2624 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
2625 |
|
2626 tmp11 = z1 + z3; |
|
2627 |
|
2628 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
|
2629 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
|
2630 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
|
2631 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
|
2632 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
|
2633 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
|
2634 tmp0 = tmp1 + tmp2 + tmp3 - |
|
2635 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
|
2636 tmp13 = tmp10 + tmp11 + tmp12 - |
|
2637 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
|
2638 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
|
2639 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
|
2640 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
|
2641 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
|
2642 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
|
2643 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
|
2644 z2 += z4; |
|
2645 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
|
2646 tmp1 += z1; |
|
2647 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
|
2648 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
|
2649 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
|
2650 tmp12 += z2; |
|
2651 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
|
2652 tmp2 += z2; |
|
2653 tmp3 += z2; |
|
2654 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
|
2655 tmp10 += z2; |
|
2656 tmp11 += z2; |
|
2657 |
|
2658 /* Final output stage */ |
|
2659 |
|
2660 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); |
|
2661 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); |
|
2662 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); |
|
2663 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); |
|
2664 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); |
|
2665 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); |
|
2666 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); |
|
2667 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); |
|
2668 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); |
|
2669 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); |
|
2670 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); |
|
2671 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); |
|
2672 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); |
|
2673 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); |
|
2674 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); |
|
2675 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); |
|
2676 } |
|
2677 |
|
2678 /* Pass 2: process 16 rows from work array, store into output array. */ |
|
2679 |
|
2680 wsptr = workspace; |
|
2681 for (ctr = 0; ctr < 16; ctr++) { |
|
2682 outptr = output_buf[ctr] + output_col; |
|
2683 |
|
2684 /* Even part */ |
|
2685 |
|
2686 /* Add fudge factor here for final descale. */ |
|
2687 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
2688 tmp0 <<= CONST_BITS; |
|
2689 |
|
2690 z1 = (INT32) wsptr[4]; |
|
2691 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
|
2692 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
|
2693 |
|
2694 tmp10 = tmp0 + tmp1; |
|
2695 tmp11 = tmp0 - tmp1; |
|
2696 tmp12 = tmp0 + tmp2; |
|
2697 tmp13 = tmp0 - tmp2; |
|
2698 |
|
2699 z1 = (INT32) wsptr[2]; |
|
2700 z2 = (INT32) wsptr[6]; |
|
2701 z3 = z1 - z2; |
|
2702 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
|
2703 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
|
2704 |
|
2705 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
|
2706 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
|
2707 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
|
2708 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
|
2709 |
|
2710 tmp20 = tmp10 + tmp0; |
|
2711 tmp27 = tmp10 - tmp0; |
|
2712 tmp21 = tmp12 + tmp1; |
|
2713 tmp26 = tmp12 - tmp1; |
|
2714 tmp22 = tmp13 + tmp2; |
|
2715 tmp25 = tmp13 - tmp2; |
|
2716 tmp23 = tmp11 + tmp3; |
|
2717 tmp24 = tmp11 - tmp3; |
|
2718 |
|
2719 /* Odd part */ |
|
2720 |
|
2721 z1 = (INT32) wsptr[1]; |
|
2722 z2 = (INT32) wsptr[3]; |
|
2723 z3 = (INT32) wsptr[5]; |
|
2724 z4 = (INT32) wsptr[7]; |
|
2725 |
|
2726 tmp11 = z1 + z3; |
|
2727 |
|
2728 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
|
2729 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
|
2730 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
|
2731 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
|
2732 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
|
2733 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
|
2734 tmp0 = tmp1 + tmp2 + tmp3 - |
|
2735 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
|
2736 tmp13 = tmp10 + tmp11 + tmp12 - |
|
2737 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
|
2738 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
|
2739 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
|
2740 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
|
2741 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
|
2742 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
|
2743 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
|
2744 z2 += z4; |
|
2745 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
|
2746 tmp1 += z1; |
|
2747 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
|
2748 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
|
2749 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
|
2750 tmp12 += z2; |
|
2751 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
|
2752 tmp2 += z2; |
|
2753 tmp3 += z2; |
|
2754 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
|
2755 tmp10 += z2; |
|
2756 tmp11 += z2; |
|
2757 |
|
2758 /* Final output stage */ |
|
2759 |
|
2760 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, |
|
2761 CONST_BITS+PASS1_BITS+3) |
|
2762 & RANGE_MASK]; |
|
2763 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, |
|
2764 CONST_BITS+PASS1_BITS+3) |
|
2765 & RANGE_MASK]; |
|
2766 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, |
|
2767 CONST_BITS+PASS1_BITS+3) |
|
2768 & RANGE_MASK]; |
|
2769 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, |
|
2770 CONST_BITS+PASS1_BITS+3) |
|
2771 & RANGE_MASK]; |
|
2772 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, |
|
2773 CONST_BITS+PASS1_BITS+3) |
|
2774 & RANGE_MASK]; |
|
2775 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, |
|
2776 CONST_BITS+PASS1_BITS+3) |
|
2777 & RANGE_MASK]; |
|
2778 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, |
|
2779 CONST_BITS+PASS1_BITS+3) |
|
2780 & RANGE_MASK]; |
|
2781 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, |
|
2782 CONST_BITS+PASS1_BITS+3) |
|
2783 & RANGE_MASK]; |
|
2784 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, |
|
2785 CONST_BITS+PASS1_BITS+3) |
|
2786 & RANGE_MASK]; |
|
2787 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, |
|
2788 CONST_BITS+PASS1_BITS+3) |
|
2789 & RANGE_MASK]; |
|
2790 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, |
|
2791 CONST_BITS+PASS1_BITS+3) |
|
2792 & RANGE_MASK]; |
|
2793 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, |
|
2794 CONST_BITS+PASS1_BITS+3) |
|
2795 & RANGE_MASK]; |
|
2796 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, |
|
2797 CONST_BITS+PASS1_BITS+3) |
|
2798 & RANGE_MASK]; |
|
2799 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, |
|
2800 CONST_BITS+PASS1_BITS+3) |
|
2801 & RANGE_MASK]; |
|
2802 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, |
|
2803 CONST_BITS+PASS1_BITS+3) |
|
2804 & RANGE_MASK]; |
|
2805 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, |
|
2806 CONST_BITS+PASS1_BITS+3) |
|
2807 & RANGE_MASK]; |
|
2808 |
|
2809 wsptr += 8; /* advance pointer to next row */ |
|
2810 } |
|
2811 } |
|
2812 |
|
2813 |
|
2814 /* |
|
2815 * Perform dequantization and inverse DCT on one block of coefficients, |
|
2816 * producing a 16x8 output block. |
|
2817 * |
|
2818 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows). |
|
2819 */ |
|
2820 |
|
2821 GLOBAL(void) |
|
2822 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
2823 JCOEFPTR coef_block, |
|
2824 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
2825 { |
|
2826 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; |
|
2827 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
|
2828 INT32 z1, z2, z3, z4; |
|
2829 JCOEFPTR inptr; |
|
2830 ISLOW_MULT_TYPE * quantptr; |
|
2831 int * wsptr; |
|
2832 JSAMPROW outptr; |
|
2833 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
2834 int ctr; |
|
2835 int workspace[8*8]; /* buffers data between passes */ |
162 SHIFT_TEMPS |
2836 SHIFT_TEMPS |
163 |
2837 |
164 /* Pass 1: process columns from input, store into work array. */ |
2838 /* Pass 1: process columns from input, store into work array. */ |
165 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
2839 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
166 /* furthermore, we scale the results by 2**PASS1_BITS. */ |
2840 /* furthermore, we scale the results by 2**PASS1_BITS. */ |
205 |
2879 |
206 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
2880 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
207 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
2881 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
208 |
2882 |
209 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
2883 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
210 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); |
2884 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
211 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); |
2885 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
212 |
2886 |
213 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
2887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
214 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
2888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
215 |
2889 z2 <<= CONST_BITS; |
216 tmp0 = (z2 + z3) << CONST_BITS; |
2890 z3 <<= CONST_BITS; |
217 tmp1 = (z2 - z3) << CONST_BITS; |
2891 /* Add fudge factor here for final descale. */ |
|
2892 z2 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
2893 |
|
2894 tmp0 = z2 + z3; |
|
2895 tmp1 = z2 - z3; |
218 |
2896 |
219 tmp10 = tmp0 + tmp3; |
2897 tmp10 = tmp0 + tmp2; |
220 tmp13 = tmp0 - tmp3; |
2898 tmp13 = tmp0 - tmp2; |
221 tmp11 = tmp1 + tmp2; |
2899 tmp11 = tmp1 + tmp3; |
222 tmp12 = tmp1 - tmp2; |
2900 tmp12 = tmp1 - tmp3; |
223 |
2901 |
224 /* Odd part per figure 8; the matrix is unitary and hence its |
2902 /* Odd part per figure 8; the matrix is unitary and hence its |
225 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
2903 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
226 */ |
2904 */ |
227 |
2905 |
228 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
2906 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
229 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
2907 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
230 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
2908 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
231 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
2909 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
232 |
2910 |
233 z1 = tmp0 + tmp3; |
2911 z2 = tmp0 + tmp2; |
234 z2 = tmp1 + tmp2; |
2912 z3 = tmp1 + tmp3; |
235 z3 = tmp0 + tmp2; |
2913 |
236 z4 = tmp1 + tmp3; |
2914 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
237 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |
2915 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
238 |
2916 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
2917 z2 += z1; |
|
2918 z3 += z1; |
|
2919 |
|
2920 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
239 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
2921 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
2922 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
2923 tmp0 += z1 + z2; |
|
2924 tmp3 += z1 + z3; |
|
2925 |
|
2926 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
240 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
2927 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
241 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
2928 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
242 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
2929 tmp1 += z1 + z3; |
243 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
2930 tmp2 += z1 + z2; |
244 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
245 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
246 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
247 |
|
248 z3 += z5; |
|
249 z4 += z5; |
|
250 |
|
251 tmp0 += z1 + z3; |
|
252 tmp1 += z2 + z4; |
|
253 tmp2 += z2 + z3; |
|
254 tmp3 += z1 + z4; |
|
255 |
2931 |
256 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
2932 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
257 |
2933 |
258 wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
2934 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
259 wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
2935 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
260 wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
2936 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
261 wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
2937 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
262 wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
2938 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
263 wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
2939 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
264 wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
2940 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
265 wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
2941 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
266 |
2942 |
267 inptr++; /* advance pointers to next column */ |
2943 inptr++; /* advance pointers to next column */ |
268 quantptr++; |
2944 quantptr++; |
269 wsptr++; |
2945 wsptr++; |
270 } |
2946 } |
|
2947 |
|
2948 /* Pass 2: process 8 rows from work array, store into output array. |
|
2949 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). |
|
2950 */ |
|
2951 wsptr = workspace; |
|
2952 for (ctr = 0; ctr < 8; ctr++) { |
|
2953 outptr = output_buf[ctr] + output_col; |
|
2954 |
|
2955 /* Even part */ |
|
2956 |
|
2957 /* Add fudge factor here for final descale. */ |
|
2958 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
2959 tmp0 <<= CONST_BITS; |
|
2960 |
|
2961 z1 = (INT32) wsptr[4]; |
|
2962 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
|
2963 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
|
2964 |
|
2965 tmp10 = tmp0 + tmp1; |
|
2966 tmp11 = tmp0 - tmp1; |
|
2967 tmp12 = tmp0 + tmp2; |
|
2968 tmp13 = tmp0 - tmp2; |
|
2969 |
|
2970 z1 = (INT32) wsptr[2]; |
|
2971 z2 = (INT32) wsptr[6]; |
|
2972 z3 = z1 - z2; |
|
2973 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
|
2974 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
|
2975 |
|
2976 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
|
2977 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
|
2978 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
|
2979 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
|
2980 |
|
2981 tmp20 = tmp10 + tmp0; |
|
2982 tmp27 = tmp10 - tmp0; |
|
2983 tmp21 = tmp12 + tmp1; |
|
2984 tmp26 = tmp12 - tmp1; |
|
2985 tmp22 = tmp13 + tmp2; |
|
2986 tmp25 = tmp13 - tmp2; |
|
2987 tmp23 = tmp11 + tmp3; |
|
2988 tmp24 = tmp11 - tmp3; |
|
2989 |
|
2990 /* Odd part */ |
|
2991 |
|
2992 z1 = (INT32) wsptr[1]; |
|
2993 z2 = (INT32) wsptr[3]; |
|
2994 z3 = (INT32) wsptr[5]; |
|
2995 z4 = (INT32) wsptr[7]; |
|
2996 |
|
2997 tmp11 = z1 + z3; |
|
2998 |
|
2999 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
|
3000 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
|
3001 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
|
3002 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
|
3003 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
|
3004 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
|
3005 tmp0 = tmp1 + tmp2 + tmp3 - |
|
3006 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
|
3007 tmp13 = tmp10 + tmp11 + tmp12 - |
|
3008 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
|
3009 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
|
3010 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
|
3011 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
|
3012 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
|
3013 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
|
3014 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
|
3015 z2 += z4; |
|
3016 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
|
3017 tmp1 += z1; |
|
3018 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
|
3019 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
|
3020 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
|
3021 tmp12 += z2; |
|
3022 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
|
3023 tmp2 += z2; |
|
3024 tmp3 += z2; |
|
3025 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
|
3026 tmp10 += z2; |
|
3027 tmp11 += z2; |
|
3028 |
|
3029 /* Final output stage */ |
|
3030 |
|
3031 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, |
|
3032 CONST_BITS+PASS1_BITS+3) |
|
3033 & RANGE_MASK]; |
|
3034 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, |
|
3035 CONST_BITS+PASS1_BITS+3) |
|
3036 & RANGE_MASK]; |
|
3037 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, |
|
3038 CONST_BITS+PASS1_BITS+3) |
|
3039 & RANGE_MASK]; |
|
3040 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, |
|
3041 CONST_BITS+PASS1_BITS+3) |
|
3042 & RANGE_MASK]; |
|
3043 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, |
|
3044 CONST_BITS+PASS1_BITS+3) |
|
3045 & RANGE_MASK]; |
|
3046 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, |
|
3047 CONST_BITS+PASS1_BITS+3) |
|
3048 & RANGE_MASK]; |
|
3049 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, |
|
3050 CONST_BITS+PASS1_BITS+3) |
|
3051 & RANGE_MASK]; |
|
3052 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, |
|
3053 CONST_BITS+PASS1_BITS+3) |
|
3054 & RANGE_MASK]; |
|
3055 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, |
|
3056 CONST_BITS+PASS1_BITS+3) |
|
3057 & RANGE_MASK]; |
|
3058 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, |
|
3059 CONST_BITS+PASS1_BITS+3) |
|
3060 & RANGE_MASK]; |
|
3061 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, |
|
3062 CONST_BITS+PASS1_BITS+3) |
|
3063 & RANGE_MASK]; |
|
3064 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, |
|
3065 CONST_BITS+PASS1_BITS+3) |
|
3066 & RANGE_MASK]; |
|
3067 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, |
|
3068 CONST_BITS+PASS1_BITS+3) |
|
3069 & RANGE_MASK]; |
|
3070 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, |
|
3071 CONST_BITS+PASS1_BITS+3) |
|
3072 & RANGE_MASK]; |
|
3073 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, |
|
3074 CONST_BITS+PASS1_BITS+3) |
|
3075 & RANGE_MASK]; |
|
3076 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, |
|
3077 CONST_BITS+PASS1_BITS+3) |
|
3078 & RANGE_MASK]; |
|
3079 |
|
3080 wsptr += 8; /* advance pointer to next row */ |
|
3081 } |
|
3082 } |
|
3083 |
|
3084 |
|
3085 /* |
|
3086 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3087 * producing a 14x7 output block. |
|
3088 * |
|
3089 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows). |
|
3090 */ |
|
3091 |
|
3092 GLOBAL(void) |
|
3093 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3094 JCOEFPTR coef_block, |
|
3095 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3096 { |
|
3097 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
|
3098 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
|
3099 INT32 z1, z2, z3, z4; |
|
3100 JCOEFPTR inptr; |
|
3101 ISLOW_MULT_TYPE * quantptr; |
|
3102 int * wsptr; |
|
3103 JSAMPROW outptr; |
|
3104 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3105 int ctr; |
|
3106 int workspace[8*7]; /* buffers data between passes */ |
|
3107 SHIFT_TEMPS |
|
3108 |
|
3109 /* Pass 1: process columns from input, store into work array. |
|
3110 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). |
|
3111 */ |
|
3112 inptr = coef_block; |
|
3113 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3114 wsptr = workspace; |
|
3115 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
3116 /* Even part */ |
|
3117 |
|
3118 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3119 tmp23 <<= CONST_BITS; |
|
3120 /* Add fudge factor here for final descale. */ |
|
3121 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
3122 |
|
3123 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
3124 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
3125 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
3126 |
|
3127 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
|
3128 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
|
3129 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
|
3130 tmp10 = z1 + z3; |
|
3131 z2 -= tmp10; |
|
3132 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ |
|
3133 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
|
3134 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
|
3135 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
|
3136 |
|
3137 /* Odd part */ |
|
3138 |
|
3139 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3140 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
3141 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
3142 |
|
3143 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
|
3144 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
|
3145 tmp10 = tmp11 - tmp12; |
|
3146 tmp11 += tmp12; |
|
3147 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
|
3148 tmp11 += tmp12; |
|
3149 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
|
3150 tmp10 += z2; |
|
3151 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
|
3152 |
|
3153 /* Final output stage */ |
|
3154 |
|
3155 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
3156 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
3157 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
3158 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
3159 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
3160 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
3161 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS); |
|
3162 } |
|
3163 |
|
3164 /* Pass 2: process 7 rows from work array, store into output array. |
|
3165 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). |
|
3166 */ |
|
3167 wsptr = workspace; |
|
3168 for (ctr = 0; ctr < 7; ctr++) { |
|
3169 outptr = output_buf[ctr] + output_col; |
|
3170 |
|
3171 /* Even part */ |
|
3172 |
|
3173 /* Add fudge factor here for final descale. */ |
|
3174 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
3175 z1 <<= CONST_BITS; |
|
3176 z4 = (INT32) wsptr[4]; |
|
3177 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
|
3178 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
|
3179 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
|
3180 |
|
3181 tmp10 = z1 + z2; |
|
3182 tmp11 = z1 + z3; |
|
3183 tmp12 = z1 - z4; |
|
3184 |
|
3185 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ |
|
3186 |
|
3187 z1 = (INT32) wsptr[2]; |
|
3188 z2 = (INT32) wsptr[6]; |
|
3189 |
|
3190 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
|
3191 |
|
3192 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
|
3193 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
|
3194 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
|
3195 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
|
3196 |
|
3197 tmp20 = tmp10 + tmp13; |
|
3198 tmp26 = tmp10 - tmp13; |
|
3199 tmp21 = tmp11 + tmp14; |
|
3200 tmp25 = tmp11 - tmp14; |
|
3201 tmp22 = tmp12 + tmp15; |
|
3202 tmp24 = tmp12 - tmp15; |
|
3203 |
|
3204 /* Odd part */ |
|
3205 |
|
3206 z1 = (INT32) wsptr[1]; |
|
3207 z2 = (INT32) wsptr[3]; |
|
3208 z3 = (INT32) wsptr[5]; |
|
3209 z4 = (INT32) wsptr[7]; |
|
3210 z4 <<= CONST_BITS; |
|
3211 |
|
3212 tmp14 = z1 + z3; |
|
3213 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
|
3214 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
|
3215 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
|
3216 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
|
3217 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
|
3218 z1 -= z2; |
|
3219 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ |
|
3220 tmp16 += tmp15; |
|
3221 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ |
|
3222 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
|
3223 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
|
3224 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
|
3225 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
|
3226 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
|
3227 |
|
3228 tmp13 = ((z1 - z3) << CONST_BITS) + z4; |
|
3229 |
|
3230 /* Final output stage */ |
|
3231 |
|
3232 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
3233 CONST_BITS+PASS1_BITS+3) |
|
3234 & RANGE_MASK]; |
|
3235 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
3236 CONST_BITS+PASS1_BITS+3) |
|
3237 & RANGE_MASK]; |
|
3238 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
3239 CONST_BITS+PASS1_BITS+3) |
|
3240 & RANGE_MASK]; |
|
3241 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
3242 CONST_BITS+PASS1_BITS+3) |
|
3243 & RANGE_MASK]; |
|
3244 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
3245 CONST_BITS+PASS1_BITS+3) |
|
3246 & RANGE_MASK]; |
|
3247 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
3248 CONST_BITS+PASS1_BITS+3) |
|
3249 & RANGE_MASK]; |
|
3250 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
3251 CONST_BITS+PASS1_BITS+3) |
|
3252 & RANGE_MASK]; |
|
3253 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
3254 CONST_BITS+PASS1_BITS+3) |
|
3255 & RANGE_MASK]; |
|
3256 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
3257 CONST_BITS+PASS1_BITS+3) |
|
3258 & RANGE_MASK]; |
|
3259 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
3260 CONST_BITS+PASS1_BITS+3) |
|
3261 & RANGE_MASK]; |
|
3262 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
3263 CONST_BITS+PASS1_BITS+3) |
|
3264 & RANGE_MASK]; |
|
3265 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
3266 CONST_BITS+PASS1_BITS+3) |
|
3267 & RANGE_MASK]; |
|
3268 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, |
|
3269 CONST_BITS+PASS1_BITS+3) |
|
3270 & RANGE_MASK]; |
|
3271 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, |
|
3272 CONST_BITS+PASS1_BITS+3) |
|
3273 & RANGE_MASK]; |
|
3274 |
|
3275 wsptr += 8; /* advance pointer to next row */ |
|
3276 } |
|
3277 } |
|
3278 |
|
3279 |
|
3280 /* |
|
3281 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3282 * producing a 12x6 output block. |
|
3283 * |
|
3284 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows). |
|
3285 */ |
|
3286 |
|
3287 GLOBAL(void) |
|
3288 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3289 JCOEFPTR coef_block, |
|
3290 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3291 { |
|
3292 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
|
3293 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
|
3294 INT32 z1, z2, z3, z4; |
|
3295 JCOEFPTR inptr; |
|
3296 ISLOW_MULT_TYPE * quantptr; |
|
3297 int * wsptr; |
|
3298 JSAMPROW outptr; |
|
3299 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3300 int ctr; |
|
3301 int workspace[8*6]; /* buffers data between passes */ |
|
3302 SHIFT_TEMPS |
|
3303 |
|
3304 /* Pass 1: process columns from input, store into work array. |
|
3305 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). |
|
3306 */ |
|
3307 inptr = coef_block; |
|
3308 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3309 wsptr = workspace; |
|
3310 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
3311 /* Even part */ |
|
3312 |
|
3313 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3314 tmp10 <<= CONST_BITS; |
|
3315 /* Add fudge factor here for final descale. */ |
|
3316 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
3317 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
3318 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ |
|
3319 tmp11 = tmp10 + tmp20; |
|
3320 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS); |
|
3321 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
3322 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ |
|
3323 tmp20 = tmp11 + tmp10; |
|
3324 tmp22 = tmp11 - tmp10; |
|
3325 |
|
3326 /* Odd part */ |
|
3327 |
|
3328 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3329 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
3330 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
3331 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
3332 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); |
|
3333 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); |
|
3334 tmp11 = (z1 - z2 - z3) << PASS1_BITS; |
|
3335 |
|
3336 /* Final output stage */ |
|
3337 |
|
3338 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
3339 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
3340 wsptr[8*1] = (int) (tmp21 + tmp11); |
|
3341 wsptr[8*4] = (int) (tmp21 - tmp11); |
|
3342 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
3343 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
3344 } |
|
3345 |
|
3346 /* Pass 2: process 6 rows from work array, store into output array. |
|
3347 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). |
|
3348 */ |
|
3349 wsptr = workspace; |
|
3350 for (ctr = 0; ctr < 6; ctr++) { |
|
3351 outptr = output_buf[ctr] + output_col; |
|
3352 |
|
3353 /* Even part */ |
|
3354 |
|
3355 /* Add fudge factor here for final descale. */ |
|
3356 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
3357 z3 <<= CONST_BITS; |
|
3358 |
|
3359 z4 = (INT32) wsptr[4]; |
|
3360 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
|
3361 |
|
3362 tmp10 = z3 + z4; |
|
3363 tmp11 = z3 - z4; |
|
3364 |
|
3365 z1 = (INT32) wsptr[2]; |
|
3366 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
|
3367 z1 <<= CONST_BITS; |
|
3368 z2 = (INT32) wsptr[6]; |
|
3369 z2 <<= CONST_BITS; |
|
3370 |
|
3371 tmp12 = z1 - z2; |
|
3372 |
|
3373 tmp21 = z3 + tmp12; |
|
3374 tmp24 = z3 - tmp12; |
|
3375 |
|
3376 tmp12 = z4 + z2; |
|
3377 |
|
3378 tmp20 = tmp10 + tmp12; |
|
3379 tmp25 = tmp10 - tmp12; |
|
3380 |
|
3381 tmp12 = z4 - z1 - z2; |
|
3382 |
|
3383 tmp22 = tmp11 + tmp12; |
|
3384 tmp23 = tmp11 - tmp12; |
|
3385 |
|
3386 /* Odd part */ |
|
3387 |
|
3388 z1 = (INT32) wsptr[1]; |
|
3389 z2 = (INT32) wsptr[3]; |
|
3390 z3 = (INT32) wsptr[5]; |
|
3391 z4 = (INT32) wsptr[7]; |
|
3392 |
|
3393 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
|
3394 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
|
3395 |
|
3396 tmp10 = z1 + z3; |
|
3397 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
|
3398 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
|
3399 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
|
3400 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
|
3401 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
|
3402 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
|
3403 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
|
3404 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
|
3405 |
|
3406 z1 -= z4; |
|
3407 z2 -= z3; |
|
3408 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
|
3409 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
|
3410 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
|
3411 |
|
3412 /* Final output stage */ |
|
3413 |
|
3414 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
3415 CONST_BITS+PASS1_BITS+3) |
|
3416 & RANGE_MASK]; |
|
3417 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
3418 CONST_BITS+PASS1_BITS+3) |
|
3419 & RANGE_MASK]; |
|
3420 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
3421 CONST_BITS+PASS1_BITS+3) |
|
3422 & RANGE_MASK]; |
|
3423 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
3424 CONST_BITS+PASS1_BITS+3) |
|
3425 & RANGE_MASK]; |
|
3426 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
3427 CONST_BITS+PASS1_BITS+3) |
|
3428 & RANGE_MASK]; |
|
3429 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
3430 CONST_BITS+PASS1_BITS+3) |
|
3431 & RANGE_MASK]; |
|
3432 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
3433 CONST_BITS+PASS1_BITS+3) |
|
3434 & RANGE_MASK]; |
|
3435 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
3436 CONST_BITS+PASS1_BITS+3) |
|
3437 & RANGE_MASK]; |
|
3438 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
3439 CONST_BITS+PASS1_BITS+3) |
|
3440 & RANGE_MASK]; |
|
3441 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
3442 CONST_BITS+PASS1_BITS+3) |
|
3443 & RANGE_MASK]; |
|
3444 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
|
3445 CONST_BITS+PASS1_BITS+3) |
|
3446 & RANGE_MASK]; |
|
3447 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
|
3448 CONST_BITS+PASS1_BITS+3) |
|
3449 & RANGE_MASK]; |
|
3450 |
|
3451 wsptr += 8; /* advance pointer to next row */ |
|
3452 } |
|
3453 } |
|
3454 |
|
3455 |
|
3456 /* |
|
3457 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3458 * producing a 10x5 output block. |
|
3459 * |
|
3460 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows). |
|
3461 */ |
|
3462 |
|
3463 GLOBAL(void) |
|
3464 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3465 JCOEFPTR coef_block, |
|
3466 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3467 { |
|
3468 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
|
3469 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; |
|
3470 INT32 z1, z2, z3, z4; |
|
3471 JCOEFPTR inptr; |
|
3472 ISLOW_MULT_TYPE * quantptr; |
|
3473 int * wsptr; |
|
3474 JSAMPROW outptr; |
|
3475 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3476 int ctr; |
|
3477 int workspace[8*5]; /* buffers data between passes */ |
|
3478 SHIFT_TEMPS |
|
3479 |
|
3480 /* Pass 1: process columns from input, store into work array. |
|
3481 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). |
|
3482 */ |
|
3483 inptr = coef_block; |
|
3484 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3485 wsptr = workspace; |
|
3486 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
3487 /* Even part */ |
|
3488 |
|
3489 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3490 tmp12 <<= CONST_BITS; |
|
3491 /* Add fudge factor here for final descale. */ |
|
3492 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
3493 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
3494 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
3495 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ |
|
3496 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ |
|
3497 z3 = tmp12 + z2; |
|
3498 tmp10 = z3 + z1; |
|
3499 tmp11 = z3 - z1; |
|
3500 tmp12 -= z2 << 2; |
|
3501 |
|
3502 /* Odd part */ |
|
3503 |
|
3504 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3505 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
3506 |
|
3507 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
|
3508 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
|
3509 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
|
3510 |
|
3511 /* Final output stage */ |
|
3512 |
|
3513 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS); |
|
3514 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS); |
|
3515 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS); |
|
3516 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS); |
|
3517 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); |
|
3518 } |
|
3519 |
|
3520 /* Pass 2: process 5 rows from work array, store into output array. |
|
3521 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). |
|
3522 */ |
|
3523 wsptr = workspace; |
|
3524 for (ctr = 0; ctr < 5; ctr++) { |
|
3525 outptr = output_buf[ctr] + output_col; |
|
3526 |
|
3527 /* Even part */ |
|
3528 |
|
3529 /* Add fudge factor here for final descale. */ |
|
3530 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
3531 z3 <<= CONST_BITS; |
|
3532 z4 = (INT32) wsptr[4]; |
|
3533 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
|
3534 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
|
3535 tmp10 = z3 + z1; |
|
3536 tmp11 = z3 - z2; |
|
3537 |
|
3538 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ |
|
3539 |
|
3540 z2 = (INT32) wsptr[2]; |
|
3541 z3 = (INT32) wsptr[6]; |
|
3542 |
|
3543 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
|
3544 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
|
3545 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
|
3546 |
|
3547 tmp20 = tmp10 + tmp12; |
|
3548 tmp24 = tmp10 - tmp12; |
|
3549 tmp21 = tmp11 + tmp13; |
|
3550 tmp23 = tmp11 - tmp13; |
|
3551 |
|
3552 /* Odd part */ |
|
3553 |
|
3554 z1 = (INT32) wsptr[1]; |
|
3555 z2 = (INT32) wsptr[3]; |
|
3556 z3 = (INT32) wsptr[5]; |
|
3557 z3 <<= CONST_BITS; |
|
3558 z4 = (INT32) wsptr[7]; |
|
3559 |
|
3560 tmp11 = z2 + z4; |
|
3561 tmp13 = z2 - z4; |
|
3562 |
|
3563 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
|
3564 |
|
3565 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
|
3566 z4 = z3 + tmp12; |
|
3567 |
|
3568 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
|
3569 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
|
3570 |
|
3571 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
|
3572 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
|
3573 |
|
3574 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; |
|
3575 |
|
3576 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
|
3577 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
|
3578 |
|
3579 /* Final output stage */ |
|
3580 |
|
3581 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
3582 CONST_BITS+PASS1_BITS+3) |
|
3583 & RANGE_MASK]; |
|
3584 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
3585 CONST_BITS+PASS1_BITS+3) |
|
3586 & RANGE_MASK]; |
|
3587 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
3588 CONST_BITS+PASS1_BITS+3) |
|
3589 & RANGE_MASK]; |
|
3590 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
3591 CONST_BITS+PASS1_BITS+3) |
|
3592 & RANGE_MASK]; |
|
3593 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
3594 CONST_BITS+PASS1_BITS+3) |
|
3595 & RANGE_MASK]; |
|
3596 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
3597 CONST_BITS+PASS1_BITS+3) |
|
3598 & RANGE_MASK]; |
|
3599 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
|
3600 CONST_BITS+PASS1_BITS+3) |
|
3601 & RANGE_MASK]; |
|
3602 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
|
3603 CONST_BITS+PASS1_BITS+3) |
|
3604 & RANGE_MASK]; |
|
3605 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
|
3606 CONST_BITS+PASS1_BITS+3) |
|
3607 & RANGE_MASK]; |
|
3608 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
|
3609 CONST_BITS+PASS1_BITS+3) |
|
3610 & RANGE_MASK]; |
|
3611 |
|
3612 wsptr += 8; /* advance pointer to next row */ |
|
3613 } |
|
3614 } |
|
3615 |
|
3616 |
|
3617 /* |
|
3618 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3619 * producing a 8x4 output block. |
|
3620 * |
|
3621 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). |
|
3622 */ |
|
3623 |
|
3624 GLOBAL(void) |
|
3625 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3626 JCOEFPTR coef_block, |
|
3627 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3628 { |
|
3629 INT32 tmp0, tmp1, tmp2, tmp3; |
|
3630 INT32 tmp10, tmp11, tmp12, tmp13; |
|
3631 INT32 z1, z2, z3; |
|
3632 JCOEFPTR inptr; |
|
3633 ISLOW_MULT_TYPE * quantptr; |
|
3634 int * wsptr; |
|
3635 JSAMPROW outptr; |
|
3636 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3637 int ctr; |
|
3638 int workspace[8*4]; /* buffers data between passes */ |
|
3639 SHIFT_TEMPS |
|
3640 |
|
3641 /* Pass 1: process columns from input, store into work array. |
|
3642 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). |
|
3643 */ |
|
3644 inptr = coef_block; |
|
3645 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3646 wsptr = workspace; |
|
3647 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
3648 /* Even part */ |
|
3649 |
|
3650 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3651 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
3652 |
|
3653 tmp10 = (tmp0 + tmp2) << PASS1_BITS; |
|
3654 tmp12 = (tmp0 - tmp2) << PASS1_BITS; |
|
3655 |
|
3656 /* Odd part */ |
|
3657 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
3658 |
|
3659 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3660 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
3661 |
|
3662 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
3663 /* Add fudge factor here for final descale. */ |
|
3664 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
3665 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ |
|
3666 CONST_BITS-PASS1_BITS); |
|
3667 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ |
|
3668 CONST_BITS-PASS1_BITS); |
|
3669 |
|
3670 /* Final output stage */ |
|
3671 |
|
3672 wsptr[8*0] = (int) (tmp10 + tmp0); |
|
3673 wsptr[8*3] = (int) (tmp10 - tmp0); |
|
3674 wsptr[8*1] = (int) (tmp12 + tmp2); |
|
3675 wsptr[8*2] = (int) (tmp12 - tmp2); |
|
3676 } |
|
3677 |
|
3678 /* Pass 2: process rows from work array, store into output array. */ |
|
3679 /* Note that we must descale the results by a factor of 8 == 2**3, */ |
|
3680 /* and also undo the PASS1_BITS scaling. */ |
|
3681 |
|
3682 wsptr = workspace; |
|
3683 for (ctr = 0; ctr < 4; ctr++) { |
|
3684 outptr = output_buf[ctr] + output_col; |
|
3685 |
|
3686 /* Even part: reverse the even part of the forward DCT. */ |
|
3687 /* The rotator is sqrt(2)*c(-6). */ |
|
3688 |
|
3689 z2 = (INT32) wsptr[2]; |
|
3690 z3 = (INT32) wsptr[6]; |
|
3691 |
|
3692 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
|
3693 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
|
3694 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
|
3695 |
|
3696 /* Add fudge factor here for final descale. */ |
|
3697 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
3698 z3 = (INT32) wsptr[4]; |
|
3699 |
|
3700 tmp0 = (z2 + z3) << CONST_BITS; |
|
3701 tmp1 = (z2 - z3) << CONST_BITS; |
|
3702 |
|
3703 tmp10 = tmp0 + tmp2; |
|
3704 tmp13 = tmp0 - tmp2; |
|
3705 tmp11 = tmp1 + tmp3; |
|
3706 tmp12 = tmp1 - tmp3; |
|
3707 |
|
3708 /* Odd part per figure 8; the matrix is unitary and hence its |
|
3709 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
|
3710 */ |
|
3711 |
|
3712 tmp0 = (INT32) wsptr[7]; |
|
3713 tmp1 = (INT32) wsptr[5]; |
|
3714 tmp2 = (INT32) wsptr[3]; |
|
3715 tmp3 = (INT32) wsptr[1]; |
|
3716 |
|
3717 z2 = tmp0 + tmp2; |
|
3718 z3 = tmp1 + tmp3; |
|
3719 |
|
3720 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
|
3721 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
3722 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
3723 z2 += z1; |
|
3724 z3 += z1; |
|
3725 |
|
3726 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
|
3727 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
3728 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
3729 tmp0 += z1 + z2; |
|
3730 tmp3 += z1 + z3; |
|
3731 |
|
3732 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
3733 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
|
3734 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
|
3735 tmp1 += z1 + z3; |
|
3736 tmp2 += z1 + z2; |
|
3737 |
|
3738 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
|
3739 |
|
3740 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, |
|
3741 CONST_BITS+PASS1_BITS+3) |
|
3742 & RANGE_MASK]; |
|
3743 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, |
|
3744 CONST_BITS+PASS1_BITS+3) |
|
3745 & RANGE_MASK]; |
|
3746 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, |
|
3747 CONST_BITS+PASS1_BITS+3) |
|
3748 & RANGE_MASK]; |
|
3749 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, |
|
3750 CONST_BITS+PASS1_BITS+3) |
|
3751 & RANGE_MASK]; |
|
3752 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, |
|
3753 CONST_BITS+PASS1_BITS+3) |
|
3754 & RANGE_MASK]; |
|
3755 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, |
|
3756 CONST_BITS+PASS1_BITS+3) |
|
3757 & RANGE_MASK]; |
|
3758 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, |
|
3759 CONST_BITS+PASS1_BITS+3) |
|
3760 & RANGE_MASK]; |
|
3761 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, |
|
3762 CONST_BITS+PASS1_BITS+3) |
|
3763 & RANGE_MASK]; |
|
3764 |
|
3765 wsptr += DCTSIZE; /* advance pointer to next row */ |
|
3766 } |
|
3767 } |
|
3768 |
|
3769 |
|
3770 /* |
|
3771 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3772 * producing a reduced-size 6x3 output block. |
|
3773 * |
|
3774 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). |
|
3775 */ |
|
3776 |
|
3777 GLOBAL(void) |
|
3778 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3779 JCOEFPTR coef_block, |
|
3780 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3781 { |
|
3782 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; |
|
3783 INT32 z1, z2, z3; |
|
3784 JCOEFPTR inptr; |
|
3785 ISLOW_MULT_TYPE * quantptr; |
|
3786 int * wsptr; |
|
3787 JSAMPROW outptr; |
|
3788 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3789 int ctr; |
|
3790 int workspace[6*3]; /* buffers data between passes */ |
|
3791 SHIFT_TEMPS |
|
3792 |
|
3793 /* Pass 1: process columns from input, store into work array. |
|
3794 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). |
|
3795 */ |
|
3796 inptr = coef_block; |
|
3797 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3798 wsptr = workspace; |
|
3799 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { |
|
3800 /* Even part */ |
|
3801 |
|
3802 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3803 tmp0 <<= CONST_BITS; |
|
3804 /* Add fudge factor here for final descale. */ |
|
3805 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
3806 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
3807 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
|
3808 tmp10 = tmp0 + tmp12; |
|
3809 tmp2 = tmp0 - tmp12 - tmp12; |
|
3810 |
|
3811 /* Odd part */ |
|
3812 |
|
3813 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3814 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
|
3815 |
|
3816 /* Final output stage */ |
|
3817 |
|
3818 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
3819 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
3820 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); |
|
3821 } |
|
3822 |
|
3823 /* Pass 2: process 3 rows from work array, store into output array. |
|
3824 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). |
|
3825 */ |
|
3826 wsptr = workspace; |
|
3827 for (ctr = 0; ctr < 3; ctr++) { |
|
3828 outptr = output_buf[ctr] + output_col; |
|
3829 |
|
3830 /* Even part */ |
|
3831 |
|
3832 /* Add fudge factor here for final descale. */ |
|
3833 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
3834 tmp0 <<= CONST_BITS; |
|
3835 tmp2 = (INT32) wsptr[4]; |
|
3836 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
|
3837 tmp1 = tmp0 + tmp10; |
|
3838 tmp11 = tmp0 - tmp10 - tmp10; |
|
3839 tmp10 = (INT32) wsptr[2]; |
|
3840 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
|
3841 tmp10 = tmp1 + tmp0; |
|
3842 tmp12 = tmp1 - tmp0; |
|
3843 |
|
3844 /* Odd part */ |
|
3845 |
|
3846 z1 = (INT32) wsptr[1]; |
|
3847 z2 = (INT32) wsptr[3]; |
|
3848 z3 = (INT32) wsptr[5]; |
|
3849 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
3850 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
|
3851 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
|
3852 tmp1 = (z1 - z2 - z3) << CONST_BITS; |
|
3853 |
|
3854 /* Final output stage */ |
|
3855 |
|
3856 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
3857 CONST_BITS+PASS1_BITS+3) |
|
3858 & RANGE_MASK]; |
|
3859 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
3860 CONST_BITS+PASS1_BITS+3) |
|
3861 & RANGE_MASK]; |
|
3862 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
|
3863 CONST_BITS+PASS1_BITS+3) |
|
3864 & RANGE_MASK]; |
|
3865 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
|
3866 CONST_BITS+PASS1_BITS+3) |
|
3867 & RANGE_MASK]; |
|
3868 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
3869 CONST_BITS+PASS1_BITS+3) |
|
3870 & RANGE_MASK]; |
|
3871 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
3872 CONST_BITS+PASS1_BITS+3) |
|
3873 & RANGE_MASK]; |
|
3874 |
|
3875 wsptr += 6; /* advance pointer to next row */ |
|
3876 } |
|
3877 } |
|
3878 |
|
3879 |
|
3880 /* |
|
3881 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3882 * producing a 4x2 output block. |
|
3883 * |
|
3884 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). |
|
3885 */ |
|
3886 |
|
3887 GLOBAL(void) |
|
3888 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3889 JCOEFPTR coef_block, |
|
3890 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3891 { |
|
3892 INT32 tmp0, tmp2, tmp10, tmp12; |
|
3893 INT32 z1, z2, z3; |
|
3894 JCOEFPTR inptr; |
|
3895 ISLOW_MULT_TYPE * quantptr; |
|
3896 INT32 * wsptr; |
|
3897 JSAMPROW outptr; |
|
3898 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3899 int ctr; |
|
3900 INT32 workspace[4*2]; /* buffers data between passes */ |
|
3901 SHIFT_TEMPS |
|
3902 |
|
3903 /* Pass 1: process columns from input, store into work array. */ |
|
3904 |
|
3905 inptr = coef_block; |
|
3906 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3907 wsptr = workspace; |
|
3908 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { |
|
3909 /* Even part */ |
|
3910 |
|
3911 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
3912 |
|
3913 /* Odd part */ |
|
3914 |
|
3915 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
3916 |
|
3917 /* Final output stage */ |
|
3918 |
|
3919 wsptr[4*0] = tmp10 + tmp0; |
|
3920 wsptr[4*1] = tmp10 - tmp0; |
|
3921 } |
|
3922 |
|
3923 /* Pass 2: process 2 rows from work array, store into output array. |
|
3924 * 4-point IDCT kernel, |
|
3925 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. |
|
3926 */ |
|
3927 wsptr = workspace; |
|
3928 for (ctr = 0; ctr < 2; ctr++) { |
|
3929 outptr = output_buf[ctr] + output_col; |
|
3930 |
|
3931 /* Even part */ |
|
3932 |
|
3933 /* Add fudge factor here for final descale. */ |
|
3934 tmp0 = wsptr[0] + (ONE << 2); |
|
3935 tmp2 = wsptr[2]; |
|
3936 |
|
3937 tmp10 = (tmp0 + tmp2) << CONST_BITS; |
|
3938 tmp12 = (tmp0 - tmp2) << CONST_BITS; |
|
3939 |
|
3940 /* Odd part */ |
|
3941 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
3942 |
|
3943 z2 = wsptr[1]; |
|
3944 z3 = wsptr[3]; |
|
3945 |
|
3946 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
3947 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ |
|
3948 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ |
|
3949 |
|
3950 /* Final output stage */ |
|
3951 |
|
3952 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
3953 CONST_BITS+3) |
|
3954 & RANGE_MASK]; |
|
3955 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
3956 CONST_BITS+3) |
|
3957 & RANGE_MASK]; |
|
3958 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
3959 CONST_BITS+3) |
|
3960 & RANGE_MASK]; |
|
3961 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
3962 CONST_BITS+3) |
|
3963 & RANGE_MASK]; |
|
3964 |
|
3965 wsptr += 4; /* advance pointer to next row */ |
|
3966 } |
|
3967 } |
|
3968 |
|
3969 |
|
3970 /* |
|
3971 * Perform dequantization and inverse DCT on one block of coefficients, |
|
3972 * producing a 2x1 output block. |
|
3973 * |
|
3974 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). |
|
3975 */ |
|
3976 |
|
3977 GLOBAL(void) |
|
3978 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
3979 JCOEFPTR coef_block, |
|
3980 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
3981 { |
|
3982 INT32 tmp0, tmp10; |
|
3983 ISLOW_MULT_TYPE * quantptr; |
|
3984 JSAMPROW outptr; |
|
3985 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
3986 SHIFT_TEMPS |
|
3987 |
|
3988 /* Pass 1: empty. */ |
|
3989 |
|
3990 /* Pass 2: process 1 row from input, store into output array. */ |
|
3991 |
|
3992 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
3993 outptr = output_buf[0] + output_col; |
|
3994 |
|
3995 /* Even part */ |
|
3996 |
|
3997 tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]); |
|
3998 /* Add fudge factor here for final descale. */ |
|
3999 tmp10 += ONE << 2; |
|
4000 |
|
4001 /* Odd part */ |
|
4002 |
|
4003 tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]); |
|
4004 |
|
4005 /* Final output stage */ |
|
4006 |
|
4007 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK]; |
|
4008 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK]; |
|
4009 } |
|
4010 |
|
4011 |
|
4012 /* |
|
4013 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4014 * producing a 8x16 output block. |
|
4015 * |
|
4016 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). |
|
4017 */ |
|
4018 |
|
4019 GLOBAL(void) |
|
4020 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4021 JCOEFPTR coef_block, |
|
4022 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4023 { |
|
4024 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; |
|
4025 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
|
4026 INT32 z1, z2, z3, z4; |
|
4027 JCOEFPTR inptr; |
|
4028 ISLOW_MULT_TYPE * quantptr; |
|
4029 int * wsptr; |
|
4030 JSAMPROW outptr; |
|
4031 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4032 int ctr; |
|
4033 int workspace[8*16]; /* buffers data between passes */ |
|
4034 SHIFT_TEMPS |
|
4035 |
|
4036 /* Pass 1: process columns from input, store into work array. |
|
4037 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). |
|
4038 */ |
|
4039 inptr = coef_block; |
|
4040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4041 wsptr = workspace; |
|
4042 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
|
4043 /* Even part */ |
|
4044 |
|
4045 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4046 tmp0 <<= CONST_BITS; |
|
4047 /* Add fudge factor here for final descale. */ |
|
4048 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4049 |
|
4050 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4051 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
|
4052 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
|
4053 |
|
4054 tmp10 = tmp0 + tmp1; |
|
4055 tmp11 = tmp0 - tmp1; |
|
4056 tmp12 = tmp0 + tmp2; |
|
4057 tmp13 = tmp0 - tmp2; |
|
4058 |
|
4059 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4060 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
4061 z3 = z1 - z2; |
|
4062 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
|
4063 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
|
4064 |
|
4065 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
|
4066 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
|
4067 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
|
4068 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
|
4069 |
|
4070 tmp20 = tmp10 + tmp0; |
|
4071 tmp27 = tmp10 - tmp0; |
|
4072 tmp21 = tmp12 + tmp1; |
|
4073 tmp26 = tmp12 - tmp1; |
|
4074 tmp22 = tmp13 + tmp2; |
|
4075 tmp25 = tmp13 - tmp2; |
|
4076 tmp23 = tmp11 + tmp3; |
|
4077 tmp24 = tmp11 - tmp3; |
|
4078 |
|
4079 /* Odd part */ |
|
4080 |
|
4081 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4082 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4083 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4084 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
4085 |
|
4086 tmp11 = z1 + z3; |
|
4087 |
|
4088 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
|
4089 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
|
4090 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
|
4091 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
|
4092 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
|
4093 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
|
4094 tmp0 = tmp1 + tmp2 + tmp3 - |
|
4095 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
|
4096 tmp13 = tmp10 + tmp11 + tmp12 - |
|
4097 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
|
4098 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
|
4099 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
|
4100 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
|
4101 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
|
4102 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
|
4103 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
|
4104 z2 += z4; |
|
4105 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
|
4106 tmp1 += z1; |
|
4107 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
|
4108 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
|
4109 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
|
4110 tmp12 += z2; |
|
4111 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
|
4112 tmp2 += z2; |
|
4113 tmp3 += z2; |
|
4114 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
|
4115 tmp10 += z2; |
|
4116 tmp11 += z2; |
|
4117 |
|
4118 /* Final output stage */ |
|
4119 |
|
4120 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); |
|
4121 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); |
|
4122 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); |
|
4123 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); |
|
4124 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); |
|
4125 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); |
|
4126 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); |
|
4127 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); |
|
4128 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); |
|
4129 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); |
|
4130 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); |
|
4131 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); |
|
4132 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); |
|
4133 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); |
|
4134 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); |
|
4135 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); |
|
4136 } |
271 |
4137 |
272 /* Pass 2: process rows from work array, store into output array. */ |
4138 /* Pass 2: process rows from work array, store into output array. */ |
273 /* Note that we must descale the results by a factor of 8 == 2**3, */ |
4139 /* Note that we must descale the results by a factor of 8 == 2**3, */ |
274 /* and also undo the PASS1_BITS scaling. */ |
4140 /* and also undo the PASS1_BITS scaling. */ |
275 |
4141 |
276 wsptr = workspace; |
4142 wsptr = workspace; |
277 for (ctr = 0; ctr < DCTSIZE; ctr++) { |
4143 for (ctr = 0; ctr < 16; ctr++) { |
278 outptr = output_buf[ctr] + output_col; |
4144 outptr = output_buf[ctr] + output_col; |
279 /* Rows of zeroes can be exploited in the same way as we did with columns. |
|
280 * However, the column calculation has created many nonzero AC terms, so |
|
281 * the simplification applies less often (typically 5% to 10% of the time). |
|
282 * On machines with very fast multiplication, it's possible that the |
|
283 * test takes more time than it's worth. In that case this section |
|
284 * may be commented out. |
|
285 */ |
|
286 |
|
287 #ifndef NO_ZERO_ROW_TEST |
|
288 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && |
|
289 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { |
|
290 /* AC terms all zero */ |
|
291 JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) |
|
292 & RANGE_MASK]; |
|
293 |
|
294 outptr[0] = dcval; |
|
295 outptr[1] = dcval; |
|
296 outptr[2] = dcval; |
|
297 outptr[3] = dcval; |
|
298 outptr[4] = dcval; |
|
299 outptr[5] = dcval; |
|
300 outptr[6] = dcval; |
|
301 outptr[7] = dcval; |
|
302 |
|
303 wsptr += DCTSIZE; /* advance pointer to next row */ |
|
304 continue; |
|
305 } |
|
306 #endif |
|
307 |
4145 |
308 /* Even part: reverse the even part of the forward DCT. */ |
4146 /* Even part: reverse the even part of the forward DCT. */ |
309 /* The rotator is sqrt(2)*c(-6). */ |
4147 /* The rotator is sqrt(2)*c(-6). */ |
310 |
4148 |
311 z2 = (INT32) wsptr[2]; |
4149 z2 = (INT32) wsptr[2]; |
312 z3 = (INT32) wsptr[6]; |
4150 z3 = (INT32) wsptr[6]; |
313 |
4151 |
314 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
4152 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
315 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); |
4153 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
316 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); |
4154 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
317 |
4155 |
318 tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; |
4156 /* Add fudge factor here for final descale. */ |
319 tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; |
4157 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4158 z3 = (INT32) wsptr[4]; |
320 |
4159 |
321 tmp10 = tmp0 + tmp3; |
4160 tmp0 = (z2 + z3) << CONST_BITS; |
322 tmp13 = tmp0 - tmp3; |
4161 tmp1 = (z2 - z3) << CONST_BITS; |
323 tmp11 = tmp1 + tmp2; |
4162 |
324 tmp12 = tmp1 - tmp2; |
4163 tmp10 = tmp0 + tmp2; |
|
4164 tmp13 = tmp0 - tmp2; |
|
4165 tmp11 = tmp1 + tmp3; |
|
4166 tmp12 = tmp1 - tmp3; |
325 |
4167 |
326 /* Odd part per figure 8; the matrix is unitary and hence its |
4168 /* Odd part per figure 8; the matrix is unitary and hence its |
327 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
4169 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
328 */ |
4170 */ |
329 |
4171 |
330 tmp0 = (INT32) wsptr[7]; |
4172 tmp0 = (INT32) wsptr[7]; |
331 tmp1 = (INT32) wsptr[5]; |
4173 tmp1 = (INT32) wsptr[5]; |
332 tmp2 = (INT32) wsptr[3]; |
4174 tmp2 = (INT32) wsptr[3]; |
333 tmp3 = (INT32) wsptr[1]; |
4175 tmp3 = (INT32) wsptr[1]; |
334 |
4176 |
335 z1 = tmp0 + tmp3; |
4177 z2 = tmp0 + tmp2; |
336 z2 = tmp1 + tmp2; |
4178 z3 = tmp1 + tmp3; |
337 z3 = tmp0 + tmp2; |
4179 |
338 z4 = tmp1 + tmp3; |
4180 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
339 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |
4181 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
340 |
4182 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
4183 z2 += z1; |
|
4184 z3 += z1; |
|
4185 |
|
4186 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
341 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
4187 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
4188 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
4189 tmp0 += z1 + z2; |
|
4190 tmp3 += z1 + z3; |
|
4191 |
|
4192 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
342 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
4193 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
343 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
4194 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
344 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
4195 tmp1 += z1 + z3; |
345 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
4196 tmp2 += z1 + z2; |
346 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
347 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
348 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
349 |
|
350 z3 += z5; |
|
351 z4 += z5; |
|
352 |
|
353 tmp0 += z1 + z3; |
|
354 tmp1 += z2 + z4; |
|
355 tmp2 += z2 + z3; |
|
356 tmp3 += z1 + z4; |
|
357 |
4197 |
358 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
4198 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
359 |
4199 |
360 outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, |
4200 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, |
361 CONST_BITS+PASS1_BITS+3) |
4201 CONST_BITS+PASS1_BITS+3) |
362 & RANGE_MASK]; |
4202 & RANGE_MASK]; |
363 outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, |
4203 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, |
364 CONST_BITS+PASS1_BITS+3) |
4204 CONST_BITS+PASS1_BITS+3) |
365 & RANGE_MASK]; |
4205 & RANGE_MASK]; |
366 outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, |
4206 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, |
367 CONST_BITS+PASS1_BITS+3) |
4207 CONST_BITS+PASS1_BITS+3) |
368 & RANGE_MASK]; |
4208 & RANGE_MASK]; |
369 outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, |
4209 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, |
370 CONST_BITS+PASS1_BITS+3) |
4210 CONST_BITS+PASS1_BITS+3) |
371 & RANGE_MASK]; |
4211 & RANGE_MASK]; |
372 outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, |
4212 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, |
373 CONST_BITS+PASS1_BITS+3) |
4213 CONST_BITS+PASS1_BITS+3) |
374 & RANGE_MASK]; |
4214 & RANGE_MASK]; |
375 outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, |
4215 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, |
376 CONST_BITS+PASS1_BITS+3) |
4216 CONST_BITS+PASS1_BITS+3) |
377 & RANGE_MASK]; |
4217 & RANGE_MASK]; |
378 outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, |
4218 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, |
379 CONST_BITS+PASS1_BITS+3) |
4219 CONST_BITS+PASS1_BITS+3) |
380 & RANGE_MASK]; |
4220 & RANGE_MASK]; |
381 outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, |
4221 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, |
382 CONST_BITS+PASS1_BITS+3) |
4222 CONST_BITS+PASS1_BITS+3) |
383 & RANGE_MASK]; |
4223 & RANGE_MASK]; |
384 |
4224 |
385 wsptr += DCTSIZE; /* advance pointer to next row */ |
4225 wsptr += DCTSIZE; /* advance pointer to next row */ |
386 } |
4226 } |
387 } |
4227 } |
388 |
4228 |
|
4229 |
|
4230 /* |
|
4231 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4232 * producing a 7x14 output block. |
|
4233 * |
|
4234 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows). |
|
4235 */ |
|
4236 |
|
4237 GLOBAL(void) |
|
4238 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4239 JCOEFPTR coef_block, |
|
4240 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4241 { |
|
4242 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
|
4243 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
|
4244 INT32 z1, z2, z3, z4; |
|
4245 JCOEFPTR inptr; |
|
4246 ISLOW_MULT_TYPE * quantptr; |
|
4247 int * wsptr; |
|
4248 JSAMPROW outptr; |
|
4249 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4250 int ctr; |
|
4251 int workspace[7*14]; /* buffers data between passes */ |
|
4252 SHIFT_TEMPS |
|
4253 |
|
4254 /* Pass 1: process columns from input, store into work array. |
|
4255 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). |
|
4256 */ |
|
4257 inptr = coef_block; |
|
4258 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4259 wsptr = workspace; |
|
4260 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { |
|
4261 /* Even part */ |
|
4262 |
|
4263 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4264 z1 <<= CONST_BITS; |
|
4265 /* Add fudge factor here for final descale. */ |
|
4266 z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4267 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4268 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
|
4269 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
|
4270 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
|
4271 |
|
4272 tmp10 = z1 + z2; |
|
4273 tmp11 = z1 + z3; |
|
4274 tmp12 = z1 - z4; |
|
4275 |
|
4276 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ |
|
4277 CONST_BITS-PASS1_BITS); |
|
4278 |
|
4279 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4280 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
4281 |
|
4282 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
|
4283 |
|
4284 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
|
4285 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
|
4286 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
|
4287 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
|
4288 |
|
4289 tmp20 = tmp10 + tmp13; |
|
4290 tmp26 = tmp10 - tmp13; |
|
4291 tmp21 = tmp11 + tmp14; |
|
4292 tmp25 = tmp11 - tmp14; |
|
4293 tmp22 = tmp12 + tmp15; |
|
4294 tmp24 = tmp12 - tmp15; |
|
4295 |
|
4296 /* Odd part */ |
|
4297 |
|
4298 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4299 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4300 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4301 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
4302 tmp13 = z4 << CONST_BITS; |
|
4303 |
|
4304 tmp14 = z1 + z3; |
|
4305 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
|
4306 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
|
4307 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
|
4308 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
|
4309 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
|
4310 z1 -= z2; |
|
4311 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ |
|
4312 tmp16 += tmp15; |
|
4313 z1 += z4; |
|
4314 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ |
|
4315 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
|
4316 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
|
4317 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
|
4318 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
|
4319 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
|
4320 |
|
4321 tmp13 = (z1 - z3) << PASS1_BITS; |
|
4322 |
|
4323 /* Final output stage */ |
|
4324 |
|
4325 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
4326 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
4327 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
4328 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
4329 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
4330 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
4331 wsptr[7*3] = (int) (tmp23 + tmp13); |
|
4332 wsptr[7*10] = (int) (tmp23 - tmp13); |
|
4333 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
4334 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
4335 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
4336 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
4337 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); |
|
4338 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); |
|
4339 } |
|
4340 |
|
4341 /* Pass 2: process 14 rows from work array, store into output array. |
|
4342 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). |
|
4343 */ |
|
4344 wsptr = workspace; |
|
4345 for (ctr = 0; ctr < 14; ctr++) { |
|
4346 outptr = output_buf[ctr] + output_col; |
|
4347 |
|
4348 /* Even part */ |
|
4349 |
|
4350 /* Add fudge factor here for final descale. */ |
|
4351 tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4352 tmp23 <<= CONST_BITS; |
|
4353 |
|
4354 z1 = (INT32) wsptr[2]; |
|
4355 z2 = (INT32) wsptr[4]; |
|
4356 z3 = (INT32) wsptr[6]; |
|
4357 |
|
4358 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
|
4359 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
|
4360 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
|
4361 tmp10 = z1 + z3; |
|
4362 z2 -= tmp10; |
|
4363 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ |
|
4364 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
|
4365 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
|
4366 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
|
4367 |
|
4368 /* Odd part */ |
|
4369 |
|
4370 z1 = (INT32) wsptr[1]; |
|
4371 z2 = (INT32) wsptr[3]; |
|
4372 z3 = (INT32) wsptr[5]; |
|
4373 |
|
4374 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
|
4375 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
|
4376 tmp10 = tmp11 - tmp12; |
|
4377 tmp11 += tmp12; |
|
4378 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
|
4379 tmp11 += tmp12; |
|
4380 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
|
4381 tmp10 += z2; |
|
4382 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
|
4383 |
|
4384 /* Final output stage */ |
|
4385 |
|
4386 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
4387 CONST_BITS+PASS1_BITS+3) |
|
4388 & RANGE_MASK]; |
|
4389 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
4390 CONST_BITS+PASS1_BITS+3) |
|
4391 & RANGE_MASK]; |
|
4392 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
4393 CONST_BITS+PASS1_BITS+3) |
|
4394 & RANGE_MASK]; |
|
4395 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
4396 CONST_BITS+PASS1_BITS+3) |
|
4397 & RANGE_MASK]; |
|
4398 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
4399 CONST_BITS+PASS1_BITS+3) |
|
4400 & RANGE_MASK]; |
|
4401 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
4402 CONST_BITS+PASS1_BITS+3) |
|
4403 & RANGE_MASK]; |
|
4404 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23, |
|
4405 CONST_BITS+PASS1_BITS+3) |
|
4406 & RANGE_MASK]; |
|
4407 |
|
4408 wsptr += 7; /* advance pointer to next row */ |
|
4409 } |
|
4410 } |
|
4411 |
|
4412 |
|
4413 /* |
|
4414 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4415 * producing a 6x12 output block. |
|
4416 * |
|
4417 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). |
|
4418 */ |
|
4419 |
|
4420 GLOBAL(void) |
|
4421 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4422 JCOEFPTR coef_block, |
|
4423 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4424 { |
|
4425 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
|
4426 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
|
4427 INT32 z1, z2, z3, z4; |
|
4428 JCOEFPTR inptr; |
|
4429 ISLOW_MULT_TYPE * quantptr; |
|
4430 int * wsptr; |
|
4431 JSAMPROW outptr; |
|
4432 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4433 int ctr; |
|
4434 int workspace[6*12]; /* buffers data between passes */ |
|
4435 SHIFT_TEMPS |
|
4436 |
|
4437 /* Pass 1: process columns from input, store into work array. |
|
4438 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). |
|
4439 */ |
|
4440 inptr = coef_block; |
|
4441 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4442 wsptr = workspace; |
|
4443 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { |
|
4444 /* Even part */ |
|
4445 |
|
4446 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4447 z3 <<= CONST_BITS; |
|
4448 /* Add fudge factor here for final descale. */ |
|
4449 z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4450 |
|
4451 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4452 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
|
4453 |
|
4454 tmp10 = z3 + z4; |
|
4455 tmp11 = z3 - z4; |
|
4456 |
|
4457 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4458 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
|
4459 z1 <<= CONST_BITS; |
|
4460 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
4461 z2 <<= CONST_BITS; |
|
4462 |
|
4463 tmp12 = z1 - z2; |
|
4464 |
|
4465 tmp21 = z3 + tmp12; |
|
4466 tmp24 = z3 - tmp12; |
|
4467 |
|
4468 tmp12 = z4 + z2; |
|
4469 |
|
4470 tmp20 = tmp10 + tmp12; |
|
4471 tmp25 = tmp10 - tmp12; |
|
4472 |
|
4473 tmp12 = z4 - z1 - z2; |
|
4474 |
|
4475 tmp22 = tmp11 + tmp12; |
|
4476 tmp23 = tmp11 - tmp12; |
|
4477 |
|
4478 /* Odd part */ |
|
4479 |
|
4480 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4481 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4482 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4483 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
4484 |
|
4485 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
|
4486 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
|
4487 |
|
4488 tmp10 = z1 + z3; |
|
4489 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
|
4490 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
|
4491 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
|
4492 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
|
4493 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
|
4494 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
|
4495 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
|
4496 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
|
4497 |
|
4498 z1 -= z4; |
|
4499 z2 -= z3; |
|
4500 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
|
4501 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
|
4502 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
|
4503 |
|
4504 /* Final output stage */ |
|
4505 |
|
4506 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
4507 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
4508 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
4509 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
4510 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
|
4511 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
|
4512 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
4513 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
4514 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
4515 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
4516 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
|
4517 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
|
4518 } |
|
4519 |
|
4520 /* Pass 2: process 12 rows from work array, store into output array. |
|
4521 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). |
|
4522 */ |
|
4523 wsptr = workspace; |
|
4524 for (ctr = 0; ctr < 12; ctr++) { |
|
4525 outptr = output_buf[ctr] + output_col; |
|
4526 |
|
4527 /* Even part */ |
|
4528 |
|
4529 /* Add fudge factor here for final descale. */ |
|
4530 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4531 tmp10 <<= CONST_BITS; |
|
4532 tmp12 = (INT32) wsptr[4]; |
|
4533 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ |
|
4534 tmp11 = tmp10 + tmp20; |
|
4535 tmp21 = tmp10 - tmp20 - tmp20; |
|
4536 tmp20 = (INT32) wsptr[2]; |
|
4537 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ |
|
4538 tmp20 = tmp11 + tmp10; |
|
4539 tmp22 = tmp11 - tmp10; |
|
4540 |
|
4541 /* Odd part */ |
|
4542 |
|
4543 z1 = (INT32) wsptr[1]; |
|
4544 z2 = (INT32) wsptr[3]; |
|
4545 z3 = (INT32) wsptr[5]; |
|
4546 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
4547 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); |
|
4548 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); |
|
4549 tmp11 = (z1 - z2 - z3) << CONST_BITS; |
|
4550 |
|
4551 /* Final output stage */ |
|
4552 |
|
4553 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
|
4554 CONST_BITS+PASS1_BITS+3) |
|
4555 & RANGE_MASK]; |
|
4556 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
|
4557 CONST_BITS+PASS1_BITS+3) |
|
4558 & RANGE_MASK]; |
|
4559 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
|
4560 CONST_BITS+PASS1_BITS+3) |
|
4561 & RANGE_MASK]; |
|
4562 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
|
4563 CONST_BITS+PASS1_BITS+3) |
|
4564 & RANGE_MASK]; |
|
4565 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
|
4566 CONST_BITS+PASS1_BITS+3) |
|
4567 & RANGE_MASK]; |
|
4568 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
|
4569 CONST_BITS+PASS1_BITS+3) |
|
4570 & RANGE_MASK]; |
|
4571 |
|
4572 wsptr += 6; /* advance pointer to next row */ |
|
4573 } |
|
4574 } |
|
4575 |
|
4576 |
|
4577 /* |
|
4578 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4579 * producing a 5x10 output block. |
|
4580 * |
|
4581 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows). |
|
4582 */ |
|
4583 |
|
4584 GLOBAL(void) |
|
4585 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4586 JCOEFPTR coef_block, |
|
4587 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4588 { |
|
4589 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
|
4590 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; |
|
4591 INT32 z1, z2, z3, z4, z5; |
|
4592 JCOEFPTR inptr; |
|
4593 ISLOW_MULT_TYPE * quantptr; |
|
4594 int * wsptr; |
|
4595 JSAMPROW outptr; |
|
4596 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4597 int ctr; |
|
4598 int workspace[5*10]; /* buffers data between passes */ |
|
4599 SHIFT_TEMPS |
|
4600 |
|
4601 /* Pass 1: process columns from input, store into work array. |
|
4602 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). |
|
4603 */ |
|
4604 inptr = coef_block; |
|
4605 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4606 wsptr = workspace; |
|
4607 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { |
|
4608 /* Even part */ |
|
4609 |
|
4610 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4611 z3 <<= CONST_BITS; |
|
4612 /* Add fudge factor here for final descale. */ |
|
4613 z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4614 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4615 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
|
4616 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
|
4617 tmp10 = z3 + z1; |
|
4618 tmp11 = z3 - z2; |
|
4619 |
|
4620 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ |
|
4621 CONST_BITS-PASS1_BITS); |
|
4622 |
|
4623 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4624 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
4625 |
|
4626 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
|
4627 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
|
4628 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
|
4629 |
|
4630 tmp20 = tmp10 + tmp12; |
|
4631 tmp24 = tmp10 - tmp12; |
|
4632 tmp21 = tmp11 + tmp13; |
|
4633 tmp23 = tmp11 - tmp13; |
|
4634 |
|
4635 /* Odd part */ |
|
4636 |
|
4637 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4638 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4639 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4640 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
4641 |
|
4642 tmp11 = z2 + z4; |
|
4643 tmp13 = z2 - z4; |
|
4644 |
|
4645 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
|
4646 z5 = z3 << CONST_BITS; |
|
4647 |
|
4648 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
|
4649 z4 = z5 + tmp12; |
|
4650 |
|
4651 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
|
4652 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
|
4653 |
|
4654 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
|
4655 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
|
4656 |
|
4657 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; |
|
4658 |
|
4659 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
|
4660 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
|
4661 |
|
4662 /* Final output stage */ |
|
4663 |
|
4664 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
|
4665 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
|
4666 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
|
4667 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
|
4668 wsptr[5*2] = (int) (tmp22 + tmp12); |
|
4669 wsptr[5*7] = (int) (tmp22 - tmp12); |
|
4670 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
|
4671 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
|
4672 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
|
4673 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
|
4674 } |
|
4675 |
|
4676 /* Pass 2: process 10 rows from work array, store into output array. |
|
4677 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). |
|
4678 */ |
|
4679 wsptr = workspace; |
|
4680 for (ctr = 0; ctr < 10; ctr++) { |
|
4681 outptr = output_buf[ctr] + output_col; |
|
4682 |
|
4683 /* Even part */ |
|
4684 |
|
4685 /* Add fudge factor here for final descale. */ |
|
4686 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4687 tmp12 <<= CONST_BITS; |
|
4688 tmp13 = (INT32) wsptr[2]; |
|
4689 tmp14 = (INT32) wsptr[4]; |
|
4690 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ |
|
4691 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ |
|
4692 z3 = tmp12 + z2; |
|
4693 tmp10 = z3 + z1; |
|
4694 tmp11 = z3 - z1; |
|
4695 tmp12 -= z2 << 2; |
|
4696 |
|
4697 /* Odd part */ |
|
4698 |
|
4699 z2 = (INT32) wsptr[1]; |
|
4700 z3 = (INT32) wsptr[3]; |
|
4701 |
|
4702 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
|
4703 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
|
4704 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
|
4705 |
|
4706 /* Final output stage */ |
|
4707 |
|
4708 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13, |
|
4709 CONST_BITS+PASS1_BITS+3) |
|
4710 & RANGE_MASK]; |
|
4711 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13, |
|
4712 CONST_BITS+PASS1_BITS+3) |
|
4713 & RANGE_MASK]; |
|
4714 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14, |
|
4715 CONST_BITS+PASS1_BITS+3) |
|
4716 & RANGE_MASK]; |
|
4717 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14, |
|
4718 CONST_BITS+PASS1_BITS+3) |
|
4719 & RANGE_MASK]; |
|
4720 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, |
|
4721 CONST_BITS+PASS1_BITS+3) |
|
4722 & RANGE_MASK]; |
|
4723 |
|
4724 wsptr += 5; /* advance pointer to next row */ |
|
4725 } |
|
4726 } |
|
4727 |
|
4728 |
|
4729 /* |
|
4730 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4731 * producing a 4x8 output block. |
|
4732 * |
|
4733 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). |
|
4734 */ |
|
4735 |
|
4736 GLOBAL(void) |
|
4737 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4738 JCOEFPTR coef_block, |
|
4739 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4740 { |
|
4741 INT32 tmp0, tmp1, tmp2, tmp3; |
|
4742 INT32 tmp10, tmp11, tmp12, tmp13; |
|
4743 INT32 z1, z2, z3; |
|
4744 JCOEFPTR inptr; |
|
4745 ISLOW_MULT_TYPE * quantptr; |
|
4746 int * wsptr; |
|
4747 JSAMPROW outptr; |
|
4748 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4749 int ctr; |
|
4750 int workspace[4*8]; /* buffers data between passes */ |
|
4751 SHIFT_TEMPS |
|
4752 |
|
4753 /* Pass 1: process columns from input, store into work array. */ |
|
4754 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ |
|
4755 /* furthermore, we scale the results by 2**PASS1_BITS. */ |
|
4756 |
|
4757 inptr = coef_block; |
|
4758 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4759 wsptr = workspace; |
|
4760 for (ctr = 4; ctr > 0; ctr--) { |
|
4761 /* Due to quantization, we will usually find that many of the input |
|
4762 * coefficients are zero, especially the AC terms. We can exploit this |
|
4763 * by short-circuiting the IDCT calculation for any column in which all |
|
4764 * the AC terms are zero. In that case each output is equal to the |
|
4765 * DC coefficient (with scale factor as needed). |
|
4766 * With typical images and quantization tables, half or more of the |
|
4767 * column DCT calculations can be simplified this way. |
|
4768 */ |
|
4769 |
|
4770 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && |
|
4771 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && |
|
4772 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && |
|
4773 inptr[DCTSIZE*7] == 0) { |
|
4774 /* AC terms all zero */ |
|
4775 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; |
|
4776 |
|
4777 wsptr[4*0] = dcval; |
|
4778 wsptr[4*1] = dcval; |
|
4779 wsptr[4*2] = dcval; |
|
4780 wsptr[4*3] = dcval; |
|
4781 wsptr[4*4] = dcval; |
|
4782 wsptr[4*5] = dcval; |
|
4783 wsptr[4*6] = dcval; |
|
4784 wsptr[4*7] = dcval; |
|
4785 |
|
4786 inptr++; /* advance pointers to next column */ |
|
4787 quantptr++; |
|
4788 wsptr++; |
|
4789 continue; |
|
4790 } |
|
4791 |
|
4792 /* Even part: reverse the even part of the forward DCT. */ |
|
4793 /* The rotator is sqrt(2)*c(-6). */ |
|
4794 |
|
4795 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4796 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
|
4797 |
|
4798 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); |
|
4799 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); |
|
4800 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); |
|
4801 |
|
4802 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4803 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4804 z2 <<= CONST_BITS; |
|
4805 z3 <<= CONST_BITS; |
|
4806 /* Add fudge factor here for final descale. */ |
|
4807 z2 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4808 |
|
4809 tmp0 = z2 + z3; |
|
4810 tmp1 = z2 - z3; |
|
4811 |
|
4812 tmp10 = tmp0 + tmp2; |
|
4813 tmp13 = tmp0 - tmp2; |
|
4814 tmp11 = tmp1 + tmp3; |
|
4815 tmp12 = tmp1 - tmp3; |
|
4816 |
|
4817 /* Odd part per figure 8; the matrix is unitary and hence its |
|
4818 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
|
4819 */ |
|
4820 |
|
4821 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
|
4822 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4823 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4824 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4825 |
|
4826 z2 = tmp0 + tmp2; |
|
4827 z3 = tmp1 + tmp3; |
|
4828 |
|
4829 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ |
|
4830 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
|
4831 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
|
4832 z2 += z1; |
|
4833 z3 += z1; |
|
4834 |
|
4835 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
|
4836 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
|
4837 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
|
4838 tmp0 += z1 + z2; |
|
4839 tmp3 += z1 + z3; |
|
4840 |
|
4841 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
|
4842 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
|
4843 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
|
4844 tmp1 += z1 + z3; |
|
4845 tmp2 += z1 + z2; |
|
4846 |
|
4847 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ |
|
4848 |
|
4849 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); |
|
4850 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); |
|
4851 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); |
|
4852 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); |
|
4853 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); |
|
4854 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); |
|
4855 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); |
|
4856 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); |
|
4857 |
|
4858 inptr++; /* advance pointers to next column */ |
|
4859 quantptr++; |
|
4860 wsptr++; |
|
4861 } |
|
4862 |
|
4863 /* Pass 2: process 8 rows from work array, store into output array. |
|
4864 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). |
|
4865 */ |
|
4866 wsptr = workspace; |
|
4867 for (ctr = 0; ctr < 8; ctr++) { |
|
4868 outptr = output_buf[ctr] + output_col; |
|
4869 |
|
4870 /* Even part */ |
|
4871 |
|
4872 /* Add fudge factor here for final descale. */ |
|
4873 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4874 tmp2 = (INT32) wsptr[2]; |
|
4875 |
|
4876 tmp10 = (tmp0 + tmp2) << CONST_BITS; |
|
4877 tmp12 = (tmp0 - tmp2) << CONST_BITS; |
|
4878 |
|
4879 /* Odd part */ |
|
4880 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
4881 |
|
4882 z2 = (INT32) wsptr[1]; |
|
4883 z3 = (INT32) wsptr[3]; |
|
4884 |
|
4885 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
4886 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ |
|
4887 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ |
|
4888 |
|
4889 /* Final output stage */ |
|
4890 |
|
4891 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
4892 CONST_BITS+PASS1_BITS+3) |
|
4893 & RANGE_MASK]; |
|
4894 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
4895 CONST_BITS+PASS1_BITS+3) |
|
4896 & RANGE_MASK]; |
|
4897 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
|
4898 CONST_BITS+PASS1_BITS+3) |
|
4899 & RANGE_MASK]; |
|
4900 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
|
4901 CONST_BITS+PASS1_BITS+3) |
|
4902 & RANGE_MASK]; |
|
4903 |
|
4904 wsptr += 4; /* advance pointer to next row */ |
|
4905 } |
|
4906 } |
|
4907 |
|
4908 |
|
4909 /* |
|
4910 * Perform dequantization and inverse DCT on one block of coefficients, |
|
4911 * producing a reduced-size 3x6 output block. |
|
4912 * |
|
4913 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows). |
|
4914 */ |
|
4915 |
|
4916 GLOBAL(void) |
|
4917 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
4918 JCOEFPTR coef_block, |
|
4919 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
4920 { |
|
4921 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; |
|
4922 INT32 z1, z2, z3; |
|
4923 JCOEFPTR inptr; |
|
4924 ISLOW_MULT_TYPE * quantptr; |
|
4925 int * wsptr; |
|
4926 JSAMPROW outptr; |
|
4927 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
4928 int ctr; |
|
4929 int workspace[3*6]; /* buffers data between passes */ |
|
4930 SHIFT_TEMPS |
|
4931 |
|
4932 /* Pass 1: process columns from input, store into work array. |
|
4933 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). |
|
4934 */ |
|
4935 inptr = coef_block; |
|
4936 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
4937 wsptr = workspace; |
|
4938 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { |
|
4939 /* Even part */ |
|
4940 |
|
4941 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
4942 tmp0 <<= CONST_BITS; |
|
4943 /* Add fudge factor here for final descale. */ |
|
4944 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
|
4945 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
|
4946 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
|
4947 tmp1 = tmp0 + tmp10; |
|
4948 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); |
|
4949 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
4950 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
|
4951 tmp10 = tmp1 + tmp0; |
|
4952 tmp12 = tmp1 - tmp0; |
|
4953 |
|
4954 /* Odd part */ |
|
4955 |
|
4956 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
4957 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
4958 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
|
4959 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
|
4960 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
|
4961 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
|
4962 tmp1 = (z1 - z2 - z3) << PASS1_BITS; |
|
4963 |
|
4964 /* Final output stage */ |
|
4965 |
|
4966 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
|
4967 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
|
4968 wsptr[3*1] = (int) (tmp11 + tmp1); |
|
4969 wsptr[3*4] = (int) (tmp11 - tmp1); |
|
4970 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
|
4971 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
|
4972 } |
|
4973 |
|
4974 /* Pass 2: process 6 rows from work array, store into output array. |
|
4975 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). |
|
4976 */ |
|
4977 wsptr = workspace; |
|
4978 for (ctr = 0; ctr < 6; ctr++) { |
|
4979 outptr = output_buf[ctr] + output_col; |
|
4980 |
|
4981 /* Even part */ |
|
4982 |
|
4983 /* Add fudge factor here for final descale. */ |
|
4984 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
|
4985 tmp0 <<= CONST_BITS; |
|
4986 tmp2 = (INT32) wsptr[2]; |
|
4987 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
|
4988 tmp10 = tmp0 + tmp12; |
|
4989 tmp2 = tmp0 - tmp12 - tmp12; |
|
4990 |
|
4991 /* Odd part */ |
|
4992 |
|
4993 tmp12 = (INT32) wsptr[1]; |
|
4994 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
|
4995 |
|
4996 /* Final output stage */ |
|
4997 |
|
4998 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
|
4999 CONST_BITS+PASS1_BITS+3) |
|
5000 & RANGE_MASK]; |
|
5001 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
|
5002 CONST_BITS+PASS1_BITS+3) |
|
5003 & RANGE_MASK]; |
|
5004 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, |
|
5005 CONST_BITS+PASS1_BITS+3) |
|
5006 & RANGE_MASK]; |
|
5007 |
|
5008 wsptr += 3; /* advance pointer to next row */ |
|
5009 } |
|
5010 } |
|
5011 |
|
5012 |
|
5013 /* |
|
5014 * Perform dequantization and inverse DCT on one block of coefficients, |
|
5015 * producing a 2x4 output block. |
|
5016 * |
|
5017 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). |
|
5018 */ |
|
5019 |
|
5020 GLOBAL(void) |
|
5021 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
5022 JCOEFPTR coef_block, |
|
5023 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
5024 { |
|
5025 INT32 tmp0, tmp2, tmp10, tmp12; |
|
5026 INT32 z1, z2, z3; |
|
5027 JCOEFPTR inptr; |
|
5028 ISLOW_MULT_TYPE * quantptr; |
|
5029 INT32 * wsptr; |
|
5030 JSAMPROW outptr; |
|
5031 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
5032 int ctr; |
|
5033 INT32 workspace[2*4]; /* buffers data between passes */ |
|
5034 SHIFT_TEMPS |
|
5035 |
|
5036 /* Pass 1: process columns from input, store into work array. |
|
5037 * 4-point IDCT kernel, |
|
5038 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. |
|
5039 */ |
|
5040 inptr = coef_block; |
|
5041 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
5042 wsptr = workspace; |
|
5043 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { |
|
5044 /* Even part */ |
|
5045 |
|
5046 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
5047 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
|
5048 |
|
5049 tmp10 = (tmp0 + tmp2) << CONST_BITS; |
|
5050 tmp12 = (tmp0 - tmp2) << CONST_BITS; |
|
5051 |
|
5052 /* Odd part */ |
|
5053 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ |
|
5054 |
|
5055 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
5056 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
|
5057 |
|
5058 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ |
|
5059 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ |
|
5060 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ |
|
5061 |
|
5062 /* Final output stage */ |
|
5063 |
|
5064 wsptr[2*0] = tmp10 + tmp0; |
|
5065 wsptr[2*3] = tmp10 - tmp0; |
|
5066 wsptr[2*1] = tmp12 + tmp2; |
|
5067 wsptr[2*2] = tmp12 - tmp2; |
|
5068 } |
|
5069 |
|
5070 /* Pass 2: process 4 rows from work array, store into output array. */ |
|
5071 |
|
5072 wsptr = workspace; |
|
5073 for (ctr = 0; ctr < 4; ctr++) { |
|
5074 outptr = output_buf[ctr] + output_col; |
|
5075 |
|
5076 /* Even part */ |
|
5077 |
|
5078 /* Add fudge factor here for final descale. */ |
|
5079 tmp10 = wsptr[0] + (ONE << (CONST_BITS+2)); |
|
5080 |
|
5081 /* Odd part */ |
|
5082 |
|
5083 tmp0 = wsptr[1]; |
|
5084 |
|
5085 /* Final output stage */ |
|
5086 |
|
5087 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3) |
|
5088 & RANGE_MASK]; |
|
5089 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3) |
|
5090 & RANGE_MASK]; |
|
5091 |
|
5092 wsptr += 2; /* advance pointer to next row */ |
|
5093 } |
|
5094 } |
|
5095 |
|
5096 |
|
5097 /* |
|
5098 * Perform dequantization and inverse DCT on one block of coefficients, |
|
5099 * producing a 1x2 output block. |
|
5100 * |
|
5101 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows). |
|
5102 */ |
|
5103 |
|
5104 GLOBAL(void) |
|
5105 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
5106 JCOEFPTR coef_block, |
|
5107 JSAMPARRAY output_buf, JDIMENSION output_col) |
|
5108 { |
|
5109 INT32 tmp0, tmp10; |
|
5110 ISLOW_MULT_TYPE * quantptr; |
|
5111 JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
|
5112 SHIFT_TEMPS |
|
5113 |
|
5114 /* Process 1 column from input, store into output array. */ |
|
5115 |
|
5116 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
|
5117 |
|
5118 /* Even part */ |
|
5119 |
|
5120 tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); |
|
5121 /* Add fudge factor here for final descale. */ |
|
5122 tmp10 += ONE << 2; |
|
5123 |
|
5124 /* Odd part */ |
|
5125 |
|
5126 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); |
|
5127 |
|
5128 /* Final output stage */ |
|
5129 |
|
5130 output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) |
|
5131 & RANGE_MASK]; |
|
5132 output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) |
|
5133 & RANGE_MASK]; |
|
5134 } |
|
5135 |
|
5136 #endif /* IDCT_SCALING_SUPPORTED */ |
389 #endif /* DCT_ISLOW_SUPPORTED */ |
5137 #endif /* DCT_ISLOW_SUPPORTED */ |