1 /* |
|
2 * Copyright (c) 2010 Ixonos Plc. |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - Initial contribution |
|
11 * |
|
12 * Contributors: |
|
13 * Ixonos Plc |
|
14 * |
|
15 * Description: |
|
16 * Symbian OS -specific IDCT routines. |
|
17 * |
|
18 */ |
|
19 |
|
20 |
|
21 |
|
22 /* |
|
23 |
|
24 1 ABSTRACT |
|
25 |
|
26 |
|
27 1.1 Module Type |
|
28 |
|
29 type subroutine |
|
30 |
|
31 |
|
32 1.2 Functional Description |
|
33 |
|
34 Fixed point arithmetic for fast calculation of IDCT for 8x8 size image |
|
35 blocks. The full calculation of the IDCT takes 208 multiplications and |
|
36 400 additions. |
|
37 |
|
38 The routine optionally checks if AC coefficients are all zero and in that |
|
39 case makes a shortcut in IDCT, thus making the calculation faster. This |
|
40 feature is activated by defining CHECK_ZERO_AC_COEFFICIENTS_0, |
|
41 CHECK_ZERO_AC_COEFFICIENTS_1 or CHECK_ZERO_AC_COEFFICIENTS_2 in envdef.h. |
|
42 |
|
43 |
|
44 1.3 Specification/Design Reference |
|
45 |
|
46 The algorithm used is the fast algorithm introduced in W.-H. Chen, C. H. |
|
47 Smith, and S. C. Fralick, "A fast computational algorithm for the |
|
48 Discrete Cosine Transform," IEEE Transactions on Communications, vol. |
|
49 COM-25, pp. 1004-1009, 1977. |
|
50 |
|
51 This IDCT routine conforms to the accuracy requirements specified in the |
|
52 H.261 and H.263 recommendations. |
|
53 |
|
54 NRC documentation: Moments: H.263 Decoder - Functional Definition |
|
55 Specification |
|
56 |
|
57 NRC documentation: Moments: H.263 Decoder - Implementation Design |
|
58 Specification |
|
59 |
|
60 |
|
61 1.4 Module Test Specification Reference |
|
62 |
|
63 TRABANT:h263_test_spec.BASE-TEST |
|
64 |
|
65 |
|
66 1.5 Compilation Information |
|
67 |
|
68 Compiler: RISC OS ARM C compiler (Acorn Computers Ltd.) |
|
69 Version: 5.05 |
|
70 Activation: cc -DBUILD_TARGET=ACORNDEC idcti.c |
|
71 |
|
72 |
|
73 1.6 Notes |
|
74 |
|
75 This source code can be used in both 16-bit and 32-bit application. |
|
76 |
|
77 PREC defines the precision for the fixed point numbers. The best value |
|
78 for it depends on several things: You should always have enough room for |
|
79 the integer part of the number, and in 386s/486s smaller PREC values are |
|
80 faster, but the smaller it is, the poorer is the accuracy. |
|
81 |
|
82 The TMPPRECDEC is another adjustable constant. It tells how many bits are |
|
83 ripped off the number for the temporary storage. This way the accuracy |
|
84 for the multiplications can be better. TMPPREC can be zero. This speeds |
|
85 the code up a bit. |
|
86 |
|
87 To determine the maximum values for PREC and TMPPRECDEC, count bits you |
|
88 need for the integer part anywhere during the calculation, substract that |
|
89 from 32, and divide the remaining number by two. This number should be |
|
90 >= (2*PREC-TMPPRECDEC)/2, otherwise the results may be corrupted due to |
|
91 lost bits. |
|
92 |
|
93 For example, if you know that your data will vary from -2048 - 2047, you |
|
94 need twelve bits for the integer part. 32-12 = 20, 20 / 2 = 10, so good |
|
95 example values for PREC and TMPPRECDEC would be 12 and 4. Also 11 and 2 |
|
96 would be legal, as would 11 and 3, but 12 and 3 would not ((2*12-3)/2 = |
|
97 10.5 > 10). |
|
98 |
|
99 NOTE: Several PREC and TMPPRECDEC values were tried in order to meet |
|
100 the . |
|
101 The requirements could not be met. PREC = 13, TMPPRECDEC = 5 was |
|
102 the closest combination to meet the requirements violating only |
|
103 the overall mean square error requirement. |
|
104 |
|
105 Both the input and output tables are assumed to be normal C ints. Thus, |
|
106 in the 16-bit version they are 16-bit integers and in the 32-bit version |
|
107 32-bit ones. |
|
108 |
|
109 |
|
110 Define CHECK_ZERO_AC_COEFFICIENTS_0, CHECK_ZERO_AC_COEFFICIENTS_1 and |
|
111 CHECK_ZERO_AC_COEFFICIENTS_2 in envdef.h if zero AC coefficients checking |
|
112 for the whole block, for current row or for current column is desired, |
|
113 respectively. |
|
114 |
|
115 |
|
116 */ |
|
117 |
|
118 |
|
119 /* 2 CONTENTS |
|
120 |
|
121 |
|
122 1 ABSTRACT |
|
123 |
|
124 2 CONTENTS |
|
125 |
|
126 3 GLOSSARY |
|
127 |
|
128 4 EXTERNAL RESOURCES |
|
129 4.1 Include Files |
|
130 4.2 External Data Structures |
|
131 4.3 External Function Prototypes |
|
132 |
|
133 5 LOCAL CONSTANTS AND MACROS |
|
134 |
|
135 6 MODULE DATA STRUCTURES |
|
136 6.1 Local Data Structures |
|
137 6.2 Local Function Prototypes |
|
138 |
|
139 7 MODULE CODE |
|
140 7.1 idct |
|
141 7.2 firstPass |
|
142 7.3 secondPass |
|
143 |
|
144 */ |
|
145 |
|
146 |
|
147 /* 3 GLOSSARY |
|
148 |
|
149 IDCT Inverse discrete cosine transform |
|
150 |
|
151 */ |
|
152 |
|
153 |
|
154 /* 4 EXTERNAL RESOURCES */ |
|
155 |
|
156 |
|
157 /* 4.1 Include Files */ |
|
158 |
|
159 #include "h263dconfig.h" |
|
160 |
|
161 /* 4.2 External Data Structures */ |
|
162 |
|
163 /* None */ |
|
164 |
|
165 |
|
166 /* 4.3 External Function Prototypes */ |
|
167 |
|
168 /* None */ |
|
169 |
|
170 |
|
171 /* 5 LOCAL CONSTANTS AND MACROS */ |
|
172 |
|
173 #define PREC 13 /* Fixed point precision */ |
|
174 #define TMPPRECDEC 5 /* Temporary precision decrease */ |
|
175 |
|
176 /* See note about PREC and TMPPRECDEC above. */ |
|
177 |
|
178 #define TMPPREC ( PREC - TMPPRECDEC ) |
|
179 #define CDIV ( 1 << ( 16 - PREC )) |
|
180 #define ROUNDER ( 1 << ( PREC - 1 )) |
|
181 |
|
182 #define f0 (int32)(0xb504 / CDIV) /* .7071068 = cos( pi / 4 ) */ |
|
183 #define f1 (int32)(0x7d8a / CDIV) /* .4903926 = 0.5 * cos( 7pi / 16 ) */ |
|
184 #define f2 (int32)(0x7641 / CDIV) /* .4619398 = 0.5 * cos( 6pi / 16 ) */ |
|
185 #define f3 (int32)(0x6a6d / CDIV) /* .4157348 = 0.5 * cos( 5pi / 16 ) */ |
|
186 #define f4 (int32)(0x5a82 / CDIV) /* .3535534 = 0.5 * cos( 4pi / 16 ) */ |
|
187 #define f5 (int32)(0x471c / CDIV) /* .2777851 = 0.5 * cos( 3pi / 16 ) */ |
|
188 #define f6 (int32)(0x30fb / CDIV) /* .1913417 = 0.5 * cos( 2pi / 16 ) */ |
|
189 #define f7 (int32)(0x18f8 / CDIV) /* .0975452 = 0.5 * cos( pi / 16 ) */ |
|
190 |
|
191 #define f0TMP (int32)(0xb504 / (1 << (16 - TMPPREC))) |
|
192 |
|
193 |
|
194 |
|
195 /* 6 MODULE DATA STRUCTURES */ |
|
196 |
|
197 |
|
198 /* 6.1 Local Data Structures */ |
|
199 |
|
200 #ifdef _WIN32_EXPLICIT /* EPOC32_PORT static data */ |
|
201 |
|
202 static const int idctZigzag[64] = /* array of zig-zag positioning */ |
|
203 { 0, 1, 5, 6, 14, 15, 27, 28, /* of transform coefficients */ |
|
204 2, 4, 7, 13, 16, 26, 29, 42, |
|
205 3, 8, 12, 17, 25, 30, 41, 43, |
|
206 9, 11, 18, 24, 31, 40, 44, 53, |
|
207 10, 19, 23, 32, 39, 45, 52, 54, |
|
208 20, 22, 33, 38, 46, 51, 55, 60, |
|
209 21, 34, 37, 47, 50, 56, 59, 61, |
|
210 35, 36, 48, 49, 57, 58, 62, 63 }; |
|
211 |
|
212 static int32 idctTmpbuf1[64]; /* array for temporary storage of |
|
213 transform results */ |
|
214 #endif |
|
215 |
|
216 /* 6.2 Local Function Prototypes */ |
|
217 |
|
218 static void firstPass (int *buffer, |
|
219 int32 *tmpbuf); |
|
220 |
|
221 static void secondPass (int32 *tmpbuf, |
|
222 int *dest); |
|
223 |
|
224 |
|
225 |
|
226 /* 7 MODULE CODE */ |
|
227 |
|
228 /* |
|
229 ============================================================================= |
|
230 */ |
|
231 |
|
232 /* 7.1 */ |
|
233 |
|
234 void idct |
|
235 (int *block) |
|
236 |
|
237 { |
|
238 |
|
239 |
|
240 /* Functional Description |
|
241 |
|
242 Fixed point arithmetic for fast calculation of IDCT for 8x8 size image |
|
243 blocks. |
|
244 |
|
245 |
|
246 Activation |
|
247 |
|
248 by function call |
|
249 |
|
250 Reentrancy: no |
|
251 |
|
252 |
|
253 Inputs |
|
254 |
|
255 Parameters: |
|
256 |
|
257 *block: 8x8 source block of zigzagged cosine transform |
|
258 coefficients |
|
259 |
|
260 Externals: |
|
261 |
|
262 None |
|
263 |
|
264 |
|
265 Outputs |
|
266 |
|
267 Parameters: |
|
268 |
|
269 *block: 8x8 destination block of pixel values |
|
270 |
|
271 Externals: |
|
272 |
|
273 None |
|
274 |
|
275 Return Values: |
|
276 |
|
277 None |
|
278 |
|
279 |
|
280 Exceptional Conditions |
|
281 |
|
282 None |
|
283 |
|
284 ----------------------------------------------------------------------------- |
|
285 */ |
|
286 |
|
287 |
|
288 |
|
289 /* Pseudocode |
|
290 |
|
291 Calculate 1D-IDCT by rows. |
|
292 Calculate 1D-IDCT by columns. |
|
293 |
|
294 */ |
|
295 |
|
296 |
|
297 |
|
298 /* Data Structures */ |
|
299 |
|
300 |
|
301 /* These are only needed if checking the AC coefficients of the whole block |
|
302 is desired. */ |
|
303 |
|
304 int i = 1; /* Loop variable */ |
|
305 int result; /* Calculation result */ |
|
306 |
|
307 |
|
308 #ifndef _WIN32_EXPLICIT /* EPOC32_PORT static data */ |
|
309 int32 idctTmpbuf1[64]; |
|
310 #endif |
|
311 |
|
312 /* Code */ |
|
313 |
|
314 |
|
315 /* |
|
316 * Check if the AC coefficients of the whole block are all zero. |
|
317 * In that case the inverse transform is equal to the DC |
|
318 * coefficient with a scale factor. |
|
319 */ |
|
320 |
|
321 while (i < 64 && !block[i++]) {} |
|
322 if (i == 64) { |
|
323 int *blk = block; |
|
324 result = (block[0] + 4) >> 3; |
|
325 i = 8; |
|
326 while ( i-- ) |
|
327 { |
|
328 blk[0] = result; blk[1] = result; blk[2] = result; blk[3] = result; |
|
329 blk[4] = result; blk[5] = result; blk[6] = result; blk[7] = result; |
|
330 blk += 8; |
|
331 } |
|
332 /* |
|
333 for (i = 0; i < 64; i++) |
|
334 { |
|
335 block[i] = result; |
|
336 } |
|
337 */ |
|
338 } |
|
339 else |
|
340 { |
|
341 firstPass(block, idctTmpbuf1); |
|
342 secondPass(idctTmpbuf1, block); |
|
343 } |
|
344 |
|
345 |
|
346 } |
|
347 |
|
348 /* |
|
349 ============================================================================= |
|
350 */ |
|
351 |
|
352 |
|
353 |
|
354 /* 7.2 */ |
|
355 |
|
356 static void firstPass |
|
357 (int *buffer, |
|
358 int32 *tmpbuf) |
|
359 |
|
360 { |
|
361 |
|
362 |
|
363 /* Functional Description |
|
364 |
|
365 Local function: Calculate 1D-IDCT for the rows of the 8x8 block. |
|
366 |
|
367 |
|
368 Activation |
|
369 |
|
370 by function call |
|
371 |
|
372 Reentrancy: no |
|
373 |
|
374 |
|
375 Inputs |
|
376 |
|
377 Parameters: |
|
378 |
|
379 *block: 8x8 block of cosine transform coefficients |
|
380 |
|
381 Externals: |
|
382 |
|
383 None |
|
384 |
|
385 |
|
386 Outputs |
|
387 |
|
388 Parameters: |
|
389 |
|
390 *tmpbuf Temporary storage for the results of the first pass. |
|
391 |
|
392 Externals: |
|
393 |
|
394 None |
|
395 |
|
396 Return Values: |
|
397 |
|
398 None |
|
399 |
|
400 |
|
401 Exceptional Conditions |
|
402 |
|
403 None |
|
404 |
|
405 ----------------------------------------------------------------------------- |
|
406 */ |
|
407 |
|
408 |
|
409 |
|
410 /* Pseudocode |
|
411 |
|
412 Calculate 1D-IDCT by rows. |
|
413 |
|
414 */ |
|
415 |
|
416 |
|
417 |
|
418 /* Data Structures */ |
|
419 |
|
420 int row; /* Loop variable */ |
|
421 int32 e, f, g, h; /* Temporary storage */ |
|
422 int32 t0, t1, t2, t3, t5, t6; /* Temporary storage */ |
|
423 int32 bd2, bd3; /* Temporary storage */ |
|
424 |
|
425 #ifndef _WIN32_EXPLICIT /* EPOC32_PORT static data */ |
|
426 static const int idctZigzag[64] = |
|
427 { 0, 1, 5, 6, 14, 15, 27, 28, |
|
428 2, 4, 7, 13, 16, 26, 29, 42, |
|
429 3, 8, 12, 17, 25, 30, 41, 43, |
|
430 9, 11, 18, 24, 31, 40, 44, 53, |
|
431 10, 19, 23, 32, 39, 45, 52, 54, |
|
432 20, 22, 33, 38, 46, 51, 55, 60, |
|
433 21, 34, 37, 47, 50, 56, 59, 61, |
|
434 35, 36, 48, 49, 57, 58, 62, 63 }; |
|
435 |
|
436 const int *zz = idctZigzag; |
|
437 #else |
|
438 int *zz = idctZigzag; |
|
439 #endif |
|
440 |
|
441 |
|
442 /* Code */ |
|
443 |
|
444 #define ZZ(x) ((int32)buffer[zz[x]]) |
|
445 |
|
446 for( row = 0; row < 8; row++ ) |
|
447 { |
|
448 |
|
449 |
|
450 /* |
|
451 * Check if the AC coefficients on the current row are all zero. |
|
452 * In that case the inverse transform is equal to the DC |
|
453 * coefficient with a scale factor. |
|
454 */ |
|
455 |
|
456 if ((ZZ(1) | ZZ(2) | ZZ(3) | ZZ(4) | ZZ(5) | ZZ(6) | ZZ(7)) == 0) |
|
457 { |
|
458 tmpbuf[7] = |
|
459 tmpbuf[6] = |
|
460 tmpbuf[5] = |
|
461 tmpbuf[4] = |
|
462 tmpbuf[3] = |
|
463 tmpbuf[2] = |
|
464 tmpbuf[1] = |
|
465 tmpbuf[0] = (ZZ(0) * f4) >> TMPPRECDEC; |
|
466 |
|
467 tmpbuf += 8; |
|
468 zz += 8; |
|
469 continue; |
|
470 } |
|
471 |
|
472 t0 = t3 = (ZZ(0) + ZZ(4)) * f4; |
|
473 bd3 = ZZ(6) * f6 + ZZ(2) * f2; |
|
474 t0 += bd3; |
|
475 t3 -= bd3; |
|
476 |
|
477 t1 = t2 = (ZZ(0) - ZZ(4)) * f4; |
|
478 bd2 = ZZ(2) * f6 - ZZ(6) * f2; |
|
479 t1 += bd2; |
|
480 t2 -= bd2; |
|
481 |
|
482 e = h = (ZZ(1) + ZZ(7)) * f7; |
|
483 h += ZZ(1) * ( -f7+f1 ); |
|
484 f = g = (ZZ(5) + ZZ(3)) * f3; |
|
485 g += ZZ(5) * ( -f3+f5 ); |
|
486 |
|
487 tmpbuf[0] = ( t0 + ( h + g )) >> TMPPRECDEC; |
|
488 tmpbuf[7] = ( t0 - ( h + g )) >> TMPPRECDEC; |
|
489 |
|
490 f += ZZ(3) * ( -f3-f5 ); |
|
491 e += ZZ(7) * ( -f7-f1 ); |
|
492 |
|
493 tmpbuf[3] = ( t3 + ( e + f )) >> TMPPRECDEC; |
|
494 tmpbuf[4] = ( t3 - ( e + f )) >> TMPPRECDEC; |
|
495 |
|
496 t6 = ( h - g + e - f ) * f0TMP >> TMPPREC; |
|
497 t5 = ( h - g - e + f ) * f0TMP >> TMPPREC; |
|
498 |
|
499 tmpbuf[1] = ( t1 + t6 ) >> TMPPRECDEC; |
|
500 tmpbuf[6] = ( t1 - t6 ) >> TMPPRECDEC; |
|
501 tmpbuf[2] = ( t2 + t5 ) >> TMPPRECDEC; |
|
502 tmpbuf[5] = ( t2 - t5 ) >> TMPPRECDEC; |
|
503 |
|
504 tmpbuf += 8; |
|
505 zz += 8; |
|
506 } |
|
507 } |
|
508 |
|
509 #undef ZZ |
|
510 |
|
511 |
|
512 /* |
|
513 ============================================================================= |
|
514 */ |
|
515 |
|
516 |
|
517 |
|
518 /* 7.3 */ |
|
519 |
|
520 static void secondPass |
|
521 (int32 *tmpbuf, |
|
522 int *dest) |
|
523 |
|
524 { |
|
525 |
|
526 |
|
527 /* Functional Description |
|
528 |
|
529 Local function: Calculate 1D-IDCT for the columns of the 8x8 block. |
|
530 |
|
531 |
|
532 Activation |
|
533 |
|
534 by function call |
|
535 |
|
536 Reentrancy: no |
|
537 |
|
538 |
|
539 Inputs |
|
540 |
|
541 Parameters: |
|
542 |
|
543 *tmpbuf Temporary storage for the results of the first pass. |
|
544 |
|
545 Externals: |
|
546 |
|
547 None |
|
548 |
|
549 |
|
550 Outputs |
|
551 |
|
552 Parameters: |
|
553 |
|
554 *block: 8x8 block of pixel values |
|
555 |
|
556 Externals: |
|
557 |
|
558 None |
|
559 |
|
560 Return Values: |
|
561 |
|
562 None |
|
563 |
|
564 |
|
565 Exceptional Conditions |
|
566 |
|
567 None |
|
568 |
|
569 ----------------------------------------------------------------------------- |
|
570 */ |
|
571 |
|
572 |
|
573 /* Pseudocode |
|
574 |
|
575 Calculate 1D-IDCT by columns. |
|
576 |
|
577 */ |
|
578 |
|
579 |
|
580 /* Data Structures */ |
|
581 |
|
582 int col; /* Loop variable */ |
|
583 int32 e, f, g, h; /* Temporary storage */ |
|
584 int32 t0, t1, t2, t3, t5, t6; /* Temporary storage */ |
|
585 int32 bd2, bd3; /* Temporary storage */ |
|
586 |
|
587 |
|
588 /* Code */ |
|
589 |
|
590 #define ZZ(x) tmpbuf[x * 8] |
|
591 |
|
592 for( col = 0; col < 8; col++ ) |
|
593 { |
|
594 |
|
595 t0 = t3 = ((ZZ(0) + ZZ(4)) * f4 ) >> TMPPREC; |
|
596 bd3 = ( ZZ(6) * f6 + ZZ(2) * f2 ) >> TMPPREC; |
|
597 t0 += bd3; |
|
598 t3 -= bd3; |
|
599 |
|
600 t1 = t2 = ((ZZ(0) - ZZ(4)) * f4 ) >> TMPPREC; |
|
601 bd2 = ( ZZ(2) * f6 - ZZ(6) * f2 ) >> TMPPREC; |
|
602 t1 += bd2; |
|
603 t2 -= bd2; |
|
604 |
|
605 e = h = (ZZ(1) + ZZ(7)) * f7; |
|
606 h += (ZZ(1) * ( -f7+f1 )); |
|
607 h >>= TMPPREC; |
|
608 f = g = (ZZ(5) + ZZ(3)) * f3; |
|
609 g += (ZZ(5) * ( -f3+f5 )); |
|
610 g >>= TMPPREC; |
|
611 |
|
612 dest[0*8] = (int) (( t0 + ( h + g ) + ROUNDER ) >> PREC); |
|
613 dest[7*8] = (int) (( t0 - ( h + g ) + ROUNDER ) >> PREC); |
|
614 |
|
615 f += ZZ(3) * ( -f3-f5 ); |
|
616 f >>= TMPPREC; |
|
617 e += ZZ(7) * ( -f7-f1 ); |
|
618 e >>= TMPPREC; |
|
619 |
|
620 dest[3*8] = (int) (( t3 + ( e + f ) + ROUNDER ) >> PREC); |
|
621 dest[4*8] = (int) (( t3 - ( e + f ) + ROUNDER ) >> PREC); |
|
622 |
|
623 t6 = ( h - g + e - f ) * f0TMP >> TMPPREC; |
|
624 t5 = ( h - g - e + f ) * f0TMP >> TMPPREC; |
|
625 |
|
626 dest[1*8] = (int) (( t1 + t6 + ROUNDER ) >> PREC); |
|
627 dest[6*8] = (int) (( t1 - t6 + ROUNDER ) >> PREC); |
|
628 dest[2*8] = (int) (( t2 + t5 + ROUNDER ) >> PREC); |
|
629 dest[5*8] = (int) (( t2 - t5 + ROUNDER ) >> PREC); |
|
630 |
|
631 tmpbuf++; |
|
632 dest++; |
|
633 } |
|
634 } |
|
635 |
|
636 |
|
637 /* |
|
638 ============================================================================= |
|
639 */ |
|
640 |
|
641 // End of File |
|