|
1 /* |
|
2 * Copyright (c) 2003 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: Internal math function implementations |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 /*! |
|
20 * \internal |
|
21 * \file |
|
22 * \brief Internal math function implementations |
|
23 * |
|
24 */ |
|
25 |
|
26 #ifndef M3G_CORE_INCLUDE |
|
27 # error included by m3g_core.c; do not compile separately. |
|
28 #endif |
|
29 |
|
30 #include "m3g_defs.h" |
|
31 #include "m3g_memory.h" |
|
32 |
|
33 #if defined(M3G_SOFT_FLOAT) |
|
34 #include <math.h> |
|
35 #endif |
|
36 |
|
37 /*---------------------------------------------------------------------- |
|
38 * Private types and definitions |
|
39 *--------------------------------------------------------------------*/ |
|
40 |
|
41 /* Enumerated common matrix classifications */ |
|
42 #define MC_IDENTITY 0x40100401 // 01000000 00010000 00000100 00000001 |
|
43 #define MC_FRUSTUM 0x30BF0C03 // 00110000 10111111 00001100 00000011 |
|
44 #define MC_PERSPECTIVE 0x30B00C03 // 00110000 10110000 00001100 00000011 |
|
45 #define MC_ORTHO 0x7F300C03 // 01111111 00110000 00001100 00000011 |
|
46 #define MC_PARALLEL 0x70300C03 // 01110000 00110000 00001100 00000011 |
|
47 #define MC_SCALING_ROTATION 0x403F3F3F // 01000000 00111111 00111111 00111111 |
|
48 #define MC_SCALING 0x40300C03 // 01000000 00110000 00001100 00000011 |
|
49 #define MC_TRANSLATION 0x7F100401 // 01111111 00010000 00000100 00000001 |
|
50 #define MC_X_ROTATION 0x403C3C01 // 01000000 00111100 00111100 00000001 |
|
51 #define MC_Y_ROTATION 0x40330433 // 01000000 00110011 00000100 00110011 |
|
52 #define MC_Z_ROTATION 0x40100F0F // 01000000 00010000 00001111 00001111 |
|
53 #define MC_W_UNITY 0x7F3F3F3F // 01111111 00111111 00111111 00111111 |
|
54 #define MC_GENERIC 0xFFFFFFFF |
|
55 |
|
56 /* Partial masks for individual matrix components */ |
|
57 #define MC_TRANSLATION_PART 0x3F000000 // 00111111 00000000 00000000 00000000 |
|
58 #define MC_SCALE_PART 0x00300C03 // 00000000 00110000 00001100 00000011 |
|
59 #define MC_SCALE_ROTATION_PART 0x003F3F3F // 00000000 00111111 00111111 00111111 |
|
60 |
|
61 /* Matrix element classification masks */ |
|
62 #define ELEM_ZERO 0x00 |
|
63 #define ELEM_ONE 0x01 |
|
64 #define ELEM_MINUS_ONE 0x02 |
|
65 #define ELEM_ANY 0x03 |
|
66 |
|
67 /*! |
|
68 * \internal |
|
69 * \brief calculates the offset of a 4x4 matrix element in a linear |
|
70 * array |
|
71 * |
|
72 * \notes The current convention is column-major, as in OpenGL ES |
|
73 */ |
|
74 #define MELEM(row, col) ((row) + ((col) << 2)) |
|
75 |
|
76 /*! |
|
77 * \internal |
|
78 * \brief Macro for accessing 4x4 float matrix elements |
|
79 * |
|
80 * \param mtx pointer to the first element of the matrix |
|
81 * \param row matrix row |
|
82 * \param col matrix column |
|
83 */ |
|
84 #define M44F(mtx, row, col) ((mtx)->elem[MELEM((row), (col))]) |
|
85 |
|
86 /*--------------------------------------------------------------------*/ |
|
87 |
|
88 /*---------------------------------------------------------------------- |
|
89 * Private functions |
|
90 *--------------------------------------------------------------------*/ |
|
91 |
|
92 |
|
93 /*! |
|
94 * \internal |
|
95 * \brief ARM VFPv2 implementation of 4x4 matrix multiplication. |
|
96 * |
|
97 * \param dst multiplication result |
|
98 * \param left left-hand matrix |
|
99 * \param right right-hand matrix |
|
100 */ |
|
101 #if defined(M3G_HW_FLOAT_VFPV2) |
|
102 |
|
103 __asm void _m3gGenericMatrixProduct(Matrix *dst, |
|
104 const Matrix *left, const Matrix *right) |
|
105 { |
|
106 // r0 = *dst |
|
107 // r1 = *left |
|
108 // r2 = *right |
|
109 |
|
110 CODE32 |
|
111 |
|
112 // save the VFP registers and set the vector STRIDE = 1, LENGTH = 4 |
|
113 |
|
114 FSTMFDD sp!, {d8-d15} |
|
115 |
|
116 FMRX r3, FPSCR |
|
117 BIC r12, r3, #0x00370000 |
|
118 ORR r12, #0x00030000 |
|
119 FMXR FPSCR, r12 |
|
120 |
|
121 // left = [a0 a1 a2 a3 right = [b0 b1 b2 b3 |
|
122 // a4 a5 a6 a7 b4 b5 b6 b7 |
|
123 // a8 a9 a10 a11 b8 b9 b10 b11 |
|
124 // a12 a13 a14 a15] b12 b13 b14 b15] |
|
125 // |
|
126 // dst = [a0*b0+a4*b1+a8*b2+a12*b3 a1*b0+a5*b1+a9*b2+a13*b3 .. |
|
127 // a0*b4+a4*b5+a8*b6+a12*b7 a1*b4+a5*b5+a9*b6+a13*b7 .. |
|
128 // . . |
|
129 // . . |
|
130 |
|
131 FLDMIAS r1!, {s8-s23} // load the left matrix [a0-a15] to registers s8-s23 |
|
132 FLDMIAS r2!, {s0-s7} // load [b0-b7] of right matrix to registers s0-s7 |
|
133 FMULS s24, s8, s0 // [s24-s27] = [a0*b0 a1*b0 a2*b0 a3*b0] |
|
134 FMULS s28, s8, s4 // [s28-s31] = [a0*b4 a1*b4 a2*b4 a3*b4] |
|
135 FMACS s24, s12, s1 // [s24-s27] += [a4*b1 a5*b1 a6*b1 a7*b1] |
|
136 FMACS s28, s12, s5 // [s28-s31] += [a4*b5 a5*b5 a6*b5 a7*b5] |
|
137 FMACS s24, s16, s2 // [s24-s27] += [a8*b2 a9*b2 a10*b2 a11*b2] |
|
138 FMACS s28, s16, s6 // [s28-s31] += [a8*b6 a9*b6 a10*b6 a11*b6] |
|
139 FMACS s24, s20, s3 // [s24-s27] += [a12*b3 a13*b3 a14*b3 a15*b3] |
|
140 FMACS s28, s20, s7 // [s28-s31] += [a12*b7 a13*b37a14*b7 a15*b7] |
|
141 FLDMIAS r2!, {s0-s7} // load [b8-b15] |
|
142 FSTMIAS r0!, {s24-s31} // write [dst0-dst7] |
|
143 FMULS s24, s8, s0 |
|
144 FMULS s28, s8, s4 |
|
145 FMACS s24, s12, s1 |
|
146 FMACS s28, s12, s5 |
|
147 FMACS s24, s16, s2 |
|
148 FMACS s28, s16, s6 |
|
149 FMACS s24, s20, s3 |
|
150 FMACS s28, s20, s7 |
|
151 FSTMIAS r0!, {s24-s31} |
|
152 |
|
153 // Restore the VFP registers and return. |
|
154 |
|
155 FMXR FPSCR, r3 |
|
156 |
|
157 FLDMFDD sp!, {d8-d15} |
|
158 |
|
159 BX lr |
|
160 |
|
161 } |
|
162 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */ |
|
163 |
|
164 |
|
165 /*------------------ Elementary float ------------------*/ |
|
166 |
|
167 #if defined(M3G_SOFT_FLOAT) |
|
168 |
|
169 #if defined (M3G_BUILD_ARM) |
|
170 |
|
171 /*! |
|
172 * \internal |
|
173 * \brief Floating point multiplication implementation for ARM. |
|
174 */ |
|
175 __asm M3Gfloat m3gMul(const M3Gfloat a, |
|
176 const M3Gfloat b) |
|
177 { |
|
178 /** |
|
179 * Extract the exponents of the multiplicands and add them |
|
180 * together. Flush to zero if either exponent or their sum |
|
181 * is zero. |
|
182 */ |
|
183 |
|
184 mov r12, #0xff; |
|
185 ands r2, r0, r12, lsl #23; // exit if e1 == 0 |
|
186 andnes r3, r1, r12, lsl #23; // exit if e2 == 0 |
|
187 subne r2, r2, #(127 << 23); |
|
188 addnes r12, r2, r3; // exit if e1+e2-127 <= 0 |
|
189 movle r0, #0; |
|
190 bxle lr; |
|
191 |
|
192 /** |
|
193 * Determine the sign of the result. Note that the exponent |
|
194 * may have overflowed to the sign bit, and thus the result |
|
195 * may be an arbitrary negative value when it really should |
|
196 * be +Inf or -Inf. |
|
197 */ |
|
198 |
|
199 teq r0, r1; |
|
200 orrmi r12, r12, #0x80000000; |
|
201 |
|
202 /** |
|
203 * Multiply the mantissas. First shift the mantissas up to |
|
204 * unsigned 1.31 fixed point, adding the leading "1" bit at |
|
205 * the same time, and finally do a 32x32 -> 64 bit unsigned |
|
206 * multiplication. The result is in unsigned 2.62 fixed point, |
|
207 * representing the interval [1.0, 4.0). |
|
208 */ |
|
209 |
|
210 mov r2, #0x80000000; |
|
211 orr r0, r2, r0, lsl #8; |
|
212 orr r1, r2, r1, lsl #8; |
|
213 umulls r2, r3, r0, r1; |
|
214 |
|
215 /** |
|
216 * If the highest bit of the 64-bit result is set, then the |
|
217 * mantissa lies in [2.0, 4.0) and needs to be renormalized. |
|
218 * That is, the mantissa is shifted one bit to the right and |
|
219 * the exponent correspondingly increased by 1. Note that we |
|
220 * lose the leading "1" bit from the mantissa by adding it up |
|
221 * with the exponent. |
|
222 */ |
|
223 |
|
224 subpl r12, r12, #(1 << 23); // no overflow: exponent -= 1 |
|
225 addpl r0, r12, r3, lsr #7; // no overflow: exponent += 1 |
|
226 addmi r0, r12, r3, lsr #8; // overflow: exponent += 1 |
|
227 bx lr; |
|
228 } |
|
229 |
|
230 /*! |
|
231 * \internal |
|
232 * \brief Floating point addition implementation for ARM. |
|
233 */ |
|
234 __asm M3Gfloat m3gAdd(const M3Gfloat a, |
|
235 const M3Gfloat b) |
|
236 { |
|
237 /** |
|
238 * If the operands have opposite signs then this is not really |
|
239 * an addition but a subtraction. Subtraction is much slower, |
|
240 * so we have a separate code path for it, rather than trying |
|
241 * to save space by handling both in the same place. |
|
242 */ |
|
243 |
|
244 teq r0, r1; |
|
245 bmi _m3gSub; |
|
246 |
|
247 /** |
|
248 * Sort the operands such that the larger operand is in R0 and |
|
249 * the smaller in R1. The sign bits do not affect the ordering, |
|
250 * since they are known to be equal. |
|
251 */ |
|
252 |
|
253 subs r2, r0, r1; |
|
254 submi r0, r0, r2; |
|
255 addmi r1, r1, r2; |
|
256 |
|
257 /** |
|
258 * Extract the exponent of the smaller operand into R2 and compute |
|
259 * the difference between the larger and smaller exponent into R3. |
|
260 * (Note that the sign bits cancel out in the subtraction.) The |
|
261 * exponent delta tells how many bits the mantissa of the smaller |
|
262 * operand must be shifted to the right in order to bring the |
|
263 * operands into equal scale. |
|
264 */ |
|
265 |
|
266 mov r2, r1, lsr #23; |
|
267 rsb r3, r2, r0, lsr #23; |
|
268 |
|
269 /** |
|
270 * Check if the exponent delta is bigger than 23 bits, or if the |
|
271 * smaller exponent is zero. In either case, exit the routine and |
|
272 * return the larger operand (which is already in R0). Note that |
|
273 * this means that subnormals are treated as zero. |
|
274 */ |
|
275 |
|
276 rsbs r12, r3, #23; // N set, V clear if R3 > 23 |
|
277 tstpl r2, #0xff; // execute only if R3 <= 23 |
|
278 bxle lr; // exit if Z set or N != V |
|
279 |
|
280 /** |
|
281 * Extract the mantissas and shift them up to unsigned 1.31 fixed |
|
282 * point, inserting the implied leading "1" bit at the same time. |
|
283 * Finally, align the decimal points and add up the mantissas. |
|
284 */ |
|
285 |
|
286 mov r12, #0x80000000; |
|
287 orr r0, r12, r0, lsl #8; |
|
288 orr r1, r12, r1, lsl #8; |
|
289 adds r0, r0, r1, lsr r3; |
|
290 |
|
291 /** |
|
292 * Compute the final exponent by adding up the smaller exponent |
|
293 * (R2), the exponent delta (R3), and the possible overflow bit |
|
294 * (carry flag). Note that in case of overflow, the leading "1" |
|
295 * has ended up in the carry flag and thus needs not be explicitly |
|
296 * discarded. Finally, put the mantissa together with the sign and |
|
297 * exponent. |
|
298 */ |
|
299 |
|
300 adc r2, r2, r3; // r2 = smallExp + deltaExp + overflow |
|
301 movcc r0, r0, lsl #1; // no overflow: discard leading 1 |
|
302 mov r0, r0, lsr #9; |
|
303 orr r0, r0, r2, lsl #23; |
|
304 bx lr; |
|
305 |
|
306 _m3gSub |
|
307 |
|
308 /** |
|
309 * Sort the operands such that the one with larger magnitude is |
|
310 * in R0 and has the correct sign (the sign of the final result), |
|
311 * while the smaller operand is in R1 with an inverted sign bit. |
|
312 */ |
|
313 |
|
314 eor r1, r1, #0x80000000; |
|
315 subs r2, r0, r1; |
|
316 eormi r2, r2, #0x80000000; |
|
317 submi r0, r0, r2; |
|
318 addmi r1, r1, r2; |
|
319 |
|
320 /** |
|
321 * Extract the exponent of the smaller operand into R2 and compute |
|
322 * the difference between the larger and smaller exponent into R3. |
|
323 * (Note that the sign bits cancel out in the subtraction.) The |
|
324 * exponent delta tells how many bits the mantissa of the smaller |
|
325 * operand must be shifted to the right in order to bring the |
|
326 * operands into equal scale. |
|
327 */ |
|
328 |
|
329 mov r2, r1, lsr #23; |
|
330 rsbs r3, r2, r0, lsr #23; |
|
331 |
|
332 /** |
|
333 * Check if the exponent delta is bigger than 31 bits, or if the |
|
334 * smaller exponent is zero. In either case, exit the routine and |
|
335 * return the larger operand (which is already in R0). Note that |
|
336 * this means that subnormals are treated as zero. |
|
337 */ |
|
338 |
|
339 rsbs r12, r3, #31; // N set, V clear if R3 > 31 |
|
340 tstpl r2, #0xff; // execute only if R3 <= 31 |
|
341 bxle lr; // exit if Z set or N != V |
|
342 |
|
343 /** |
|
344 * Extract the mantissas and shift them up to unsigned 1.31 fixed |
|
345 * point, inserting the implied leading "1" bit at the same time. |
|
346 * Then align the decimal points and subtract the smaller mantissa |
|
347 * from the larger one. |
|
348 */ |
|
349 |
|
350 mov r12, #0x80000000; |
|
351 orr r0, r12, r0, lsl #8; |
|
352 orr r1, r12, r1, lsl #8; |
|
353 subs r0, r0, r1, lsr r3; |
|
354 |
|
355 /** |
|
356 * We split the range of possible results into three categories: |
|
357 * |
|
358 * 1. [1.0, 2.0) ==> no renormalization needed (highest bit set) |
|
359 * 2. [0.5, 1.0) ==> only one left-shift needed |
|
360 * 3. (0.0, 0.5) ==> multiple left-shifts needed |
|
361 * 4. zero ==> just return |
|
362 * |
|
363 * Cases 1 and 2 are handled in the main code path. Cases 3 and 4 |
|
364 * are less common by far, so we branch to a separate code fragment |
|
365 * for those. |
|
366 */ |
|
367 |
|
368 movpls r0, r0, lsl #1; // Cases 2,3,4: shift left |
|
369 bpl _m3gSubRenormalize; // Cases 3 & 4: branch out |
|
370 |
|
371 /** |
|
372 * Now we have normalized the mantissa such that the highest bit |
|
373 * is set. Here we only need to adjust the exponent, if necessary, |
|
374 * and put the pieces together. Note that we lose the leading "1" |
|
375 * bit from the mantissa by adding it up with the exponent. We can |
|
376 * also do proper rounding (towards nearest) instead of truncation |
|
377 * (towards zero) at no extra cost! |
|
378 */ |
|
379 |
|
380 sbc r3, r3, #1; // deltaExp -= 1 (Case 1) or 2 (Case 2) |
|
381 add r2, r2, r3; // resultExp = smallExp + deltaExp |
|
382 movs r0, r0, lsr #8; // shift mantissa, keep leading "1" |
|
383 adc r0, r0, r2, lsl #23; // resultExp += 1, mantissa += carry |
|
384 bx lr; |
|
385 |
|
386 /** |
|
387 * Separate code path for cases 3 and 4 (see above). The mantissa |
|
388 * has already been shifted up by one, but the exponent has not |
|
389 * been correspondingly decreased. We also know that the highest |
|
390 * bit is still not set, and that the carry flag is clear. |
|
391 */ |
|
392 |
|
393 _m3gSubRenormalize |
|
394 bxeq lr; |
|
395 subcc r3, r3, #2; |
|
396 movccs r0, r0, lsl #2; |
|
397 |
|
398 /** |
|
399 * If the carry flag is still not set, i.e. there were more than |
|
400 * two leading zeros in the mantissa initially, loop until we |
|
401 * find the highest set bit. |
|
402 */ |
|
403 |
|
404 _m3gSubRenormalizeLoop |
|
405 subcc r3, r3, #1; |
|
406 movccs r0, r0, lsl #1; |
|
407 bcc _m3gSubRenormalizeLoop; |
|
408 |
|
409 /** |
|
410 * Now the leading "1" is in the carry flag, so we can just add up |
|
411 * the exponent and mantissa as usual, doing proper rounding at |
|
412 * the same time. However, cases where the exponent goes negative |
|
413 * (that is, underflows) must be detected and flushed to zero. |
|
414 */ |
|
415 |
|
416 add r3, r2, r3; |
|
417 movs r0, r0, lsr #9; |
|
418 adc r0, r0, r3, lsl #23; |
|
419 teq r0, r2, lsl #23; |
|
420 movmi r0, #0; |
|
421 bx lr; |
|
422 } |
|
423 |
|
424 #else /* M3G_BUILD_ARM */ |
|
425 |
|
426 /*! |
|
427 * \internal |
|
428 * \brief Floating point addition implementation |
|
429 * |
|
430 */ |
|
431 static M3G_INLINE M3Gfloat m3gFloatAdd(const M3Guint aBits, |
|
432 const M3Guint bBits) |
|
433 { |
|
434 M3Guint large, small, signMask; |
|
435 |
|
436 /* Early exits for underflow cases */ |
|
437 |
|
438 large = (M3Gint)(aBits & ~SIGN_MASK); |
|
439 if (large <= 0x00800000) { |
|
440 return INT_AS_FLOAT(bBits); |
|
441 } |
|
442 small = (M3Gint)(bBits & ~SIGN_MASK); |
|
443 if (small <= 0x00800000) { |
|
444 return INT_AS_FLOAT(aBits); |
|
445 } |
|
446 |
|
447 /* Swap the numbers so that "large" really is larger; the unsigned |
|
448 * (or de-signed) bitmasks for floats are nicely monotonous, so we |
|
449 * can compare directly. Also store the sign of the larger number |
|
450 * for future reference. */ |
|
451 |
|
452 if (small > large) { |
|
453 M3Gint temp = small; |
|
454 small = large; |
|
455 large = temp; |
|
456 signMask = (bBits & SIGN_MASK); |
|
457 } |
|
458 else { |
|
459 signMask = (aBits & SIGN_MASK); |
|
460 } |
|
461 |
|
462 { |
|
463 M3Guint res, overflow; |
|
464 M3Guint resExp, expDelta; |
|
465 |
|
466 /* Store the larger exponent as our candidate result exponent, |
|
467 * and compute the difference between the exponents */ |
|
468 |
|
469 resExp = (large >> 23); |
|
470 expDelta = resExp - (small >> 23); |
|
471 |
|
472 /* Take an early exit if the change would be insignificant; |
|
473 * this also guards against odd results from shifting by more |
|
474 * than 31 (undefined in C) */ |
|
475 |
|
476 if (expDelta >= 24) { |
|
477 res = large | signMask; |
|
478 return INT_AS_FLOAT(res); |
|
479 } |
|
480 |
|
481 /* Convert the mantissas into shifted integers, and shift the |
|
482 * smaller number to the same scale with the larger one. */ |
|
483 |
|
484 large = (large & MANTISSA_MASK) | LEADING_ONE; |
|
485 small = (small & MANTISSA_MASK) | LEADING_ONE; |
|
486 small >>= expDelta; |
|
487 M3G_ASSERT(large >= small); |
|
488 |
|
489 /* Check whether we're really adding or subtracting the |
|
490 * smaller number, and branch to slightly different routines |
|
491 * respectively */ |
|
492 |
|
493 if (((aBits ^ bBits) & SIGN_MASK) == 0) { |
|
494 |
|
495 /* Matching signs; just add the numbers and check for |
|
496 * overflow, shifting to compensate as necessary. */ |
|
497 |
|
498 res = large + small; |
|
499 |
|
500 overflow = (res >> 24); |
|
501 res >>= overflow; |
|
502 resExp += overflow; |
|
503 } |
|
504 else { |
|
505 |
|
506 /* Different signs, so let's subtract the smaller value; |
|
507 * also check that we're not subtracting a number from |
|
508 * itself (so we don't have to normalize a zero below) */ |
|
509 |
|
510 if (small == large) { |
|
511 return 0.0f; /* x - x = 0 */ |
|
512 } |
|
513 |
|
514 res = (large << 8) - (small << 8); |
|
515 |
|
516 /* Renormalize the number by shifting until we've got the |
|
517 * high bit in place */ |
|
518 |
|
519 while ((res >> 24) == 0) { |
|
520 res <<= 8; |
|
521 resExp -= 8; |
|
522 } |
|
523 while ((res >> 31) == 0) { |
|
524 res <<= 1; |
|
525 --resExp; |
|
526 } |
|
527 res >>= 8; |
|
528 } |
|
529 |
|
530 /* Flush to zero in case of over/underflow of the exponent */ |
|
531 |
|
532 if (resExp >= 255) { |
|
533 return 0.0f; |
|
534 } |
|
535 |
|
536 /* Compose the final number into "res"; note that we pull in |
|
537 * the sign of the original larger number, which should still |
|
538 * be valid */ |
|
539 |
|
540 res &= MANTISSA_MASK; |
|
541 res |= (resExp << 23); |
|
542 res |= signMask; |
|
543 |
|
544 return INT_AS_FLOAT(res); |
|
545 } |
|
546 } |
|
547 |
|
548 /*! |
|
549 * \internal |
|
550 * \brief Floating point multiplication implementation |
|
551 */ |
|
552 static M3G_INLINE M3Gfloat m3gFloatMul(const M3Guint aBits, |
|
553 const M3Guint bBits) |
|
554 { |
|
555 M3Guint a, b; |
|
556 |
|
557 /* Early exits for underflow and multiplication by zero */ |
|
558 |
|
559 a = (aBits & ~SIGN_MASK); |
|
560 if (a <= 0x00800000) { |
|
561 return 0.0f; |
|
562 } |
|
563 |
|
564 b = (bBits & ~SIGN_MASK); |
|
565 if (b <= 0x00800000) { |
|
566 return 0.0f; |
|
567 } |
|
568 |
|
569 { |
|
570 M3Guint res, exponent, overflow; |
|
571 |
|
572 /* Compute the exponent of the result, assuming the mantissas |
|
573 * don't overflow; then mask out the original exponents */ |
|
574 |
|
575 exponent = (a >> 23) + (b >> 23) - 127; |
|
576 a &= MANTISSA_MASK; |
|
577 b &= MANTISSA_MASK; |
|
578 |
|
579 /* Compute the new mantissa from: |
|
580 * |
|
581 * (1 + a)(1 + b) = ab + a + b + 1 |
|
582 * |
|
583 * First shift the mantissas from 0.23 down to 0.16 for the |
|
584 * multiplication, then shift back to 0.23 for adding in the |
|
585 * "a + b + 1" part of the equation. */ |
|
586 |
|
587 res = (a >> 7) * (b >> 7); /* "ab" at 0.32 */ |
|
588 res = (res >> 9) + a + b + LEADING_ONE; |
|
589 |
|
590 /* Add the leading one, then normalize the result by checking |
|
591 * the overflow bit and dividing by two if necessary */ |
|
592 |
|
593 overflow = (res >> 24); |
|
594 res >>= overflow; |
|
595 exponent += overflow; |
|
596 |
|
597 /* Flush to zero in case of over/underflow of the exponent */ |
|
598 |
|
599 if (exponent >= 255) { |
|
600 return 0.0f; |
|
601 } |
|
602 |
|
603 /* Compose the final number into "res" */ |
|
604 |
|
605 res &= MANTISSA_MASK; |
|
606 res |= (exponent << 23); |
|
607 res |= (aBits ^ bBits) & SIGN_MASK; |
|
608 |
|
609 return INT_AS_FLOAT(res); |
|
610 } |
|
611 } |
|
612 |
|
613 #endif /* M3G_BUILD_ARM */ |
|
614 |
|
615 /*! |
|
616 * \internal |
|
617 * \brief Computes the signed fractional part of a floating point |
|
618 * number |
|
619 * |
|
620 * \param x floating point value |
|
621 * \return x signed fraction of x in ]-1, 1[ |
|
622 */ |
|
623 static M3Gfloat m3gFrac(M3Gfloat x) |
|
624 { |
|
625 /* Quick exit for -1 < x < 1 */ |
|
626 |
|
627 if (m3gAbs(x) < 1.0f) { |
|
628 return x; |
|
629 } |
|
630 |
|
631 /* Shift the mantissa to the proper place, mask out the integer |
|
632 * part, and renormalize */ |
|
633 { |
|
634 M3Guint ix = FLOAT_AS_UINT(x); |
|
635 M3Gint expo = ((ix >> 23) & 0xFF) - 127; |
|
636 M3G_ASSERT(expo >= 0); |
|
637 |
|
638 /* The decimal part will always be zero for large values with |
|
639 * exponents over 24 */ |
|
640 |
|
641 if (expo >= 24) { |
|
642 return 0.f; |
|
643 } |
|
644 else { |
|
645 |
|
646 /* Shift the integer part out of the mantissa and see what |
|
647 * we have left */ |
|
648 |
|
649 M3Guint base = (ix & MANTISSA_MASK) | LEADING_ONE; |
|
650 base = (base << expo) & MANTISSA_MASK; |
|
651 |
|
652 /* Quick exit (and guard against infinite looping) for |
|
653 * zero */ |
|
654 |
|
655 if (base == 0) { |
|
656 return 0.f; |
|
657 } |
|
658 |
|
659 /* We now have an exponent of 0 (i.e. no shifting), but |
|
660 * must renormalize to get a set bit in place of the |
|
661 * hidden (implicit one) bit */ |
|
662 |
|
663 expo = 0; |
|
664 |
|
665 while ((base >> 19) == 0) { |
|
666 base <<= 4; |
|
667 expo -= 4; |
|
668 } |
|
669 while ((base >> 23) == 0) { |
|
670 base <<= 1; |
|
671 --expo; |
|
672 } |
|
673 |
|
674 /* Compose the final number */ |
|
675 |
|
676 ix = |
|
677 (base & MANTISSA_MASK) | |
|
678 ((expo + 127) << 23) | |
|
679 (ix & SIGN_MASK); |
|
680 return INT_AS_FLOAT(ix); |
|
681 } |
|
682 } |
|
683 } |
|
684 |
|
685 #endif /* M3G_SOFT_FLOAT */ |
|
686 |
|
687 #if defined(M3G_DEBUG) |
|
688 /*! |
|
689 * \internal |
|
690 * \brief Checks for NaN or infinity in a floating point input |
|
691 */ |
|
692 static void m3gValidateFloats(int n, float *p) |
|
693 { |
|
694 while (n-- > 0) { |
|
695 M3G_ASSERT(EXPONENT(*p) < 120); |
|
696 ++p; |
|
697 } |
|
698 } |
|
699 #else |
|
700 # define m3gValidateFloats(n, p) |
|
701 #endif |
|
702 |
|
703 /*------------------ Trigonometry and exp ----------*/ |
|
704 |
|
705 |
|
706 #if defined(M3G_SOFT_FLOAT) |
|
707 /*! |
|
708 * \internal |
|
709 * \brief Sine for the first quadrant |
|
710 * |
|
711 * \param x floating point value in [0, PI/2] |
|
712 * \return sine of \c x |
|
713 */ |
|
714 static M3Gfloat m3gSinFirstQuadrant(M3Gfloat x) |
|
715 { |
|
716 M3Guint bits = FLOAT_AS_UINT(x); |
|
717 |
|
718 if (bits <= 0x3ba3d70au) /* 0.005f */ |
|
719 return x; |
|
720 else { |
|
721 static const M3Gfloat sinTermLut[4] = { |
|
722 -1.0f / (2*3), |
|
723 -1.0f / (4*5), |
|
724 -1.0f / (6*7), |
|
725 -1.0f / (8*9) |
|
726 }; |
|
727 |
|
728 M3Gfloat xx = m3gSquare(x); |
|
729 M3Gfloat sinx = x; |
|
730 int i; |
|
731 |
|
732 for (i = 0; i < 4; ++i) { |
|
733 x = m3gMul(x, m3gMul(xx, sinTermLut[i])); |
|
734 sinx = m3gAdd(sinx, x); |
|
735 } |
|
736 |
|
737 return sinx; |
|
738 } |
|
739 } |
|
740 #endif /* M3G_SOFT_FLOAT */ |
|
741 |
|
742 #if defined(M3G_SOFT_FLOAT) |
|
743 /*! |
|
744 * \internal |
|
745 * \brief Computes sine for the first period |
|
746 * |
|
747 * \param x floating point value in [0, 2PI] |
|
748 * \return sine of x |
|
749 */ |
|
750 static M3Gfloat m3gSinFirstPeriod(const M3Gfloat x) |
|
751 { |
|
752 M3G_ASSERT(x >= 0 && x <= TWO_PI); |
|
753 |
|
754 if (x >= PI) { |
|
755 return m3gNegate(m3gSinFirstQuadrant(x >= ONE_AND_HALF_PI ? |
|
756 m3gSub(TWO_PI, x) : |
|
757 m3gSub(x, PI))); |
|
758 } |
|
759 return m3gSinFirstQuadrant((x >= HALF_PI) ? m3gSub(PI, x) : x); |
|
760 } |
|
761 #endif /* M3G_SOFT_FLOAT */ |
|
762 |
|
763 /*------------- Float vs. int conversion helpers -------------*/ |
|
764 |
|
765 /*! |
|
766 * \internal |
|
767 * \brief Scales and clamps a float to unsigned byte range |
|
768 */ |
|
769 static M3G_INLINE M3Gint m3gFloatToByte(const M3Gfloat a) |
|
770 { |
|
771 return m3gRoundToInt(m3gMul(255.f, m3gClampFloat(a, 0.f, 1.f))); |
|
772 } |
|
773 |
|
774 /*------------------ Vector helpers ------------------*/ |
|
775 |
|
776 /*! |
|
777 * \internal |
|
778 * \brief Computes the norm of a floating-point 3-vector |
|
779 */ |
|
780 static M3G_INLINE M3Gfloat m3gNorm3(const M3Gfloat *v) |
|
781 { |
|
782 return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])), |
|
783 m3gSquare(v[2])); |
|
784 } |
|
785 |
|
786 /*! |
|
787 * \internal |
|
788 * \brief Computes the norm of a floating-point 4-vector |
|
789 */ |
|
790 static M3G_INLINE M3Gfloat m3gNorm4(const M3Gfloat *v) |
|
791 { |
|
792 return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])), |
|
793 m3gAdd(m3gSquare(v[2]), m3gSquare(v[3]))); |
|
794 } |
|
795 |
|
796 /*! |
|
797 * \internal |
|
798 * \brief Scales a floating-point 3-vector |
|
799 */ |
|
800 static M3G_INLINE void m3gScale3(M3Gfloat *v, M3Gfloat s) |
|
801 { |
|
802 v[0] = m3gMul(v[0], s); |
|
803 v[1] = m3gMul(v[1], s); |
|
804 v[2] = m3gMul(v[2], s); |
|
805 } |
|
806 |
|
807 /*! |
|
808 * \internal |
|
809 * \brief Scales a floating-point 4-vector |
|
810 */ |
|
811 static M3G_INLINE void m3gScale4(M3Gfloat *v, M3Gfloat s) |
|
812 { |
|
813 v[0] = m3gMul(v[0], s); |
|
814 v[1] = m3gMul(v[1], s); |
|
815 v[2] = m3gMul(v[2], s); |
|
816 v[3] = m3gMul(v[3], s); |
|
817 } |
|
818 |
|
819 |
|
820 /*------------------ Matrices ------------------*/ |
|
821 |
|
822 /*! |
|
823 * \internal |
|
824 */ |
|
825 static M3G_INLINE M3Gbool m3gIsClassified(const Matrix *mtx) |
|
826 { |
|
827 M3G_ASSERT(mtx != NULL); |
|
828 return (M3Gbool) mtx->classified; |
|
829 } |
|
830 |
|
831 /*! |
|
832 * \internal |
|
833 * \brief Returns a classification for a single floating point number |
|
834 */ |
|
835 static M3G_INLINE M3Guint m3gElementClass(const M3Gfloat x) |
|
836 { |
|
837 if (IS_ZERO(x)) { |
|
838 return ELEM_ZERO; |
|
839 } |
|
840 else if (IS_ONE(x)) { |
|
841 return ELEM_ONE; |
|
842 } |
|
843 else if (IS_MINUS_ONE(x)) { |
|
844 return ELEM_MINUS_ONE; |
|
845 } |
|
846 return ELEM_ANY; |
|
847 } |
|
848 |
|
849 /*! |
|
850 * \internal |
|
851 * \brief Computes the classification mask of a matrix |
|
852 * |
|
853 * The mask is constructed from two bits per elements, with the lowest |
|
854 * two bits corresponding to the first element in the \c elem array of |
|
855 * the matrix. |
|
856 */ |
|
857 static void m3gClassify(Matrix *mtx) |
|
858 { |
|
859 M3Guint mask = 0; |
|
860 const M3Gfloat *p; |
|
861 int i; |
|
862 |
|
863 M3G_ASSERT(mtx != NULL); |
|
864 M3G_ASSERT(!m3gIsClassified(mtx)); |
|
865 |
|
866 p = mtx->elem; |
|
867 for (i = 0; i < 16; ++i) { |
|
868 M3Gfloat elem = *p++; |
|
869 mask |= (m3gElementClass(elem) << (i*2)); |
|
870 } |
|
871 mtx->mask = mask; |
|
872 mtx->classified = M3G_TRUE; |
|
873 } |
|
874 |
|
875 /*! |
|
876 * \internal |
|
877 * \brief Sets matrix classification directly |
|
878 */ |
|
879 static M3G_INLINE void m3gClassifyAs(M3Guint mask, Matrix *mtx) |
|
880 { |
|
881 M3G_ASSERT(mtx != NULL); |
|
882 mtx->mask = mask; |
|
883 mtx->classified = M3G_TRUE; |
|
884 mtx->complete = M3G_FALSE; |
|
885 } |
|
886 |
|
887 /*! |
|
888 * \internal |
|
889 * \brief Attempts to classify a matrix more precisely |
|
890 * |
|
891 * Tries to classify all "free" elements of a matrix into one of the |
|
892 * predefined constants to improve precision and performance in |
|
893 * subsequent calculations. |
|
894 */ |
|
895 static void m3gSubClassify(Matrix *mtx) |
|
896 { |
|
897 M3G_ASSERT_PTR(mtx); |
|
898 M3G_ASSERT(m3gIsClassified(mtx)); |
|
899 { |
|
900 const M3Gfloat *p = mtx->elem; |
|
901 M3Guint inMask = mtx->mask; |
|
902 M3Guint outMask = inMask; |
|
903 int i; |
|
904 |
|
905 for (i = 0; i < 16; ++i, inMask >>= 2) { |
|
906 M3Gfloat elem = *p++; |
|
907 if ((inMask & 0x03) == ELEM_ANY) { |
|
908 outMask &= ~(0x03u << (i*2)); |
|
909 outMask |= (m3gElementClass(elem) << (i*2)); |
|
910 } |
|
911 } |
|
912 mtx->mask = outMask; |
|
913 } |
|
914 } |
|
915 |
|
916 /*! |
|
917 * \internal |
|
918 * \brief Fills in the implicit values for a classified matrix |
|
919 */ |
|
920 static void m3gFillClassifiedMatrix(Matrix *mtx) |
|
921 { |
|
922 int i; |
|
923 M3Guint mask; |
|
924 M3Gfloat *p; |
|
925 |
|
926 M3G_ASSERT(mtx != NULL); |
|
927 M3G_ASSERT(mtx->classified); |
|
928 M3G_ASSERT(!mtx->complete); |
|
929 |
|
930 mask = mtx->mask; |
|
931 p = mtx->elem; |
|
932 |
|
933 for (i = 0; i < 16; ++i, mask >>= 2) { |
|
934 unsigned elem = (mask & 0x03); |
|
935 switch (elem) { |
|
936 case ELEM_ZERO: *p++ = 0.0f; break; |
|
937 case ELEM_ONE: *p++ = 1.0f; break; |
|
938 case ELEM_MINUS_ONE: *p++ = -1.0f; break; |
|
939 default: ++p; |
|
940 } |
|
941 } |
|
942 mtx->complete = M3G_TRUE; |
|
943 } |
|
944 |
|
945 |
|
946 #if !defined(M3G_HW_FLOAT) |
|
947 /*! |
|
948 * \internal |
|
949 * \brief Performs one multiply-add of classified matrix elements |
|
950 * |
|
951 * \param amask element class of the first multiplicand |
|
952 * \param a float value of the first multiplicand |
|
953 * \param bmask element class of the second multiplicand |
|
954 * \param b float value of the second multiplicand |
|
955 * \param c float value to add |
|
956 * \return a * b + c |
|
957 * |
|
958 * \notes inline, as only called from the matrix product function |
|
959 */ |
|
960 static M3G_INLINE M3Gfloat m3gClassifiedMadd( |
|
961 const M3Gbitmask amask, const M3Gfloat *pa, |
|
962 const M3Gbitmask bmask, const M3Gfloat *pb, |
|
963 const M3Gfloat c) |
|
964 { |
|
965 /* Check for zero product to reduce the switch cases below */ |
|
966 |
|
967 if (amask == ELEM_ZERO || bmask == ELEM_ZERO) { |
|
968 return c; |
|
969 } |
|
970 |
|
971 /* Branch based on the classification of a */ |
|
972 |
|
973 switch (amask) { |
|
974 |
|
975 case ELEM_ANY: |
|
976 if (bmask == ELEM_ONE) { |
|
977 return m3gAdd(*pa, c); /* a * 1 + c = a + c */ |
|
978 } |
|
979 if (bmask == ELEM_MINUS_ONE) { |
|
980 return m3gSub(c, *pa); /* a * -1 + c = -a + c */ |
|
981 } |
|
982 return m3gMadd(*pa, *pb, c); /* a * b + c */ |
|
983 |
|
984 case ELEM_ONE: |
|
985 if (bmask == ELEM_ONE) { |
|
986 return m3gAdd(c, 1.f); /* 1 * 1 + c = 1 + c */ |
|
987 } |
|
988 if (bmask == ELEM_MINUS_ONE) { |
|
989 return m3gSub(c, 1.f); /* 1 * -1 + c = -1 + c */ |
|
990 } |
|
991 return m3gAdd(*pb, c); /* 1 * b + c = b + c */ |
|
992 |
|
993 case ELEM_MINUS_ONE: |
|
994 if (bmask == ELEM_ONE) { |
|
995 return m3gSub(c, 1.f); /* -1 * 1 + c = -1 + c */ |
|
996 } |
|
997 if (bmask == ELEM_MINUS_ONE) { |
|
998 return m3gAdd(c, 1.f); /* -1 * -1 + c = 1 + c */ |
|
999 } |
|
1000 return m3gSub(c, *pb); /* -1 * b + c = -b + c */ |
|
1001 |
|
1002 default: |
|
1003 M3G_ASSERT(M3G_FALSE); |
|
1004 return 0.0f; |
|
1005 } |
|
1006 } |
|
1007 #endif /*!defined(M3G_HW_FLOAT)*/ |
|
1008 |
|
1009 /*! |
|
1010 * \internal |
|
1011 * \brief Computes a generic 4x4 matrix product |
|
1012 */ |
|
1013 static void m3gGenericMatrixProduct(Matrix *dst, |
|
1014 const Matrix *left, const Matrix *right) |
|
1015 { |
|
1016 M3G_ASSERT(dst != NULL && left != NULL && right != NULL); |
|
1017 |
|
1018 { |
|
1019 |
|
1020 # if defined(M3G_HW_FLOAT) |
|
1021 if (!left->complete) { |
|
1022 m3gFillClassifiedMatrix((Matrix*)left); |
|
1023 } |
|
1024 if (!right->complete) { |
|
1025 m3gFillClassifiedMatrix((Matrix*)right); |
|
1026 } |
|
1027 # else |
|
1028 int row; |
|
1029 const unsigned lmask = left->mask; |
|
1030 const unsigned rmask = right->mask; |
|
1031 # endif |
|
1032 |
|
1033 #if defined(M3G_HW_FLOAT_VFPV2) |
|
1034 _m3gGenericMatrixProduct(dst, left, right); |
|
1035 #else |
|
1036 for (row = 0; row < 4; ++row) { |
|
1037 int col; |
|
1038 for (col = 0; col < 4; ++col) { |
|
1039 int k; |
|
1040 M3Gfloat a = 0; |
|
1041 for (k = 0; k < 4; ++k) { |
|
1042 M3Gint lidx = MELEM(row, k); |
|
1043 M3Gint ridx = MELEM(k, col); |
|
1044 # if defined(M3G_HW_FLOAT) |
|
1045 a = m3gMadd(left->elem[lidx], right->elem[ridx], a); |
|
1046 # else |
|
1047 a = m3gClassifiedMadd((lmask >> (2 * lidx)) & 3, |
|
1048 &left->elem[lidx], |
|
1049 (rmask >> (2 * ridx)) & 3, |
|
1050 &right->elem[ridx], |
|
1051 a); |
|
1052 # endif /*!M3G_HW_FLOAT*/ |
|
1053 } |
|
1054 M44F(dst, row, col) = a; |
|
1055 } |
|
1056 } |
|
1057 #endif /*!M3G_HW_FLOAT_VFPV2*/ |
|
1058 } |
|
1059 dst->complete = M3G_TRUE; |
|
1060 dst->classified = M3G_FALSE; |
|
1061 } |
|
1062 |
|
1063 /*! |
|
1064 * \internal |
|
1065 * \brief Converts a float vector to 16-bit integers |
|
1066 * |
|
1067 * \param size vector length |
|
1068 * \param vec pointer to float vector |
|
1069 * \param scale scale of output values, as the number of bits to |
|
1070 * shift left to get actual values |
|
1071 * \param outVec output value vector |
|
1072 */ |
|
1073 static void m3gFloatVecToShort(M3Gint size, const M3Gfloat *vec, |
|
1074 M3Gint scale, M3Gshort *outVec) |
|
1075 { |
|
1076 const M3Guint *vecInt = (const M3Guint*) vec; |
|
1077 M3Gint i; |
|
1078 |
|
1079 for (i = 0; i < size; ++i) { |
|
1080 M3Guint a = vecInt[i]; |
|
1081 if ((a & ~SIGN_MASK) < (1 << 23)) { |
|
1082 *outVec++ = 0; |
|
1083 } |
|
1084 else { |
|
1085 M3Gint shift = |
|
1086 scale - ((M3Gint)((vecInt[i] >> 23) & 0xFFu) - (127 + 23)); |
|
1087 M3G_ASSERT(shift > 8); /* or the high bits will overflow */ |
|
1088 |
|
1089 if (shift > 23) { |
|
1090 *outVec++ = 0; |
|
1091 } |
|
1092 else { |
|
1093 M3Gint out = |
|
1094 (M3Gint) (((a & MANTISSA_MASK) | LEADING_ONE) >> shift); |
|
1095 if (a >> 31) { |
|
1096 out = -out; |
|
1097 } |
|
1098 M3G_ASSERT(m3gInRange(out, -32767, 32767)); |
|
1099 *outVec++ = (M3Gshort) out; |
|
1100 } |
|
1101 } |
|
1102 } |
|
1103 } |
|
1104 |
|
1105 /*---------------------------------------------------------------------- |
|
1106 * Internal functions |
|
1107 *--------------------------------------------------------------------*/ |
|
1108 |
|
1109 /*--------------------------------------------------------------------*/ |
|
1110 |
|
1111 #if defined(M3G_SOFT_FLOAT) |
|
1112 |
|
1113 #if !defined (M3G_BUILD_ARM) |
|
1114 |
|
1115 static M3Gfloat m3gAdd(const M3Gfloat a, const M3Gfloat b) |
|
1116 { |
|
1117 return m3gFloatAdd(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b)); |
|
1118 } |
|
1119 |
|
1120 static M3Gfloat m3gMul(const M3Gfloat a, const M3Gfloat b) |
|
1121 { |
|
1122 return m3gFloatMul(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b)); |
|
1123 } |
|
1124 |
|
1125 #endif /* M3G_BUILD_ARM */ |
|
1126 |
|
1127 /*! |
|
1128 * \internal |
|
1129 * \brief Computes the reciprocal of the square root |
|
1130 * |
|
1131 * \param x a floating point value |
|
1132 * \return 1 / square root of \c x |
|
1133 */ |
|
1134 static M3Gfloat m3gRcpSqrt(const M3Gfloat x) |
|
1135 { |
|
1136 /* Approximation followed by Newton-Raphson iteration a'la |
|
1137 * "Floating-point tricks" by J. Blinn, but we iterate several |
|
1138 * times to improve precision */ |
|
1139 |
|
1140 M3Gint i = (M3G_FLOAT_ONE + (M3G_FLOAT_ONE >> 1)) |
|
1141 - (FLOAT_AS_UINT(x) >> 1); |
|
1142 M3Gfloat y = INT_AS_FLOAT(i); |
|
1143 for (i = 0; i < 3; ++i) { |
|
1144 y = m3gMul(y, m3gSub(1.5f, m3gHalf(m3gMul(x, m3gSquare(y))))); |
|
1145 } |
|
1146 return y; |
|
1147 } |
|
1148 |
|
1149 /*! |
|
1150 * \internal |
|
1151 * \brief Computes the square root |
|
1152 * |
|
1153 * \param x a floating point value |
|
1154 * \return square root of \c x |
|
1155 */ |
|
1156 static M3Gfloat m3gSqrt(const M3Gfloat x) |
|
1157 { |
|
1158 /* Approximation followed by Newton-Raphson iteration a'la |
|
1159 * "Floating-point tricks" by J. Blinn, but we iterate several |
|
1160 * times to improve precision */ |
|
1161 |
|
1162 M3Gint i = (FLOAT_AS_UINT(x) >> 1) + (M3G_FLOAT_ONE >> 1); |
|
1163 M3Gfloat y = INT_AS_FLOAT(i); |
|
1164 for (i = 0; i < 2; ++i) { |
|
1165 y = m3gDiv(m3gAdd(m3gSquare(y), x), m3gDouble(y)); |
|
1166 } |
|
1167 return y; |
|
1168 } |
|
1169 |
|
1170 #endif /* M3G_SOFT_FLOAT */ |
|
1171 |
|
1172 /*--------------------------------------------------------------------*/ |
|
1173 |
|
1174 #if defined(M3G_SOFT_FLOAT) |
|
1175 /*! |
|
1176 * \internal |
|
1177 */ |
|
1178 static M3Gfloat m3gArcCos(const M3Gfloat x) |
|
1179 { |
|
1180 return (M3Gfloat) acos(x); |
|
1181 } |
|
1182 |
|
1183 /*! |
|
1184 * \internal |
|
1185 */ |
|
1186 static M3Gfloat m3gArcTan(const M3Gfloat y, const M3Gfloat x) |
|
1187 { |
|
1188 return (M3Gfloat) atan2(y, x); |
|
1189 } |
|
1190 |
|
1191 /*! |
|
1192 * \internal |
|
1193 */ |
|
1194 static M3Gfloat m3gCos(const M3Gfloat x) |
|
1195 { |
|
1196 return m3gSin(m3gAdd(x, HALF_PI)); |
|
1197 } |
|
1198 |
|
1199 /*! |
|
1200 * \internal |
|
1201 * \brief |
|
1202 */ |
|
1203 static M3Gfloat m3gSin(const M3Gfloat x) |
|
1204 { |
|
1205 M3Gfloat f = x; |
|
1206 |
|
1207 /* If x is greater than two pi, do a modulo operation to bring it |
|
1208 * back in range for the internal sine function */ |
|
1209 |
|
1210 if (m3gAbs(f) >= TWO_PI) { |
|
1211 f = m3gMul (f, (1.f / TWO_PI)); |
|
1212 f = m3gFrac(f); |
|
1213 f = m3gMul (f, TWO_PI); |
|
1214 } |
|
1215 |
|
1216 /* Compute the result, negating both the input value and the |
|
1217 * result if x was negative */ |
|
1218 { |
|
1219 M3Guint i = FLOAT_AS_UINT(f); |
|
1220 M3Guint neg = (i & SIGN_MASK); |
|
1221 i ^= neg; |
|
1222 f = m3gSinFirstPeriod(INT_AS_FLOAT(i)); |
|
1223 i = FLOAT_AS_UINT(f) ^ neg; |
|
1224 return INT_AS_FLOAT(i); |
|
1225 } |
|
1226 } |
|
1227 |
|
1228 /*! |
|
1229 * \internal |
|
1230 */ |
|
1231 static M3Gfloat m3gTan(const M3Gfloat x) |
|
1232 { |
|
1233 return (M3Gfloat) tan(x); |
|
1234 } |
|
1235 |
|
1236 /*! |
|
1237 * \internal |
|
1238 */ |
|
1239 static M3Gfloat m3gExp(const M3Gfloat a) |
|
1240 { |
|
1241 return (M3Gfloat) exp(a); |
|
1242 } |
|
1243 #endif /* M3G_SOFT_FLOAT */ |
|
1244 |
|
1245 /*! |
|
1246 * \brief Checks whether the bottom row of a matrix is 0 0 0 1 |
|
1247 */ |
|
1248 static M3Gbool m3gIsWUnity(const Matrix *mtx) |
|
1249 { |
|
1250 M3G_ASSERT_PTR(mtx); |
|
1251 |
|
1252 if (!m3gIsClassified(mtx)) { |
|
1253 return (IS_ZERO(M44F(mtx, 3, 0)) && |
|
1254 IS_ZERO(M44F(mtx, 3, 1)) && |
|
1255 IS_ZERO(M44F(mtx, 3, 2)) && |
|
1256 IS_ONE (M44F(mtx, 3, 3))); |
|
1257 } |
|
1258 else { |
|
1259 return ((mtx->mask & 0xC0C0C0C0) == (ELEM_ONE << 30)); |
|
1260 } |
|
1261 } |
|
1262 |
|
1263 /*! |
|
1264 * \brief Makes a quaternion by exponentiating a quaternion logarithm |
|
1265 */ |
|
1266 static void m3gExpQuat(Quat *quat, const Vec3 *qExp) |
|
1267 { |
|
1268 M3Gfloat theta; |
|
1269 |
|
1270 M3G_ASSERT_PTR(quat); |
|
1271 M3G_ASSERT_PTR(qExp); |
|
1272 |
|
1273 theta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(qExp->x), |
|
1274 m3gSquare(qExp->y)), |
|
1275 m3gSquare(qExp->z))); |
|
1276 |
|
1277 if (theta > EPSILON) { |
|
1278 M3Gfloat s = m3gMul(m3gSin(theta), m3gRcp(theta)); |
|
1279 quat->x = m3gMul(qExp->x, s); |
|
1280 quat->y = m3gMul(qExp->y, s); |
|
1281 quat->z = m3gMul(qExp->z, s); |
|
1282 quat->w = m3gCos(theta); |
|
1283 } |
|
1284 else { |
|
1285 quat->x = quat->y = quat->z = 0.0f; |
|
1286 quat->w = 1.0f; |
|
1287 } |
|
1288 } |
|
1289 |
|
1290 /*! |
|
1291 * \brief Natural logarithm of a unit quaternion. |
|
1292 */ |
|
1293 static void m3gLogQuat(Vec3 *qLog, const Quat *quat) |
|
1294 { |
|
1295 M3Gfloat sinTheta = m3gSqrt(m3gNorm3((const float *) quat)); |
|
1296 |
|
1297 if (sinTheta > EPSILON) { |
|
1298 M3Gfloat s = m3gArcTan(sinTheta, quat->w) / sinTheta; |
|
1299 qLog->x = m3gMul(s, quat->x); |
|
1300 qLog->y = m3gMul(s, quat->y); |
|
1301 qLog->z = m3gMul(s, quat->z); |
|
1302 } |
|
1303 else { |
|
1304 qLog->x = qLog->y = qLog->z = 0.0f; |
|
1305 } |
|
1306 } |
|
1307 |
|
1308 /*! |
|
1309 * \brief Make quaternion the "logarithmic difference" between two |
|
1310 * other quaternions. |
|
1311 */ |
|
1312 static void m3gLogDiffQuat(Vec3 *logDiff, |
|
1313 const Quat *from, const Quat *to) |
|
1314 { |
|
1315 Quat temp; |
|
1316 temp.x = m3gNegate(from->x); |
|
1317 temp.y = m3gNegate(from->y); |
|
1318 temp.z = m3gNegate(from->z); |
|
1319 temp.w = from->w; |
|
1320 m3gMulQuat(&temp, to); |
|
1321 m3gLogQuat(logDiff, &temp); |
|
1322 } |
|
1323 |
|
1324 /*! |
|
1325 * \brief Rounds a float to the nearest integer |
|
1326 * |
|
1327 * Overflows are clamped to the maximum or minimum representable |
|
1328 * value. |
|
1329 */ |
|
1330 static M3Gint m3gRoundToInt(const M3Gfloat a) |
|
1331 { |
|
1332 M3Guint base = FLOAT_AS_UINT(a); |
|
1333 M3Gint signMask, expo; |
|
1334 |
|
1335 /* Decompose the number into sign, exponent, and base number */ |
|
1336 |
|
1337 signMask = ((M3Gint) base >> 31); /* -> 0 or 0xFFFFFFFF */ |
|
1338 expo = (M3Gint)((base >> 23) & 0xFF) - 127; |
|
1339 |
|
1340 /* First check for large values and return either the negative or |
|
1341 * the positive maximum integer in case of overflow. The overflow |
|
1342 * check can be made on the exponent alone, as large floats are |
|
1343 * spaced several integer values apart so that nothing will |
|
1344 * overflow because of rounding later on */ |
|
1345 |
|
1346 if (expo >= 31) { |
|
1347 return (M3Gint)((1U << 31) - 1 + (((M3Guint) signMask) & 1)); |
|
1348 } |
|
1349 |
|
1350 /* Also check for underflow to avoid problems with shifting by |
|
1351 * more than 31 */ |
|
1352 |
|
1353 if (expo < -1) { |
|
1354 return 0; |
|
1355 } |
|
1356 |
|
1357 /* Mask out the sign and exponent bits, shift the base number so |
|
1358 * that the lowest bit corresponds to one half, then add one |
|
1359 * (half) and shift to round to the closest integer. */ |
|
1360 |
|
1361 base = (base | LEADING_ONE) << 8; /* shift mantissa to 1.31 */ |
|
1362 base = base >> (30 - expo); /* integer value as 31.1 */ |
|
1363 base = (base + 1) >> 1; /* round to nearest 32.0 */ |
|
1364 |
|
1365 /* Factor in the sign (negate if originally negative) and |
|
1366 * return */ |
|
1367 |
|
1368 return ((M3Gint) base ^ signMask) - signMask; |
|
1369 } |
|
1370 |
|
1371 /*! |
|
1372 * \brief Calculates ray-triangle intersection. |
|
1373 * |
|
1374 * http://www.acm.org/jgt/papers/MollerTrumbore97 |
|
1375 */ |
|
1376 static M3Gbool m3gIntersectTriangle(const Vec3 *orig, const Vec3 *dir, |
|
1377 const Vec3 *vert0, const Vec3 *vert1, const Vec3 *vert2, |
|
1378 Vec3 *tuv, M3Gint cullMode) |
|
1379 { |
|
1380 Vec3 edge1, edge2, tvec, pvec, qvec; |
|
1381 M3Gfloat det,inv_det; |
|
1382 |
|
1383 /* find vectors for two edges sharing vert0 */ |
|
1384 edge1 = *vert1; |
|
1385 edge2 = *vert2; |
|
1386 m3gSubVec3(&edge1, vert0); |
|
1387 m3gSubVec3(&edge2, vert0); |
|
1388 |
|
1389 /* begin calculating determinant - also used to calculate U parameter */ |
|
1390 m3gCross(&pvec, dir, &edge2); |
|
1391 |
|
1392 /* if determinant is near zero, ray lies in plane of triangle */ |
|
1393 det = m3gDot3(&edge1, &pvec); |
|
1394 |
|
1395 if (cullMode == 0 && det <= 0) return M3G_FALSE; |
|
1396 if (cullMode == 1 && det >= 0) return M3G_FALSE; |
|
1397 |
|
1398 if (det > -EPSILON && det < EPSILON) |
|
1399 return M3G_FALSE; |
|
1400 inv_det = m3gRcp(det); |
|
1401 |
|
1402 /* calculate distance from vert0 to ray origin */ |
|
1403 tvec = *orig; |
|
1404 m3gSubVec3(&tvec, vert0); |
|
1405 |
|
1406 /* calculate U parameter and test bounds */ |
|
1407 tuv->y = m3gMul(m3gDot3(&tvec, &pvec), inv_det); |
|
1408 if (tuv->y < 0.0f || tuv->y > 1.0f) |
|
1409 return M3G_FALSE; |
|
1410 |
|
1411 /* prepare to test V parameter */ |
|
1412 m3gCross(&qvec, &tvec, &edge1); |
|
1413 |
|
1414 /* calculate V parameter and test bounds */ |
|
1415 tuv->z = m3gMul(m3gDot3(dir, &qvec), inv_det); |
|
1416 if (tuv->z < 0.0f || m3gAdd(tuv->y, tuv->z) > 1.0f) |
|
1417 return M3G_FALSE; |
|
1418 |
|
1419 /* calculate t, ray intersects triangle */ |
|
1420 tuv->x = m3gMul(m3gDot3(&edge2, &qvec), inv_det); |
|
1421 |
|
1422 return M3G_TRUE; |
|
1423 } |
|
1424 |
|
1425 /*! |
|
1426 * \brief Calculates ray-box intersection. |
|
1427 * |
|
1428 * http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm |
|
1429 */ |
|
1430 |
|
1431 #define XO orig->x |
|
1432 #define YO orig->y |
|
1433 #define ZO orig->z |
|
1434 #define XD dir->x |
|
1435 #define YD dir->y |
|
1436 #define ZD dir->z |
|
1437 |
|
1438 #define XL box->min[0] |
|
1439 #define YL box->min[1] |
|
1440 #define ZL box->min[2] |
|
1441 #define XH box->max[0] |
|
1442 #define YH box->max[1] |
|
1443 #define ZH box->max[2] |
|
1444 |
|
1445 /*! |
|
1446 * \internal |
|
1447 * \brief Ray - bounding box intersection |
|
1448 * |
|
1449 */ |
|
1450 static M3Gbool m3gIntersectBox(const Vec3 *orig, const Vec3 *dir, const AABB *box) |
|
1451 { |
|
1452 M3Gfloat tnear = M3G_MIN_NEGATIVE_FLOAT; |
|
1453 M3Gfloat tfar = M3G_MAX_POSITIVE_FLOAT; |
|
1454 M3Gfloat t1, t2, temp; |
|
1455 |
|
1456 /* X slab */ |
|
1457 if(XD != 0) { |
|
1458 t1 = m3gSub(XL, XO) / XD; |
|
1459 t2 = m3gSub(XH, XO) / XD; |
|
1460 |
|
1461 if(t1 > t2) { |
|
1462 temp = t1; |
|
1463 t1 = t2; |
|
1464 t2 = temp; |
|
1465 } |
|
1466 |
|
1467 if(t1 > tnear) tnear = t1; |
|
1468 if(t2 < tfar) tfar = t2; |
|
1469 |
|
1470 if(tnear > tfar) return M3G_FALSE; |
|
1471 if(tfar < 0) return M3G_FALSE; |
|
1472 } |
|
1473 else { |
|
1474 if(XO > XH || XO < XL) return M3G_FALSE; |
|
1475 } |
|
1476 |
|
1477 /* Y slab */ |
|
1478 if(YD != 0) { |
|
1479 t1 = m3gSub(YL, YO) / YD; |
|
1480 t2 = m3gSub(YH, YO) / YD; |
|
1481 |
|
1482 if(t1 > t2) { |
|
1483 temp = t1; |
|
1484 t1 = t2; |
|
1485 t2 = temp; |
|
1486 } |
|
1487 |
|
1488 if(t1 > tnear) tnear = t1; |
|
1489 if(t2 < tfar) tfar = t2; |
|
1490 |
|
1491 if(tnear > tfar) return M3G_FALSE; |
|
1492 if(tfar < 0) return M3G_FALSE; |
|
1493 } |
|
1494 else { |
|
1495 if(YO > YH || YO < YL) return M3G_FALSE; |
|
1496 } |
|
1497 |
|
1498 /* Z slab */ |
|
1499 if(ZD != 0) { |
|
1500 t1 = m3gSub(ZL, ZO) / ZD; |
|
1501 t2 = m3gSub(ZH, ZO) / ZD; |
|
1502 |
|
1503 if(t1 > t2) { |
|
1504 temp = t1; |
|
1505 t1 = t2; |
|
1506 t2 = temp; |
|
1507 } |
|
1508 |
|
1509 if(t1 > tnear) tnear = t1; |
|
1510 if(t2 < tfar) tfar = t2; |
|
1511 |
|
1512 if(tnear > tfar) return M3G_FALSE; |
|
1513 if(tfar < 0) return M3G_FALSE; |
|
1514 } |
|
1515 else { |
|
1516 if(ZO > ZH || ZO < ZL) return M3G_FALSE; |
|
1517 } |
|
1518 |
|
1519 return M3G_TRUE; |
|
1520 } |
|
1521 |
|
1522 /*! |
|
1523 * \brief Calculates the intersection of two rectangles. Always fills |
|
1524 * the intersection result. |
|
1525 * |
|
1526 * \param dst result of the intersection |
|
1527 * \param r1 rectangle 1 |
|
1528 * \param r2 rectangle 2 |
|
1529 */ |
|
1530 static M3Gbool m3gIntersectRectangle(M3GRectangle *dst, M3GRectangle *r1, M3GRectangle *r2) |
|
1531 { |
|
1532 M3Gbool intersects = M3G_TRUE; |
|
1533 M3Gint min, max; |
|
1534 |
|
1535 max = (r1->x) >= (r2->x) ? (r1->x) : (r2->x); |
|
1536 min = (r1->x + r1->width) <= (r2->x + r2->width) ? (r1->x + r1->width) : (r2->x + r2->width); |
|
1537 if ((min - max) < 0) intersects = M3G_FALSE; |
|
1538 dst->x = max; |
|
1539 dst->width = min - max; |
|
1540 |
|
1541 max = (r1->y) >= (r2->y) ? (r1->y) : (r2->y); |
|
1542 min = (r1->y + r1->height) <= (r2->y + r2->height) ? (r1->y + r1->height) : (r2->y + r2->height); |
|
1543 if ((min - max) < 0) intersects = M3G_FALSE; |
|
1544 dst->y = max; |
|
1545 dst->height = min - max; |
|
1546 |
|
1547 return intersects; |
|
1548 } |
|
1549 |
|
1550 /*-------- float-to-int color conversions --------*/ |
|
1551 |
|
1552 static M3Guint m3gAlpha1f(M3Gfloat a) |
|
1553 { |
|
1554 M3Guint alpha = (M3Guint) m3gFloatToByte(a); |
|
1555 return (alpha << 24) | M3G_RGB_MASK; |
|
1556 } |
|
1557 |
|
1558 static M3Guint m3gColor3f(M3Gfloat r, M3Gfloat g, M3Gfloat b) |
|
1559 { |
|
1560 return ((M3Guint) m3gFloatToByte(r) << 16) |
|
1561 | ((M3Guint) m3gFloatToByte(g) << 8) |
|
1562 | (M3Guint) m3gFloatToByte(b) |
|
1563 | M3G_ALPHA_MASK; |
|
1564 } |
|
1565 |
|
1566 static M3Guint m3gColor4f(M3Gfloat r, M3Gfloat g, M3Gfloat b, M3Gfloat a) |
|
1567 { |
|
1568 return ((M3Guint) m3gFloatToByte(r) << 16) |
|
1569 | ((M3Guint) m3gFloatToByte(g) << 8) |
|
1570 | (M3Guint) m3gFloatToByte(b) |
|
1571 | ((M3Guint) m3gFloatToByte(a) << 24); |
|
1572 } |
|
1573 |
|
1574 static void m3gFloatColor(M3Gint argb, M3Gfloat intensity, M3Gfloat *rgba) |
|
1575 { |
|
1576 /* NOTE we intentionally aim a bit high here -- some GL |
|
1577 * implementations may round down instead of closest */ |
|
1578 |
|
1579 const M3Gfloat oneOver255 = (M3Gfloat)(1.0001 / 255.0); |
|
1580 |
|
1581 rgba[0] = (M3Gfloat)((argb >> 16) & 0xFF); |
|
1582 rgba[1] = (M3Gfloat)((argb >> 8) & 0xFF); |
|
1583 rgba[2] = (M3Gfloat)((argb ) & 0xFF); |
|
1584 rgba[3] = (M3Gfloat)((argb >> 24) & 0xFF); |
|
1585 |
|
1586 m3gScale4(rgba, m3gMul(oneOver255, intensity)); |
|
1587 } |
|
1588 |
|
1589 /*! |
|
1590 * \brief Converts the 3x3 submatrix of a matrix into fixed point |
|
1591 * |
|
1592 * The input matrix must be an affine one, i.e. the bottom row must be |
|
1593 * 0 0 0 1. The output matrix is guaranteed to be such that it can be |
|
1594 * multiplied with a 16-bit 3-vector without overflowing, while using |
|
1595 * the 32-bit range optimally. |
|
1596 * |
|
1597 * \param mtx the original matrix |
|
1598 * \param elem 9 shorts to hold the fixed point base numbers |
|
1599 * \return floating point exponent for the values in \c elem |
|
1600 * (number of bits to shift left for actual values) |
|
1601 */ |
|
1602 static M3Gint m3gGetFixedPoint3x3Basis(const Matrix *mtx, M3Gshort *elem) |
|
1603 { |
|
1604 M3Gint outExp; |
|
1605 M3Gint row, col; |
|
1606 const M3Guint *m; |
|
1607 |
|
1608 if (!mtx->complete) { |
|
1609 m3gFillClassifiedMatrix((Matrix*) mtx); |
|
1610 } |
|
1611 m = (const M3Guint*) mtx->elem; |
|
1612 |
|
1613 /* First, find the maximum exponent value in the whole matrix */ |
|
1614 |
|
1615 outExp = 0; |
|
1616 for (col = 0; col < 3; ++col) { |
|
1617 for (row = 0; row < 3; ++row) { |
|
1618 M3Gint element = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK); |
|
1619 outExp = M3G_MAX(outExp, element); |
|
1620 } |
|
1621 } |
|
1622 outExp >>= 23; |
|
1623 |
|
1624 /* Our candidate exponent is the maximum found plus 9, which is |
|
1625 * guaranteed to shift the maximum unsigned 24-bit mantissa (which |
|
1626 * always will have the high bit set) down to the signed 16-bit |
|
1627 * range */ |
|
1628 |
|
1629 outExp += 9; |
|
1630 |
|
1631 /* Now proceed to sum each row and see what's the actual smallest |
|
1632 * exponent we can safely use without overflowing in a 16+16 |
|
1633 * matrix-vector multiplication; this will win us one bit |
|
1634 * (doubling the precision) compared to the conservative approach |
|
1635 * of just shifting everything down by 10 bits */ |
|
1636 |
|
1637 for (row = 0; row < 3; ++row) { |
|
1638 |
|
1639 /* Sum the absolute values on this row */ |
|
1640 |
|
1641 M3Gint rowSum = 0; |
|
1642 for (col = 0; col < 3; ++col) { |
|
1643 M3Gint a = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK); |
|
1644 M3Gint shift = outExp - (a >> 23); |
|
1645 M3G_ASSERT(shift < 265); |
|
1646 |
|
1647 if (shift < 24) { |
|
1648 rowSum += ((a & MANTISSA_MASK) | LEADING_ONE) >> shift; |
|
1649 } |
|
1650 } |
|
1651 |
|
1652 /* Now we have a 26-bit sum of the absolute values on this |
|
1653 * row, and shift that down until we fit the target range of |
|
1654 * [0, 65535]. Note that this still leaves *exactly* enough |
|
1655 * space for adding in an arbitrary 16-bit translation vector |
|
1656 * after multiplying with the matrix! */ |
|
1657 |
|
1658 while (rowSum >= (1 << 16)) { |
|
1659 rowSum >>= 1; |
|
1660 ++outExp; |
|
1661 } |
|
1662 } |
|
1663 |
|
1664 /* De-bias the exponent, but add in an extra 23 to account for the |
|
1665 * decimal bits in the floating point mantissa values we started |
|
1666 * with (we're returning the exponent as "bits to shift left to |
|
1667 * get integers", so we're off by 23 from IEEE notation) */ |
|
1668 |
|
1669 outExp = (outExp - 127) - 23; |
|
1670 |
|
1671 /* Finally, shift all the matrix elements to our final output |
|
1672 * precision */ |
|
1673 |
|
1674 for (col = 0; col < 3; ++col) { |
|
1675 m3gFloatVecToShort(3, mtx->elem + MELEM(0, col), outExp, elem); |
|
1676 elem += 3; |
|
1677 } |
|
1678 return outExp; |
|
1679 } |
|
1680 |
|
1681 /*! |
|
1682 * \brief Gets the translation component of a matrix as fixed point |
|
1683 * |
|
1684 * \param mtx the matrix |
|
1685 * \param elem 3 shorts to write the vector into |
|
1686 * \return floating point exponent for the values in \c elem |
|
1687 * (number of bits to shift left for actual values) |
|
1688 */ |
|
1689 static M3Gint m3gGetFixedPointTranslation(const Matrix *mtx, M3Gshort *elem) |
|
1690 { |
|
1691 const M3Guint *m; |
|
1692 |
|
1693 M3G_ASSERT(m3gIsWUnity(mtx)); |
|
1694 if (!mtx->complete) { |
|
1695 m3gFillClassifiedMatrix((Matrix*) mtx); |
|
1696 } |
|
1697 m = (const M3Guint*) &mtx->elem[MELEM(0, 3)]; |
|
1698 |
|
1699 /* Find the maximum exponent, then scale down by 9 bits from that |
|
1700 * to shift the unsigned 24-bit mantissas to the signed 16-bit |
|
1701 * range */ |
|
1702 { |
|
1703 M3Gint outExp; |
|
1704 M3Guint maxElem = m[0] & ~SIGN_MASK; |
|
1705 maxElem = M3G_MAX(maxElem, m[1] & ~SIGN_MASK); |
|
1706 maxElem = M3G_MAX(maxElem, m[2] & ~SIGN_MASK); |
|
1707 |
|
1708 outExp = (M3Gint)(maxElem >> 23) - (127 + 23) + 9; |
|
1709 m3gFloatVecToShort(3, mtx->elem + MELEM(0, 3), outExp, elem); |
|
1710 return outExp; |
|
1711 } |
|
1712 } |
|
1713 |
|
1714 /*! |
|
1715 * \internal |
|
1716 * \brief Compute a bounding box enclosing two other boxes |
|
1717 * |
|
1718 * \param box box to fit |
|
1719 * \param a first box to enclose or NULL |
|
1720 * \param b second box to enclose or NULL |
|
1721 * |
|
1722 * \note If both input boxes are NULL, the box is not modified. |
|
1723 */ |
|
1724 static void m3gFitAABB(AABB *box, const AABB *a, const AABB *b) |
|
1725 { |
|
1726 int i; |
|
1727 |
|
1728 M3G_ASSERT_PTR(box); |
|
1729 |
|
1730 if (a) { |
|
1731 m3gValidateAABB(a); |
|
1732 } |
|
1733 if (b) { |
|
1734 m3gValidateAABB(b); |
|
1735 } |
|
1736 |
|
1737 if (a && b) { |
|
1738 for (i = 0; i < 3; ++i) { |
|
1739 box->min[i] = M3G_MIN(a->min[i], b->min[i]); |
|
1740 box->max[i] = M3G_MAX(a->max[i], b->max[i]); |
|
1741 } |
|
1742 } |
|
1743 else if (a) { |
|
1744 *box = *a; |
|
1745 } |
|
1746 else if (b) { |
|
1747 *box = *b; |
|
1748 } |
|
1749 } |
|
1750 |
|
1751 /* |
|
1752 * \internal |
|
1753 * \brief Transform an axis-aligned bounding box with a matrix |
|
1754 * |
|
1755 * This results in a box that encloses the transformed original box. |
|
1756 * |
|
1757 * Based on "Transforming Axis-Aligned Bounding Boxes" by Jim Arvo |
|
1758 * from Graphics Gems. |
|
1759 * |
|
1760 * \note The bottom row of the matrix is ignored in the transformation. |
|
1761 */ |
|
1762 static void m3gTransformAABB(AABB *box, const Matrix *mtx) |
|
1763 { |
|
1764 M3Gfloat boxMin[3], boxMax[3]; |
|
1765 M3Gfloat newMin[3], newMax[3]; |
|
1766 |
|
1767 m3gValidateAABB(box); |
|
1768 |
|
1769 if (!mtx->complete) { |
|
1770 m3gFillClassifiedMatrix((Matrix*) mtx); |
|
1771 } |
|
1772 |
|
1773 /* Get the original minimum and maximum extents as floats, and add |
|
1774 * the translation as the base for the transformed box */ |
|
1775 { |
|
1776 int i; |
|
1777 for (i = 0; i < 3; ++i) { |
|
1778 boxMin[i] = box->min[i]; |
|
1779 boxMax[i] = box->max[i]; |
|
1780 newMin[i] = newMax[i] = M44F(mtx, i, 3); |
|
1781 } |
|
1782 } |
|
1783 |
|
1784 /* Transform into the new minimum and maximum coordinates using |
|
1785 * the upper left 3x3 part of the matrix */ |
|
1786 { |
|
1787 M3Gint row, col; |
|
1788 |
|
1789 for (row = 0; row < 3; ++row) { |
|
1790 for (col = 0; col < 3; ++col) { |
|
1791 M3Gfloat a = m3gMul(M44F(mtx, row, col), boxMin[col]); |
|
1792 M3Gfloat b = m3gMul(M44F(mtx, row, col), boxMax[col]); |
|
1793 |
|
1794 if (a < b) { |
|
1795 newMin[row] = m3gAdd(newMin[row], a); |
|
1796 newMax[row] = m3gAdd(newMax[row], b); |
|
1797 } |
|
1798 else { |
|
1799 newMin[row] = m3gAdd(newMin[row], b); |
|
1800 newMax[row] = m3gAdd(newMax[row], a); |
|
1801 } |
|
1802 } |
|
1803 } |
|
1804 } |
|
1805 |
|
1806 /* Write back into the bounding box */ |
|
1807 { |
|
1808 int i; |
|
1809 for (i = 0; i < 3; ++i) { |
|
1810 box->min[i] = newMin[i]; |
|
1811 box->max[i] = newMax[i]; |
|
1812 } |
|
1813 } |
|
1814 |
|
1815 m3gValidateAABB(box); |
|
1816 } |
|
1817 |
|
1818 #if defined(M3G_DEBUG) |
|
1819 /*! |
|
1820 * \brief |
|
1821 */ |
|
1822 static void m3gValidateAABB(const AABB *aabb) |
|
1823 { |
|
1824 m3gValidateFloats(6, (float*) aabb); |
|
1825 } |
|
1826 #endif |
|
1827 |
|
1828 /*---------------------------------------------------------------------- |
|
1829 * Public functions |
|
1830 *--------------------------------------------------------------------*/ |
|
1831 |
|
1832 /*! |
|
1833 * \brief Linear interpolation of vectors |
|
1834 * |
|
1835 * \param size number of components |
|
1836 * \param vec output vector |
|
1837 * \param s interpolation factor |
|
1838 * \param start initial value |
|
1839 * \param end target value |
|
1840 */ |
|
1841 #if defined(M3G_HW_FLOAT_VFPV2) |
|
1842 |
|
1843 M3G_API __asm void m3gLerp(M3Gint size, |
|
1844 M3Gfloat *vec, |
|
1845 M3Gfloat s, |
|
1846 const M3Gfloat *start, const M3Gfloat *end) |
|
1847 { |
|
1848 // r0 = size |
|
1849 // r1 = *vec |
|
1850 // r2 = s |
|
1851 // r3 = *start |
|
1852 // sp[0] = *end |
|
1853 |
|
1854 CODE32 |
|
1855 /* |
|
1856 M3Gfloat sCompl = 1.0 - s; |
|
1857 for (i = 0; i < size; ++i) { |
|
1858 vec[i] = sCompl*start[i] + s*end[i]; |
|
1859 } |
|
1860 */ |
|
1861 // |
|
1862 // if size = 0, return |
|
1863 // |
|
1864 CMP r0, #0 |
|
1865 BXEQ lr |
|
1866 |
|
1867 FMSR s0, r2 |
|
1868 MOV r2, r3 |
|
1869 LDR r3, [sp] |
|
1870 |
|
1871 FLDS s1,=1.0 |
|
1872 STMFD sp!, {r4-r5} |
|
1873 FSUBS s2, s1, s0 // sCompl = 1 - s |
|
1874 |
|
1875 FMRX r4, FPSCR |
|
1876 CMP r0, #4 |
|
1877 BGT _m3gLerp_over4Loop |
|
1878 |
|
1879 // |
|
1880 // if 1 < size <= 4 |
|
1881 // |
|
1882 // set vector STRIDE = 1, LENGTH = 4 |
|
1883 BIC r12, r4, #0x00370000 |
|
1884 ORR r12, #(3<<16) |
|
1885 FMXR FPSCR, r12 |
|
1886 |
|
1887 FLDMIAS r2!, {s4-s7} // load the start[i] values |
|
1888 FLDMIAS r3!, {s8-s11} // load the end[i] values |
|
1889 FMULS s12, s4, s2 // [s12-s15] = [sCompl*start[0] .. sCompl*start[3]] |
|
1890 FMACS s12, s8, s0 // [s12-s15] += [ s*end[0] .. s*end[3]] |
|
1891 CMP r0, #1 |
|
1892 FSTS s12, [r1] |
|
1893 FSTSGT s13, [r1, #4] |
|
1894 CMP r0, #3 |
|
1895 FSTSGE s14, [r1, #8] |
|
1896 FSTSGT s15, [r1, #12] |
|
1897 |
|
1898 FMXR FPSCR, r4 |
|
1899 |
|
1900 LDMFD sp!, {r4-r5} |
|
1901 |
|
1902 BX lr |
|
1903 // |
|
1904 // if size > 4, interpolate 8 values in one loop |
|
1905 // |
|
1906 _m3gLerp_over4Loop |
|
1907 |
|
1908 FSTMFDD sp!, {d8-d9} |
|
1909 MOVS r5, r0, ASR #3 // size/8 |
|
1910 SUB r0, r0, r5, LSL #3 // tail length |
|
1911 |
|
1912 // set vector STRIDE = 1, LENGTH = 8 |
|
1913 BIC r12, r4, #0x00370000 |
|
1914 ORR r12, #(7<<16) |
|
1915 FMXR FPSCR, r12 |
|
1916 |
|
1917 |
|
1918 _m3gLerp_alignedLoop |
|
1919 |
|
1920 FLDMIASNE r2!, {s8-s15} // load the start[i] values |
|
1921 FLDMIASNE r3!, {s16-s23} // load the end[i] values |
|
1922 FMULSNE s24, s8, s2 // [s16-s23] = [ sCompl*start[0] sCompl*start[1] .. sCompl*start[7]] |
|
1923 FMACSNE s24, s16, s0 // [s16-s23] += [ s*end[0] s*end[1] .. s*end[7]] |
|
1924 FSTMIASNE r1!, {s24-s31} |
|
1925 SUBSNE r5, #1 |
|
1926 |
|
1927 BNE _m3gLerp_alignedLoop |
|
1928 |
|
1929 // process the 0-7 remaining values in the tail |
|
1930 |
|
1931 CMP r0, #1 |
|
1932 FLDMIASGE r2!, {s8-s15} |
|
1933 FLDMIASGE r3!, {s16-s23} |
|
1934 FMULSGE s24, s8, s2 |
|
1935 FMACSGE s24, s16, s0 |
|
1936 FSTSGE s24, [r1] |
|
1937 FSTSGT s25, [r1, #4] |
|
1938 CMP r0, #3 |
|
1939 FSTSGE s26, [r1, #8] |
|
1940 FSTSGT s27, [r1, #12] |
|
1941 CMP r0, #5 |
|
1942 FSTSGE s28, [r1, #16] |
|
1943 FSTSGT s29, [r1, #20] |
|
1944 CMP r0, #7 |
|
1945 FSTSEQ s30, [r1, #24] |
|
1946 |
|
1947 FMXR FPSCR, r4 |
|
1948 |
|
1949 FLDMFDD sp!, {d8-d9} |
|
1950 LDMFD sp!, {r4-r5} |
|
1951 |
|
1952 BX lr |
|
1953 |
|
1954 } |
|
1955 #else /* #if defined(M3G_HW_FLOAT_VFPV2) */ |
|
1956 |
|
1957 M3G_API void m3gLerp(M3Gint size, |
|
1958 M3Gfloat *vec, |
|
1959 M3Gfloat s, |
|
1960 const M3Gfloat *start, const M3Gfloat *end) |
|
1961 { |
|
1962 int i; |
|
1963 |
|
1964 M3Gfloat sCompl = m3gSub(1.f, s); |
|
1965 for (i = 0; i < size; ++i) { |
|
1966 vec[i] = m3gAdd(m3gMul(sCompl, start[i]), m3gMul(s, end[i])); |
|
1967 } |
|
1968 } |
|
1969 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */ |
|
1970 |
|
1971 /*! |
|
1972 * \brief Hermite spline interpolation of vectors |
|
1973 * |
|
1974 * \param size number of components |
|
1975 * \param vec output vector |
|
1976 * \param s interpolation factor |
|
1977 * \param start start value vector |
|
1978 * \param end end value vector |
|
1979 * \param tStart start tangent vector |
|
1980 * \param tEnd end tangent vector |
|
1981 */ |
|
1982 M3G_API void m3gHermite(M3Gint size, |
|
1983 M3Gfloat *vec, |
|
1984 M3Gfloat s, |
|
1985 const M3Gfloat *start, const M3Gfloat *end, |
|
1986 const M3Gfloat *tStart, const M3Gfloat *tEnd) |
|
1987 { |
|
1988 M3Gfloat s2 = m3gSquare(s); |
|
1989 M3Gfloat s3 = m3gMul(s2, s); |
|
1990 int i; |
|
1991 |
|
1992 for (i = 0; i < size; ++i) { |
|
1993 vec[i] = |
|
1994 m3gMadd(start[i], |
|
1995 m3gAdd(m3gSub(m3gDouble(s3), m3gMul(3.f, s2)), 1.f), |
|
1996 m3gMadd(end[i], |
|
1997 m3gSub(m3gMul(3.f, s2), m3gDouble(s3)), |
|
1998 m3gMadd(tStart[i], |
|
1999 m3gAdd(m3gSub(s3, m3gDouble(s2)), s), |
|
2000 m3gMul(tEnd[i], |
|
2001 m3gSub(s3, s2))))); |
|
2002 |
|
2003 } |
|
2004 |
|
2005 /* vec = ( 2*s3 - 3*s2 + 1) * start |
|
2006 + (-2*s3 + 3*s2 ) * end |
|
2007 + ( s3 - 2*s2 + s) * tStart |
|
2008 + ( s3 - s2 ) * tEnd; */ |
|
2009 } |
|
2010 |
|
2011 /*--------------------------------------------------------------------*/ |
|
2012 |
|
2013 /*! |
|
2014 * \brief Sets a matrix to a copy of another matrix |
|
2015 */ |
|
2016 M3G_API void m3gCopyMatrix(Matrix *dst, const Matrix *src) |
|
2017 { |
|
2018 M3G_ASSERT(dst != NULL && src != NULL); |
|
2019 *dst = *src; |
|
2020 } |
|
2021 |
|
2022 /*! |
|
2023 * \brief Vector addition |
|
2024 */ |
|
2025 M3G_API void m3gAddVec3(Vec3 *vec, const Vec3 *other) |
|
2026 { |
|
2027 vec->x = m3gAdd(vec->x, other->x); |
|
2028 vec->y = m3gAdd(vec->y, other->y); |
|
2029 vec->z = m3gAdd(vec->z, other->z); |
|
2030 } |
|
2031 |
|
2032 /*! |
|
2033 * \brief Vector addition |
|
2034 */ |
|
2035 M3G_API void m3gAddVec4(Vec4 *vec, const Vec4 *other) |
|
2036 { |
|
2037 vec->x = m3gAdd(vec->x, other->x); |
|
2038 vec->y = m3gAdd(vec->y, other->y); |
|
2039 vec->z = m3gAdd(vec->z, other->z); |
|
2040 vec->w = m3gAdd(vec->w, other->w); |
|
2041 } |
|
2042 |
|
2043 /*! |
|
2044 * \brief Cross product of two 3D vectors expressed as 4D vectors |
|
2045 */ |
|
2046 M3G_API void m3gCross(Vec3 *dst, const Vec3 *a, const Vec3 *b) |
|
2047 { |
|
2048 dst->x = m3gSub(m3gMul(a->y, b->z), m3gMul(a->z, b->y)); |
|
2049 dst->y = m3gSub(m3gMul(a->z, b->x), m3gMul(a->x, b->z)); |
|
2050 dst->z = m3gSub(m3gMul(a->x, b->y), m3gMul(a->y, b->x)); |
|
2051 } |
|
2052 |
|
2053 /*! |
|
2054 * \brief Dot product of two vectors |
|
2055 */ |
|
2056 M3G_API M3Gfloat m3gDot3(const Vec3 *a, const Vec3 *b) |
|
2057 { |
|
2058 M3Gfloat d; |
|
2059 d = m3gMul(a->x, b->x); |
|
2060 d = m3gMadd(a->y, b->y, d); |
|
2061 d = m3gMadd(a->z, b->z, d); |
|
2062 return d; |
|
2063 } |
|
2064 |
|
2065 /*! |
|
2066 * \brief Dot product of two vectors |
|
2067 */ |
|
2068 M3G_API M3Gfloat m3gDot4(const Vec4 *a, const Vec4 *b) |
|
2069 { |
|
2070 M3Gfloat d; |
|
2071 d = m3gMul(a->x, b->x); |
|
2072 d = m3gMadd(a->y, b->y, d); |
|
2073 d = m3gMadd(a->z, b->z, d); |
|
2074 d = m3gMadd(a->w, b->w, d); |
|
2075 return d; |
|
2076 } |
|
2077 |
|
2078 /*! |
|
2079 * \brief |
|
2080 */ |
|
2081 M3G_API void m3gSetVec3(Vec3 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z) |
|
2082 { |
|
2083 M3G_ASSERT_PTR(v); |
|
2084 v->x = x; |
|
2085 v->y = y; |
|
2086 v->z = z; |
|
2087 } |
|
2088 |
|
2089 /*! |
|
2090 * \brief |
|
2091 */ |
|
2092 M3G_API void m3gSetVec4(Vec4 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z, M3Gfloat w) |
|
2093 { |
|
2094 M3G_ASSERT_PTR(v); |
|
2095 v->x = x; |
|
2096 v->y = y; |
|
2097 v->z = z; |
|
2098 v->w = w; |
|
2099 } |
|
2100 |
|
2101 /*! |
|
2102 * \brief Vector subtraction |
|
2103 */ |
|
2104 M3G_API void m3gSubVec3(Vec3 *vec, const Vec3 *other) |
|
2105 { |
|
2106 vec->x = m3gSub(vec->x, other->x); |
|
2107 vec->y = m3gSub(vec->y, other->y); |
|
2108 vec->z = m3gSub(vec->z, other->z); |
|
2109 } |
|
2110 |
|
2111 /*! |
|
2112 * \brief Vector subtraction |
|
2113 */ |
|
2114 M3G_API void m3gSubVec4(Vec4 *vec, const Vec4 *other) |
|
2115 { |
|
2116 vec->x = m3gSub(vec->x, other->x); |
|
2117 vec->y = m3gSub(vec->y, other->y); |
|
2118 vec->z = m3gSub(vec->z, other->z); |
|
2119 vec->w = m3gSub(vec->w, other->w); |
|
2120 } |
|
2121 |
|
2122 /*! |
|
2123 * \brief Vector length |
|
2124 */ |
|
2125 M3G_API M3Gfloat m3gLengthVec3(const Vec3 *vec) |
|
2126 { |
|
2127 return m3gSqrt(m3gAdd(m3gAdd(m3gSquare(vec->x), |
|
2128 m3gSquare(vec->y)), |
|
2129 m3gSquare(vec->z))); |
|
2130 } |
|
2131 |
|
2132 /*! |
|
2133 * \brief Vector scaling |
|
2134 */ |
|
2135 M3G_API void m3gScaleVec3(Vec3 *vec, const M3Gfloat s) |
|
2136 { |
|
2137 vec->x = m3gMul(vec->x, s); |
|
2138 vec->y = m3gMul(vec->y, s); |
|
2139 vec->z = m3gMul(vec->z, s); |
|
2140 } |
|
2141 |
|
2142 /*! |
|
2143 * \brief Vector scaling |
|
2144 */ |
|
2145 M3G_API void m3gScaleVec4(Vec4 *vec, const M3Gfloat s) |
|
2146 { |
|
2147 vec->x = m3gMul(vec->x, s); |
|
2148 vec->y = m3gMul(vec->y, s); |
|
2149 vec->z = m3gMul(vec->z, s); |
|
2150 vec->w = m3gMul(vec->w, s); |
|
2151 } |
|
2152 |
|
2153 /*! |
|
2154 * \brief Returns an angle-axis representation for a quaternion |
|
2155 * |
|
2156 * \note There are many, and this is not guaranteed to return a |
|
2157 * particular one |
|
2158 */ |
|
2159 M3G_API void m3gGetAngleAxis(const Quat *quat, M3Gfloat *angle, Vec3 *axis) |
|
2160 { |
|
2161 M3Gfloat x, y, z, sinTheta; |
|
2162 |
|
2163 M3G_ASSERT_PTR(quat); |
|
2164 M3G_ASSERT_PTR(angle); |
|
2165 M3G_ASSERT_PTR(axis); |
|
2166 |
|
2167 x = quat->x; |
|
2168 y = quat->y; |
|
2169 z = quat->z; |
|
2170 |
|
2171 sinTheta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(x), m3gSquare(y)), |
|
2172 m3gSquare(z))); |
|
2173 |
|
2174 if (sinTheta > EPSILON) { |
|
2175 M3Gfloat ooSinTheta = m3gRcp(sinTheta); |
|
2176 axis->x = m3gMul(x, ooSinTheta); |
|
2177 axis->y = m3gMul(y, ooSinTheta); |
|
2178 axis->z = m3gMul(z, ooSinTheta); |
|
2179 } |
|
2180 else { |
|
2181 /* return a valid axis even for no rotation */ |
|
2182 axis->x = axis->y = 0.0f; |
|
2183 axis->z = 1.0f; |
|
2184 } |
|
2185 *angle = m3gMul(2.0f * RAD2DEG, m3gArcCos(quat->w)); |
|
2186 } |
|
2187 |
|
2188 /*! |
|
2189 * \brief Gets a single matrix column |
|
2190 */ |
|
2191 M3G_API void m3gGetMatrixColumn(const Matrix *mtx, M3Gint col, Vec4 *dst) |
|
2192 { |
|
2193 if ((col & ~3) == 0) { |
|
2194 if (!mtx->complete) { |
|
2195 m3gFillClassifiedMatrix((Matrix*)mtx); |
|
2196 } |
|
2197 dst->x = M44F(mtx, 0, col); |
|
2198 dst->y = M44F(mtx, 1, col); |
|
2199 dst->z = M44F(mtx, 2, col); |
|
2200 dst->w = M44F(mtx, 3, col); |
|
2201 } |
|
2202 else { |
|
2203 M3G_ASSERT(M3G_FALSE); |
|
2204 } |
|
2205 } |
|
2206 |
|
2207 /*! |
|
2208 * \brief Returns the floating point values of a matrix as consecutive |
|
2209 * columns |
|
2210 */ |
|
2211 M3G_API void m3gGetMatrixColumns(const Matrix *mtx, M3Gfloat *dst) |
|
2212 { |
|
2213 M3G_ASSERT(mtx != NULL && dst != NULL); |
|
2214 |
|
2215 if (!mtx->complete) { |
|
2216 m3gFillClassifiedMatrix((Matrix*)mtx); |
|
2217 } |
|
2218 m3gCopy(dst, mtx->elem, sizeof(mtx->elem)); |
|
2219 } |
|
2220 |
|
2221 /*! |
|
2222 * \brief Gets a single matrix row |
|
2223 */ |
|
2224 M3G_API void m3gGetMatrixRow(const Matrix *mtx, M3Gint row, Vec4 *dst) |
|
2225 { |
|
2226 if ((row & ~3) == 0) { |
|
2227 if (!mtx->complete) { |
|
2228 m3gFillClassifiedMatrix((Matrix*)mtx); |
|
2229 } |
|
2230 dst->x = M44F(mtx, row, 0); |
|
2231 dst->y = M44F(mtx, row, 1); |
|
2232 dst->z = M44F(mtx, row, 2); |
|
2233 dst->w = M44F(mtx, row, 3); |
|
2234 } |
|
2235 else { |
|
2236 M3G_ASSERT(M3G_FALSE); |
|
2237 } |
|
2238 } |
|
2239 |
|
2240 /*! |
|
2241 * \brief Returns the floating point values of a matrix as consecutive |
|
2242 * rows |
|
2243 */ |
|
2244 M3G_API void m3gGetMatrixRows(const Matrix *mtx, M3Gfloat *dst) |
|
2245 { |
|
2246 M3G_ASSERT(mtx != NULL && dst != NULL); |
|
2247 |
|
2248 if (!mtx->complete) { |
|
2249 m3gFillClassifiedMatrix((Matrix*)mtx); |
|
2250 } |
|
2251 { |
|
2252 int row; |
|
2253 for (row = 0; row < 4; ++row) { |
|
2254 *dst++ = mtx->elem[ 0 + row]; |
|
2255 *dst++ = mtx->elem[ 4 + row]; |
|
2256 *dst++ = mtx->elem[ 8 + row]; |
|
2257 *dst++ = mtx->elem[12 + row]; |
|
2258 } |
|
2259 } |
|
2260 } |
|
2261 |
|
2262 /*! |
|
2263 * \brief Sets a matrix to identity |
|
2264 */ |
|
2265 M3G_API void m3gIdentityMatrix(Matrix *mtx) |
|
2266 { |
|
2267 M3G_ASSERT(mtx != NULL); |
|
2268 m3gClassifyAs(MC_IDENTITY, mtx); |
|
2269 } |
|
2270 |
|
2271 /*! |
|
2272 * \brief Sets a quaternion to identity |
|
2273 */ |
|
2274 M3G_API void m3gIdentityQuat(Quat *quat) |
|
2275 { |
|
2276 M3G_ASSERT(quat != NULL); |
|
2277 quat->x = quat->y = quat->z = 0.0f; |
|
2278 quat->w = 1.0f; |
|
2279 } |
|
2280 |
|
2281 /*! |
|
2282 * \brief Inverts a matrix |
|
2283 */ |
|
2284 M3G_API M3Gbool m3gInvertMatrix(Matrix *mtx) |
|
2285 { |
|
2286 M3Gfloat *matrix; |
|
2287 M3Gint i; |
|
2288 M3Gfloat tmp[12]; |
|
2289 M3Gfloat src[16]; |
|
2290 M3Gfloat det; |
|
2291 |
|
2292 M3G_ASSERT(mtx != NULL); |
|
2293 |
|
2294 if (!m3gIsClassified(mtx)) { |
|
2295 m3gClassify(mtx); |
|
2296 } |
|
2297 |
|
2298 /* Quick exit for identity */ |
|
2299 |
|
2300 if (mtx->mask == MC_IDENTITY) { |
|
2301 return M3G_TRUE; |
|
2302 } |
|
2303 |
|
2304 /* Look for other common cases; these require that we have valid |
|
2305 * values in all the elements, so fill in the values first */ |
|
2306 { |
|
2307 M3Guint mask = mtx->mask; |
|
2308 |
|
2309 if (!mtx->complete) { |
|
2310 m3gFillClassifiedMatrix(mtx); |
|
2311 } |
|
2312 |
|
2313 if ((mask | (0x3F << 24)) == MC_TRANSLATION) { |
|
2314 M44F(mtx, 0, 3) = m3gNegate(M44F(mtx, 0, 3)); |
|
2315 M44F(mtx, 1, 3) = m3gNegate(M44F(mtx, 1, 3)); |
|
2316 M44F(mtx, 2, 3) = m3gNegate(M44F(mtx, 2, 3)); |
|
2317 mtx->mask = MC_TRANSLATION; |
|
2318 return M3G_TRUE; |
|
2319 } |
|
2320 if ((mask | 0x300C03) == MC_SCALING) { |
|
2321 if ((mask & 3 ) == 0 || |
|
2322 (mask & (3 << 10)) == 0 || |
|
2323 (mask & (3 << 20)) == 0) { |
|
2324 return M3G_FALSE; /* zero scale for at least one axis */ |
|
2325 } |
|
2326 M44F(mtx, 0, 0) = m3gRcp(M44F(mtx, 0, 0)); |
|
2327 M44F(mtx, 1, 1) = m3gRcp(M44F(mtx, 1, 1)); |
|
2328 M44F(mtx, 2, 2) = m3gRcp(M44F(mtx, 2, 2)); |
|
2329 return M3G_TRUE; |
|
2330 } |
|
2331 } |
|
2332 |
|
2333 /* Do a full 4x4 inversion as a last resort */ |
|
2334 |
|
2335 matrix = mtx->elem; |
|
2336 |
|
2337 /* transpose matrix */ |
|
2338 for (i = 0; i < 4; i++) { |
|
2339 src[i] = matrix[i*4]; |
|
2340 src[i+4] = matrix[i*4+1]; |
|
2341 src[i+8] = matrix[i*4+2]; |
|
2342 src[i+12] = matrix[i*4+3]; |
|
2343 } |
|
2344 |
|
2345 /* calculate pairs for first 8 elements (cofactors) */ |
|
2346 tmp[0] = src[10]*src[15]; |
|
2347 tmp[1] = src[11]*src[14]; |
|
2348 tmp[2] = src[9]*src[15]; |
|
2349 tmp[3] = src[11]*src[13]; |
|
2350 tmp[4] = src[9]*src[14]; |
|
2351 tmp[5] = src[10]*src[13]; |
|
2352 tmp[6] = src[8]*src[15]; |
|
2353 tmp[7] = src[11]*src[12]; |
|
2354 tmp[8] = src[8]*src[14]; |
|
2355 tmp[9] = src[10]*src[12]; |
|
2356 tmp[10] = src[8]*src[13]; |
|
2357 tmp[11] = src[9]*src[12]; |
|
2358 |
|
2359 /* calculate first 8 elements (cofactors) */ |
|
2360 matrix[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7]; |
|
2361 matrix[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7]; |
|
2362 matrix[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7]; |
|
2363 matrix[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7]; |
|
2364 matrix[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7]; |
|
2365 matrix[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7]; |
|
2366 matrix[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6]; |
|
2367 matrix[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6]; |
|
2368 matrix[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3]; |
|
2369 matrix[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3]; |
|
2370 matrix[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3]; |
|
2371 matrix[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3]; |
|
2372 matrix[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3]; |
|
2373 matrix[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3]; |
|
2374 matrix[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2]; |
|
2375 matrix[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2]; |
|
2376 |
|
2377 /* calculate pairs for second 8 elements (cofactors) */ |
|
2378 tmp[0] = src[2]*src[7]; |
|
2379 tmp[1] = src[3]*src[6]; |
|
2380 tmp[2] = src[1]*src[7]; |
|
2381 tmp[3] = src[3]*src[5]; |
|
2382 tmp[4] = src[1]*src[6]; |
|
2383 tmp[5] = src[2]*src[5]; |
|
2384 tmp[6] = src[0]*src[7]; |
|
2385 tmp[7] = src[3]*src[4]; |
|
2386 tmp[8] = src[0]*src[6]; |
|
2387 tmp[9] = src[2]*src[4]; |
|
2388 tmp[10] = src[0]*src[5]; |
|
2389 tmp[11] = src[1]*src[4]; |
|
2390 |
|
2391 /* calculate second 8 elements (cofactors) */ |
|
2392 matrix[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15]; |
|
2393 matrix[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15]; |
|
2394 matrix[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15]; |
|
2395 matrix[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15]; |
|
2396 matrix[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15]; |
|
2397 matrix[10] -= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15]; |
|
2398 matrix[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14]; |
|
2399 matrix[11] -= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14]; |
|
2400 matrix[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9]; |
|
2401 matrix[12] -= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10]; |
|
2402 matrix[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10]; |
|
2403 matrix[13] -= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8]; |
|
2404 matrix[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8]; |
|
2405 matrix[14] -= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9]; |
|
2406 matrix[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9]; |
|
2407 matrix[15] -= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8]; |
|
2408 |
|
2409 /* calculate determinant */ |
|
2410 det = src[0]*matrix[0]+src[1]*matrix[1]+src[2]*matrix[2]+src[3]*matrix[3]; |
|
2411 |
|
2412 /* matrix has no inverse */ |
|
2413 if (det == 0.0f) { |
|
2414 return M3G_FALSE; |
|
2415 } |
|
2416 |
|
2417 /* calculate matrix inverse */ |
|
2418 det = 1/det; |
|
2419 for (i = 0; i < 16; i++) { |
|
2420 matrix[i] *= det; |
|
2421 } |
|
2422 |
|
2423 mtx->classified = M3G_FALSE; |
|
2424 return M3G_TRUE; |
|
2425 } |
|
2426 |
|
2427 /*! |
|
2428 * \brief Sets a matrix to the inverse of another matrix |
|
2429 */ |
|
2430 M3G_API M3Gbool m3gMatrixInverse(Matrix *mtx, const Matrix *other) |
|
2431 { |
|
2432 M3G_ASSERT(mtx != NULL && other != NULL); |
|
2433 |
|
2434 if (!m3gIsClassified(other)) { |
|
2435 m3gClassify((Matrix*)other); |
|
2436 } |
|
2437 |
|
2438 m3gCopyMatrix(mtx, other); |
|
2439 return m3gInvertMatrix(mtx); |
|
2440 } |
|
2441 |
|
2442 /*! |
|
2443 * \brief Sets a matrix to the transpose of another matrix |
|
2444 */ |
|
2445 M3G_API void m3gMatrixTranspose(Matrix *mtx, const Matrix *other) |
|
2446 { |
|
2447 M3Gbyte i; |
|
2448 M3G_ASSERT(mtx != NULL && other != NULL); |
|
2449 |
|
2450 if (!other->complete) { |
|
2451 m3gFillClassifiedMatrix((Matrix *)other); |
|
2452 } |
|
2453 |
|
2454 for (i = 0; i < 4; i++) { |
|
2455 mtx->elem[i] = other->elem[i*4]; |
|
2456 mtx->elem[i+4] = other->elem[i*4+1]; |
|
2457 mtx->elem[i+8] = other->elem[i*4+2]; |
|
2458 mtx->elem[i+12] = other->elem[i*4+3]; |
|
2459 } |
|
2460 mtx->classified = M3G_FALSE; |
|
2461 mtx->complete = M3G_TRUE; |
|
2462 } |
|
2463 |
|
2464 M3G_API M3Gbool m3gInverseTranspose(Matrix *mtx, const Matrix *other) |
|
2465 { |
|
2466 Matrix temp; |
|
2467 if (!m3gMatrixInverse(&temp, other)) { |
|
2468 return M3G_FALSE; |
|
2469 } |
|
2470 m3gMatrixTranspose(mtx, &temp); |
|
2471 return M3G_TRUE; |
|
2472 } |
|
2473 |
|
2474 /*! |
|
2475 * \brief Sets a matrix to the product of two other matrices |
|
2476 * |
|
2477 * \note \c dst can not be either of \c left or \c right; if it is, |
|
2478 * the results are undefined |
|
2479 */ |
|
2480 M3G_API void m3gMatrixProduct(Matrix *dst, const Matrix *left, const Matrix *right) |
|
2481 { |
|
2482 M3G_ASSERT_PTR(dst); |
|
2483 M3G_ASSERT_PTR(left); |
|
2484 M3G_ASSERT_PTR(right); |
|
2485 M3G_ASSERT(dst != left && dst != right); |
|
2486 |
|
2487 /* Classify input matrices and take early exits for identities */ |
|
2488 |
|
2489 if (!m3gIsClassified(left)) { |
|
2490 m3gClassify((Matrix*)left); |
|
2491 } |
|
2492 if (left->mask == MC_IDENTITY) { |
|
2493 m3gCopyMatrix(dst, right); |
|
2494 return; |
|
2495 } |
|
2496 |
|
2497 if (!m3gIsClassified(right)) { |
|
2498 m3gClassify((Matrix*)right); |
|
2499 } |
|
2500 if (right->mask == MC_IDENTITY) { |
|
2501 m3gCopyMatrix(dst, left); |
|
2502 return; |
|
2503 } |
|
2504 |
|
2505 /* Special quick paths for 3x4 matrices */ |
|
2506 |
|
2507 if (m3gIsWUnity(left) && m3gIsWUnity(right)) { |
|
2508 |
|
2509 /* Translation? */ |
|
2510 |
|
2511 if ((left->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) { |
|
2512 |
|
2513 if (left->mask != MC_TRANSLATION && !left->complete) { |
|
2514 m3gFillClassifiedMatrix((Matrix*)left); |
|
2515 } |
|
2516 if (right->mask != MC_TRANSLATION && !right->complete) { |
|
2517 m3gFillClassifiedMatrix((Matrix*)right); |
|
2518 } |
|
2519 |
|
2520 m3gCopyMatrix(dst, right); |
|
2521 |
|
2522 M44F(dst, 0, 3) = m3gAdd(M44F(left, 0, 3), M44F(dst, 0, 3)); |
|
2523 M44F(dst, 1, 3) = m3gAdd(M44F(left, 1, 3), M44F(dst, 1, 3)); |
|
2524 M44F(dst, 2, 3) = m3gAdd(M44F(left, 2, 3), M44F(dst, 2, 3)); |
|
2525 |
|
2526 dst->mask |= MC_TRANSLATION_PART; |
|
2527 return; |
|
2528 } |
|
2529 |
|
2530 if ((right->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) { |
|
2531 Vec4 tvec; |
|
2532 |
|
2533 if (left->mask != MC_TRANSLATION && !left->complete) { |
|
2534 m3gFillClassifiedMatrix((Matrix*)left); |
|
2535 } |
|
2536 if (right->mask != MC_TRANSLATION && !right->complete) { |
|
2537 m3gFillClassifiedMatrix((Matrix*)right); |
|
2538 } |
|
2539 |
|
2540 m3gCopyMatrix(dst, left); |
|
2541 |
|
2542 m3gGetMatrixColumn(right, 3, &tvec); |
|
2543 m3gTransformVec4(dst, &tvec); |
|
2544 |
|
2545 M44F(dst, 0, 3) = tvec.x; |
|
2546 M44F(dst, 1, 3) = tvec.y; |
|
2547 M44F(dst, 2, 3) = tvec.z; |
|
2548 |
|
2549 dst->mask |= MC_TRANSLATION_PART; |
|
2550 return; |
|
2551 } |
|
2552 |
|
2553 } |
|
2554 |
|
2555 /* Compute product and set output classification */ |
|
2556 |
|
2557 m3gGenericMatrixProduct(dst, left, right); |
|
2558 } |
|
2559 |
|
2560 /*! |
|
2561 * \brief Normalizes a quaternion |
|
2562 */ |
|
2563 M3G_API void m3gNormalizeQuat(Quat *q) |
|
2564 { |
|
2565 M3Gfloat norm; |
|
2566 M3G_ASSERT_PTR(q); |
|
2567 |
|
2568 norm = m3gNorm4(&q->x); |
|
2569 |
|
2570 if (norm > EPSILON) { |
|
2571 norm = m3gRcpSqrt(norm); |
|
2572 m3gScale4(&q->x, norm); |
|
2573 } |
|
2574 else { |
|
2575 m3gIdentityQuat(q); |
|
2576 } |
|
2577 } |
|
2578 |
|
2579 /*! |
|
2580 * \brief Normalizes a three-vector |
|
2581 */ |
|
2582 M3G_API void m3gNormalizeVec3(Vec3 *v) |
|
2583 { |
|
2584 M3Gfloat norm; |
|
2585 M3G_ASSERT_PTR(v); |
|
2586 |
|
2587 norm = m3gNorm3(&v->x); |
|
2588 |
|
2589 if (norm > EPSILON) { |
|
2590 norm = m3gRcpSqrt(norm); |
|
2591 m3gScale3(&v->x, norm); |
|
2592 } |
|
2593 else { |
|
2594 m3gZero(v, sizeof(Vec3)); |
|
2595 } |
|
2596 } |
|
2597 |
|
2598 /*! |
|
2599 * \brief Normalizes a four-vector |
|
2600 */ |
|
2601 M3G_API void m3gNormalizeVec4(Vec4 *v) |
|
2602 { |
|
2603 M3Gfloat norm; |
|
2604 M3G_ASSERT_PTR(v); |
|
2605 |
|
2606 norm = m3gNorm4(&v->x); |
|
2607 |
|
2608 if (norm > EPSILON) { |
|
2609 norm = m3gRcpSqrt(norm); |
|
2610 m3gScale4(&v->x, norm); |
|
2611 } |
|
2612 else { |
|
2613 m3gZero(v, sizeof(Vec4)); |
|
2614 } |
|
2615 } |
|
2616 |
|
2617 /*! |
|
2618 * \brief Multiplies a matrix from the right with another matrix |
|
2619 */ |
|
2620 M3G_API void m3gPostMultiplyMatrix(Matrix *mtx, const Matrix *other) |
|
2621 { |
|
2622 Matrix temp; |
|
2623 M3G_ASSERT_PTR(mtx); |
|
2624 M3G_ASSERT_PTR(other); |
|
2625 |
|
2626 m3gCopyMatrix(&temp, mtx); |
|
2627 m3gMatrixProduct(mtx, &temp, other); |
|
2628 } |
|
2629 |
|
2630 /*! |
|
2631 * \brief Multiplies a matrix from the left with another matrix |
|
2632 */ |
|
2633 M3G_API void m3gPreMultiplyMatrix(Matrix *mtx, const Matrix *other) |
|
2634 { |
|
2635 Matrix temp; |
|
2636 M3G_ASSERT_PTR(mtx); |
|
2637 M3G_ASSERT_PTR(other); |
|
2638 |
|
2639 m3gCopyMatrix(&temp, mtx); |
|
2640 m3gMatrixProduct(mtx, other, &temp); |
|
2641 } |
|
2642 |
|
2643 /*! |
|
2644 * \brief Multiplies a matrix with a rotation matrix. |
|
2645 */ |
|
2646 M3G_API void m3gPostRotateMatrix(Matrix *mtx, |
|
2647 M3Gfloat angle, |
|
2648 M3Gfloat ax, M3Gfloat ay, M3Gfloat az) |
|
2649 { |
|
2650 Quat q; |
|
2651 m3gSetAngleAxis(&q, angle, ax, ay, az); |
|
2652 m3gPostRotateMatrixQuat(mtx, &q); |
|
2653 } |
|
2654 |
|
2655 /*! |
|
2656 * \brief Multiplies a matrix with a translation matrix. |
|
2657 */ |
|
2658 M3G_API void m3gPostRotateMatrixQuat(Matrix *mtx, const Quat *quat) |
|
2659 { |
|
2660 Matrix temp; |
|
2661 m3gQuatMatrix(&temp, quat); |
|
2662 m3gPostMultiplyMatrix(mtx, &temp); |
|
2663 } |
|
2664 |
|
2665 /*! |
|
2666 * \brief Multiplies a matrix with a scale matrix. |
|
2667 */ |
|
2668 M3G_API void m3gPostScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz) |
|
2669 { |
|
2670 Matrix temp; |
|
2671 m3gScalingMatrix(&temp, sx, sy, sz); |
|
2672 m3gPostMultiplyMatrix(mtx, &temp); |
|
2673 } |
|
2674 |
|
2675 /*! |
|
2676 * \brief Multiplies a matrix with a translation (matrix). |
|
2677 */ |
|
2678 M3G_API void m3gPostTranslateMatrix(Matrix *mtx, |
|
2679 M3Gfloat tx, M3Gfloat ty, M3Gfloat tz) |
|
2680 { |
|
2681 Matrix temp; |
|
2682 m3gTranslationMatrix(&temp, tx, ty, tz); |
|
2683 m3gPostMultiplyMatrix(mtx, &temp); |
|
2684 } |
|
2685 |
|
2686 /*! |
|
2687 * \brief Multiplies a matrix with a rotation matrix |
|
2688 */ |
|
2689 M3G_API void m3gPreRotateMatrix(Matrix *mtx, |
|
2690 M3Gfloat angle, |
|
2691 M3Gfloat ax, M3Gfloat ay, M3Gfloat az) |
|
2692 { |
|
2693 Quat q; |
|
2694 m3gSetAngleAxis(&q, angle, ax, ay, az); |
|
2695 m3gPreRotateMatrixQuat(mtx, &q); |
|
2696 } |
|
2697 |
|
2698 /*! |
|
2699 * \brief Multiplies a matrix with a quaternion rotation |
|
2700 */ |
|
2701 M3G_API void m3gPreRotateMatrixQuat(Matrix *mtx, const Quat *quat) |
|
2702 { |
|
2703 Matrix temp; |
|
2704 m3gQuatMatrix(&temp, quat); |
|
2705 m3gPreMultiplyMatrix(mtx, &temp); |
|
2706 } |
|
2707 |
|
2708 /*! |
|
2709 * \brief Multiplies a matrix with a scale matrix. |
|
2710 */ |
|
2711 M3G_API void m3gPreScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz) |
|
2712 { |
|
2713 Matrix temp; |
|
2714 m3gScalingMatrix(&temp, sx, sy, sz); |
|
2715 m3gPreMultiplyMatrix(mtx, &temp); |
|
2716 } |
|
2717 |
|
2718 /*! |
|
2719 * \brief Multiplies a matrix with a translation (matrix). |
|
2720 */ |
|
2721 M3G_API void m3gPreTranslateMatrix(Matrix *mtx, |
|
2722 M3Gfloat tx, M3Gfloat ty, M3Gfloat tz) |
|
2723 { |
|
2724 Matrix temp; |
|
2725 m3gTranslationMatrix(&temp, tx, ty, tz); |
|
2726 m3gPreMultiplyMatrix(mtx, &temp); |
|
2727 } |
|
2728 |
|
2729 /*! |
|
2730 * \brief Converts a quaternion into a matrix |
|
2731 * |
|
2732 * The output is a matrix effecting the same rotation as the |
|
2733 * quaternion passed as input |
|
2734 */ |
|
2735 M3G_API void m3gQuatMatrix(Matrix *mtx, const Quat *quat) |
|
2736 { |
|
2737 M3Gfloat qx = quat->x; |
|
2738 M3Gfloat qy = quat->y; |
|
2739 M3Gfloat qz = quat->z; |
|
2740 M3Gfloat qw = quat->w; |
|
2741 |
|
2742 /* Quick exit for identity rotations */ |
|
2743 |
|
2744 if (IS_ZERO(qx) && IS_ZERO(qy) && IS_ZERO(qz)) { |
|
2745 m3gIdentityMatrix(mtx); |
|
2746 return; |
|
2747 } |
|
2748 |
|
2749 { |
|
2750 /* Determine the rough type of the output matrix */ |
|
2751 |
|
2752 M3Guint type = MC_SCALING_ROTATION; |
|
2753 if (IS_ZERO(qz) && IS_ZERO(qy)) { |
|
2754 type = MC_X_ROTATION; |
|
2755 } |
|
2756 else if (IS_ZERO(qz) && IS_ZERO(qx)) { |
|
2757 type = MC_Y_ROTATION; |
|
2758 } |
|
2759 else if (IS_ZERO(qx) && IS_ZERO(qy)) { |
|
2760 type = MC_Z_ROTATION; |
|
2761 } |
|
2762 m3gClassifyAs(type, mtx); |
|
2763 |
|
2764 /* Generate the non-constant parts of the matrix */ |
|
2765 { |
|
2766 M3Gfloat wx, wy, wz, xx, yy, yz, xy, xz, zz; |
|
2767 |
|
2768 xx = m3gMul(qx, qx); |
|
2769 xy = m3gMul(qx, qy); |
|
2770 xz = m3gMul(qx, qz); |
|
2771 yy = m3gMul(qy, qy); |
|
2772 yz = m3gMul(qy, qz); |
|
2773 zz = m3gMul(qz, qz); |
|
2774 wx = m3gMul(qw, qx); |
|
2775 wy = m3gMul(qw, qy); |
|
2776 wz = m3gMul(qw, qz); |
|
2777 |
|
2778 if (type != MC_X_ROTATION) { |
|
2779 M44F(mtx, 0, 0) = m3gSub(1.f, m3gDouble(m3gAdd(yy, zz))); |
|
2780 M44F(mtx, 0, 1) = m3gDouble(m3gSub(xy, wz)); |
|
2781 M44F(mtx, 0, 2) = m3gDouble(m3gAdd(xz, wy)); |
|
2782 } |
|
2783 |
|
2784 if (type != MC_Y_ROTATION) { |
|
2785 M44F(mtx, 1, 0) = m3gDouble(m3gAdd(xy, wz)); |
|
2786 M44F(mtx, 1, 1) = m3gSub(1.f, m3gDouble(m3gAdd(xx, zz))); |
|
2787 M44F(mtx, 1, 2) = m3gDouble(m3gSub(yz, wx)); |
|
2788 } |
|
2789 |
|
2790 if (type != MC_Z_ROTATION) { |
|
2791 M44F(mtx, 2, 0) = m3gDouble(m3gSub(xz, wy)); |
|
2792 M44F(mtx, 2, 1) = m3gDouble(m3gAdd(yz, wx)); |
|
2793 M44F(mtx, 2, 2) = m3gSub(1.f, m3gDouble(m3gAdd(xx, yy))); |
|
2794 } |
|
2795 } |
|
2796 m3gSubClassify(mtx); |
|
2797 } |
|
2798 } |
|
2799 |
|
2800 /*! |
|
2801 * \brief Generates a scaling matrix |
|
2802 */ |
|
2803 M3G_API void m3gScalingMatrix( |
|
2804 Matrix *mtx, |
|
2805 const M3Gfloat sx, const M3Gfloat sy, const M3Gfloat sz) |
|
2806 { |
|
2807 M3G_ASSERT_PTR(mtx); |
|
2808 M44F(mtx, 0, 0) = sx; |
|
2809 M44F(mtx, 1, 1) = sy; |
|
2810 M44F(mtx, 2, 2) = sz; |
|
2811 m3gClassifyAs(MC_SCALING, mtx); |
|
2812 m3gSubClassify(mtx); |
|
2813 } |
|
2814 |
|
2815 /*! |
|
2816 * \brief Sets a quaternion to represent an angle-axis rotation |
|
2817 */ |
|
2818 M3G_API void m3gSetAngleAxis(Quat *quat, |
|
2819 M3Gfloat angle, |
|
2820 M3Gfloat ax, M3Gfloat ay, M3Gfloat az) |
|
2821 { |
|
2822 m3gSetAngleAxisRad(quat, m3gMul(angle, M3G_DEG2RAD), ax, ay, az); |
|
2823 } |
|
2824 |
|
2825 /*! |
|
2826 * \brief Sets a quaternion to represent an angle-axis rotation |
|
2827 */ |
|
2828 M3G_API void m3gSetAngleAxisRad(Quat *quat, |
|
2829 M3Gfloat angleRad, |
|
2830 M3Gfloat ax, M3Gfloat ay, M3Gfloat az) |
|
2831 { |
|
2832 M3G_ASSERT_PTR(quat); |
|
2833 |
|
2834 if (!IS_ZERO(angleRad)) { |
|
2835 M3Gfloat s; |
|
2836 M3Gfloat halfAngle = m3gHalf(angleRad); |
|
2837 |
|
2838 s = m3gSin(halfAngle); |
|
2839 |
|
2840 { |
|
2841 M3Gfloat sqrNorm = m3gMadd(ax, ax, m3gMadd(ay, ay, m3gMul(az, az))); |
|
2842 if (sqrNorm < 0.995f || sqrNorm > 1.005f) { |
|
2843 if (sqrNorm > EPSILON) { |
|
2844 M3Gfloat ooNorm = m3gRcpSqrt(sqrNorm); |
|
2845 ax = m3gMul(ax, ooNorm); |
|
2846 ay = m3gMul(ay, ooNorm); |
|
2847 az = m3gMul(az, ooNorm); |
|
2848 } |
|
2849 else { |
|
2850 ax = ay = az = 0.0f; |
|
2851 } |
|
2852 } |
|
2853 } |
|
2854 |
|
2855 quat->x = m3gMul(s, ax); |
|
2856 quat->y = m3gMul(s, ay); |
|
2857 quat->z = m3gMul(s, az); |
|
2858 quat->w = m3gCos(halfAngle); |
|
2859 } |
|
2860 else { |
|
2861 m3gIdentityQuat(quat); |
|
2862 } |
|
2863 } |
|
2864 |
|
2865 /*! |
|
2866 * \brief Quaternion multiplication. |
|
2867 */ |
|
2868 M3G_API void m3gMulQuat(Quat *quat, const Quat *other) |
|
2869 { |
|
2870 Quat q; |
|
2871 q = *quat; |
|
2872 |
|
2873 quat->w = m3gMul(q.w, other->w) - m3gMul(q.x, other->x) - m3gMul(q.y, other->y) - m3gMul(q.z, other->z); |
|
2874 quat->x = m3gMul(q.w, other->x) + m3gMul(q.x, other->w) + m3gMul(q.y, other->z) - m3gMul(q.z, other->y); |
|
2875 quat->y = m3gMul(q.w, other->y) - m3gMul(q.x, other->z) + m3gMul(q.y, other->w) + m3gMul(q.z, other->x); |
|
2876 quat->z = m3gMul(q.w, other->z) + m3gMul(q.x, other->y) - m3gMul(q.y, other->x) + m3gMul(q.z, other->w); |
|
2877 } |
|
2878 |
|
2879 /*! |
|
2880 * \brief Makes this quaternion represent the rotation from one 3D |
|
2881 * vector to another |
|
2882 */ |
|
2883 M3G_API void m3gSetQuatRotation(Quat *quat, |
|
2884 const Vec3 *from, const Vec3 *to) |
|
2885 { |
|
2886 M3Gfloat cosAngle; |
|
2887 |
|
2888 M3G_ASSERT_PTR(quat); |
|
2889 M3G_ASSERT_PTR(from); |
|
2890 M3G_ASSERT_PTR(to); |
|
2891 |
|
2892 cosAngle = m3gDot3(from, to); |
|
2893 |
|
2894 if (cosAngle > (1.0f - EPSILON)) { /* zero angle */ |
|
2895 m3gIdentityQuat(quat); |
|
2896 return; |
|
2897 } |
|
2898 else if (cosAngle > (1.0e-3f - 1.0f)) { /* normal case */ |
|
2899 Vec3 axis; |
|
2900 m3gCross(&axis, from, to); |
|
2901 m3gSetAngleAxisRad(quat, m3gArcCos(cosAngle), axis.x, axis.y, axis.z); |
|
2902 } |
|
2903 else { |
|
2904 |
|
2905 /* Opposite vectors; must generate an arbitrary perpendicular |
|
2906 * vector and use that as the rotation axis. Here, we try the |
|
2907 * Z axis first, and if that seems too parallel to the |
|
2908 * vectors, project the Y axis instead: Z is the only good |
|
2909 * choice for Z-constrained rotations, and Y by definition |
|
2910 * must be perpendicular to that. */ |
|
2911 |
|
2912 Vec3 axis, temp; |
|
2913 M3Gfloat s; |
|
2914 |
|
2915 axis.x = axis.y = axis.z = 0.0f; |
|
2916 if (m3gAbs(from->z) < (1.0f - EPSILON)) { |
|
2917 axis.z = 1.0f; |
|
2918 } |
|
2919 else { |
|
2920 axis.y = 1.0f; |
|
2921 } |
|
2922 |
|
2923 s = m3gDot3(&axis, from); |
|
2924 temp = *from; |
|
2925 m3gScaleVec3(&temp, s); |
|
2926 m3gSubVec3(&axis, &temp); |
|
2927 |
|
2928 m3gSetAngleAxis(quat, 180.f, axis.x, axis.y, axis.z); |
|
2929 } |
|
2930 } |
|
2931 |
|
2932 /*! |
|
2933 * \brief Sets the values of a matrix |
|
2934 * |
|
2935 */ |
|
2936 M3G_API void m3gSetMatrixColumns(Matrix *mtx, const M3Gfloat *src) |
|
2937 { |
|
2938 M3G_ASSERT(mtx != NULL && src != NULL); |
|
2939 |
|
2940 m3gCopy(mtx->elem, src, sizeof(mtx->elem)); |
|
2941 mtx->classified = M3G_FALSE; |
|
2942 mtx->complete = M3G_TRUE; |
|
2943 } |
|
2944 |
|
2945 /*! |
|
2946 * \brief Sets the values of a matrix |
|
2947 * |
|
2948 */ |
|
2949 M3G_API void m3gSetMatrixRows(Matrix *mtx, const M3Gfloat *src) |
|
2950 { |
|
2951 M3G_ASSERT(mtx != NULL && src != NULL); |
|
2952 { |
|
2953 int row; |
|
2954 for (row = 0; row < 4; ++row) { |
|
2955 mtx->elem[ 0 + row] = *src++; |
|
2956 mtx->elem[ 4 + row] = *src++; |
|
2957 mtx->elem[ 8 + row] = *src++; |
|
2958 mtx->elem[12 + row] = *src++; |
|
2959 } |
|
2960 } |
|
2961 mtx->classified = M3G_FALSE; |
|
2962 mtx->complete = M3G_TRUE; |
|
2963 } |
|
2964 |
|
2965 /*! |
|
2966 * \brief Transforms a 4-vector with a matrix |
|
2967 */ |
|
2968 #if defined(M3G_HW_FLOAT_VFPV2) |
|
2969 |
|
2970 __asm void _m3gTransformVec4(const Matrix *mtx, Vec4 *vec, M3Gint n) |
|
2971 { |
|
2972 |
|
2973 CODE32 |
|
2974 |
|
2975 FSTMFDD sp!, {d8-d11} |
|
2976 |
|
2977 FMRX r3, FPSCR |
|
2978 BIC r12, r3, #0x00370000 |
|
2979 ORR r12, #(3<<16) |
|
2980 FMXR FPSCR, r12 |
|
2981 CMP r2, #4 |
|
2982 |
|
2983 FLDMIAS r0, {s4-s19} // [mtx0 mtx1 ..] |
|
2984 FLDMIAS r1, {s0-s3} // [vec.x vec.y vec.z vec.w] |
|
2985 FMULS s20, s4, s0 // [s20-s23] = [v.x*mtx0 v.x*mtx1 v.x*mtx2 v.x*mtx3 ] |
|
2986 FMACS s20, s8, s1 // [s20-s23] += [v.y*mtx4 v.y*mtx5 v.y*mtx6 v.y*mtx7 ] |
|
2987 FMACS s20, s12, s2 // [s20-s23] += [v.z*mtx8 v.z*mtx9 v.z*mtx10 v.z*mtx11] |
|
2988 FMACS s20, s16, s3 // [s20-s23] += [v.w*mtx12 v.w*mtx13 v.w*mtx14 v.w*mtx15] |
|
2989 FSTMIAS r1!, {s20-s22} |
|
2990 FSTMIASEQ r1, {s23} |
|
2991 |
|
2992 FMXR FPSCR, r3 |
|
2993 FLDMFDD sp!, {d8-d11} |
|
2994 |
|
2995 BX lr |
|
2996 } |
|
2997 #endif /* #if defined(M3G_HW_FLOAT_VFPV2) */ |
|
2998 |
|
2999 M3G_API void m3gTransformVec4(const Matrix *mtx, Vec4 *vec) |
|
3000 { |
|
3001 M3Guint type; |
|
3002 M3G_ASSERT(mtx != NULL && vec != NULL); |
|
3003 |
|
3004 if (!m3gIsClassified(mtx)) { |
|
3005 m3gClassify((Matrix*)mtx); |
|
3006 } |
|
3007 |
|
3008 type = mtx->mask; |
|
3009 |
|
3010 if (type == MC_IDENTITY) { |
|
3011 return; |
|
3012 } |
|
3013 else { |
|
3014 Vec4 v = *vec; |
|
3015 int i; |
|
3016 int n = m3gIsWUnity(mtx) ? 3 : 4; |
|
3017 |
|
3018 if (!mtx->complete) { |
|
3019 m3gFillClassifiedMatrix((Matrix*)mtx); |
|
3020 } |
|
3021 #if defined(M3G_HW_FLOAT_VFPV2) |
|
3022 _m3gTransformVec4(mtx, vec, n); |
|
3023 #else |
|
3024 for (i = 0; i < n; ++i) { |
|
3025 M3Gfloat d = m3gMul(M44F(mtx, i, 0), v.x); |
|
3026 d = m3gMadd(M44F(mtx, i, 1), v.y, d); |
|
3027 d = m3gMadd(M44F(mtx, i, 2), v.z, d); |
|
3028 d = m3gMadd(M44F(mtx, i, 3), v.w, d); |
|
3029 (&vec->x)[i] = d; |
|
3030 } |
|
3031 #endif |
|
3032 } |
|
3033 } |
|
3034 |
|
3035 /*! |
|
3036 * \brief Generates a translation matrix |
|
3037 */ |
|
3038 M3G_API void m3gTranslationMatrix( |
|
3039 Matrix *mtx, |
|
3040 const M3Gfloat tx, const M3Gfloat ty, const M3Gfloat tz) |
|
3041 { |
|
3042 M3G_ASSERT_PTR(mtx); |
|
3043 M44F(mtx, 0, 3) = tx; |
|
3044 M44F(mtx, 1, 3) = ty; |
|
3045 M44F(mtx, 2, 3) = tz; |
|
3046 m3gClassifyAs(MC_TRANSLATION, mtx); |
|
3047 m3gSubClassify(mtx); |
|
3048 } |
|
3049 |
|
3050 /*! |
|
3051 * \brief Float vector assignment. |
|
3052 */ |
|
3053 M3G_API void m3gSetQuat(Quat *quat, const M3Gfloat *vec) |
|
3054 { |
|
3055 quat->x = vec[0]; |
|
3056 quat->y = vec[1]; |
|
3057 quat->z = vec[2]; |
|
3058 quat->w = vec[3]; |
|
3059 } |
|
3060 |
|
3061 /*! |
|
3062 * \brief Slerp between quaternions q0 and q1, storing the result in quat. |
|
3063 */ |
|
3064 M3G_API void m3gSlerpQuat(Quat *quat, |
|
3065 M3Gfloat s, |
|
3066 const Quat *q0, const Quat *q1) |
|
3067 { |
|
3068 M3Gfloat s0, s1; |
|
3069 M3Gfloat cosTheta = m3gDot4((const Vec4 *)q0, (const Vec4 *)q1); |
|
3070 M3Gfloat oneMinusS = m3gSub(1.0f, s); |
|
3071 |
|
3072 if (cosTheta > EPSILON - 1.0f) { |
|
3073 if (cosTheta < 1.0f - EPSILON) { |
|
3074 M3Gfloat theta = m3gArcCos(cosTheta); |
|
3075 M3Gfloat sinTheta = m3gSin(theta); |
|
3076 s0 = m3gSin(m3gMul(oneMinusS, theta)) / sinTheta; |
|
3077 s1 = m3gSin(m3gMul(s, theta)) / sinTheta; |
|
3078 } |
|
3079 else { |
|
3080 /* For quaternions very close to each other, use plain |
|
3081 linear interpolation for numerical stability. */ |
|
3082 s0 = oneMinusS; |
|
3083 s1 = s; |
|
3084 } |
|
3085 quat->x = m3gMadd(s0, q0->x, m3gMul(s1, q1->x)); |
|
3086 quat->y = m3gMadd(s0, q0->y, m3gMul(s1, q1->y)); |
|
3087 quat->z = m3gMadd(s0, q0->z, m3gMul(s1, q1->z)); |
|
3088 quat->w = m3gMadd(s0, q0->w, m3gMul(s1, q1->w)); |
|
3089 } |
|
3090 else { |
|
3091 /* Slerp is undefined if the two quaternions are (nearly) |
|
3092 opposite, so we just pick an arbitrary plane of |
|
3093 rotation here. */ |
|
3094 |
|
3095 quat->x = -q0->y; |
|
3096 quat->y = q0->x; |
|
3097 quat->z = -q0->w; |
|
3098 quat->w = q0->z; |
|
3099 |
|
3100 s0 = m3gSin(m3gMul(oneMinusS, HALF_PI)); |
|
3101 s1 = m3gSin(m3gMul(s, HALF_PI)); |
|
3102 |
|
3103 quat->x = m3gMadd(s0, q0->x, m3gMul(s1, quat->x)); |
|
3104 quat->y = m3gMadd(s0, q0->y, m3gMul(s1, quat->y)); |
|
3105 quat->z = m3gMadd(s0, q0->z, m3gMul(s1, quat->z)); |
|
3106 } |
|
3107 } |
|
3108 |
|
3109 /*! |
|
3110 * \brief Interpolate quaternions using spline, or "squad" interpolation. |
|
3111 */ |
|
3112 M3G_API void m3gSquadQuat(Quat *quat, |
|
3113 M3Gfloat s, |
|
3114 const Quat *q0, const Quat *a, const Quat *b, const Quat *q1) |
|
3115 { |
|
3116 |
|
3117 Quat temp0; |
|
3118 Quat temp1; |
|
3119 m3gSlerpQuat(&temp0, s, q0, q1); |
|
3120 m3gSlerpQuat(&temp1, s, a, b); |
|
3121 |
|
3122 m3gSlerpQuat(quat, m3gDouble(m3gMul(s, m3gSub(1.0f, s))), &temp0, &temp1); |
|
3123 } |
|
3124 |
|
3125 |
|
3126 |