/*
* Copyright (c) 2003 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of the License "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: Internal math function implementations
*
*/
/*!
* \internal
* \file
* \brief Internal math function implementations
*
*/
#ifndef M3G_CORE_INCLUDE
# error included by m3g_core.c; do not compile separately.
#endif
#include "m3g_defs.h"
#include "m3g_memory.h"
#if defined(M3G_SOFT_FLOAT)
#include <math.h>
#endif
/*----------------------------------------------------------------------
* Private types and definitions
*--------------------------------------------------------------------*/
/* Enumerated common matrix classifications */
#define MC_IDENTITY 0x40100401 // 01000000 00010000 00000100 00000001
#define MC_FRUSTUM 0x30BF0C03 // 00110000 10111111 00001100 00000011
#define MC_PERSPECTIVE 0x30B00C03 // 00110000 10110000 00001100 00000011
#define MC_ORTHO 0x7F300C03 // 01111111 00110000 00001100 00000011
#define MC_PARALLEL 0x70300C03 // 01110000 00110000 00001100 00000011
#define MC_SCALING_ROTATION 0x403F3F3F // 01000000 00111111 00111111 00111111
#define MC_SCALING 0x40300C03 // 01000000 00110000 00001100 00000011
#define MC_TRANSLATION 0x7F100401 // 01111111 00010000 00000100 00000001
#define MC_X_ROTATION 0x403C3C01 // 01000000 00111100 00111100 00000001
#define MC_Y_ROTATION 0x40330433 // 01000000 00110011 00000100 00110011
#define MC_Z_ROTATION 0x40100F0F // 01000000 00010000 00001111 00001111
#define MC_W_UNITY 0x7F3F3F3F // 01111111 00111111 00111111 00111111
#define MC_GENERIC 0xFFFFFFFF
/* Partial masks for individual matrix components */
#define MC_TRANSLATION_PART 0x3F000000 // 00111111 00000000 00000000 00000000
#define MC_SCALE_PART 0x00300C03 // 00000000 00110000 00001100 00000011
#define MC_SCALE_ROTATION_PART 0x003F3F3F // 00000000 00111111 00111111 00111111
/* Matrix element classification masks */
#define ELEM_ZERO 0x00
#define ELEM_ONE 0x01
#define ELEM_MINUS_ONE 0x02
#define ELEM_ANY 0x03
/*!
* \internal
* \brief calculates the offset of a 4x4 matrix element in a linear
* array
*
* \notes The current convention is column-major, as in OpenGL ES
*/
#define MELEM(row, col) ((row) + ((col) << 2))
/*!
* \internal
* \brief Macro for accessing 4x4 float matrix elements
*
* \param mtx pointer to the first element of the matrix
* \param row matrix row
* \param col matrix column
*/
#define M44F(mtx, row, col) ((mtx)->elem[MELEM((row), (col))])
/*--------------------------------------------------------------------*/
/*----------------------------------------------------------------------
* Private functions
*--------------------------------------------------------------------*/
/*!
* \internal
* \brief ARM VFPv2 implementation of 4x4 matrix multiplication.
*
* \param dst multiplication result
* \param left left-hand matrix
* \param right right-hand matrix
*/
#if defined(M3G_HW_FLOAT_VFPV2)
__asm void _m3gGenericMatrixProduct(Matrix *dst,
const Matrix *left, const Matrix *right)
{
// r0 = *dst
// r1 = *left
// r2 = *right
CODE32
// save the VFP registers and set the vector STRIDE = 1, LENGTH = 4
FSTMFDD sp!, {d8-d15}
FMRX r3, FPSCR
BIC r12, r3, #0x00370000
ORR r12, #0x00030000
FMXR FPSCR, r12
// left = [a0 a1 a2 a3 right = [b0 b1 b2 b3
// a4 a5 a6 a7 b4 b5 b6 b7
// a8 a9 a10 a11 b8 b9 b10 b11
// a12 a13 a14 a15] b12 b13 b14 b15]
//
// dst = [a0*b0+a4*b1+a8*b2+a12*b3 a1*b0+a5*b1+a9*b2+a13*b3 ..
// a0*b4+a4*b5+a8*b6+a12*b7 a1*b4+a5*b5+a9*b6+a13*b7 ..
// . .
// . .
FLDMIAS r1!, {s8-s23} // load the left matrix [a0-a15] to registers s8-s23
FLDMIAS r2!, {s0-s7} // load [b0-b7] of right matrix to registers s0-s7
FMULS s24, s8, s0 // [s24-s27] = [a0*b0 a1*b0 a2*b0 a3*b0]
FMULS s28, s8, s4 // [s28-s31] = [a0*b4 a1*b4 a2*b4 a3*b4]
FMACS s24, s12, s1 // [s24-s27] += [a4*b1 a5*b1 a6*b1 a7*b1]
FMACS s28, s12, s5 // [s28-s31] += [a4*b5 a5*b5 a6*b5 a7*b5]
FMACS s24, s16, s2 // [s24-s27] += [a8*b2 a9*b2 a10*b2 a11*b2]
FMACS s28, s16, s6 // [s28-s31] += [a8*b6 a9*b6 a10*b6 a11*b6]
FMACS s24, s20, s3 // [s24-s27] += [a12*b3 a13*b3 a14*b3 a15*b3]
FMACS s28, s20, s7 // [s28-s31] += [a12*b7 a13*b37a14*b7 a15*b7]
FLDMIAS r2!, {s0-s7} // load [b8-b15]
FSTMIAS r0!, {s24-s31} // write [dst0-dst7]
FMULS s24, s8, s0
FMULS s28, s8, s4
FMACS s24, s12, s1
FMACS s28, s12, s5
FMACS s24, s16, s2
FMACS s28, s16, s6
FMACS s24, s20, s3
FMACS s28, s20, s7
FSTMIAS r0!, {s24-s31}
// Restore the VFP registers and return.
FMXR FPSCR, r3
FLDMFDD sp!, {d8-d15}
BX lr
}
#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
/*------------------ Elementary float ------------------*/
#if defined(M3G_SOFT_FLOAT)
#if defined (M3G_BUILD_ARM)
/*!
* \internal
* \brief Floating point multiplication implementation for ARM.
*/
__asm M3Gfloat m3gMul(const M3Gfloat a,
const M3Gfloat b)
{
/**
* Extract the exponents of the multiplicands and add them
* together. Flush to zero if either exponent or their sum
* is zero.
*/
mov r12, #0xff;
ands r2, r0, r12, lsl #23; // exit if e1 == 0
andnes r3, r1, r12, lsl #23; // exit if e2 == 0
subne r2, r2, #(127 << 23);
addnes r12, r2, r3; // exit if e1+e2-127 <= 0
movle r0, #0;
bxle lr;
/**
* Determine the sign of the result. Note that the exponent
* may have overflowed to the sign bit, and thus the result
* may be an arbitrary negative value when it really should
* be +Inf or -Inf.
*/
teq r0, r1;
orrmi r12, r12, #0x80000000;
/**
* Multiply the mantissas. First shift the mantissas up to
* unsigned 1.31 fixed point, adding the leading "1" bit at
* the same time, and finally do a 32x32 -> 64 bit unsigned
* multiplication. The result is in unsigned 2.62 fixed point,
* representing the interval [1.0, 4.0).
*/
mov r2, #0x80000000;
orr r0, r2, r0, lsl #8;
orr r1, r2, r1, lsl #8;
umulls r2, r3, r0, r1;
/**
* If the highest bit of the 64-bit result is set, then the
* mantissa lies in [2.0, 4.0) and needs to be renormalized.
* That is, the mantissa is shifted one bit to the right and
* the exponent correspondingly increased by 1. Note that we
* lose the leading "1" bit from the mantissa by adding it up
* with the exponent.
*/
subpl r12, r12, #(1 << 23); // no overflow: exponent -= 1
addpl r0, r12, r3, lsr #7; // no overflow: exponent += 1
addmi r0, r12, r3, lsr #8; // overflow: exponent += 1
bx lr;
}
/*!
* \internal
* \brief Floating point addition implementation for ARM.
*/
__asm M3Gfloat m3gAdd(const M3Gfloat a,
const M3Gfloat b)
{
/**
* If the operands have opposite signs then this is not really
* an addition but a subtraction. Subtraction is much slower,
* so we have a separate code path for it, rather than trying
* to save space by handling both in the same place.
*/
teq r0, r1;
bmi _m3gSub;
/**
* Sort the operands such that the larger operand is in R0 and
* the smaller in R1. The sign bits do not affect the ordering,
* since they are known to be equal.
*/
subs r2, r0, r1;
submi r0, r0, r2;
addmi r1, r1, r2;
/**
* Extract the exponent of the smaller operand into R2 and compute
* the difference between the larger and smaller exponent into R3.
* (Note that the sign bits cancel out in the subtraction.) The
* exponent delta tells how many bits the mantissa of the smaller
* operand must be shifted to the right in order to bring the
* operands into equal scale.
*/
mov r2, r1, lsr #23;
rsb r3, r2, r0, lsr #23;
/**
* Check if the exponent delta is bigger than 23 bits, or if the
* smaller exponent is zero. In either case, exit the routine and
* return the larger operand (which is already in R0). Note that
* this means that subnormals are treated as zero.
*/
rsbs r12, r3, #23; // N set, V clear if R3 > 23
tstpl r2, #0xff; // execute only if R3 <= 23
bxle lr; // exit if Z set or N != V
/**
* Extract the mantissas and shift them up to unsigned 1.31 fixed
* point, inserting the implied leading "1" bit at the same time.
* Finally, align the decimal points and add up the mantissas.
*/
mov r12, #0x80000000;
orr r0, r12, r0, lsl #8;
orr r1, r12, r1, lsl #8;
adds r0, r0, r1, lsr r3;
/**
* Compute the final exponent by adding up the smaller exponent
* (R2), the exponent delta (R3), and the possible overflow bit
* (carry flag). Note that in case of overflow, the leading "1"
* has ended up in the carry flag and thus needs not be explicitly
* discarded. Finally, put the mantissa together with the sign and
* exponent.
*/
adc r2, r2, r3; // r2 = smallExp + deltaExp + overflow
movcc r0, r0, lsl #1; // no overflow: discard leading 1
mov r0, r0, lsr #9;
orr r0, r0, r2, lsl #23;
bx lr;
_m3gSub
/**
* Sort the operands such that the one with larger magnitude is
* in R0 and has the correct sign (the sign of the final result),
* while the smaller operand is in R1 with an inverted sign bit.
*/
eor r1, r1, #0x80000000;
subs r2, r0, r1;
eormi r2, r2, #0x80000000;
submi r0, r0, r2;
addmi r1, r1, r2;
/**
* Extract the exponent of the smaller operand into R2 and compute
* the difference between the larger and smaller exponent into R3.
* (Note that the sign bits cancel out in the subtraction.) The
* exponent delta tells how many bits the mantissa of the smaller
* operand must be shifted to the right in order to bring the
* operands into equal scale.
*/
mov r2, r1, lsr #23;
rsbs r3, r2, r0, lsr #23;
/**
* Check if the exponent delta is bigger than 31 bits, or if the
* smaller exponent is zero. In either case, exit the routine and
* return the larger operand (which is already in R0). Note that
* this means that subnormals are treated as zero.
*/
rsbs r12, r3, #31; // N set, V clear if R3 > 31
tstpl r2, #0xff; // execute only if R3 <= 31
bxle lr; // exit if Z set or N != V
/**
* Extract the mantissas and shift them up to unsigned 1.31 fixed
* point, inserting the implied leading "1" bit at the same time.
* Then align the decimal points and subtract the smaller mantissa
* from the larger one.
*/
mov r12, #0x80000000;
orr r0, r12, r0, lsl #8;
orr r1, r12, r1, lsl #8;
subs r0, r0, r1, lsr r3;
/**
* We split the range of possible results into three categories:
*
* 1. [1.0, 2.0) ==> no renormalization needed (highest bit set)
* 2. [0.5, 1.0) ==> only one left-shift needed
* 3. (0.0, 0.5) ==> multiple left-shifts needed
* 4. zero ==> just return
*
* Cases 1 and 2 are handled in the main code path. Cases 3 and 4
* are less common by far, so we branch to a separate code fragment
* for those.
*/
movpls r0, r0, lsl #1; // Cases 2,3,4: shift left
bpl _m3gSubRenormalize; // Cases 3 & 4: branch out
/**
* Now we have normalized the mantissa such that the highest bit
* is set. Here we only need to adjust the exponent, if necessary,
* and put the pieces together. Note that we lose the leading "1"
* bit from the mantissa by adding it up with the exponent. We can
* also do proper rounding (towards nearest) instead of truncation
* (towards zero) at no extra cost!
*/
sbc r3, r3, #1; // deltaExp -= 1 (Case 1) or 2 (Case 2)
add r2, r2, r3; // resultExp = smallExp + deltaExp
movs r0, r0, lsr #8; // shift mantissa, keep leading "1"
adc r0, r0, r2, lsl #23; // resultExp += 1, mantissa += carry
bx lr;
/**
* Separate code path for cases 3 and 4 (see above). The mantissa
* has already been shifted up by one, but the exponent has not
* been correspondingly decreased. We also know that the highest
* bit is still not set, and that the carry flag is clear.
*/
_m3gSubRenormalize
bxeq lr;
subcc r3, r3, #2;
movccs r0, r0, lsl #2;
/**
* If the carry flag is still not set, i.e. there were more than
* two leading zeros in the mantissa initially, loop until we
* find the highest set bit.
*/
_m3gSubRenormalizeLoop
subcc r3, r3, #1;
movccs r0, r0, lsl #1;
bcc _m3gSubRenormalizeLoop;
/**
* Now the leading "1" is in the carry flag, so we can just add up
* the exponent and mantissa as usual, doing proper rounding at
* the same time. However, cases where the exponent goes negative
* (that is, underflows) must be detected and flushed to zero.
*/
add r3, r2, r3;
movs r0, r0, lsr #9;
adc r0, r0, r3, lsl #23;
teq r0, r2, lsl #23;
movmi r0, #0;
bx lr;
}
#else /* M3G_BUILD_ARM */
/*!
* \internal
* \brief Floating point addition implementation
*
*/
static M3G_INLINE M3Gfloat m3gFloatAdd(const M3Guint aBits,
const M3Guint bBits)
{
M3Guint large, small, signMask;
/* Early exits for underflow cases */
large = (M3Gint)(aBits & ~SIGN_MASK);
if (large <= 0x00800000) {
return INT_AS_FLOAT(bBits);
}
small = (M3Gint)(bBits & ~SIGN_MASK);
if (small <= 0x00800000) {
return INT_AS_FLOAT(aBits);
}
/* Swap the numbers so that "large" really is larger; the unsigned
* (or de-signed) bitmasks for floats are nicely monotonous, so we
* can compare directly. Also store the sign of the larger number
* for future reference. */
if (small > large) {
M3Gint temp = small;
small = large;
large = temp;
signMask = (bBits & SIGN_MASK);
}
else {
signMask = (aBits & SIGN_MASK);
}
{
M3Guint res, overflow;
M3Guint resExp, expDelta;
/* Store the larger exponent as our candidate result exponent,
* and compute the difference between the exponents */
resExp = (large >> 23);
expDelta = resExp - (small >> 23);
/* Take an early exit if the change would be insignificant;
* this also guards against odd results from shifting by more
* than 31 (undefined in C) */
if (expDelta >= 24) {
res = large | signMask;
return INT_AS_FLOAT(res);
}
/* Convert the mantissas into shifted integers, and shift the
* smaller number to the same scale with the larger one. */
large = (large & MANTISSA_MASK) | LEADING_ONE;
small = (small & MANTISSA_MASK) | LEADING_ONE;
small >>= expDelta;
M3G_ASSERT(large >= small);
/* Check whether we're really adding or subtracting the
* smaller number, and branch to slightly different routines
* respectively */
if (((aBits ^ bBits) & SIGN_MASK) == 0) {
/* Matching signs; just add the numbers and check for
* overflow, shifting to compensate as necessary. */
res = large + small;
overflow = (res >> 24);
res >>= overflow;
resExp += overflow;
}
else {
/* Different signs, so let's subtract the smaller value;
* also check that we're not subtracting a number from
* itself (so we don't have to normalize a zero below) */
if (small == large) {
return 0.0f; /* x - x = 0 */
}
res = (large << 8) - (small << 8);
/* Renormalize the number by shifting until we've got the
* high bit in place */
while ((res >> 24) == 0) {
res <<= 8;
resExp -= 8;
}
while ((res >> 31) == 0) {
res <<= 1;
--resExp;
}
res >>= 8;
}
/* Flush to zero in case of over/underflow of the exponent */
if (resExp >= 255) {
return 0.0f;
}
/* Compose the final number into "res"; note that we pull in
* the sign of the original larger number, which should still
* be valid */
res &= MANTISSA_MASK;
res |= (resExp << 23);
res |= signMask;
return INT_AS_FLOAT(res);
}
}
/*!
* \internal
* \brief Floating point multiplication implementation
*/
static M3G_INLINE M3Gfloat m3gFloatMul(const M3Guint aBits,
const M3Guint bBits)
{
M3Guint a, b;
/* Early exits for underflow and multiplication by zero */
a = (aBits & ~SIGN_MASK);
if (a <= 0x00800000) {
return 0.0f;
}
b = (bBits & ~SIGN_MASK);
if (b <= 0x00800000) {
return 0.0f;
}
{
M3Guint res, exponent, overflow;
/* Compute the exponent of the result, assuming the mantissas
* don't overflow; then mask out the original exponents */
exponent = (a >> 23) + (b >> 23) - 127;
a &= MANTISSA_MASK;
b &= MANTISSA_MASK;
/* Compute the new mantissa from:
*
* (1 + a)(1 + b) = ab + a + b + 1
*
* First shift the mantissas from 0.23 down to 0.16 for the
* multiplication, then shift back to 0.23 for adding in the
* "a + b + 1" part of the equation. */
res = (a >> 7) * (b >> 7); /* "ab" at 0.32 */
res = (res >> 9) + a + b + LEADING_ONE;
/* Add the leading one, then normalize the result by checking
* the overflow bit and dividing by two if necessary */
overflow = (res >> 24);
res >>= overflow;
exponent += overflow;
/* Flush to zero in case of over/underflow of the exponent */
if (exponent >= 255) {
return 0.0f;
}
/* Compose the final number into "res" */
res &= MANTISSA_MASK;
res |= (exponent << 23);
res |= (aBits ^ bBits) & SIGN_MASK;
return INT_AS_FLOAT(res);
}
}
#endif /* M3G_BUILD_ARM */
/*!
* \internal
* \brief Computes the signed fractional part of a floating point
* number
*
* \param x floating point value
* \return x signed fraction of x in ]-1, 1[
*/
static M3Gfloat m3gFrac(M3Gfloat x)
{
/* Quick exit for -1 < x < 1 */
if (m3gAbs(x) < 1.0f) {
return x;
}
/* Shift the mantissa to the proper place, mask out the integer
* part, and renormalize */
{
M3Guint ix = FLOAT_AS_UINT(x);
M3Gint expo = ((ix >> 23) & 0xFF) - 127;
M3G_ASSERT(expo >= 0);
/* The decimal part will always be zero for large values with
* exponents over 24 */
if (expo >= 24) {
return 0.f;
}
else {
/* Shift the integer part out of the mantissa and see what
* we have left */
M3Guint base = (ix & MANTISSA_MASK) | LEADING_ONE;
base = (base << expo) & MANTISSA_MASK;
/* Quick exit (and guard against infinite looping) for
* zero */
if (base == 0) {
return 0.f;
}
/* We now have an exponent of 0 (i.e. no shifting), but
* must renormalize to get a set bit in place of the
* hidden (implicit one) bit */
expo = 0;
while ((base >> 19) == 0) {
base <<= 4;
expo -= 4;
}
while ((base >> 23) == 0) {
base <<= 1;
--expo;
}
/* Compose the final number */
ix =
(base & MANTISSA_MASK) |
((expo + 127) << 23) |
(ix & SIGN_MASK);
return INT_AS_FLOAT(ix);
}
}
}
#endif /* M3G_SOFT_FLOAT */
#if defined(M3G_DEBUG)
/*!
* \internal
* \brief Checks for NaN or infinity in a floating point input
*/
static void m3gValidateFloats(int n, float *p)
{
while (n-- > 0) {
M3G_ASSERT(EXPONENT(*p) < 120);
++p;
}
}
#else
# define m3gValidateFloats(n, p)
#endif
/*------------------ Trigonometry and exp ----------*/
#if defined(M3G_SOFT_FLOAT)
/*!
* \internal
* \brief Sine for the first quadrant
*
* \param x floating point value in [0, PI/2]
* \return sine of \c x
*/
static M3Gfloat m3gSinFirstQuadrant(M3Gfloat x)
{
M3Guint bits = FLOAT_AS_UINT(x);
if (bits <= 0x3ba3d70au) /* 0.005f */
return x;
else {
static const M3Gfloat sinTermLut[4] = {
-1.0f / (2*3),
-1.0f / (4*5),
-1.0f / (6*7),
-1.0f / (8*9)
};
M3Gfloat xx = m3gSquare(x);
M3Gfloat sinx = x;
int i;
for (i = 0; i < 4; ++i) {
x = m3gMul(x, m3gMul(xx, sinTermLut[i]));
sinx = m3gAdd(sinx, x);
}
return sinx;
}
}
#endif /* M3G_SOFT_FLOAT */
#if defined(M3G_SOFT_FLOAT)
/*!
* \internal
* \brief Computes sine for the first period
*
* \param x floating point value in [0, 2PI]
* \return sine of x
*/
static M3Gfloat m3gSinFirstPeriod(const M3Gfloat x)
{
M3G_ASSERT(x >= 0 && x <= TWO_PI);
if (x >= PI) {
return m3gNegate(m3gSinFirstQuadrant(x >= ONE_AND_HALF_PI ?
m3gSub(TWO_PI, x) :
m3gSub(x, PI)));
}
return m3gSinFirstQuadrant((x >= HALF_PI) ? m3gSub(PI, x) : x);
}
#endif /* M3G_SOFT_FLOAT */
/*------------- Float vs. int conversion helpers -------------*/
/*!
* \internal
* \brief Scales and clamps a float to unsigned byte range
*/
static M3G_INLINE M3Gint m3gFloatToByte(const M3Gfloat a)
{
return m3gRoundToInt(m3gMul(255.f, m3gClampFloat(a, 0.f, 1.f)));
}
/*------------------ Vector helpers ------------------*/
/*!
* \internal
* \brief Computes the norm of a floating-point 3-vector
*/
static M3G_INLINE M3Gfloat m3gNorm3(const M3Gfloat *v)
{
return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
m3gSquare(v[2]));
}
/*!
* \internal
* \brief Computes the norm of a floating-point 4-vector
*/
static M3G_INLINE M3Gfloat m3gNorm4(const M3Gfloat *v)
{
return m3gAdd(m3gAdd(m3gSquare(v[0]), m3gSquare(v[1])),
m3gAdd(m3gSquare(v[2]), m3gSquare(v[3])));
}
/*!
* \internal
* \brief Scales a floating-point 3-vector
*/
static M3G_INLINE void m3gScale3(M3Gfloat *v, M3Gfloat s)
{
v[0] = m3gMul(v[0], s);
v[1] = m3gMul(v[1], s);
v[2] = m3gMul(v[2], s);
}
/*!
* \internal
* \brief Scales a floating-point 4-vector
*/
static M3G_INLINE void m3gScale4(M3Gfloat *v, M3Gfloat s)
{
v[0] = m3gMul(v[0], s);
v[1] = m3gMul(v[1], s);
v[2] = m3gMul(v[2], s);
v[3] = m3gMul(v[3], s);
}
/*------------------ Matrices ------------------*/
/*!
* \internal
*/
static M3G_INLINE M3Gbool m3gIsClassified(const Matrix *mtx)
{
M3G_ASSERT(mtx != NULL);
return (M3Gbool) mtx->classified;
}
/*!
* \internal
* \brief Returns a classification for a single floating point number
*/
static M3G_INLINE M3Guint m3gElementClass(const M3Gfloat x)
{
if (IS_ZERO(x)) {
return ELEM_ZERO;
}
else if (IS_ONE(x)) {
return ELEM_ONE;
}
else if (IS_MINUS_ONE(x)) {
return ELEM_MINUS_ONE;
}
return ELEM_ANY;
}
/*!
* \internal
* \brief Computes the classification mask of a matrix
*
* The mask is constructed from two bits per elements, with the lowest
* two bits corresponding to the first element in the \c elem array of
* the matrix.
*/
static void m3gClassify(Matrix *mtx)
{
M3Guint mask = 0;
const M3Gfloat *p;
int i;
M3G_ASSERT(mtx != NULL);
M3G_ASSERT(!m3gIsClassified(mtx));
p = mtx->elem;
for (i = 0; i < 16; ++i) {
M3Gfloat elem = *p++;
mask |= (m3gElementClass(elem) << (i*2));
}
mtx->mask = mask;
mtx->classified = M3G_TRUE;
}
/*!
* \internal
* \brief Sets matrix classification directly
*/
static M3G_INLINE void m3gClassifyAs(M3Guint mask, Matrix *mtx)
{
M3G_ASSERT(mtx != NULL);
mtx->mask = mask;
mtx->classified = M3G_TRUE;
mtx->complete = M3G_FALSE;
}
/*!
* \internal
* \brief Attempts to classify a matrix more precisely
*
* Tries to classify all "free" elements of a matrix into one of the
* predefined constants to improve precision and performance in
* subsequent calculations.
*/
static void m3gSubClassify(Matrix *mtx)
{
M3G_ASSERT_PTR(mtx);
M3G_ASSERT(m3gIsClassified(mtx));
{
const M3Gfloat *p = mtx->elem;
M3Guint inMask = mtx->mask;
M3Guint outMask = inMask;
int i;
for (i = 0; i < 16; ++i, inMask >>= 2) {
M3Gfloat elem = *p++;
if ((inMask & 0x03) == ELEM_ANY) {
outMask &= ~(0x03u << (i*2));
outMask |= (m3gElementClass(elem) << (i*2));
}
}
mtx->mask = outMask;
}
}
/*!
* \internal
* \brief Fills in the implicit values for a classified matrix
*/
static void m3gFillClassifiedMatrix(Matrix *mtx)
{
int i;
M3Guint mask;
M3Gfloat *p;
M3G_ASSERT(mtx != NULL);
M3G_ASSERT(mtx->classified);
M3G_ASSERT(!mtx->complete);
mask = mtx->mask;
p = mtx->elem;
for (i = 0; i < 16; ++i, mask >>= 2) {
unsigned elem = (mask & 0x03);
switch (elem) {
case ELEM_ZERO: *p++ = 0.0f; break;
case ELEM_ONE: *p++ = 1.0f; break;
case ELEM_MINUS_ONE: *p++ = -1.0f; break;
default: ++p;
}
}
mtx->complete = M3G_TRUE;
}
#if !defined(M3G_HW_FLOAT)
/*!
* \internal
* \brief Performs one multiply-add of classified matrix elements
*
* \param amask element class of the first multiplicand
* \param a float value of the first multiplicand
* \param bmask element class of the second multiplicand
* \param b float value of the second multiplicand
* \param c float value to add
* \return a * b + c
*
* \notes inline, as only called from the matrix product function
*/
static M3G_INLINE M3Gfloat m3gClassifiedMadd(
const M3Gbitmask amask, const M3Gfloat *pa,
const M3Gbitmask bmask, const M3Gfloat *pb,
const M3Gfloat c)
{
/* Check for zero product to reduce the switch cases below */
if (amask == ELEM_ZERO || bmask == ELEM_ZERO) {
return c;
}
/* Branch based on the classification of a */
switch (amask) {
case ELEM_ANY:
if (bmask == ELEM_ONE) {
return m3gAdd(*pa, c); /* a * 1 + c = a + c */
}
if (bmask == ELEM_MINUS_ONE) {
return m3gSub(c, *pa); /* a * -1 + c = -a + c */
}
return m3gMadd(*pa, *pb, c); /* a * b + c */
case ELEM_ONE:
if (bmask == ELEM_ONE) {
return m3gAdd(c, 1.f); /* 1 * 1 + c = 1 + c */
}
if (bmask == ELEM_MINUS_ONE) {
return m3gSub(c, 1.f); /* 1 * -1 + c = -1 + c */
}
return m3gAdd(*pb, c); /* 1 * b + c = b + c */
case ELEM_MINUS_ONE:
if (bmask == ELEM_ONE) {
return m3gSub(c, 1.f); /* -1 * 1 + c = -1 + c */
}
if (bmask == ELEM_MINUS_ONE) {
return m3gAdd(c, 1.f); /* -1 * -1 + c = 1 + c */
}
return m3gSub(c, *pb); /* -1 * b + c = -b + c */
default:
M3G_ASSERT(M3G_FALSE);
return 0.0f;
}
}
#endif /*!defined(M3G_HW_FLOAT)*/
/*!
* \internal
* \brief Computes a generic 4x4 matrix product
*/
static void m3gGenericMatrixProduct(Matrix *dst,
const Matrix *left, const Matrix *right)
{
M3G_ASSERT(dst != NULL && left != NULL && right != NULL);
{
# if defined(M3G_HW_FLOAT)
if (!left->complete) {
m3gFillClassifiedMatrix((Matrix*)left);
}
if (!right->complete) {
m3gFillClassifiedMatrix((Matrix*)right);
}
# else
const unsigned lmask = left->mask;
const unsigned rmask = right->mask;
# endif
#if defined(M3G_HW_FLOAT_VFPV2)
_m3gGenericMatrixProduct(dst, left, right);
#else
{
int row;
for (row = 0; row < 4; ++row) {
int col;
for (col = 0; col < 4; ++col) {
int k;
M3Gfloat a = 0;
for (k = 0; k < 4; ++k) {
M3Gint lidx = MELEM(row, k);
M3Gint ridx = MELEM(k, col);
# if defined(M3G_HW_FLOAT)
a = m3gMadd(left->elem[lidx], right->elem[ridx], a);
# else
a = m3gClassifiedMadd((lmask >> (2 * lidx)) & 3,
&left->elem[lidx],
(rmask >> (2 * ridx)) & 3,
&right->elem[ridx],
a);
# endif /*!M3G_HW_FLOAT*/
}
M44F(dst, row, col) = a;
}
}
}
#endif /*!M3G_HW_FLOAT_VFPV2*/
}
dst->complete = M3G_TRUE;
dst->classified = M3G_FALSE;
}
/*!
* \internal
* \brief Converts a float vector to 16-bit integers
*
* \param size vector length
* \param vec pointer to float vector
* \param scale scale of output values, as the number of bits to
* shift left to get actual values
* \param outVec output value vector
*/
static void m3gFloatVecToShort(M3Gint size, const M3Gfloat *vec,
M3Gint scale, M3Gshort *outVec)
{
const M3Guint *vecInt = (const M3Guint*) vec;
M3Gint i;
for (i = 0; i < size; ++i) {
M3Guint a = vecInt[i];
if ((a & ~SIGN_MASK) < (1 << 23)) {
*outVec++ = 0;
}
else {
M3Gint shift =
scale - ((M3Gint)((vecInt[i] >> 23) & 0xFFu) - (127 + 23));
M3G_ASSERT(shift > 8); /* or the high bits will overflow */
if (shift > 23) {
*outVec++ = 0;
}
else {
M3Gint out =
(M3Gint) (((a & MANTISSA_MASK) | LEADING_ONE) >> shift);
if (a >> 31) {
out = -out;
}
M3G_ASSERT(m3gInRange(out, -32767, 32767));
*outVec++ = (M3Gshort) out;
}
}
}
}
/*----------------------------------------------------------------------
* Internal functions
*--------------------------------------------------------------------*/
/*--------------------------------------------------------------------*/
#if defined(M3G_SOFT_FLOAT)
#if !defined (M3G_BUILD_ARM)
static M3Gfloat m3gAdd(const M3Gfloat a, const M3Gfloat b)
{
return m3gFloatAdd(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
}
static M3Gfloat m3gMul(const M3Gfloat a, const M3Gfloat b)
{
return m3gFloatMul(FLOAT_AS_UINT(a), FLOAT_AS_UINT(b));
}
#endif /* M3G_BUILD_ARM */
/*!
* \internal
* \brief Computes the reciprocal of the square root
*
* \param x a floating point value
* \return 1 / square root of \c x
*/
static M3Gfloat m3gRcpSqrt(const M3Gfloat x)
{
/* Approximation followed by Newton-Raphson iteration a'la
* "Floating-point tricks" by J. Blinn, but we iterate several
* times to improve precision */
M3Gint i = (M3G_FLOAT_ONE + (M3G_FLOAT_ONE >> 1))
- (FLOAT_AS_UINT(x) >> 1);
M3Gfloat y = INT_AS_FLOAT(i);
for (i = 0; i < 3; ++i) {
y = m3gMul(y, m3gSub(1.5f, m3gHalf(m3gMul(x, m3gSquare(y)))));
}
return y;
}
/*!
* \internal
* \brief Computes the square root
*
* \param x a floating point value
* \return square root of \c x
*/
static M3Gfloat m3gSqrt(const M3Gfloat x)
{
/* Approximation followed by Newton-Raphson iteration a'la
* "Floating-point tricks" by J. Blinn, but we iterate several
* times to improve precision */
M3Gint i = (FLOAT_AS_UINT(x) >> 1) + (M3G_FLOAT_ONE >> 1);
M3Gfloat y = INT_AS_FLOAT(i);
for (i = 0; i < 2; ++i) {
y = m3gDiv(m3gAdd(m3gSquare(y), x), m3gDouble(y));
}
return y;
}
#endif /* M3G_SOFT_FLOAT */
/*--------------------------------------------------------------------*/
#if defined(M3G_SOFT_FLOAT)
/*!
* \internal
*/
static M3Gfloat m3gArcCos(const M3Gfloat x)
{
return (M3Gfloat) acos(x);
}
/*!
* \internal
*/
static M3Gfloat m3gArcTan(const M3Gfloat y, const M3Gfloat x)
{
return (M3Gfloat) atan2(y, x);
}
/*!
* \internal
*/
static M3Gfloat m3gCos(const M3Gfloat x)
{
return m3gSin(m3gAdd(x, HALF_PI));
}
/*!
* \internal
* \brief
*/
static M3Gfloat m3gSin(const M3Gfloat x)
{
M3Gfloat f = x;
/* If x is greater than two pi, do a modulo operation to bring it
* back in range for the internal sine function */
if (m3gAbs(f) >= TWO_PI) {
f = m3gMul (f, (1.f / TWO_PI));
f = m3gFrac(f);
f = m3gMul (f, TWO_PI);
}
/* Compute the result, negating both the input value and the
* result if x was negative */
{
M3Guint i = FLOAT_AS_UINT(f);
M3Guint neg = (i & SIGN_MASK);
i ^= neg;
f = m3gSinFirstPeriod(INT_AS_FLOAT(i));
i = FLOAT_AS_UINT(f) ^ neg;
return INT_AS_FLOAT(i);
}
}
/*!
* \internal
*/
static M3Gfloat m3gTan(const M3Gfloat x)
{
return (M3Gfloat) tan(x);
}
/*!
* \internal
*/
static M3Gfloat m3gExp(const M3Gfloat a)
{
return (M3Gfloat) exp(a);
}
#endif /* M3G_SOFT_FLOAT */
/*!
* \brief Checks whether the bottom row of a matrix is 0 0 0 1
*/
static M3Gbool m3gIsWUnity(const Matrix *mtx)
{
M3G_ASSERT_PTR(mtx);
if (!m3gIsClassified(mtx)) {
return (IS_ZERO(M44F(mtx, 3, 0)) &&
IS_ZERO(M44F(mtx, 3, 1)) &&
IS_ZERO(M44F(mtx, 3, 2)) &&
IS_ONE (M44F(mtx, 3, 3)));
}
else {
return ((mtx->mask & 0xC0C0C0C0) == (ELEM_ONE << 30));
}
}
/*!
* \brief Makes a quaternion by exponentiating a quaternion logarithm
*/
static void m3gExpQuat(Quat *quat, const Vec3 *qExp)
{
M3Gfloat theta;
M3G_ASSERT_PTR(quat);
M3G_ASSERT_PTR(qExp);
theta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(qExp->x),
m3gSquare(qExp->y)),
m3gSquare(qExp->z)));
if (theta > EPSILON) {
M3Gfloat s = m3gMul(m3gSin(theta), m3gRcp(theta));
quat->x = m3gMul(qExp->x, s);
quat->y = m3gMul(qExp->y, s);
quat->z = m3gMul(qExp->z, s);
quat->w = m3gCos(theta);
}
else {
quat->x = quat->y = quat->z = 0.0f;
quat->w = 1.0f;
}
}
/*!
* \brief Natural logarithm of a unit quaternion.
*/
static void m3gLogQuat(Vec3 *qLog, const Quat *quat)
{
M3Gfloat sinTheta = m3gSqrt(m3gNorm3((const float *) quat));
if (sinTheta > EPSILON) {
M3Gfloat s = m3gArcTan(sinTheta, quat->w) / sinTheta;
qLog->x = m3gMul(s, quat->x);
qLog->y = m3gMul(s, quat->y);
qLog->z = m3gMul(s, quat->z);
}
else {
qLog->x = qLog->y = qLog->z = 0.0f;
}
}
/*!
* \brief Make quaternion the "logarithmic difference" between two
* other quaternions.
*/
static void m3gLogDiffQuat(Vec3 *logDiff,
const Quat *from, const Quat *to)
{
Quat temp;
temp.x = m3gNegate(from->x);
temp.y = m3gNegate(from->y);
temp.z = m3gNegate(from->z);
temp.w = from->w;
m3gMulQuat(&temp, to);
m3gLogQuat(logDiff, &temp);
}
/*!
* \brief Rounds a float to the nearest integer
*
* Overflows are clamped to the maximum or minimum representable
* value.
*/
static M3Gint m3gRoundToInt(const M3Gfloat a)
{
M3Guint base = FLOAT_AS_UINT(a);
M3Gint signMask, expo;
/* Decompose the number into sign, exponent, and base number */
signMask = ((M3Gint) base >> 31); /* -> 0 or 0xFFFFFFFF */
expo = (M3Gint)((base >> 23) & 0xFF) - 127;
/* First check for large values and return either the negative or
* the positive maximum integer in case of overflow. The overflow
* check can be made on the exponent alone, as large floats are
* spaced several integer values apart so that nothing will
* overflow because of rounding later on */
if (expo >= 31) {
return (M3Gint)((1U << 31) - 1 + (((M3Guint) signMask) & 1));
}
/* Also check for underflow to avoid problems with shifting by
* more than 31 */
if (expo < -1) {
return 0;
}
/* Mask out the sign and exponent bits, shift the base number so
* that the lowest bit corresponds to one half, then add one
* (half) and shift to round to the closest integer. */
base = (base | LEADING_ONE) << 8; /* shift mantissa to 1.31 */
base = base >> (30 - expo); /* integer value as 31.1 */
base = (base + 1) >> 1; /* round to nearest 32.0 */
/* Factor in the sign (negate if originally negative) and
* return */
return ((M3Gint) base ^ signMask) - signMask;
}
/*!
* \brief Calculates ray-triangle intersection.
*
* http://www.acm.org/jgt/papers/MollerTrumbore97
*/
static M3Gbool m3gIntersectTriangle(const Vec3 *orig, const Vec3 *dir,
const Vec3 *vert0, const Vec3 *vert1, const Vec3 *vert2,
Vec3 *tuv, M3Gint cullMode)
{
Vec3 edge1, edge2, tvec, pvec, qvec;
M3Gfloat det,inv_det;
/* find vectors for two edges sharing vert0 */
edge1 = *vert1;
edge2 = *vert2;
m3gSubVec3(&edge1, vert0);
m3gSubVec3(&edge2, vert0);
/* begin calculating determinant - also used to calculate U parameter */
m3gCross(&pvec, dir, &edge2);
/* if determinant is near zero, ray lies in plane of triangle */
det = m3gDot3(&edge1, &pvec);
if (cullMode == 0 && det <= 0) return M3G_FALSE;
if (cullMode == 1 && det >= 0) return M3G_FALSE;
if (det > -EPSILON && det < EPSILON)
return M3G_FALSE;
inv_det = m3gRcp(det);
/* calculate distance from vert0 to ray origin */
tvec = *orig;
m3gSubVec3(&tvec, vert0);
/* calculate U parameter and test bounds */
tuv->y = m3gMul(m3gDot3(&tvec, &pvec), inv_det);
if (tuv->y < 0.0f || tuv->y > 1.0f)
return M3G_FALSE;
/* prepare to test V parameter */
m3gCross(&qvec, &tvec, &edge1);
/* calculate V parameter and test bounds */
tuv->z = m3gMul(m3gDot3(dir, &qvec), inv_det);
if (tuv->z < 0.0f || m3gAdd(tuv->y, tuv->z) > 1.0f)
return M3G_FALSE;
/* calculate t, ray intersects triangle */
tuv->x = m3gMul(m3gDot3(&edge2, &qvec), inv_det);
return M3G_TRUE;
}
/*!
* \brief Calculates ray-box intersection.
*
* http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
*/
#define XO orig->x
#define YO orig->y
#define ZO orig->z
#define XD dir->x
#define YD dir->y
#define ZD dir->z
#define XL box->min[0]
#define YL box->min[1]
#define ZL box->min[2]
#define XH box->max[0]
#define YH box->max[1]
#define ZH box->max[2]
/*!
* \internal
* \brief Ray - bounding box intersection
*
*/
static M3Gbool m3gIntersectBox(const Vec3 *orig, const Vec3 *dir, const AABB *box)
{
M3Gfloat tnear = M3G_MIN_NEGATIVE_FLOAT;
M3Gfloat tfar = M3G_MAX_POSITIVE_FLOAT;
M3Gfloat t1, t2, temp;
/* X slab */
if(XD != 0) {
t1 = m3gSub(XL, XO) / XD;
t2 = m3gSub(XH, XO) / XD;
if(t1 > t2) {
temp = t1;
t1 = t2;
t2 = temp;
}
if(t1 > tnear) tnear = t1;
if(t2 < tfar) tfar = t2;
if(tnear > tfar) return M3G_FALSE;
if(tfar < 0) return M3G_FALSE;
}
else {
if(XO > XH || XO < XL) return M3G_FALSE;
}
/* Y slab */
if(YD != 0) {
t1 = m3gSub(YL, YO) / YD;
t2 = m3gSub(YH, YO) / YD;
if(t1 > t2) {
temp = t1;
t1 = t2;
t2 = temp;
}
if(t1 > tnear) tnear = t1;
if(t2 < tfar) tfar = t2;
if(tnear > tfar) return M3G_FALSE;
if(tfar < 0) return M3G_FALSE;
}
else {
if(YO > YH || YO < YL) return M3G_FALSE;
}
/* Z slab */
if(ZD != 0) {
t1 = m3gSub(ZL, ZO) / ZD;
t2 = m3gSub(ZH, ZO) / ZD;
if(t1 > t2) {
temp = t1;
t1 = t2;
t2 = temp;
}
if(t1 > tnear) tnear = t1;
if(t2 < tfar) tfar = t2;
if(tnear > tfar) return M3G_FALSE;
if(tfar < 0) return M3G_FALSE;
}
else {
if(ZO > ZH || ZO < ZL) return M3G_FALSE;
}
return M3G_TRUE;
}
/*!
* \brief Calculates the intersection of two rectangles. Always fills
* the intersection result.
*
* \param dst result of the intersection
* \param r1 rectangle 1
* \param r2 rectangle 2
*/
static M3Gbool m3gIntersectRectangle(M3GRectangle *dst, M3GRectangle *r1, M3GRectangle *r2)
{
M3Gbool intersects = M3G_TRUE;
M3Gint min, max;
max = (r1->x) >= (r2->x) ? (r1->x) : (r2->x);
min = (r1->x + r1->width) <= (r2->x + r2->width) ? (r1->x + r1->width) : (r2->x + r2->width);
if ((min - max) < 0) intersects = M3G_FALSE;
dst->x = max;
dst->width = min - max;
max = (r1->y) >= (r2->y) ? (r1->y) : (r2->y);
min = (r1->y + r1->height) <= (r2->y + r2->height) ? (r1->y + r1->height) : (r2->y + r2->height);
if ((min - max) < 0) intersects = M3G_FALSE;
dst->y = max;
dst->height = min - max;
return intersects;
}
/*-------- float-to-int color conversions --------*/
static M3Guint m3gAlpha1f(M3Gfloat a)
{
M3Guint alpha = (M3Guint) m3gFloatToByte(a);
return (alpha << 24) | M3G_RGB_MASK;
}
static M3Guint m3gColor3f(M3Gfloat r, M3Gfloat g, M3Gfloat b)
{
return ((M3Guint) m3gFloatToByte(r) << 16)
| ((M3Guint) m3gFloatToByte(g) << 8)
| (M3Guint) m3gFloatToByte(b)
| M3G_ALPHA_MASK;
}
static M3Guint m3gColor4f(M3Gfloat r, M3Gfloat g, M3Gfloat b, M3Gfloat a)
{
return ((M3Guint) m3gFloatToByte(r) << 16)
| ((M3Guint) m3gFloatToByte(g) << 8)
| (M3Guint) m3gFloatToByte(b)
| ((M3Guint) m3gFloatToByte(a) << 24);
}
static void m3gFloatColor(M3Gint argb, M3Gfloat intensity, M3Gfloat *rgba)
{
/* NOTE we intentionally aim a bit high here -- some GL
* implementations may round down instead of closest */
const M3Gfloat oneOver255 = (M3Gfloat)(1.0001 / 255.0);
rgba[0] = (M3Gfloat)((argb >> 16) & 0xFF);
rgba[1] = (M3Gfloat)((argb >> 8) & 0xFF);
rgba[2] = (M3Gfloat)((argb ) & 0xFF);
rgba[3] = (M3Gfloat)((argb >> 24) & 0xFF);
m3gScale4(rgba, m3gMul(oneOver255, intensity));
}
/*!
* \brief Converts the 3x3 submatrix of a matrix into fixed point
*
* The input matrix must be an affine one, i.e. the bottom row must be
* 0 0 0 1. The output matrix is guaranteed to be such that it can be
* multiplied with a 16-bit 3-vector without overflowing, while using
* the 32-bit range optimally.
*
* \param mtx the original matrix
* \param elem 9 shorts to hold the fixed point base numbers
* \return floating point exponent for the values in \c elem
* (number of bits to shift left for actual values)
*/
static M3Gint m3gGetFixedPoint3x3Basis(const Matrix *mtx, M3Gshort *elem)
{
M3Gint outExp;
M3Gint row, col;
const M3Guint *m;
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*) mtx);
}
m = (const M3Guint*) mtx->elem;
/* First, find the maximum exponent value in the whole matrix */
outExp = 0;
for (col = 0; col < 3; ++col) {
for (row = 0; row < 3; ++row) {
M3Gint element = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
outExp = M3G_MAX(outExp, element);
}
}
outExp >>= 23;
/* Our candidate exponent is the maximum found plus 9, which is
* guaranteed to shift the maximum unsigned 24-bit mantissa (which
* always will have the high bit set) down to the signed 16-bit
* range */
outExp += 9;
/* Now proceed to sum each row and see what's the actual smallest
* exponent we can safely use without overflowing in a 16+16
* matrix-vector multiplication; this will win us one bit
* (doubling the precision) compared to the conservative approach
* of just shifting everything down by 10 bits */
for (row = 0; row < 3; ++row) {
/* Sum the absolute values on this row */
M3Gint rowSum = 0;
for (col = 0; col < 3; ++col) {
M3Gint a = (M3Gint)(m[MELEM(row, col)] & ~SIGN_MASK);
M3Gint shift = outExp - (a >> 23);
M3G_ASSERT(shift < 265);
if (shift < 24) {
rowSum += ((a & MANTISSA_MASK) | LEADING_ONE) >> shift;
}
}
/* Now we have a 26-bit sum of the absolute values on this
* row, and shift that down until we fit the target range of
* [0, 65535]. Note that this still leaves *exactly* enough
* space for adding in an arbitrary 16-bit translation vector
* after multiplying with the matrix! */
while (rowSum >= (1 << 16)) {
rowSum >>= 1;
++outExp;
}
}
/* De-bias the exponent, but add in an extra 23 to account for the
* decimal bits in the floating point mantissa values we started
* with (we're returning the exponent as "bits to shift left to
* get integers", so we're off by 23 from IEEE notation) */
outExp = (outExp - 127) - 23;
/* Finally, shift all the matrix elements to our final output
* precision */
for (col = 0; col < 3; ++col) {
m3gFloatVecToShort(3, mtx->elem + MELEM(0, col), outExp, elem);
elem += 3;
}
return outExp;
}
/*!
* \brief Gets the translation component of a matrix as fixed point
*
* \param mtx the matrix
* \param elem 3 shorts to write the vector into
* \return floating point exponent for the values in \c elem
* (number of bits to shift left for actual values)
*/
static M3Gint m3gGetFixedPointTranslation(const Matrix *mtx, M3Gshort *elem)
{
const M3Guint *m;
M3G_ASSERT(m3gIsWUnity(mtx));
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*) mtx);
}
m = (const M3Guint*) &mtx->elem[MELEM(0, 3)];
/* Find the maximum exponent, then scale down by 9 bits from that
* to shift the unsigned 24-bit mantissas to the signed 16-bit
* range */
{
M3Gint outExp;
M3Guint maxElem = m[0] & ~SIGN_MASK;
maxElem = M3G_MAX(maxElem, m[1] & ~SIGN_MASK);
maxElem = M3G_MAX(maxElem, m[2] & ~SIGN_MASK);
outExp = (M3Gint)(maxElem >> 23) - (127 + 23) + 9;
m3gFloatVecToShort(3, mtx->elem + MELEM(0, 3), outExp, elem);
return outExp;
}
}
/*!
* \internal
* \brief Compute a bounding box enclosing two other boxes
*
* \param box box to fit
* \param a first box to enclose or NULL
* \param b second box to enclose or NULL
*
* \note If both input boxes are NULL, the box is not modified.
*/
static void m3gFitAABB(AABB *box, const AABB *a, const AABB *b)
{
int i;
M3G_ASSERT_PTR(box);
if (a) {
m3gValidateAABB(a);
}
if (b) {
m3gValidateAABB(b);
}
if (a && b) {
for (i = 0; i < 3; ++i) {
box->min[i] = M3G_MIN(a->min[i], b->min[i]);
box->max[i] = M3G_MAX(a->max[i], b->max[i]);
}
}
else if (a) {
*box = *a;
}
else if (b) {
*box = *b;
}
}
/*
* \internal
* \brief Transform an axis-aligned bounding box with a matrix
*
* This results in a box that encloses the transformed original box.
*
* Based on "Transforming Axis-Aligned Bounding Boxes" by Jim Arvo
* from Graphics Gems.
*
* \note The bottom row of the matrix is ignored in the transformation.
*/
static void m3gTransformAABB(AABB *box, const Matrix *mtx)
{
M3Gfloat boxMin[3], boxMax[3];
M3Gfloat newMin[3], newMax[3];
m3gValidateAABB(box);
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*) mtx);
}
/* Get the original minimum and maximum extents as floats, and add
* the translation as the base for the transformed box */
{
int i;
for (i = 0; i < 3; ++i) {
boxMin[i] = box->min[i];
boxMax[i] = box->max[i];
newMin[i] = newMax[i] = M44F(mtx, i, 3);
}
}
/* Transform into the new minimum and maximum coordinates using
* the upper left 3x3 part of the matrix */
{
M3Gint row, col;
for (row = 0; row < 3; ++row) {
for (col = 0; col < 3; ++col) {
M3Gfloat a = m3gMul(M44F(mtx, row, col), boxMin[col]);
M3Gfloat b = m3gMul(M44F(mtx, row, col), boxMax[col]);
if (a < b) {
newMin[row] = m3gAdd(newMin[row], a);
newMax[row] = m3gAdd(newMax[row], b);
}
else {
newMin[row] = m3gAdd(newMin[row], b);
newMax[row] = m3gAdd(newMax[row], a);
}
}
}
}
/* Write back into the bounding box */
{
int i;
for (i = 0; i < 3; ++i) {
box->min[i] = newMin[i];
box->max[i] = newMax[i];
}
}
m3gValidateAABB(box);
}
#if defined(M3G_DEBUG)
/*!
* \brief
*/
static void m3gValidateAABB(const AABB *aabb)
{
m3gValidateFloats(6, (float*) aabb);
}
#endif
/*----------------------------------------------------------------------
* Public functions
*--------------------------------------------------------------------*/
/*!
* \brief Linear interpolation of vectors
*
* \param size number of components
* \param vec output vector
* \param s interpolation factor
* \param start initial value
* \param end target value
*/
#if defined(M3G_HW_FLOAT_VFPV2)
__weak __asm void m3gLerp(M3Gint size,
M3Gfloat *vec,
M3Gfloat s,
const M3Gfloat *start, const M3Gfloat *end)
{
// r0 = size
// r1 = *vec
// r2 = s
// r3 = *start
// sp[0] = *end
EXPORT m3gLerp[DYNAMIC]
CODE32
/*
M3Gfloat sCompl = 1.0 - s;
for (i = 0; i < size; ++i) {
vec[i] = sCompl*start[i] + s*end[i];
}
*/
//
// if size = 0, return
//
CMP r0, #0
BXEQ lr
FMSR s0, r2
MOV r2, r3
LDR r3, [sp]
FLDS s1,=1.0
STMFD sp!, {r4-r5}
FSUBS s2, s1, s0 // sCompl = 1 - s
FMRX r4, FPSCR
CMP r0, #4
BGT _m3gLerp_over4Loop
//
// if 1 < size <= 4
//
// set vector STRIDE = 1, LENGTH = 4
BIC r12, r4, #0x00370000
ORR r12, #(3<<16)
FMXR FPSCR, r12
FLDMIAS r2!, {s4-s7} // load the start[i] values
FLDMIAS r3!, {s8-s11} // load the end[i] values
FMULS s12, s4, s2 // [s12-s15] = [sCompl*start[0] .. sCompl*start[3]]
FMACS s12, s8, s0 // [s12-s15] += [ s*end[0] .. s*end[3]]
CMP r0, #1
FSTS s12, [r1]
FSTSGT s13, [r1, #4]
CMP r0, #3
FSTSGE s14, [r1, #8]
FSTSGT s15, [r1, #12]
FMXR FPSCR, r4
LDMFD sp!, {r4-r5}
BX lr
//
// if size > 4, interpolate 8 values in one loop
//
_m3gLerp_over4Loop
FSTMFDD sp!, {d8-d9}
MOVS r5, r0, ASR #3 // size/8
SUB r0, r0, r5, LSL #3 // tail length
// set vector STRIDE = 1, LENGTH = 8
BIC r12, r4, #0x00370000
ORR r12, #(7<<16)
FMXR FPSCR, r12
_m3gLerp_alignedLoop
FLDMIASNE r2!, {s8-s15} // load the start[i] values
FLDMIASNE r3!, {s16-s23} // load the end[i] values
FMULSNE s24, s8, s2 // [s16-s23] = [ sCompl*start[0] sCompl*start[1] .. sCompl*start[7]]
FMACSNE s24, s16, s0 // [s16-s23] += [ s*end[0] s*end[1] .. s*end[7]]
FSTMIASNE r1!, {s24-s31}
SUBSNE r5, #1
BNE _m3gLerp_alignedLoop
// process the 0-7 remaining values in the tail
CMP r0, #1
FLDMIASGE r2!, {s8-s15}
FLDMIASGE r3!, {s16-s23}
FMULSGE s24, s8, s2
FMACSGE s24, s16, s0
FSTSGE s24, [r1]
FSTSGT s25, [r1, #4]
CMP r0, #3
FSTSGE s26, [r1, #8]
FSTSGT s27, [r1, #12]
CMP r0, #5
FSTSGE s28, [r1, #16]
FSTSGT s29, [r1, #20]
CMP r0, #7
FSTSEQ s30, [r1, #24]
FMXR FPSCR, r4
FLDMFDD sp!, {d8-d9}
LDMFD sp!, {r4-r5}
BX lr
}
#else /* #if defined(M3G_HW_FLOAT_VFPV2) */
M3G_API void m3gLerp(M3Gint size,
M3Gfloat *vec,
M3Gfloat s,
const M3Gfloat *start, const M3Gfloat *end)
{
int i;
M3Gfloat sCompl = m3gSub(1.f, s);
for (i = 0; i < size; ++i) {
vec[i] = m3gAdd(m3gMul(sCompl, start[i]), m3gMul(s, end[i]));
}
}
#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
/*!
* \brief Hermite spline interpolation of vectors
*
* \param size number of components
* \param vec output vector
* \param s interpolation factor
* \param start start value vector
* \param end end value vector
* \param tStart start tangent vector
* \param tEnd end tangent vector
*/
M3G_API void m3gHermite(M3Gint size,
M3Gfloat *vec,
M3Gfloat s,
const M3Gfloat *start, const M3Gfloat *end,
const M3Gfloat *tStart, const M3Gfloat *tEnd)
{
M3Gfloat s2 = m3gSquare(s);
M3Gfloat s3 = m3gMul(s2, s);
int i;
for (i = 0; i < size; ++i) {
vec[i] =
m3gMadd(start[i],
m3gAdd(m3gSub(m3gDouble(s3), m3gMul(3.f, s2)), 1.f),
m3gMadd(end[i],
m3gSub(m3gMul(3.f, s2), m3gDouble(s3)),
m3gMadd(tStart[i],
m3gAdd(m3gSub(s3, m3gDouble(s2)), s),
m3gMul(tEnd[i],
m3gSub(s3, s2)))));
}
/* vec = ( 2*s3 - 3*s2 + 1) * start
+ (-2*s3 + 3*s2 ) * end
+ ( s3 - 2*s2 + s) * tStart
+ ( s3 - s2 ) * tEnd; */
}
/*--------------------------------------------------------------------*/
/*!
* \brief Sets a matrix to a copy of another matrix
*/
M3G_API void m3gCopyMatrix(Matrix *dst, const Matrix *src)
{
M3G_ASSERT(dst != NULL && src != NULL);
*dst = *src;
}
/*!
* \brief Vector addition
*/
M3G_API void m3gAddVec3(Vec3 *vec, const Vec3 *other)
{
vec->x = m3gAdd(vec->x, other->x);
vec->y = m3gAdd(vec->y, other->y);
vec->z = m3gAdd(vec->z, other->z);
}
/*!
* \brief Vector addition
*/
M3G_API void m3gAddVec4(Vec4 *vec, const Vec4 *other)
{
vec->x = m3gAdd(vec->x, other->x);
vec->y = m3gAdd(vec->y, other->y);
vec->z = m3gAdd(vec->z, other->z);
vec->w = m3gAdd(vec->w, other->w);
}
/*!
* \brief Cross product of two 3D vectors expressed as 4D vectors
*/
M3G_API void m3gCross(Vec3 *dst, const Vec3 *a, const Vec3 *b)
{
dst->x = m3gSub(m3gMul(a->y, b->z), m3gMul(a->z, b->y));
dst->y = m3gSub(m3gMul(a->z, b->x), m3gMul(a->x, b->z));
dst->z = m3gSub(m3gMul(a->x, b->y), m3gMul(a->y, b->x));
}
/*!
* \brief Dot product of two vectors
*/
M3G_API M3Gfloat m3gDot3(const Vec3 *a, const Vec3 *b)
{
M3Gfloat d;
d = m3gMul(a->x, b->x);
d = m3gMadd(a->y, b->y, d);
d = m3gMadd(a->z, b->z, d);
return d;
}
/*!
* \brief Dot product of two vectors
*/
M3G_API M3Gfloat m3gDot4(const Vec4 *a, const Vec4 *b)
{
M3Gfloat d;
d = m3gMul(a->x, b->x);
d = m3gMadd(a->y, b->y, d);
d = m3gMadd(a->z, b->z, d);
d = m3gMadd(a->w, b->w, d);
return d;
}
/*!
* \brief
*/
M3G_API void m3gSetVec3(Vec3 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z)
{
M3G_ASSERT_PTR(v);
v->x = x;
v->y = y;
v->z = z;
}
/*!
* \brief
*/
M3G_API void m3gSetVec4(Vec4 *v, M3Gfloat x, M3Gfloat y, M3Gfloat z, M3Gfloat w)
{
M3G_ASSERT_PTR(v);
v->x = x;
v->y = y;
v->z = z;
v->w = w;
}
/*!
* \brief Vector subtraction
*/
M3G_API void m3gSubVec3(Vec3 *vec, const Vec3 *other)
{
vec->x = m3gSub(vec->x, other->x);
vec->y = m3gSub(vec->y, other->y);
vec->z = m3gSub(vec->z, other->z);
}
/*!
* \brief Vector subtraction
*/
M3G_API void m3gSubVec4(Vec4 *vec, const Vec4 *other)
{
vec->x = m3gSub(vec->x, other->x);
vec->y = m3gSub(vec->y, other->y);
vec->z = m3gSub(vec->z, other->z);
vec->w = m3gSub(vec->w, other->w);
}
/*!
* \brief Vector length
*/
M3G_API M3Gfloat m3gLengthVec3(const Vec3 *vec)
{
return m3gSqrt(m3gAdd(m3gAdd(m3gSquare(vec->x),
m3gSquare(vec->y)),
m3gSquare(vec->z)));
}
/*!
* \brief Vector scaling
*/
M3G_API void m3gScaleVec3(Vec3 *vec, const M3Gfloat s)
{
vec->x = m3gMul(vec->x, s);
vec->y = m3gMul(vec->y, s);
vec->z = m3gMul(vec->z, s);
}
/*!
* \brief Vector scaling
*/
M3G_API void m3gScaleVec4(Vec4 *vec, const M3Gfloat s)
{
vec->x = m3gMul(vec->x, s);
vec->y = m3gMul(vec->y, s);
vec->z = m3gMul(vec->z, s);
vec->w = m3gMul(vec->w, s);
}
/*!
* \brief Returns an angle-axis representation for a quaternion
*
* \note There are many, and this is not guaranteed to return a
* particular one
*/
M3G_API void m3gGetAngleAxis(const Quat *quat, M3Gfloat *angle, Vec3 *axis)
{
M3Gfloat x, y, z, sinTheta;
M3G_ASSERT_PTR(quat);
M3G_ASSERT_PTR(angle);
M3G_ASSERT_PTR(axis);
x = quat->x;
y = quat->y;
z = quat->z;
sinTheta = m3gSqrt(m3gAdd(m3gAdd(m3gSquare(x), m3gSquare(y)),
m3gSquare(z)));
if (sinTheta > EPSILON) {
M3Gfloat ooSinTheta = m3gRcp(sinTheta);
axis->x = m3gMul(x, ooSinTheta);
axis->y = m3gMul(y, ooSinTheta);
axis->z = m3gMul(z, ooSinTheta);
}
else {
/* return a valid axis even for no rotation */
axis->x = axis->y = 0.0f;
axis->z = 1.0f;
}
*angle = m3gMul(2.0f * RAD2DEG, m3gArcCos(quat->w));
}
/*!
* \brief Gets a single matrix column
*/
M3G_API void m3gGetMatrixColumn(const Matrix *mtx, M3Gint col, Vec4 *dst)
{
if ((col & ~3) == 0) {
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*)mtx);
}
dst->x = M44F(mtx, 0, col);
dst->y = M44F(mtx, 1, col);
dst->z = M44F(mtx, 2, col);
dst->w = M44F(mtx, 3, col);
}
else {
M3G_ASSERT(M3G_FALSE);
}
}
/*!
* \brief Returns the floating point values of a matrix as consecutive
* columns
*/
M3G_API void m3gGetMatrixColumns(const Matrix *mtx, M3Gfloat *dst)
{
M3G_ASSERT(mtx != NULL && dst != NULL);
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*)mtx);
}
m3gCopy(dst, mtx->elem, sizeof(mtx->elem));
}
/*!
* \brief Gets a single matrix row
*/
M3G_API void m3gGetMatrixRow(const Matrix *mtx, M3Gint row, Vec4 *dst)
{
if ((row & ~3) == 0) {
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*)mtx);
}
dst->x = M44F(mtx, row, 0);
dst->y = M44F(mtx, row, 1);
dst->z = M44F(mtx, row, 2);
dst->w = M44F(mtx, row, 3);
}
else {
M3G_ASSERT(M3G_FALSE);
}
}
/*!
* \brief Returns the floating point values of a matrix as consecutive
* rows
*/
M3G_API void m3gGetMatrixRows(const Matrix *mtx, M3Gfloat *dst)
{
M3G_ASSERT(mtx != NULL && dst != NULL);
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*)mtx);
}
{
int row;
for (row = 0; row < 4; ++row) {
*dst++ = mtx->elem[ 0 + row];
*dst++ = mtx->elem[ 4 + row];
*dst++ = mtx->elem[ 8 + row];
*dst++ = mtx->elem[12 + row];
}
}
}
/*!
* \brief Sets a matrix to identity
*/
M3G_API void m3gIdentityMatrix(Matrix *mtx)
{
M3G_ASSERT(mtx != NULL);
m3gClassifyAs(MC_IDENTITY, mtx);
}
/*!
* \brief Sets a quaternion to identity
*/
M3G_API void m3gIdentityQuat(Quat *quat)
{
M3G_ASSERT(quat != NULL);
quat->x = quat->y = quat->z = 0.0f;
quat->w = 1.0f;
}
/*!
* \brief Inverts a matrix
*/
M3G_API M3Gbool m3gInvertMatrix(Matrix *mtx)
{
M3Gfloat *matrix;
M3Gint i;
M3Gfloat tmp[12];
M3Gfloat src[16];
M3Gfloat det;
M3G_ASSERT(mtx != NULL);
if (!m3gIsClassified(mtx)) {
m3gClassify(mtx);
}
/* Quick exit for identity */
if (mtx->mask == MC_IDENTITY) {
return M3G_TRUE;
}
/* Look for other common cases; these require that we have valid
* values in all the elements, so fill in the values first */
{
M3Guint mask = mtx->mask;
if (!mtx->complete) {
m3gFillClassifiedMatrix(mtx);
}
if ((mask | (0x3F << 24)) == MC_TRANSLATION) {
M44F(mtx, 0, 3) = m3gNegate(M44F(mtx, 0, 3));
M44F(mtx, 1, 3) = m3gNegate(M44F(mtx, 1, 3));
M44F(mtx, 2, 3) = m3gNegate(M44F(mtx, 2, 3));
mtx->mask = MC_TRANSLATION;
return M3G_TRUE;
}
if ((mask | 0x300C03) == MC_SCALING) {
if ((mask & 3 ) == 0 ||
(mask & (3 << 10)) == 0 ||
(mask & (3 << 20)) == 0) {
return M3G_FALSE; /* zero scale for at least one axis */
}
M44F(mtx, 0, 0) = m3gRcp(M44F(mtx, 0, 0));
M44F(mtx, 1, 1) = m3gRcp(M44F(mtx, 1, 1));
M44F(mtx, 2, 2) = m3gRcp(M44F(mtx, 2, 2));
return M3G_TRUE;
}
}
/* Do a full 4x4 inversion as a last resort */
matrix = mtx->elem;
/* transpose matrix */
for (i = 0; i < 4; i++) {
src[i] = matrix[i*4];
src[i+4] = matrix[i*4+1];
src[i+8] = matrix[i*4+2];
src[i+12] = matrix[i*4+3];
}
/* calculate pairs for first 8 elements (cofactors) */
tmp[0] = src[10]*src[15];
tmp[1] = src[11]*src[14];
tmp[2] = src[9]*src[15];
tmp[3] = src[11]*src[13];
tmp[4] = src[9]*src[14];
tmp[5] = src[10]*src[13];
tmp[6] = src[8]*src[15];
tmp[7] = src[11]*src[12];
tmp[8] = src[8]*src[14];
tmp[9] = src[10]*src[12];
tmp[10] = src[8]*src[13];
tmp[11] = src[9]*src[12];
/* calculate first 8 elements (cofactors) */
matrix[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
matrix[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
matrix[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
matrix[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
matrix[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
matrix[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
matrix[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
matrix[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
matrix[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
matrix[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
matrix[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
matrix[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
matrix[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
matrix[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
matrix[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
matrix[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
/* calculate pairs for second 8 elements (cofactors) */
tmp[0] = src[2]*src[7];
tmp[1] = src[3]*src[6];
tmp[2] = src[1]*src[7];
tmp[3] = src[3]*src[5];
tmp[4] = src[1]*src[6];
tmp[5] = src[2]*src[5];
tmp[6] = src[0]*src[7];
tmp[7] = src[3]*src[4];
tmp[8] = src[0]*src[6];
tmp[9] = src[2]*src[4];
tmp[10] = src[0]*src[5];
tmp[11] = src[1]*src[4];
/* calculate second 8 elements (cofactors) */
matrix[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
matrix[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
matrix[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
matrix[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
matrix[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
matrix[10] -= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
matrix[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
matrix[11] -= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
matrix[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
matrix[12] -= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
matrix[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
matrix[13] -= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
matrix[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
matrix[14] -= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
matrix[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
matrix[15] -= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
/* calculate determinant */
det = src[0]*matrix[0]+src[1]*matrix[1]+src[2]*matrix[2]+src[3]*matrix[3];
/* matrix has no inverse */
if (det == 0.0f) {
return M3G_FALSE;
}
/* calculate matrix inverse */
det = 1/det;
for (i = 0; i < 16; i++) {
matrix[i] *= det;
}
mtx->classified = M3G_FALSE;
return M3G_TRUE;
}
/*!
* \brief Sets a matrix to the inverse of another matrix
*/
M3G_API M3Gbool m3gMatrixInverse(Matrix *mtx, const Matrix *other)
{
M3G_ASSERT(mtx != NULL && other != NULL);
if (!m3gIsClassified(other)) {
m3gClassify((Matrix*)other);
}
m3gCopyMatrix(mtx, other);
return m3gInvertMatrix(mtx);
}
/*!
* \brief Sets a matrix to the transpose of another matrix
*/
M3G_API void m3gMatrixTranspose(Matrix *mtx, const Matrix *other)
{
M3Gbyte i;
M3G_ASSERT(mtx != NULL && other != NULL);
if (!other->complete) {
m3gFillClassifiedMatrix((Matrix *)other);
}
for (i = 0; i < 4; i++) {
mtx->elem[i] = other->elem[i*4];
mtx->elem[i+4] = other->elem[i*4+1];
mtx->elem[i+8] = other->elem[i*4+2];
mtx->elem[i+12] = other->elem[i*4+3];
}
mtx->classified = M3G_FALSE;
mtx->complete = M3G_TRUE;
}
M3G_API M3Gbool m3gInverseTranspose(Matrix *mtx, const Matrix *other)
{
Matrix temp;
if (!m3gMatrixInverse(&temp, other)) {
return M3G_FALSE;
}
m3gMatrixTranspose(mtx, &temp);
return M3G_TRUE;
}
/*!
* \brief Sets a matrix to the product of two other matrices
*
* \note \c dst can not be either of \c left or \c right; if it is,
* the results are undefined
*/
M3G_API void m3gMatrixProduct(Matrix *dst, const Matrix *left, const Matrix *right)
{
M3G_ASSERT_PTR(dst);
M3G_ASSERT_PTR(left);
M3G_ASSERT_PTR(right);
M3G_ASSERT(dst != left && dst != right);
/* Classify input matrices and take early exits for identities */
if (!m3gIsClassified(left)) {
m3gClassify((Matrix*)left);
}
if (left->mask == MC_IDENTITY) {
m3gCopyMatrix(dst, right);
return;
}
if (!m3gIsClassified(right)) {
m3gClassify((Matrix*)right);
}
if (right->mask == MC_IDENTITY) {
m3gCopyMatrix(dst, left);
return;
}
/* Special quick paths for 3x4 matrices */
if (m3gIsWUnity(left) && m3gIsWUnity(right)) {
/* Translation? */
if ((left->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
if (left->mask != MC_TRANSLATION && !left->complete) {
m3gFillClassifiedMatrix((Matrix*)left);
}
if (right->mask != MC_TRANSLATION && !right->complete) {
m3gFillClassifiedMatrix((Matrix*)right);
}
m3gCopyMatrix(dst, right);
M44F(dst, 0, 3) = m3gAdd(M44F(left, 0, 3), M44F(dst, 0, 3));
M44F(dst, 1, 3) = m3gAdd(M44F(left, 1, 3), M44F(dst, 1, 3));
M44F(dst, 2, 3) = m3gAdd(M44F(left, 2, 3), M44F(dst, 2, 3));
dst->mask |= MC_TRANSLATION_PART;
return;
}
if ((right->mask & ~MC_TRANSLATION_PART) == MC_IDENTITY) {
Vec4 tvec;
if (left->mask != MC_TRANSLATION && !left->complete) {
m3gFillClassifiedMatrix((Matrix*)left);
}
if (right->mask != MC_TRANSLATION && !right->complete) {
m3gFillClassifiedMatrix((Matrix*)right);
}
m3gCopyMatrix(dst, left);
m3gGetMatrixColumn(right, 3, &tvec);
m3gTransformVec4(dst, &tvec);
M44F(dst, 0, 3) = tvec.x;
M44F(dst, 1, 3) = tvec.y;
M44F(dst, 2, 3) = tvec.z;
dst->mask |= MC_TRANSLATION_PART;
return;
}
}
/* Compute product and set output classification */
m3gGenericMatrixProduct(dst, left, right);
}
/*!
* \brief Normalizes a quaternion
*/
M3G_API void m3gNormalizeQuat(Quat *q)
{
M3Gfloat norm;
M3G_ASSERT_PTR(q);
norm = m3gNorm4(&q->x);
if (norm > EPSILON) {
norm = m3gRcpSqrt(norm);
m3gScale4(&q->x, norm);
}
else {
m3gIdentityQuat(q);
}
}
/*!
* \brief Normalizes a three-vector
*/
M3G_API void m3gNormalizeVec3(Vec3 *v)
{
M3Gfloat norm;
M3G_ASSERT_PTR(v);
norm = m3gNorm3(&v->x);
if (norm > EPSILON) {
norm = m3gRcpSqrt(norm);
m3gScale3(&v->x, norm);
}
else {
m3gZero(v, sizeof(Vec3));
}
}
/*!
* \brief Normalizes a four-vector
*/
M3G_API void m3gNormalizeVec4(Vec4 *v)
{
M3Gfloat norm;
M3G_ASSERT_PTR(v);
norm = m3gNorm4(&v->x);
if (norm > EPSILON) {
norm = m3gRcpSqrt(norm);
m3gScale4(&v->x, norm);
}
else {
m3gZero(v, sizeof(Vec4));
}
}
/*!
* \brief Multiplies a matrix from the right with another matrix
*/
M3G_API void m3gPostMultiplyMatrix(Matrix *mtx, const Matrix *other)
{
Matrix temp;
M3G_ASSERT_PTR(mtx);
M3G_ASSERT_PTR(other);
m3gCopyMatrix(&temp, mtx);
m3gMatrixProduct(mtx, &temp, other);
}
/*!
* \brief Multiplies a matrix from the left with another matrix
*/
M3G_API void m3gPreMultiplyMatrix(Matrix *mtx, const Matrix *other)
{
Matrix temp;
M3G_ASSERT_PTR(mtx);
M3G_ASSERT_PTR(other);
m3gCopyMatrix(&temp, mtx);
m3gMatrixProduct(mtx, other, &temp);
}
/*!
* \brief Multiplies a matrix with a rotation matrix.
*/
M3G_API void m3gPostRotateMatrix(Matrix *mtx,
M3Gfloat angle,
M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
{
Quat q;
m3gSetAngleAxis(&q, angle, ax, ay, az);
m3gPostRotateMatrixQuat(mtx, &q);
}
/*!
* \brief Multiplies a matrix with a translation matrix.
*/
M3G_API void m3gPostRotateMatrixQuat(Matrix *mtx, const Quat *quat)
{
Matrix temp;
m3gQuatMatrix(&temp, quat);
m3gPostMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Multiplies a matrix with a scale matrix.
*/
M3G_API void m3gPostScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
{
Matrix temp;
m3gScalingMatrix(&temp, sx, sy, sz);
m3gPostMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Multiplies a matrix with a translation (matrix).
*/
M3G_API void m3gPostTranslateMatrix(Matrix *mtx,
M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
{
Matrix temp;
m3gTranslationMatrix(&temp, tx, ty, tz);
m3gPostMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Multiplies a matrix with a rotation matrix
*/
M3G_API void m3gPreRotateMatrix(Matrix *mtx,
M3Gfloat angle,
M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
{
Quat q;
m3gSetAngleAxis(&q, angle, ax, ay, az);
m3gPreRotateMatrixQuat(mtx, &q);
}
/*!
* \brief Multiplies a matrix with a quaternion rotation
*/
M3G_API void m3gPreRotateMatrixQuat(Matrix *mtx, const Quat *quat)
{
Matrix temp;
m3gQuatMatrix(&temp, quat);
m3gPreMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Multiplies a matrix with a scale matrix.
*/
M3G_API void m3gPreScaleMatrix(Matrix *mtx, M3Gfloat sx, M3Gfloat sy, M3Gfloat sz)
{
Matrix temp;
m3gScalingMatrix(&temp, sx, sy, sz);
m3gPreMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Multiplies a matrix with a translation (matrix).
*/
M3G_API void m3gPreTranslateMatrix(Matrix *mtx,
M3Gfloat tx, M3Gfloat ty, M3Gfloat tz)
{
Matrix temp;
m3gTranslationMatrix(&temp, tx, ty, tz);
m3gPreMultiplyMatrix(mtx, &temp);
}
/*!
* \brief Converts a quaternion into a matrix
*
* The output is a matrix effecting the same rotation as the
* quaternion passed as input
*/
M3G_API void m3gQuatMatrix(Matrix *mtx, const Quat *quat)
{
M3Gfloat qx = quat->x;
M3Gfloat qy = quat->y;
M3Gfloat qz = quat->z;
M3Gfloat qw = quat->w;
/* Quick exit for identity rotations */
if (IS_ZERO(qx) && IS_ZERO(qy) && IS_ZERO(qz)) {
m3gIdentityMatrix(mtx);
return;
}
{
/* Determine the rough type of the output matrix */
M3Guint type = MC_SCALING_ROTATION;
if (IS_ZERO(qz) && IS_ZERO(qy)) {
type = MC_X_ROTATION;
}
else if (IS_ZERO(qz) && IS_ZERO(qx)) {
type = MC_Y_ROTATION;
}
else if (IS_ZERO(qx) && IS_ZERO(qy)) {
type = MC_Z_ROTATION;
}
m3gClassifyAs(type, mtx);
/* Generate the non-constant parts of the matrix */
{
M3Gfloat wx, wy, wz, xx, yy, yz, xy, xz, zz;
xx = m3gMul(qx, qx);
xy = m3gMul(qx, qy);
xz = m3gMul(qx, qz);
yy = m3gMul(qy, qy);
yz = m3gMul(qy, qz);
zz = m3gMul(qz, qz);
wx = m3gMul(qw, qx);
wy = m3gMul(qw, qy);
wz = m3gMul(qw, qz);
if (type != MC_X_ROTATION) {
M44F(mtx, 0, 0) = m3gSub(1.f, m3gDouble(m3gAdd(yy, zz)));
M44F(mtx, 0, 1) = m3gDouble(m3gSub(xy, wz));
M44F(mtx, 0, 2) = m3gDouble(m3gAdd(xz, wy));
}
if (type != MC_Y_ROTATION) {
M44F(mtx, 1, 0) = m3gDouble(m3gAdd(xy, wz));
M44F(mtx, 1, 1) = m3gSub(1.f, m3gDouble(m3gAdd(xx, zz)));
M44F(mtx, 1, 2) = m3gDouble(m3gSub(yz, wx));
}
if (type != MC_Z_ROTATION) {
M44F(mtx, 2, 0) = m3gDouble(m3gSub(xz, wy));
M44F(mtx, 2, 1) = m3gDouble(m3gAdd(yz, wx));
M44F(mtx, 2, 2) = m3gSub(1.f, m3gDouble(m3gAdd(xx, yy)));
}
}
m3gSubClassify(mtx);
}
}
/*!
* \brief Generates a scaling matrix
*/
M3G_API void m3gScalingMatrix(
Matrix *mtx,
const M3Gfloat sx, const M3Gfloat sy, const M3Gfloat sz)
{
M3G_ASSERT_PTR(mtx);
M44F(mtx, 0, 0) = sx;
M44F(mtx, 1, 1) = sy;
M44F(mtx, 2, 2) = sz;
m3gClassifyAs(MC_SCALING, mtx);
m3gSubClassify(mtx);
}
/*!
* \brief Sets a quaternion to represent an angle-axis rotation
*/
M3G_API void m3gSetAngleAxis(Quat *quat,
M3Gfloat angle,
M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
{
m3gSetAngleAxisRad(quat, m3gMul(angle, M3G_DEG2RAD), ax, ay, az);
}
/*!
* \brief Sets a quaternion to represent an angle-axis rotation
*/
M3G_API void m3gSetAngleAxisRad(Quat *quat,
M3Gfloat angleRad,
M3Gfloat ax, M3Gfloat ay, M3Gfloat az)
{
M3G_ASSERT_PTR(quat);
if (!IS_ZERO(angleRad)) {
M3Gfloat s;
M3Gfloat halfAngle = m3gHalf(angleRad);
s = m3gSin(halfAngle);
{
M3Gfloat sqrNorm = m3gMadd(ax, ax, m3gMadd(ay, ay, m3gMul(az, az)));
if (sqrNorm < 0.995f || sqrNorm > 1.005f) {
if (sqrNorm > EPSILON) {
M3Gfloat ooNorm = m3gRcpSqrt(sqrNorm);
ax = m3gMul(ax, ooNorm);
ay = m3gMul(ay, ooNorm);
az = m3gMul(az, ooNorm);
}
else {
ax = ay = az = 0.0f;
}
}
}
quat->x = m3gMul(s, ax);
quat->y = m3gMul(s, ay);
quat->z = m3gMul(s, az);
quat->w = m3gCos(halfAngle);
}
else {
m3gIdentityQuat(quat);
}
}
/*!
* \brief Quaternion multiplication.
*/
M3G_API void m3gMulQuat(Quat *quat, const Quat *other)
{
Quat q;
q = *quat;
quat->w = m3gMul(q.w, other->w) - m3gMul(q.x, other->x) - m3gMul(q.y, other->y) - m3gMul(q.z, other->z);
quat->x = m3gMul(q.w, other->x) + m3gMul(q.x, other->w) + m3gMul(q.y, other->z) - m3gMul(q.z, other->y);
quat->y = m3gMul(q.w, other->y) - m3gMul(q.x, other->z) + m3gMul(q.y, other->w) + m3gMul(q.z, other->x);
quat->z = m3gMul(q.w, other->z) + m3gMul(q.x, other->y) - m3gMul(q.y, other->x) + m3gMul(q.z, other->w);
}
/*!
* \brief Makes this quaternion represent the rotation from one 3D
* vector to another
*/
M3G_API void m3gSetQuatRotation(Quat *quat,
const Vec3 *from, const Vec3 *to)
{
M3Gfloat cosAngle;
M3G_ASSERT_PTR(quat);
M3G_ASSERT_PTR(from);
M3G_ASSERT_PTR(to);
cosAngle = m3gDot3(from, to);
if (cosAngle > (1.0f - EPSILON)) { /* zero angle */
m3gIdentityQuat(quat);
return;
}
else if (cosAngle > (1.0e-3f - 1.0f)) { /* normal case */
Vec3 axis;
m3gCross(&axis, from, to);
m3gSetAngleAxisRad(quat, m3gArcCos(cosAngle), axis.x, axis.y, axis.z);
}
else {
/* Opposite vectors; must generate an arbitrary perpendicular
* vector and use that as the rotation axis. Here, we try the
* Z axis first, and if that seems too parallel to the
* vectors, project the Y axis instead: Z is the only good
* choice for Z-constrained rotations, and Y by definition
* must be perpendicular to that. */
Vec3 axis, temp;
M3Gfloat s;
axis.x = axis.y = axis.z = 0.0f;
if (m3gAbs(from->z) < (1.0f - EPSILON)) {
axis.z = 1.0f;
}
else {
axis.y = 1.0f;
}
s = m3gDot3(&axis, from);
temp = *from;
m3gScaleVec3(&temp, s);
m3gSubVec3(&axis, &temp);
m3gSetAngleAxis(quat, 180.f, axis.x, axis.y, axis.z);
}
}
/*!
* \brief Sets the values of a matrix
*
*/
M3G_API void m3gSetMatrixColumns(Matrix *mtx, const M3Gfloat *src)
{
M3G_ASSERT(mtx != NULL && src != NULL);
m3gCopy(mtx->elem, src, sizeof(mtx->elem));
mtx->classified = M3G_FALSE;
mtx->complete = M3G_TRUE;
}
/*!
* \brief Sets the values of a matrix
*
*/
M3G_API void m3gSetMatrixRows(Matrix *mtx, const M3Gfloat *src)
{
M3G_ASSERT(mtx != NULL && src != NULL);
{
int row;
for (row = 0; row < 4; ++row) {
mtx->elem[ 0 + row] = *src++;
mtx->elem[ 4 + row] = *src++;
mtx->elem[ 8 + row] = *src++;
mtx->elem[12 + row] = *src++;
}
}
mtx->classified = M3G_FALSE;
mtx->complete = M3G_TRUE;
}
/*!
* \brief Transforms a 4-vector with a matrix
*/
#if defined(M3G_HW_FLOAT_VFPV2)
__asm void _m3gTransformVec4(const Matrix *mtx, Vec4 *vec, M3Gint n)
{
CODE32
FSTMFDD sp!, {d8-d11}
FMRX r3, FPSCR
BIC r12, r3, #0x00370000
ORR r12, #(3<<16)
FMXR FPSCR, r12
CMP r2, #4
FLDMIAS r0, {s4-s19} // [mtx0 mtx1 ..]
FLDMIAS r1, {s0-s3} // [vec.x vec.y vec.z vec.w]
FMULS s20, s4, s0 // [s20-s23] = [v.x*mtx0 v.x*mtx1 v.x*mtx2 v.x*mtx3 ]
FMACS s20, s8, s1 // [s20-s23] += [v.y*mtx4 v.y*mtx5 v.y*mtx6 v.y*mtx7 ]
FMACS s20, s12, s2 // [s20-s23] += [v.z*mtx8 v.z*mtx9 v.z*mtx10 v.z*mtx11]
FMACS s20, s16, s3 // [s20-s23] += [v.w*mtx12 v.w*mtx13 v.w*mtx14 v.w*mtx15]
FSTMIAS r1!, {s20-s22}
FSTMIASEQ r1, {s23}
FMXR FPSCR, r3
FLDMFDD sp!, {d8-d11}
BX lr
}
#endif /* #if defined(M3G_HW_FLOAT_VFPV2) */
M3G_API void m3gTransformVec4(const Matrix *mtx, Vec4 *vec)
{
M3Guint type;
M3G_ASSERT(mtx != NULL && vec != NULL);
if (!m3gIsClassified(mtx)) {
m3gClassify((Matrix*)mtx);
}
type = mtx->mask;
if (type == MC_IDENTITY) {
return;
}
else {
int n = m3gIsWUnity(mtx) ? 3 : 4;
if (!mtx->complete) {
m3gFillClassifiedMatrix((Matrix*)mtx);
}
#if defined(M3G_HW_FLOAT_VFPV2)
_m3gTransformVec4(mtx, vec, n);
#else
{
Vec4 v = *vec;
int i;
for (i = 0; i < n; ++i) {
M3Gfloat d = m3gMul(M44F(mtx, i, 0), v.x);
d = m3gMadd(M44F(mtx, i, 1), v.y, d);
d = m3gMadd(M44F(mtx, i, 2), v.z, d);
d = m3gMadd(M44F(mtx, i, 3), v.w, d);
(&vec->x)[i] = d;
}
}
#endif
}
}
/*!
* \brief Generates a translation matrix
*/
M3G_API void m3gTranslationMatrix(
Matrix *mtx,
const M3Gfloat tx, const M3Gfloat ty, const M3Gfloat tz)
{
M3G_ASSERT_PTR(mtx);
M44F(mtx, 0, 3) = tx;
M44F(mtx, 1, 3) = ty;
M44F(mtx, 2, 3) = tz;
m3gClassifyAs(MC_TRANSLATION, mtx);
m3gSubClassify(mtx);
}
/*!
* \brief Float vector assignment.
*/
M3G_API void m3gSetQuat(Quat *quat, const M3Gfloat *vec)
{
quat->x = vec[0];
quat->y = vec[1];
quat->z = vec[2];
quat->w = vec[3];
}
/*!
* \brief Slerp between quaternions q0 and q1, storing the result in quat.
*/
M3G_API void m3gSlerpQuat(Quat *quat,
M3Gfloat s,
const Quat *q0, const Quat *q1)
{
M3Gfloat s0, s1;
M3Gfloat cosTheta = m3gDot4((const Vec4 *)q0, (const Vec4 *)q1);
M3Gfloat oneMinusS = m3gSub(1.0f, s);
if (cosTheta > EPSILON - 1.0f) {
if (cosTheta < 1.0f - EPSILON) {
M3Gfloat theta = m3gArcCos(cosTheta);
M3Gfloat sinTheta = m3gSin(theta);
s0 = m3gSin(m3gMul(oneMinusS, theta)) / sinTheta;
s1 = m3gSin(m3gMul(s, theta)) / sinTheta;
}
else {
/* For quaternions very close to each other, use plain
linear interpolation for numerical stability. */
s0 = oneMinusS;
s1 = s;
}
quat->x = m3gMadd(s0, q0->x, m3gMul(s1, q1->x));
quat->y = m3gMadd(s0, q0->y, m3gMul(s1, q1->y));
quat->z = m3gMadd(s0, q0->z, m3gMul(s1, q1->z));
quat->w = m3gMadd(s0, q0->w, m3gMul(s1, q1->w));
}
else {
/* Slerp is undefined if the two quaternions are (nearly)
opposite, so we just pick an arbitrary plane of
rotation here. */
quat->x = -q0->y;
quat->y = q0->x;
quat->z = -q0->w;
quat->w = q0->z;
s0 = m3gSin(m3gMul(oneMinusS, HALF_PI));
s1 = m3gSin(m3gMul(s, HALF_PI));
quat->x = m3gMadd(s0, q0->x, m3gMul(s1, quat->x));
quat->y = m3gMadd(s0, q0->y, m3gMul(s1, quat->y));
quat->z = m3gMadd(s0, q0->z, m3gMul(s1, quat->z));
}
}
/*!
* \brief Interpolate quaternions using spline, or "squad" interpolation.
*/
M3G_API void m3gSquadQuat(Quat *quat,
M3Gfloat s,
const Quat *q0, const Quat *a, const Quat *b, const Quat *q1)
{
Quat temp0;
Quat temp1;
m3gSlerpQuat(&temp0, s, q0, q1);
m3gSlerpQuat(&temp1, s, a, b);
m3gSlerpQuat(quat, m3gDouble(m3gMul(s, m3gSub(1.0f, s))), &temp0, &temp1);
}