ode/src/stepfast.cpp
changeset 0 2f259fa3e83a
equal deleted inserted replaced
-1:000000000000 0:2f259fa3e83a
       
     1 /*************************************************************************
       
     2  *                                                                       *
       
     3  * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
       
     4  * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
       
     5  *                                                                       *
       
     6  * Fast iterative solver, David Whittaker. Email: david@csworkbench.com  *
       
     7  *                                                                       *
       
     8  * This library is free software; you can redistribute it and/or         *
       
     9  * modify it under the terms of EITHER:                                  *
       
    10  *   (1) The GNU Lesser General Public License as published by the Free  *
       
    11  *       Software Foundation; either version 2.1 of the License, or (at  *
       
    12  *       your option) any later version. The text of the GNU Lesser      *
       
    13  *       General Public License is included with this library in the     *
       
    14  *       file LICENSE.TXT.                                               *
       
    15  *   (2) The BSD-style license that is included with this library in     *
       
    16  *       the file LICENSE-BSD.TXT.                                       *
       
    17  *                                                                       *
       
    18  * This library is distributed in the hope that it will be useful,       *
       
    19  * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
       
    20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
       
    21  * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
       
    22  *                                                                       *
       
    23  *************************************************************************/
       
    24 
       
    25 // This is the StepFast code by David Whittaker. This code is faster, but
       
    26 // sometimes less stable than, the original "big matrix" code.
       
    27 // Refer to the user's manual for more information.
       
    28 // Note that this source file duplicates a lot of stuff from step.cpp,
       
    29 // eventually we should move the common code to a third file.
       
    30 
       
    31 #include "object.h"
       
    32 #include "joint.h"
       
    33 #include <ode/config.h>
       
    34 #include <ode/objects.h>
       
    35 #include <ode/odemath.h>
       
    36 #include <ode/rotation.h>
       
    37 #include <ode/timer.h>
       
    38 #include <ode/error.h>
       
    39 #include <ode/matrix.h>
       
    40 #include <ode/misc.h>
       
    41 #include "lcp.h"
       
    42 #include "step.h"
       
    43 #include "util.h"
       
    44 #include <ode/lookup_tables.h>
       
    45 #include <ode/ode.h>
       
    46 
       
    47 
       
    48 // misc defines
       
    49 
       
    50 #define ALLOCA dALLOCA16
       
    51 
       
    52 #define RANDOM_JOINT_ORDER
       
    53 #define SLOW_LCP      //use the old LCP solver
       
    54 
       
    55 EXPORT_C void dWorldSetAutoEnableDepthSF1 (dxWorld */*world*/, int autodepth)
       
    56 {
       
    57 	if (autodepth > 0)
       
    58 		GetGlobalData()->autoEnableDepth = autodepth;
       
    59 	else
       
    60 		GetGlobalData()->autoEnableDepth = 0;
       
    61 }
       
    62 
       
    63 EXPORT_C int dWorldGetAutoEnableDepthSF1 (dxWorld */*world*/)
       
    64 {
       
    65 	return GetGlobalData()->autoEnableDepth;
       
    66 }
       
    67 
       
    68 //little bit of math.... the _sym_ functions assume the return matrix will be symmetric
       
    69 static void
       
    70 Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
       
    71 {
       
    72 	int i, j;
       
    73 	dReal sum, *aa, *ad, *bb, *cc;
       
    74 	bb = B;
       
    75 	for (i = 0; i < p; i++)
       
    76 	{
       
    77 		//aa is going accross the matrix, ad down
       
    78 		aa = ad = A;
       
    79 		cc = C;
       
    80 		for (j = i; j < p; j++)
       
    81 		{
       
    82 			sum = dMUL(bb[0],cc[0]);
       
    83 			sum += dMUL(bb[1],cc[1]);
       
    84 			sum += dMUL(bb[2],cc[2]);
       
    85 			sum += dMUL(bb[4],cc[4]);
       
    86 			sum += dMUL(bb[5],cc[5]);
       
    87 			sum += dMUL(bb[6],cc[6]);
       
    88 			*(aa++) = *ad = sum;
       
    89 			ad += Askip;
       
    90 			cc += 8;
       
    91 		}
       
    92 		bb += 8;
       
    93 		A += Askip + 1;
       
    94 		C += 8;
       
    95 	}
       
    96 }
       
    97 
       
    98 static void
       
    99 MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
       
   100 {
       
   101 	int i, j;
       
   102 	dReal sum, *aa, *ad, *bb, *cc;
       
   103 	bb = B;
       
   104 	for (i = 0; i < p; i++)
       
   105 	{
       
   106 		//aa is going accross the matrix, ad down
       
   107 		aa = ad = A;
       
   108 		cc = C;
       
   109 		for (j = i; j < p; j++)
       
   110 		{
       
   111 			sum = dMUL(bb[0],cc[0]);
       
   112 			sum += dMUL(bb[1],cc[1]);
       
   113 			sum += dMUL(bb[2],cc[2]);
       
   114 			sum += dMUL(bb[4],cc[4]);
       
   115 			sum += dMUL(bb[5],cc[5]);
       
   116 			sum += dMUL(bb[6],cc[6]);
       
   117 			*(aa++) += sum;
       
   118 			*ad += sum;
       
   119 			ad += Askip;
       
   120 			cc += 8;
       
   121 		}
       
   122 		bb += 8;
       
   123 		A += Askip + 1;
       
   124 		C += 8;
       
   125 	}
       
   126 }
       
   127 
       
   128 
       
   129 // this assumes the 4th and 8th rows of B are zero.
       
   130 
       
   131 static void
       
   132 Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p)
       
   133 {
       
   134 	int i;
       
   135 	dReal sum;
       
   136 	for (i = p; i; i--)
       
   137 	{
       
   138 		sum = dMUL(B[0],C[0]);
       
   139 		sum += dMUL(B[1],C[1]);
       
   140 		sum += dMUL(B[2],C[2]);
       
   141 		sum += dMUL(B[4],C[4]);
       
   142 		sum += dMUL(B[5],C[5]);
       
   143 		sum += dMUL(B[6],C[6]);
       
   144 		*(A++) = sum;
       
   145 		B += 8;
       
   146 	}
       
   147 }
       
   148 
       
   149 
       
   150 // this assumes the 4th and 8th rows of B are zero.
       
   151 
       
   152 static void
       
   153 MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p)
       
   154 {
       
   155 	int i;
       
   156 	dReal sum;
       
   157 	for (i = p; i; i--)
       
   158 	{
       
   159 		sum = dMUL(B[0],C[0]);
       
   160 		sum += dMUL(B[1],C[1]);
       
   161 		sum += dMUL(B[2],C[2]);
       
   162 		sum += dMUL(B[4],C[4]);
       
   163 		sum += dMUL(B[5],C[5]);
       
   164 		sum += dMUL(B[6],C[6]);
       
   165 		*(A++) += sum;
       
   166 		B += 8;
       
   167 	}
       
   168 }
       
   169 
       
   170 
       
   171 // this assumes the 4th and 8th rows of B are zero.
       
   172 
       
   173 static void
       
   174 Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q)
       
   175 {
       
   176 	int k;
       
   177 	dReal sum;
       
   178 	sum = 0;
       
   179 	for (k = 0; k < q; k++)
       
   180 		sum += dMUL(B[k * 8],C[k]);
       
   181 	A[0] = sum;
       
   182 	sum = 0;
       
   183 	for (k = 0; k < q; k++)
       
   184 		sum += dMUL(B[1 + k * 8],C[k]);
       
   185 	A[1] = sum;
       
   186 	sum = 0;
       
   187 	for (k = 0; k < q; k++)
       
   188 		sum += dMUL(B[2 + k * 8],C[k]);
       
   189 	A[2] = sum;
       
   190 	sum = 0;
       
   191 	for (k = 0; k < q; k++)
       
   192 		sum += dMUL(B[4 + k * 8],C[k]);
       
   193 	A[4] = sum;
       
   194 	sum = 0;
       
   195 	for (k = 0; k < q; k++)
       
   196 		sum += dMUL(B[5 + k * 8],C[k]);
       
   197 	A[5] = sum;
       
   198 	sum = 0;
       
   199 	for (k = 0; k < q; k++)
       
   200 		sum += dMUL(B[6 + k * 8],C[k]);
       
   201 	A[6] = sum;
       
   202 }
       
   203 
       
   204 //****************************************************************************
       
   205 // body rotation
       
   206 
       
   207 // return sin(x)/x. this has a singularity at 0 so special handling is needed
       
   208 // for small arguments.
       
   209 
       
   210 static inline dReal
       
   211 sinc (dReal x)
       
   212 {
       
   213 	// if |x| < 1e-4 then use a taylor series expansion. this two term expansion
       
   214 	// is actually accurate to one LS bit within this range if double precision
       
   215 	// is being used - so don't worry!
       
   216 	if (dFabs (x) < REAL(1.0e-4))
       
   217 		return REAL (1.0) - dMUL(dMUL(x,x),REAL (0.166666666666666666667));
       
   218 	else
       
   219 		return dDIV(dSin (x),x);
       
   220 }
       
   221 
       
   222 
       
   223 // given a body b, apply its linear and angular rotation over the time
       
   224 // interval h, thereby adjusting its position and orientation.
       
   225 
       
   226 static inline void
       
   227 moveAndRotateBody (dxBody * b, dReal h)
       
   228 {
       
   229 	int j;
       
   230 
       
   231 	// handle linear velocity
       
   232 	for (j = 0; j < 3; j++)
       
   233 		b->posr.pos[j] += dMUL(h,b->lvel[j]);
       
   234 
       
   235 	if (b->flags & dxBodyFlagFiniteRotation)
       
   236 	{
       
   237 		dVector3 irv;			// infitesimal rotation vector
       
   238 		dQuaternion q;			// quaternion for finite rotation
       
   239 
       
   240 		if (b->flags & dxBodyFlagFiniteRotationAxis)
       
   241 		{
       
   242 			// split the angular velocity vector into a component along the finite
       
   243 			// rotation axis, and a component orthogonal to it.
       
   244 			dVector3 frv;	// finite rotation vector
       
   245 			dReal k = dDOT (b->finite_rot_axis, b->avel);
       
   246 			frv[0] = dMUL(b->finite_rot_axis[0],k);
       
   247 			frv[1] = dMUL(b->finite_rot_axis[1],k);
       
   248 			frv[2] = dMUL(b->finite_rot_axis[2],k);
       
   249 			irv[0] = b->avel[0] - frv[0];
       
   250 			irv[1] = b->avel[1] - frv[1];
       
   251 			irv[2] = b->avel[2] - frv[2];
       
   252 
       
   253 			// make a rotation quaternion q that corresponds to frv * h.
       
   254 			// compare this with the full-finite-rotation case below.
       
   255 			h = dMUL(h,REAL (0.5));
       
   256 			dReal theta = dMUL(k,h);
       
   257 			q[0] = dCos (theta);
       
   258 			dReal s = dMUL(sinc (theta),h);
       
   259 			q[1] = dMUL(frv[0],s);
       
   260 			q[2] = dMUL(frv[1],s);
       
   261 			q[3] = dMUL(frv[2],s);
       
   262 		}
       
   263 		else
       
   264 		{
       
   265 			// make a rotation quaternion q that corresponds to w * h
       
   266 			dReal wlen = dSqrt (dMUL(b->avel[0],b->avel[0]) + dMUL(b->avel[1],b->avel[1]) + dMUL(b->avel[2],b->avel[2]));
       
   267 			h = dMUL(h,REAL (0.5));
       
   268 			dReal theta = dMUL(wlen,h);
       
   269 			q[0] = dCos (theta);
       
   270 			dReal s = dMUL(sinc (theta),h);
       
   271 			q[1] = dMUL(b->avel[0],s);
       
   272 			q[2] = dMUL(b->avel[1],s);
       
   273 			q[3] = dMUL(b->avel[2],s);
       
   274 		}
       
   275 
       
   276 		// do the finite rotation
       
   277 		dQuaternion q2;
       
   278 		dQMultiply0 (q2, q, b->q);
       
   279 		for (j = 0; j < 4; j++)
       
   280 			b->q[j] = q2[j];
       
   281 
       
   282 		// do the infitesimal rotation if required
       
   283 		if (b->flags & dxBodyFlagFiniteRotationAxis)
       
   284 		{
       
   285 			dReal dq[4];
       
   286 			dWtoDQ (irv, b->q, dq);
       
   287 			for (j = 0; j < 4; j++)
       
   288 				b->q[j] += dMUL(h,dq[j]);
       
   289 		}
       
   290 	}
       
   291 	else
       
   292 	{
       
   293 		// the normal way - do an infitesimal rotation
       
   294 		dReal dq[4];
       
   295 		dWtoDQ (b->avel, b->q, dq);
       
   296 		for (j = 0; j < 4; j++)
       
   297 			b->q[j] += dMUL(h,dq[j]);
       
   298 	}
       
   299 
       
   300 	// normalize the quaternion and convert it to a rotation matrix
       
   301 	dNormalize4 (b->q);
       
   302 	dQtoR (b->q, b->posr.R);
       
   303 
       
   304 	// notify all attached geoms that this body has moved
       
   305 	for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
       
   306 		dGeomMoved (geom);
       
   307 }
       
   308 
       
   309 //****************************************************************************
       
   310 //This is an implementation of the iterated/relaxation algorithm.
       
   311 //Here is a quick overview of the algorithm per Sergi Valverde's posts to the
       
   312 //mailing list:
       
   313 //
       
   314 //  for i=0..N-1 do
       
   315 //      for c = 0..C-1 do
       
   316 //          Solve constraint c-th
       
   317 //          Apply forces to constraint bodies
       
   318 //      next
       
   319 //  next
       
   320 //  Integrate bodies
       
   321 
       
   322 void
       
   323 dInternalStepFast (dxWorld * /*world*/, dxBody * body[2], dReal * /*GI*/[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize)
       
   324 {
       
   325 	int i, j, k;
       
   326 
       
   327 	dReal stepsize1 = dRecip (stepsize);
       
   328 
       
   329 	int m = info.m;
       
   330 	// nothing to do if no constraints.
       
   331 	if (m <= 0)
       
   332 		return;
       
   333 
       
   334 	int nub = 0;
       
   335 	if (info.nub == info.m)
       
   336 		nub = m;
       
   337 
       
   338 	// compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
       
   339 	// format as J so we just go through the constraints in J multiplying by
       
   340 	// the appropriate scalars and matrices.
       
   341 	dReal JinvM[2 * 6 * 8];
       
   342 	//dSetZero (JinvM, 2 * m * 8);
       
   343 
       
   344 	dReal *Jsrc = Jinfo.J1l;
       
   345 	dReal *Jdst = JinvM;
       
   346 	if (body[0])
       
   347 	{
       
   348 		for (j = m - 1; j >= 0; j--)
       
   349 		{
       
   350 			for (k = 0; k < 3; k++)
       
   351 				Jdst[k] = dMUL(Jsrc[k],body[0]->invMass);
       
   352 			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]);
       
   353 			Jsrc += 8;
       
   354 			Jdst += 8;
       
   355 		}
       
   356 	}
       
   357 	if (body[1])
       
   358 	{
       
   359 		Jsrc = Jinfo.J2l;
       
   360 		Jdst = JinvM + 8 * m;
       
   361 		for (j = m - 1; j >= 0; j--)
       
   362 		{
       
   363 			for (k = 0; k < 3; k++)
       
   364 				Jdst[k] = dMUL(Jsrc[k],body[1]->invMass);
       
   365 			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]);
       
   366 			Jsrc += 8;
       
   367 			Jdst += 8;
       
   368 		}
       
   369 	}
       
   370 
       
   371 
       
   372 	// now compute A = JinvM * J'.
       
   373 	int mskip = dPAD (m);
       
   374 	dReal A[6 * 8];
       
   375 	//dSetZero (A, 6 * 8);
       
   376 
       
   377 	if (body[0]) {
       
   378 		Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip);
       
   379 		if (body[1])
       
   380 			MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
       
   381                                               m, mskip);
       
   382 	} else {
       
   383 		if (body[1])
       
   384 			Multiply2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
       
   385                                            m, mskip);
       
   386 	}
       
   387 
       
   388 	// add cfm to the diagonal of A
       
   389 	for (i = 0; i < m; i++)
       
   390 		A[i * mskip + i] += dMUL(Jinfo.cfm[i],stepsize1);
       
   391 
       
   392 	// compute the right hand side `rhs'
       
   393 	dReal tmp1[16];
       
   394 	//dSetZero (tmp1, 16);
       
   395 	// put v/h + invM*fe into tmp1
       
   396 	for (i = 0; i < 2; i++)
       
   397 	{
       
   398 		if (!body[i])
       
   399 			continue;
       
   400 		for (j = 0; j < 3; j++)
       
   401 			tmp1[i * 8 + j] = dMUL(body[i]->facc[j],body[i]->invMass) + dMUL(body[i]->lvel[j],stepsize1);
       
   402 		dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc);
       
   403 		for (j = 0; j < 3; j++)
       
   404 			tmp1[i * 8 + 4 + j] += dMUL(body[i]->avel[j],stepsize1);
       
   405 	}
       
   406 	// put J*tmp1 into rhs
       
   407 	dReal rhs[6];
       
   408 	//dSetZero (rhs, 6);
       
   409 
       
   410 	if (body[0]) {
       
   411 		Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m);
       
   412 		if (body[1])
       
   413 			MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
       
   414 	} else {
       
   415 		if (body[1])
       
   416 			Multiply0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
       
   417 	}
       
   418 
       
   419 	// complete rhs
       
   420 	for (i = 0; i < m; i++)
       
   421 		rhs[i] = dMUL(Jinfo.c[i],stepsize1) - rhs[i];
       
   422 
       
   423 #ifdef SLOW_LCP
       
   424 	// solve the LCP problem and get lambda.
       
   425 	// this will destroy A but that's okay
       
   426 	dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal));
       
   427 	dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal));
       
   428 	dReal lo[6], hi[6];
       
   429 	memcpy (lo, Jinfo.lo, m * sizeof (dReal));
       
   430 	memcpy (hi, Jinfo.hi, m * sizeof (dReal));
       
   431 	dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex);
       
   432 #endif
       
   433 
       
   434 
       
   435 	// compute the constraint force `cforce'
       
   436 	// compute cforce = J'*lambda
       
   437 	dJointFeedback *fb = joint->feedback;
       
   438 	dReal cforce[16];
       
   439 	//dSetZero (cforce, 16);
       
   440 
       
   441 	if (fb)
       
   442 	{
       
   443 		// the user has requested feedback on the amount of force that this
       
   444 		// joint is applying to the bodies. we use a slightly slower
       
   445 		// computation that splits out the force components and puts them
       
   446 		// in the feedback structure.
       
   447 		dReal data1[8], data2[8];
       
   448 		if (body[0])
       
   449 		{
       
   450 			Multiply1_8q1 (data1, Jinfo.J1l, lambda, m);
       
   451 			dReal *cf1 = cforce;
       
   452 			cf1[0] = (fb->f1[0] = data1[0]);
       
   453 			cf1[1] = (fb->f1[1] = data1[1]);
       
   454 			cf1[2] = (fb->f1[2] = data1[2]);
       
   455 			cf1[4] = (fb->t1[0] = data1[4]);
       
   456 			cf1[5] = (fb->t1[1] = data1[5]);
       
   457 			cf1[6] = (fb->t1[2] = data1[6]);
       
   458 		}
       
   459 		if (body[1])
       
   460 		{
       
   461 			Multiply1_8q1 (data2, Jinfo.J2l, lambda, m);
       
   462 			dReal *cf2 = cforce + 8;
       
   463 			cf2[0] = (fb->f2[0] = data2[0]);
       
   464 			cf2[1] = (fb->f2[1] = data2[1]);
       
   465 			cf2[2] = (fb->f2[2] = data2[2]);
       
   466 			cf2[4] = (fb->t2[0] = data2[4]);
       
   467 			cf2[5] = (fb->t2[1] = data2[5]);
       
   468 			cf2[6] = (fb->t2[2] = data2[6]);
       
   469 		}
       
   470 	}
       
   471 	else
       
   472 	{
       
   473 		// no feedback is required, let's compute cforce the faster way
       
   474 		if (body[0])
       
   475 			Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m);
       
   476 		if (body[1])
       
   477 			Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m);
       
   478 	}
       
   479 
       
   480 	for (i = 0; i < 2; i++)
       
   481 	{
       
   482 		if (!body[i])
       
   483 			continue;
       
   484 		for (j = 0; j < 3; j++)
       
   485 		{
       
   486 			body[i]->facc[j] += cforce[i * 8 + j];
       
   487 			body[i]->tacc[j] += cforce[i * 8 + 4 + j];
       
   488 		}
       
   489 	}
       
   490 }
       
   491 
       
   492 void
       
   493 dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations)
       
   494 {
       
   495 	dxBody *bodyPair[2], *body;
       
   496 	dReal *GIPair[2], *GinvIPair[2];
       
   497 	dxJoint *joint;
       
   498 	int iter, b, j, i;
       
   499 	dReal ministep = stepsize / maxiterations;
       
   500 
       
   501 	// make a local copy of the joint array, because we might want to modify it.
       
   502 	// (the "dxJoint *const*" declaration says we're allowed to modify the joints
       
   503 	// but not the joint array, because the caller might need it unchanged).
       
   504 	dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *));
       
   505 	memcpy (joints, _joints, nj * sizeof (dxJoint *));
       
   506 
       
   507 	// get m = total constraint dimension, nub = number of unbounded variables.
       
   508 	// create constraint offset array and number-of-rows array for all joints.
       
   509 	// the constraints are re-ordered as follows: the purely unbounded
       
   510 	// constraints, the mixed unbounded + LCP constraints, and last the purely
       
   511 	// LCP constraints. this assists the LCP solver to put all unbounded
       
   512 	// variables at the start for a quick factorization.
       
   513 	//
       
   514 	// joints with m=0 are inactive and are removed from the joints array
       
   515 	// entirely, so that the code that follows does not consider them.
       
   516 	// also number all active joints in the joint list (set their tag values).
       
   517 	// inactive joints receive a tag value of -1.
       
   518 
       
   519 	int m = 0;
       
   520 	dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1));
       
   521 	int *ofs = (int *) ALLOCA (nj * sizeof (int));
       
   522 	for (i = 0, j = 0; j < nj; j++)
       
   523 	{	// i=dest, j=src
       
   524 		joints[j]->vtable->getInfo1 (joints[j], info + i);
       
   525 		if (info[i].m > 0)
       
   526 		{
       
   527 			joints[i] = joints[j];
       
   528 			joints[i]->tag = i;
       
   529 			i++;
       
   530 		}
       
   531 		else
       
   532 		{
       
   533 			joints[j]->tag = -1;
       
   534 		}
       
   535 	}
       
   536 	nj = i;
       
   537 
       
   538 	// the purely unbounded constraints
       
   539 	for (i = 0; i < nj; i++)
       
   540 	{
       
   541 		ofs[i] = m;
       
   542 		m += info[i].m;
       
   543 	}
       
   544 	dReal *c = NULL;
       
   545 	dReal *cfm = NULL;
       
   546 	dReal *lo = NULL;
       
   547 	dReal *hi = NULL;
       
   548 	int *findex = NULL;
       
   549 
       
   550 	dReal *J = NULL;
       
   551 	dxJoint::Info2 * Jinfo = NULL;
       
   552 
       
   553 	if (m)
       
   554 	{
       
   555 	// create a constraint equation right hand side vector `c', a constraint
       
   556 	// force mixing vector `cfm', and LCP low and high bound vectors, and an
       
   557 	// 'findex' vector.
       
   558 		c = (dReal *) ALLOCA (m * sizeof (dReal));
       
   559 		cfm = (dReal *) ALLOCA (m * sizeof (dReal));
       
   560 		lo = (dReal *) ALLOCA (m * sizeof (dReal));
       
   561 		hi = (dReal *) ALLOCA (m * sizeof (dReal));
       
   562 		findex = (int *) ALLOCA (m * sizeof (int));
       
   563 	dSetZero (c, m);
       
   564 	dSetValue (cfm, m, world->global_cfm);
       
   565 	dSetValue (lo, m, -dInfinity);
       
   566 	dSetValue (hi, m, dInfinity);
       
   567 	for (i = 0; i < m; i++)
       
   568 		findex[i] = -1;
       
   569 
       
   570 	// get jacobian data from constraints. a (2*m)x8 matrix will be created
       
   571 	// to store the two jacobian blocks from each constraint. it has this
       
   572 	// format:
       
   573 	//
       
   574 	//   l l l 0 a a a 0  \    .
       
   575 	//   l l l 0 a a a 0   }-- jacobian body 1 block for joint 0 (3 rows)
       
   576 	//   l l l 0 a a a 0  /
       
   577 	//   l l l 0 a a a 0  \    .
       
   578 	//   l l l 0 a a a 0   }-- jacobian body 2 block for joint 0 (3 rows)
       
   579 	//   l l l 0 a a a 0  /
       
   580 	//   l l l 0 a a a 0  }--- jacobian body 1 block for joint 1 (1 row)
       
   581 	//   l l l 0 a a a 0  }--- jacobian body 2 block for joint 1 (1 row)
       
   582 	//   etc...
       
   583 	//
       
   584 	//   (lll) = linear jacobian data
       
   585 	//   (aaa) = angular jacobian data
       
   586 	//
       
   587 		J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal));
       
   588 		dSetZero (J, 2 * m * 8);
       
   589 		Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2));
       
   590 	for (i = 0; i < nj; i++)
       
   591 	{
       
   592 		Jinfo[i].rowskip = 8;
       
   593 		Jinfo[i].fps = dRecip (stepsize);
       
   594 		Jinfo[i].erp = world->global_erp;
       
   595 		Jinfo[i].J1l = J + 2 * 8 * ofs[i];
       
   596 		Jinfo[i].J1a = Jinfo[i].J1l + 4;
       
   597 		Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m;
       
   598 		Jinfo[i].J2a = Jinfo[i].J2l + 4;
       
   599 		Jinfo[i].c = c + ofs[i];
       
   600 		Jinfo[i].cfm = cfm + ofs[i];
       
   601 		Jinfo[i].lo = lo + ofs[i];
       
   602 		Jinfo[i].hi = hi + ofs[i];
       
   603 		Jinfo[i].findex = findex + ofs[i];
       
   604 		//joints[i]->vtable->getInfo2 (joints[i], Jinfo+i);
       
   605 	}
       
   606 
       
   607 	}
       
   608 
       
   609 	dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
       
   610 	dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
       
   611 	dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
       
   612 	dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
       
   613 	for (b = 0; b < nb; b++)
       
   614 	{
       
   615 		for (i = 0; i < 4; i++)
       
   616 		{
       
   617 			saveFacc[b * 4 + i] = bodies[b]->facc[i];
       
   618 			saveTacc[b * 4 + i] = bodies[b]->tacc[i];
       
   619 		}
       
   620                 bodies[b]->tag = b;
       
   621 	}
       
   622 
       
   623 	for (iter = 0; iter < maxiterations; iter++)
       
   624 	{
       
   625 		dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
       
   626 
       
   627 		for (b = 0; b < nb; b++)
       
   628 		{
       
   629 			body = bodies[b];
       
   630 
       
   631 			// for all bodies, compute the inertia tensor and its inverse in the global
       
   632 			// frame, and compute the rotational force and add it to the torque
       
   633 			// accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
       
   634 			// @@@ check computation of rotational force.
       
   635 
       
   636 			// compute inertia tensor in global frame
       
   637 			dMULTIPLY2_333 (tmp, body->mass.I, body->posr.R);
       
   638 			dMULTIPLY0_333 (globalI + b * 12, body->posr.R, tmp);
       
   639 			// compute inverse inertia tensor in global frame
       
   640 			dMULTIPLY2_333 (tmp, body->invI, body->posr.R);
       
   641 			dMULTIPLY0_333 (globalInvI + b * 12, body->posr.R, tmp);
       
   642 
       
   643 			for (i = 0; i < 4; i++)
       
   644 				body->tacc[i] = saveTacc[b * 4 + i];
       
   645 
       
   646 			// add the gravity force to all bodies
       
   647 			if ((body->flags & dxBodyNoGravity) == 0)
       
   648 			{
       
   649 				body->facc[0] = saveFacc[b * 4 + 0] + dMUL(body->mass.mass,world->gravity[0]);
       
   650 				body->facc[1] = saveFacc[b * 4 + 1] + dMUL(body->mass.mass,world->gravity[1]);
       
   651 				body->facc[2] = saveFacc[b * 4 + 2] + dMUL(body->mass.mass,world->gravity[2]);
       
   652 				body->facc[3] = 0;
       
   653 			} else {
       
   654                                 body->facc[0] = saveFacc[b * 4 + 0];
       
   655                                 body->facc[1] = saveFacc[b * 4 + 1];
       
   656                                 body->facc[2] = saveFacc[b * 4 + 2];
       
   657 				body->facc[3] = 0;
       
   658                         }
       
   659 
       
   660 		}
       
   661 
       
   662 #ifdef RANDOM_JOINT_ORDER
       
   663 		//randomize the order of the joints by looping through the array
       
   664 		//and swapping the current joint pointer with a random one before it.
       
   665 		for (j = 0; j < nj; j++)
       
   666 		{
       
   667 			joint = joints[j];
       
   668 			dxJoint::Info1 i1 = info[j];
       
   669 			dxJoint::Info2 i2 = Jinfo[j];
       
   670                         const int r = dRandInt(j+1);
       
   671 			joints[j] = joints[r];
       
   672 			info[j] = info[r];
       
   673 			Jinfo[j] = Jinfo[r];
       
   674 			joints[r] = joint;
       
   675 			info[r] = i1;
       
   676 			Jinfo[r] = i2;
       
   677 		}
       
   678 #endif
       
   679 
       
   680 		//now iterate through the random ordered joint array we created.
       
   681 		for (j = 0; j < nj; j++)
       
   682 		{
       
   683 			joint = joints[j];
       
   684 			bodyPair[0] = joint->node[0].body;
       
   685 			bodyPair[1] = joint->node[1].body;
       
   686 
       
   687 			if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled))
       
   688 				bodyPair[0] = 0;
       
   689 			if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled))
       
   690 				bodyPair[1] = 0;
       
   691 			
       
   692 			//if this joint is not connected to any enabled bodies, skip it.
       
   693 			if (!bodyPair[0] && !bodyPair[1])
       
   694 				continue;
       
   695 			
       
   696 			if (bodyPair[0])
       
   697 			{
       
   698 				GIPair[0] = globalI + bodyPair[0]->tag * 12;
       
   699 				GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12;
       
   700 			}
       
   701 			if (bodyPair[1])
       
   702 			{
       
   703 				GIPair[1] = globalI + bodyPair[1]->tag * 12;
       
   704 				GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12;
       
   705 			}
       
   706 
       
   707 			joints[j]->vtable->getInfo2 (joints[j], Jinfo + j);
       
   708 
       
   709 			//dInternalStepIslandFast is an exact copy of the old routine with one
       
   710 			//modification: the calculated forces are added back to the facc and tacc
       
   711 			//vectors instead of applying them to the bodies and moving them.
       
   712 			if (info[j].m > 0)
       
   713 			{
       
   714 			dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep);
       
   715 			}		
       
   716 		}
       
   717 		//  }
       
   718 		//Now we can simulate all the free floating bodies, and move them.
       
   719 		for (b = 0; b < nb; b++)
       
   720 		{
       
   721 			body = bodies[b];
       
   722 
       
   723 			for (i = 0; i < 4; i++)
       
   724 			{
       
   725 				body->facc[i] = dMUL(body->facc[i],ministep);
       
   726 				body->tacc[i] = dMUL(body->tacc[i],ministep);
       
   727 			}
       
   728 
       
   729 			//apply torque
       
   730 			dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc);
       
   731 
       
   732 			//apply force
       
   733 			for (i = 0; i < 3; i++)
       
   734 				body->lvel[i] += dMUL(body->invMass,body->facc[i]);
       
   735 
       
   736 			//move It!
       
   737 			moveAndRotateBody (body, ministep);
       
   738 		}
       
   739 	}
       
   740 	for (b = 0; b < nb; b++)
       
   741 		for (j = 0; j < 4; j++)
       
   742 			bodies[b]->facc[j] = bodies[b]->tacc[j] = 0;
       
   743 }
       
   744 
       
   745 
       
   746 //****************************************************************************
       
   747 // island processing
       
   748 
       
   749 // this groups all joints and bodies in a world into islands. all objects
       
   750 // in an island are reachable by going through connected bodies and joints.
       
   751 // each island can be simulated separately.
       
   752 // note that joints that are not attached to anything will not be included
       
   753 // in any island, an so they do not affect the simulation.
       
   754 //
       
   755 // this function starts new island from unvisited bodies. however, it will
       
   756 // never start a new islands from a disabled body. thus islands of disabled
       
   757 // bodies will not be included in the simulation. disabled bodies are
       
   758 // re-enabled if they are found to be part of an active island.
       
   759 
       
   760 static void
       
   761 processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
       
   762 {
       
   763 	dxBody *b, *bb, **body;
       
   764 	dxJoint *j, **joint;
       
   765 
       
   766 	// nothing to do if no bodies
       
   767 	if (world->nb <= 0)
       
   768 		return;
       
   769 
       
   770 	dInternalHandleAutoDisabling (world,stepsize);
       
   771 
       
   772 	// make arrays for body and joint lists (for a single island) to go into
       
   773 	body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
       
   774 	joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
       
   775 	int bcount = 0;				// number of bodies in `body'
       
   776 	int jcount = 0;				// number of joints in `joint'
       
   777 	int tbcount = 0;
       
   778 	int tjcount = 0;
       
   779 	
       
   780 	// set all body/joint tags to 0
       
   781 	for (b = world->firstbody; b; b = (dxBody *) b->next)
       
   782 		b->tag = 0;
       
   783 	for (j = world->firstjoint; j; j = (dxJoint *) j->next)
       
   784 		j->tag = 0;
       
   785 
       
   786 	// allocate a stack of unvisited bodies in the island. the maximum size of
       
   787 	// the stack can be the lesser of the number of bodies or joints, because
       
   788 	// new bodies are only ever added to the stack by going through untagged
       
   789 	// joints. all the bodies in the stack must be tagged!
       
   790 	int stackalloc = (world->nj < world->nb) ? world->nj : world->nb;
       
   791 	dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *));
       
   792 	int *autostack = (int *) ALLOCA (stackalloc * sizeof (int));
       
   793 
       
   794 	for (bb = world->firstbody; bb; bb = (dxBody *) bb->next)
       
   795 	{
       
   796 		// get bb = the next enabled, untagged body, and tag it
       
   797 		if (bb->tag || (bb->flags & dxBodyDisabled))
       
   798 			continue;
       
   799 		bb->tag = 1;
       
   800 
       
   801 		// tag all bodies and joints starting from bb.
       
   802 		
       
   803 		int stacksize = 0;
       
   804 		int firsttime = 1;
       
   805 		
       
   806 		int autoDepth = GetGlobalData()->autoEnableDepth;
       
   807 		b = bb;
       
   808 		body[0] = bb;
       
   809 		bcount = 1;
       
   810 		jcount = 0;
       
   811 		
       
   812 		while (stacksize > 0 || firsttime)
       
   813 		{
       
   814 		    if (!firsttime)
       
   815 		    {
       
   816 			    b = stack[--stacksize];	// pop body off stack
       
   817 			    autoDepth = autostack[stacksize];
       
   818 			    body[bcount++] = b;	// put body on body list
       
   819 		    }
       
   820 		    else
       
   821 		    {
       
   822 		        firsttime = 0;
       
   823 		    }
       
   824 		 
       
   825 			// traverse and tag all body's joints, add untagged connected bodies
       
   826 			// to stack
       
   827 			for (dxJointNode * n = b->firstjoint; n; n = n->next)
       
   828 			{
       
   829 				if (!n->joint->tag)
       
   830 				{
       
   831 					int thisDepth = GetGlobalData()->autoEnableDepth;
       
   832 					n->joint->tag = 1;
       
   833 					joint[jcount++] = n->joint;
       
   834 					if (n->body && !n->body->tag)
       
   835 					{
       
   836 						if (n->body->flags & dxBodyDisabled)
       
   837 							thisDepth = autoDepth - 1;
       
   838 						if (thisDepth < 0)
       
   839 							continue;
       
   840 						n->body->flags &= ~dxBodyDisabled;
       
   841 						n->body->tag = 1;
       
   842 						autostack[stacksize] = thisDepth;
       
   843 						stack[stacksize++] = n->body;
       
   844 					}
       
   845 				}
       
   846 			}
       
   847 		}
       
   848 
       
   849 		// now do something with body and joint lists
       
   850 		dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations);
       
   851 
       
   852 		// what we've just done may have altered the body/joint tag values.
       
   853 		// we must make sure that these tags are nonzero.
       
   854 		// also make sure all bodies are in the enabled state.
       
   855 		int i;
       
   856 		for (i = 0; i < bcount; i++)
       
   857 		{
       
   858 			body[i]->tag = 1;
       
   859 			body[i]->flags &= ~dxBodyDisabled;
       
   860 		}
       
   861 		for (i = 0; i < jcount; i++)
       
   862 			joint[i]->tag = 1;
       
   863 		
       
   864 		tbcount += bcount;
       
   865 		tjcount += jcount;
       
   866 	}
       
   867 	
       
   868 
       
   869 }
       
   870 
       
   871 
       
   872 EXPORT_C void dWorldStepFast1 (dWorldID w, dReal stepsize, int maxiterations)
       
   873 {
       
   874 
       
   875 	processIslandsFast (w, stepsize, maxiterations);
       
   876 }