|
1 /************************************************************************* |
|
2 * * |
|
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * |
|
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org * |
|
5 * * |
|
6 * This library is free software; you can redistribute it and/or * |
|
7 * modify it under the terms of EITHER: * |
|
8 * (1) The GNU Lesser General Public License as published by the Free * |
|
9 * Software Foundation; either version 2.1 of the License, or (at * |
|
10 * your option) any later version. The text of the GNU Lesser * |
|
11 * General Public License is included with this library in the * |
|
12 * file LICENSE.TXT. * |
|
13 * (2) The BSD-style license that is included with this library in * |
|
14 * the file LICENSE-BSD.TXT. * |
|
15 * * |
|
16 * This library is distributed in the hope that it will be useful, * |
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * |
|
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. * |
|
20 * * |
|
21 *************************************************************************/ |
|
22 |
|
23 /* |
|
24 |
|
25 |
|
26 THE ALGORITHM |
|
27 ------------- |
|
28 |
|
29 solve A*x = b+w, with x and w subject to certain LCP conditions. |
|
30 each x(i),w(i) must lie on one of the three line segments in the following |
|
31 diagram. each line segment corresponds to one index set : |
|
32 |
|
33 w(i) |
|
34 /|\ | : |
|
35 | | : |
|
36 | |i in N : |
|
37 w>0 | |state[i]=0 : |
|
38 | | : |
|
39 | | : i in C |
|
40 w=0 + +-----------------------+ |
|
41 | : | |
|
42 | : | |
|
43 w<0 | : |i in N |
|
44 | : |state[i]=1 |
|
45 | : | |
|
46 | : | |
|
47 +-------|-----------|-----------|----------> x(i) |
|
48 lo 0 hi |
|
49 |
|
50 the Dantzig algorithm proceeds as follows: |
|
51 for i=1:n |
|
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or |
|
53 negative towards the line. as this is done, the other (x(j),w(j)) |
|
54 for j<i are constrained to be on the line. if any (x,w) reaches the |
|
55 end of a line segment then it is switched between index sets. |
|
56 * i is added to the appropriate index set depending on what line segment |
|
57 it hits. |
|
58 |
|
59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit |
|
60 simpler, because the starting point for x(i),w(i) is always on the dotted |
|
61 line x=0 and x will only ever increase in one direction, so it can only hit |
|
62 two out of the three line segments. |
|
63 |
|
64 |
|
65 NOTES |
|
66 ----- |
|
67 |
|
68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m". |
|
69 the implementation is split into an LCP problem object (dLCP) and an LCP |
|
70 driver function. most optimization occurs in the dLCP object. |
|
71 |
|
72 a naive implementation of the algorithm requires either a lot of data motion |
|
73 or a lot of permutation-array lookup, because we are constantly re-ordering |
|
74 rows and columns. to avoid this and make a more optimized algorithm, a |
|
75 non-trivial data structure is used to represent the matrix A (this is |
|
76 implemented in the fast version of the dLCP object). |
|
77 |
|
78 during execution of this algorithm, some indexes in A are clamped (set C), |
|
79 some are non-clamped (set N), and some are "don't care" (where x=0). |
|
80 A,x,b,w (and other problem vectors) are permuted such that the clamped |
|
81 indexes are first, the unclamped indexes are next, and the don't-care |
|
82 indexes are last. this permutation is recorded in the array `p'. |
|
83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped, |
|
84 the corresponding elements of p are swapped. |
|
85 |
|
86 because the C and N elements are grouped together in the rows of A, we can do |
|
87 lots of work with a fast dot product function. if A,x,etc were not permuted |
|
88 and we only had a permutation array, then those dot products would be much |
|
89 slower as we would have a permutation array lookup in some inner loops. |
|
90 |
|
91 A is accessed through an array of row pointers, so that element (i,j) of the |
|
92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping |
|
93 we still have to actually move the data. |
|
94 |
|
95 during execution of this algorithm we maintain an L*D*L' factorization of |
|
96 the clamped submatrix of A (call it `AC') which is the top left nC*nC |
|
97 submatrix of A. there are two ways we could arrange the rows/columns in AC. |
|
98 |
|
99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem |
|
100 when a row/column is removed from C, because then all the rows/columns of A |
|
101 between the deleted index and the end of C need to be rotated downward. |
|
102 this results in a lot of data motion and slows things down. |
|
103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is |
|
104 itself a permutation of the underlying A). this is what we do - the |
|
105 permutation is recorded in the vector C. call this permutation A[C,C]. |
|
106 when a row/column is removed from C, all we have to do is swap two |
|
107 rows/columns and manipulate C. |
|
108 |
|
109 */ |
|
110 |
|
111 #include <ode/common.h> |
|
112 #include "lcp.h" |
|
113 #include <ode/matrix.h> |
|
114 #include <ode/misc.h> |
|
115 #include "mat.h" // for testing |
|
116 #include <ode/timer.h> // for testing |
|
117 |
|
118 //*************************************************************************** |
|
119 // code generation parameters |
|
120 |
|
121 #define dLCP_FAST // use fast dLCP object |
|
122 #define dUSE_MALLOC_FOR_ALLOCA |
|
123 |
|
124 // option 1 : matrix row pointers (less data copying) |
|
125 #define ROWPTRS |
|
126 #define ATYPE dReal ** |
|
127 #define AROW(i) (A[i]) |
|
128 |
|
129 // option 2 : no matrix row pointers (slightly faster inner loops) |
|
130 //#define NOROWPTRS |
|
131 //#define ATYPE dReal * |
|
132 //#define AROW(i) (A+(i)*nskip) |
|
133 |
|
134 // use protected, non-stack memory allocation system |
|
135 |
|
136 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
137 extern unsigned int dMemoryFlag; |
|
138 |
|
139 #define ALLOCA(t,v,s) t* v = (t*) malloc(s) |
|
140 #define UNALLOCA(t) free(t) |
|
141 |
|
142 #else |
|
143 |
|
144 #define ALLOCA(t,v,s) t* v =(t*)dALLOCA16(s) |
|
145 #define UNALLOCA(t) /* nothing */ |
|
146 |
|
147 #endif |
|
148 |
|
149 #define NUB_OPTIMIZATIONS |
|
150 |
|
151 //*************************************************************************** |
|
152 |
|
153 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of |
|
154 // A is nskip. this only references and swaps the lower triangle. |
|
155 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then |
|
156 // rows will be swapped by exchanging row pointers. otherwise the data will |
|
157 // be copied. |
|
158 |
|
159 static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int /*nskip*/, |
|
160 int do_fast_row_swaps) |
|
161 { |
|
162 int i; |
|
163 |
|
164 |
|
165 # ifdef ROWPTRS |
|
166 for (i=i1+1; i<i2; i++) A[i1][i] = A[i][i1]; |
|
167 for (i=i1+1; i<i2; i++) A[i][i1] = A[i2][i]; |
|
168 A[i1][i2] = A[i1][i1]; |
|
169 A[i1][i1] = A[i2][i1]; |
|
170 A[i2][i1] = A[i2][i2]; |
|
171 // swap rows, by swapping row pointers |
|
172 if (do_fast_row_swaps) { |
|
173 dReal *tmpp; |
|
174 tmpp = A[i1]; |
|
175 A[i1] = A[i2]; |
|
176 A[i2] = tmpp; |
|
177 } |
|
178 else { |
|
179 ALLOCA (dReal,tmprow,n * sizeof(dReal)); |
|
180 |
|
181 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
182 if (tmprow == NULL) { |
|
183 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
184 return; |
|
185 } |
|
186 #endif |
|
187 |
|
188 memcpy (tmprow,A[i1],n * sizeof(dReal)); |
|
189 memcpy (A[i1],A[i2],n * sizeof(dReal)); |
|
190 memcpy (A[i2],tmprow,n * sizeof(dReal)); |
|
191 UNALLOCA(tmprow); |
|
192 } |
|
193 // swap columns the hard way |
|
194 for (i=i2+1; i<n; i++) { |
|
195 dReal tmp = A[i][i1]; |
|
196 A[i][i1] = A[i][i2]; |
|
197 A[i][i2] = tmp; |
|
198 } |
|
199 # else |
|
200 dReal tmp; |
|
201 ALLOCA (dReal,tmprow,n * sizeof(dReal)); |
|
202 |
|
203 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
204 if (tmprow == NULL) { |
|
205 return; |
|
206 } |
|
207 #endif |
|
208 |
|
209 if (i1 > 0) { |
|
210 memcpy (tmprow,A+i1*nskip,i1*sizeof(dReal)); |
|
211 memcpy (A+i1*nskip,A+i2*nskip,i1*sizeof(dReal)); |
|
212 memcpy (A+i2*nskip,tmprow,i1*sizeof(dReal)); |
|
213 } |
|
214 for (i=i1+1; i<i2; i++) { |
|
215 tmp = A[i2*nskip+i]; |
|
216 A[i2*nskip+i] = A[i*nskip+i1]; |
|
217 A[i*nskip+i1] = tmp; |
|
218 } |
|
219 tmp = A[i1*nskip+i1]; |
|
220 A[i1*nskip+i1] = A[i2*nskip+i2]; |
|
221 A[i2*nskip+i2] = tmp; |
|
222 for (i=i2+1; i<n; i++) { |
|
223 tmp = A[i*nskip+i1]; |
|
224 A[i*nskip+i1] = A[i*nskip+i2]; |
|
225 A[i*nskip+i2] = tmp; |
|
226 } |
|
227 UNALLOCA(tmprow); |
|
228 # endif |
|
229 |
|
230 } |
|
231 |
|
232 |
|
233 // swap two indexes in the n*n LCP problem. i1 must be <= i2. |
|
234 |
|
235 static void swapProblem (ATYPE A, dReal *x, dReal *b, dReal *w, dReal *lo, |
|
236 dReal *hi, int *p, int *state, int *findex, |
|
237 int n, int i1, int i2, int nskip, |
|
238 int do_fast_row_swaps) |
|
239 { |
|
240 dReal tmp; |
|
241 int tmpi; |
|
242 |
|
243 if (i1==i2) return; |
|
244 swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps); |
|
245 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
246 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) |
|
247 return; |
|
248 #endif |
|
249 tmp = x[i1]; |
|
250 x[i1] = x[i2]; |
|
251 x[i2] = tmp; |
|
252 tmp = b[i1]; |
|
253 b[i1] = b[i2]; |
|
254 b[i2] = tmp; |
|
255 tmp = w[i1]; |
|
256 w[i1] = w[i2]; |
|
257 w[i2] = tmp; |
|
258 tmp = lo[i1]; |
|
259 lo[i1] = lo[i2]; |
|
260 lo[i2] = tmp; |
|
261 tmp = hi[i1]; |
|
262 hi[i1] = hi[i2]; |
|
263 hi[i2] = tmp; |
|
264 tmpi = p[i1]; |
|
265 p[i1] = p[i2]; |
|
266 p[i2] = tmpi; |
|
267 tmpi = state[i1]; |
|
268 state[i1] = state[i2]; |
|
269 state[i2] = tmpi; |
|
270 if (findex) { |
|
271 tmpi = findex[i1]; |
|
272 findex[i1] = findex[i2]; |
|
273 findex[i2] = tmpi; |
|
274 } |
|
275 } |
|
276 |
|
277 |
|
278 //*************************************************************************** |
|
279 // dLCP manipulator object. this represents an n*n LCP problem. |
|
280 // |
|
281 // two index sets C and N are kept. each set holds a subset of |
|
282 // the variable indexes 0..n-1. an index can only be in one set. |
|
283 // initially both sets are empty. |
|
284 // |
|
285 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated. |
|
286 //*************************************************************************** |
|
287 |
|
288 //*************************************************************************** |
|
289 // fast implementation of dLCP. see the above definition of dLCP for |
|
290 // interface comments. |
|
291 // |
|
292 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is |
|
293 // permuted as the other vectors/matrices are permuted. |
|
294 // |
|
295 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have |
|
296 // contiguous indexes. the don't-care indexes follow N. |
|
297 // |
|
298 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are |
|
299 // added or removed from the set C the factorization is updated. |
|
300 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A. |
|
301 // the leading dimension of the matrix L is always `nskip'. |
|
302 // |
|
303 // at the start there may be other indexes that are unbounded but are not |
|
304 // included in `nub'. dLCP will permute the matrix so that absolutely all |
|
305 // unbounded vectors are at the start. thus there may be some initial |
|
306 // permutation. |
|
307 // |
|
308 // the algorithms here assume certain patterns, particularly with respect to |
|
309 // index transfer. |
|
310 |
|
311 #ifdef dLCP_FAST |
|
312 |
|
313 struct dLCP { |
|
314 int n,nskip,nub; |
|
315 ATYPE A; // A rows |
|
316 dReal *Adata,*x,*b,*w,*lo,*hi; // permuted LCP problem data |
|
317 dReal *L,*d; // L*D*L' factorization of set C |
|
318 dReal *Dell,*ell,*tmp; |
|
319 int *state,*findex,*p,*C; |
|
320 int nC,nN; // size of each index set |
|
321 |
|
322 dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w, |
|
323 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d, |
|
324 dReal *_Dell, dReal *_ell, dReal *_tmp, |
|
325 int *_state, int *_findex, int *_p, int *_C, dReal **Arows); |
|
326 int getNub() { return nub; } |
|
327 void transfer_i_to_C (int i); |
|
328 void transfer_i_to_N (int /*i*/) |
|
329 { nN++; } // because we can assume C and N span 1:i-1 |
|
330 void transfer_i_from_N_to_C (int i); |
|
331 void transfer_i_from_C_to_N (int i); |
|
332 int numC() { return nC; } |
|
333 int numN() { return nN; } |
|
334 int indexC (int i) { return i; } |
|
335 int indexN (int i) { return i+nC; } |
|
336 dReal Aii (int i) { return AROW(i)[i]; } |
|
337 dReal AiC_times_qC (int i, dReal *q) { return dDot (AROW(i),q,nC); } |
|
338 dReal AiN_times_qN (int i, dReal *q) { return dDot (AROW(i)+nC,q+nC,nN); } |
|
339 void pN_equals_ANC_times_qC (dReal *p, dReal *q); |
|
340 void pN_plusequals_ANi (dReal *p, int i, int sign=1); |
|
341 void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q) |
|
342 { for (int i=0; i<nC; i++) p[i] += dMUL(s,q[i]); } |
|
343 void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q) |
|
344 { for (int i=0; i<nN; i++) p[i+nC] += dMUL(s,q[i+nC]); } |
|
345 void solve1 (dReal *a, int i, int dir=1, int only_transfer=0); |
|
346 void unpermute(); |
|
347 }; |
|
348 |
|
349 |
|
350 dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w, |
|
351 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d, |
|
352 dReal *_Dell, dReal *_ell, dReal *_tmp, |
|
353 int *_state, int *_findex, int *_p, int *_C, dReal **Arows) |
|
354 { |
|
355 n = _n; |
|
356 nub = _nub; |
|
357 Adata = _Adata; |
|
358 A = 0; |
|
359 x = _x; |
|
360 b = _b; |
|
361 w = _w; |
|
362 lo = _lo; |
|
363 hi = _hi; |
|
364 L = _L; |
|
365 d = _d; |
|
366 Dell = _Dell; |
|
367 ell = _ell; |
|
368 tmp = _tmp; |
|
369 state = _state; |
|
370 findex = _findex; |
|
371 p = _p; |
|
372 C = _C; |
|
373 nskip = dPAD(n); |
|
374 dSetZero (x,n); |
|
375 |
|
376 int k; |
|
377 |
|
378 # ifdef ROWPTRS |
|
379 // make matrix row pointers |
|
380 A = Arows; |
|
381 for (k=0; k<n; k++) A[k] = Adata + k*nskip; |
|
382 # else |
|
383 A = Adata; |
|
384 # endif |
|
385 |
|
386 nC = 0; |
|
387 nN = 0; |
|
388 for (k=0; k<n; k++) p[k]=k; // initially unpermuted |
|
389 |
|
390 /* |
|
391 // for testing, we can do some random swaps in the area i > nub |
|
392 if (nub < n) { |
|
393 for (k=0; k<100; k++) { |
|
394 int i1,i2; |
|
395 do { |
|
396 i1 = dRandInt(n-nub)+nub; |
|
397 i2 = dRandInt(n-nub)+nub; |
|
398 } |
|
399 while (i1 > i2); |
|
400 //printf ("--> %d %d\n",i1,i2); |
|
401 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i1,i2,nskip,0); |
|
402 } |
|
403 } |
|
404 */ |
|
405 |
|
406 // permute the problem so that *all* the unbounded variables are at the |
|
407 // start, i.e. look for unbounded variables not included in `nub'. we can |
|
408 // potentially push up `nub' this way and get a bigger initial factorization. |
|
409 // note that when we swap rows/cols here we must not just swap row pointers, |
|
410 // as the initial factorization relies on the data being all in one chunk. |
|
411 // variables that have findex >= 0 are *not* considered to be unbounded even |
|
412 // if lo=-inf and hi=inf - this is because these limits may change during the |
|
413 // solution process. |
|
414 |
|
415 for (k=nub; k<n; k++) { |
|
416 if (findex && findex[k] >= 0) continue; |
|
417 if (lo[k]==-dInfinity && hi[k]==dInfinity) { |
|
418 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nub,k,nskip,0); |
|
419 nub++; |
|
420 } |
|
421 } |
|
422 |
|
423 // if there are unbounded variables at the start, factorize A up to that |
|
424 // point and solve for x. this puts all indexes 0..nub-1 into C. |
|
425 if (nub > 0) { |
|
426 for (k=0; k<nub; k++) memcpy (L+k*nskip,AROW(k),(k+1)*sizeof(dReal)); |
|
427 dFactorLDLT (L,d,nub,nskip); |
|
428 memcpy (x,b,nub*sizeof(dReal)); |
|
429 dSolveLDLT (L,d,x,nub,nskip); |
|
430 dSetZero (w,nub); |
|
431 for (k=0; k<nub; k++) C[k] = k; |
|
432 nC = nub; |
|
433 } |
|
434 |
|
435 // permute the indexes > nub such that all findex variables are at the end |
|
436 if (findex) { |
|
437 int num_at_end = 0; |
|
438 for (k=n-1; k >= nub; k--) { |
|
439 if (findex[k] >= 0) { |
|
440 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,k,n-1-num_at_end,nskip,1); |
|
441 num_at_end++; |
|
442 } |
|
443 } |
|
444 } |
|
445 |
|
446 // print info about indexes |
|
447 /* |
|
448 for (k=0; k<n; k++) { |
|
449 if (k<nub) printf ("C"); |
|
450 else if (lo[k]==-dInfinity && hi[k]==dInfinity) printf ("c"); |
|
451 else printf ("."); |
|
452 } |
|
453 printf ("\n"); |
|
454 */ |
|
455 } |
|
456 |
|
457 |
|
458 void dLCP::transfer_i_to_C (int i) |
|
459 { |
|
460 int j; |
|
461 if (nC > 0) { |
|
462 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C)) |
|
463 for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j]; |
|
464 d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC)); |
|
465 } |
|
466 else { |
|
467 d[0] = dRecip (AROW(i)[i]); |
|
468 } |
|
469 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1); |
|
470 C[nC] = nC; |
|
471 nC++; |
|
472 |
|
473 } |
|
474 |
|
475 |
|
476 void dLCP::transfer_i_from_N_to_C (int i) |
|
477 { |
|
478 int j; |
|
479 if (nC > 0) { |
|
480 dReal *aptr = AROW(i); |
|
481 # ifdef NUB_OPTIMIZATIONS |
|
482 // if nub>0, initial part of aptr unpermuted |
|
483 for (j=0; j<nub; j++) Dell[j] = aptr[j]; |
|
484 for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]]; |
|
485 # else |
|
486 for (j=0; j<nC; j++) Dell[j] = aptr[C[j]]; |
|
487 # endif |
|
488 dSolveL1 (L,Dell,nC,nskip); |
|
489 for (j=0; j<nC; j++) ell[j] = dMUL(Dell[j],d[j]); |
|
490 for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j]; |
|
491 d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC)); |
|
492 } |
|
493 else { |
|
494 d[0] = dRecip (AROW(i)[i]); |
|
495 } |
|
496 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1); |
|
497 C[nC] = nC; |
|
498 nN--; |
|
499 nC++; |
|
500 |
|
501 // @@@ TO DO LATER |
|
502 // if we just finish here then we'll go back and re-solve for |
|
503 // delta_x. but actually we can be more efficient and incrementally |
|
504 // update delta_x here. but if we do this, we wont have ell and Dell |
|
505 // to use in updating the factorization later. |
|
506 |
|
507 } |
|
508 |
|
509 |
|
510 void dLCP::transfer_i_from_C_to_N (int i) |
|
511 { |
|
512 // remove a row/column from the factorization, and adjust the |
|
513 // indexes (black magic!) |
|
514 int j,k; |
|
515 for (j=0; j<nC; j++) if (C[j]==i) { |
|
516 dLDLTRemove (A,C,L,d,n,nC,j,nskip); |
|
517 for (k=0; k<nC; k++) if (C[k]==nC-1) { |
|
518 C[k] = C[j]; |
|
519 if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int)); |
|
520 break; |
|
521 } |
|
522 |
|
523 break; |
|
524 } |
|
525 |
|
526 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i,nC-1,nskip,1); |
|
527 nC--; |
|
528 nN++; |
|
529 |
|
530 } |
|
531 |
|
532 |
|
533 void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q) |
|
534 { |
|
535 // we could try to make this matrix-vector multiplication faster using |
|
536 // outer product matrix tricks, e.g. with the dMultidotX() functions. |
|
537 // but i tried it and it actually made things slower on random 100x100 |
|
538 // problems because of the overhead involved. so we'll stick with the |
|
539 // simple method for now. |
|
540 for (int i=0; i<nN; i++) p[i+nC] = dDot (AROW(i+nC),q,nC); |
|
541 } |
|
542 |
|
543 |
|
544 void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign) |
|
545 { |
|
546 dReal *aptr = AROW(i)+nC; |
|
547 if (sign > 0) { |
|
548 for (int i=0; i<nN; i++) p[i+nC] += aptr[i]; |
|
549 } |
|
550 else { |
|
551 for (int i=0; i<nN; i++) p[i+nC] -= aptr[i]; |
|
552 } |
|
553 } |
|
554 |
|
555 |
|
556 void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer) |
|
557 { |
|
558 // the `Dell' and `ell' that are computed here are saved. if index i is |
|
559 // later added to the factorization then they can be reused. |
|
560 // |
|
561 // @@@ question: do we need to solve for entire delta_x??? yes, but |
|
562 // only if an x goes below 0 during the step. |
|
563 |
|
564 int j; |
|
565 if (nC > 0) { |
|
566 dReal *aptr = AROW(i); |
|
567 # ifdef NUB_OPTIMIZATIONS |
|
568 // if nub>0, initial part of aptr[] is guaranteed unpermuted |
|
569 for (j=0; j<nub; j++) Dell[j] = aptr[j]; |
|
570 for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]]; |
|
571 # else |
|
572 for (j=0; j<nC; j++) Dell[j] = aptr[C[j]]; |
|
573 # endif |
|
574 dSolveL1 (L,Dell,nC,nskip); |
|
575 for (j=0; j<nC; j++) ell[j] = dMUL(Dell[j],d[j]); |
|
576 |
|
577 if (!only_transfer) { |
|
578 for (j=0; j<nC; j++) tmp[j] = ell[j]; |
|
579 dSolveL1T (L,tmp,nC,nskip); |
|
580 if (dir > 0) { |
|
581 for (j=0; j<nC; j++) a[C[j]] = -tmp[j]; |
|
582 } |
|
583 else { |
|
584 for (j=0; j<nC; j++) a[C[j]] = tmp[j]; |
|
585 } |
|
586 } |
|
587 } |
|
588 } |
|
589 |
|
590 |
|
591 void dLCP::unpermute() |
|
592 { |
|
593 // now we have to un-permute x and w |
|
594 int j; |
|
595 ALLOCA (dReal,tmp,n*sizeof(dReal)); |
|
596 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
597 if (tmp == NULL) { |
|
598 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
599 return; |
|
600 } |
|
601 #endif |
|
602 memcpy (tmp,x,n*sizeof(dReal)); |
|
603 for (j=0; j<n; j++) x[p[j]] = tmp[j]; |
|
604 memcpy (tmp,w,n*sizeof(dReal)); |
|
605 for (j=0; j<n; j++) w[p[j]] = tmp[j]; |
|
606 |
|
607 UNALLOCA (tmp); |
|
608 } |
|
609 |
|
610 #endif // dLCP_FAST |
|
611 |
|
612 //*************************************************************************** |
|
613 // an unoptimized Dantzig LCP driver routine for the basic LCP problem. |
|
614 // must have lo=0, hi=dInfinity, and nub=0. |
|
615 |
|
616 void dSolveLCPBasic (int n, dReal *A, dReal *x, dReal *b, |
|
617 dReal *w, int /*nub*/, dReal */*lo*/, dReal */*hi*/) |
|
618 { |
|
619 int i,k; |
|
620 int nskip = dPAD(n); |
|
621 ALLOCA (dReal,L,n*nskip*sizeof(dReal)); |
|
622 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
623 if (L == NULL) { |
|
624 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
625 return; |
|
626 } |
|
627 #endif |
|
628 ALLOCA (dReal,d,n*sizeof(dReal)); |
|
629 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
630 if (d == NULL) { |
|
631 UNALLOCA(L); |
|
632 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
633 return; |
|
634 } |
|
635 #endif |
|
636 ALLOCA (dReal,delta_x,n*sizeof(dReal)); |
|
637 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
638 if (delta_x == NULL) { |
|
639 UNALLOCA(d); |
|
640 UNALLOCA(L); |
|
641 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
642 return; |
|
643 } |
|
644 #endif |
|
645 ALLOCA (dReal,delta_w,n*sizeof(dReal)); |
|
646 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
647 if (delta_w == NULL) { |
|
648 UNALLOCA(delta_x); |
|
649 UNALLOCA(d); |
|
650 UNALLOCA(L); |
|
651 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
652 return; |
|
653 } |
|
654 #endif |
|
655 ALLOCA (dReal,Dell,n*sizeof(dReal)); |
|
656 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
657 if (Dell == NULL) { |
|
658 UNALLOCA(delta_w); |
|
659 UNALLOCA(delta_x); |
|
660 UNALLOCA(d); |
|
661 UNALLOCA(L); |
|
662 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
663 return; |
|
664 } |
|
665 #endif |
|
666 ALLOCA (dReal,ell,n*sizeof(dReal)); |
|
667 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
668 if (ell == NULL) { |
|
669 UNALLOCA(Dell); |
|
670 UNALLOCA(delta_w); |
|
671 UNALLOCA(delta_x); |
|
672 UNALLOCA(d); |
|
673 UNALLOCA(L); |
|
674 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
675 return; |
|
676 } |
|
677 #endif |
|
678 ALLOCA (dReal,tmp,n*sizeof(dReal)); |
|
679 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
680 if (tmp == NULL) { |
|
681 UNALLOCA(ell); |
|
682 UNALLOCA(Dell); |
|
683 UNALLOCA(delta_w); |
|
684 UNALLOCA(delta_x); |
|
685 UNALLOCA(d); |
|
686 UNALLOCA(L); |
|
687 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
688 return; |
|
689 } |
|
690 #endif |
|
691 ALLOCA (dReal*,Arows,n*sizeof(dReal*)); |
|
692 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
693 if (Arows == NULL) { |
|
694 UNALLOCA(tmp); |
|
695 UNALLOCA(ell); |
|
696 UNALLOCA(Dell); |
|
697 UNALLOCA(delta_w); |
|
698 UNALLOCA(delta_x); |
|
699 UNALLOCA(d); |
|
700 UNALLOCA(L); |
|
701 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
702 return; |
|
703 } |
|
704 #endif |
|
705 ALLOCA (int,p,n*sizeof(int)); |
|
706 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
707 if (p == NULL) { |
|
708 UNALLOCA(Arows); |
|
709 UNALLOCA(tmp); |
|
710 UNALLOCA(ell); |
|
711 UNALLOCA(Dell); |
|
712 UNALLOCA(delta_w); |
|
713 UNALLOCA(delta_x); |
|
714 UNALLOCA(d); |
|
715 UNALLOCA(L); |
|
716 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
717 return; |
|
718 } |
|
719 #endif |
|
720 ALLOCA (int,C,n*sizeof(int)); |
|
721 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
722 if (C == NULL) { |
|
723 UNALLOCA(p); |
|
724 UNALLOCA(Arows); |
|
725 UNALLOCA(tmp); |
|
726 UNALLOCA(ell); |
|
727 UNALLOCA(Dell); |
|
728 UNALLOCA(delta_w); |
|
729 UNALLOCA(delta_x); |
|
730 UNALLOCA(d); |
|
731 UNALLOCA(L); |
|
732 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
733 return; |
|
734 } |
|
735 #endif |
|
736 ALLOCA (int,dummy,n*sizeof(int)); |
|
737 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
738 if (dummy == NULL) { |
|
739 UNALLOCA(C); |
|
740 UNALLOCA(p); |
|
741 UNALLOCA(Arows); |
|
742 UNALLOCA(tmp); |
|
743 UNALLOCA(ell); |
|
744 UNALLOCA(Dell); |
|
745 UNALLOCA(delta_w); |
|
746 UNALLOCA(delta_x); |
|
747 UNALLOCA(d); |
|
748 UNALLOCA(L); |
|
749 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
750 return; |
|
751 } |
|
752 #endif |
|
753 |
|
754 |
|
755 dLCP lcp (n,0,A,x,b,w,tmp,tmp,L,d,Dell,ell,tmp,dummy,dummy,p,C,Arows); |
|
756 lcp.getNub(); |
|
757 |
|
758 for (i=0; i<n; i++) { |
|
759 w[i] = lcp.AiC_times_qC (i,x) - b[i]; |
|
760 if (w[i] >= 0) { |
|
761 lcp.transfer_i_to_N (i); |
|
762 } |
|
763 else { |
|
764 for (;;) { |
|
765 // compute: delta_x(C) = -A(C,C)\A(C,i) |
|
766 dSetZero (delta_x,n); |
|
767 lcp.solve1 (delta_x,i); |
|
768 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
769 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { |
|
770 UNALLOCA(dummy); |
|
771 UNALLOCA(C); |
|
772 UNALLOCA(p); |
|
773 UNALLOCA(Arows); |
|
774 UNALLOCA(tmp); |
|
775 UNALLOCA(ell); |
|
776 UNALLOCA(Dell); |
|
777 UNALLOCA(delta_w); |
|
778 UNALLOCA(delta_x); |
|
779 UNALLOCA(d); |
|
780 UNALLOCA(L); |
|
781 return; |
|
782 } |
|
783 #endif |
|
784 delta_x[i] = REAL(1.0); |
|
785 |
|
786 // compute: delta_w = A*delta_x |
|
787 dSetZero (delta_w,n); |
|
788 lcp.pN_equals_ANC_times_qC (delta_w,delta_x); |
|
789 lcp.pN_plusequals_ANi (delta_w,i); |
|
790 delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i); |
|
791 |
|
792 // find index to switch |
|
793 int si = i; // si = switch index |
|
794 int si_in_N = 0; // set to 1 if si in N |
|
795 dReal s = -dDIV(w[i],delta_w[i]); |
|
796 |
|
797 if (s <= 0) { |
|
798 if (i < (n-1)) { |
|
799 dSetZero (x+i,n-i); |
|
800 dSetZero (w+i,n-i); |
|
801 } |
|
802 goto done; |
|
803 } |
|
804 |
|
805 for (k=0; k < lcp.numN(); k++) { |
|
806 if (delta_w[lcp.indexN(k)] < 0) { |
|
807 dReal s2 = -dDIV(w[lcp.indexN(k)],delta_w[lcp.indexN(k)]); |
|
808 if (s2 < s) { |
|
809 s = s2; |
|
810 si = lcp.indexN(k); |
|
811 si_in_N = 1; |
|
812 } |
|
813 } |
|
814 } |
|
815 for (k=0; k < lcp.numC(); k++) { |
|
816 if (delta_x[lcp.indexC(k)] < 0) { |
|
817 dReal s2 = -dDIV(x[lcp.indexC(k)],delta_x[lcp.indexC(k)]); |
|
818 if (s2 < s) { |
|
819 s = s2; |
|
820 si = lcp.indexC(k); |
|
821 si_in_N = 0; |
|
822 } |
|
823 } |
|
824 } |
|
825 |
|
826 // apply x = x + s * delta_x |
|
827 lcp.pC_plusequals_s_times_qC (x,s,delta_x); |
|
828 x[i] += s; |
|
829 lcp.pN_plusequals_s_times_qN (w,s,delta_w); |
|
830 w[i] += dMUL(s,delta_w[i]); |
|
831 |
|
832 // switch indexes between sets if necessary |
|
833 if (si==i) { |
|
834 w[i] = 0; |
|
835 lcp.transfer_i_to_C (i); |
|
836 break; |
|
837 } |
|
838 if (si_in_N) { |
|
839 w[si] = 0; |
|
840 lcp.transfer_i_from_N_to_C (si); |
|
841 } |
|
842 else { |
|
843 x[si] = 0; |
|
844 lcp.transfer_i_from_C_to_N (si); |
|
845 } |
|
846 } |
|
847 } |
|
848 } |
|
849 |
|
850 done: |
|
851 lcp.unpermute(); |
|
852 |
|
853 UNALLOCA (L); |
|
854 UNALLOCA (d); |
|
855 UNALLOCA (delta_x); |
|
856 UNALLOCA (delta_w); |
|
857 UNALLOCA (Dell); |
|
858 UNALLOCA (ell); |
|
859 UNALLOCA (tmp); |
|
860 UNALLOCA (Arows); |
|
861 UNALLOCA (p); |
|
862 UNALLOCA (C); |
|
863 UNALLOCA (dummy); |
|
864 } |
|
865 |
|
866 //*************************************************************************** |
|
867 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem. |
|
868 |
|
869 void dSolveLCP (int n, dReal *A, dReal *x, dReal *b, |
|
870 dReal *w, int nub, dReal *lo, dReal *hi, int *findex) |
|
871 { |
|
872 |
|
873 int i,k,hit_first_friction_index = 0; |
|
874 int nskip = dPAD(n); |
|
875 |
|
876 // if all the variables are unbounded then we can just factor, solve, |
|
877 // and return |
|
878 if (nub >= n) { |
|
879 dFactorLDLT (A,w,n,nskip); // use w for d |
|
880 dSolveLDLT (A,w,b,n,nskip); |
|
881 memcpy (x,b,n*sizeof(dReal)); |
|
882 dSetZero (w,n); |
|
883 |
|
884 return; |
|
885 } |
|
886 ALLOCA (dReal,L,n*nskip*sizeof(dReal)); |
|
887 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
888 if (L == NULL) { |
|
889 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
890 return; |
|
891 } |
|
892 #endif |
|
893 ALLOCA (dReal,d,n*sizeof(dReal)); |
|
894 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
895 if (d == NULL) { |
|
896 UNALLOCA(L); |
|
897 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
898 return; |
|
899 } |
|
900 #endif |
|
901 ALLOCA (dReal,delta_x,n*sizeof(dReal)); |
|
902 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
903 if (delta_x == NULL) { |
|
904 UNALLOCA(d); |
|
905 UNALLOCA(L); |
|
906 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
907 return; |
|
908 } |
|
909 #endif |
|
910 ALLOCA (dReal,delta_w,n*sizeof(dReal)); |
|
911 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
912 if (delta_w == NULL) { |
|
913 UNALLOCA(delta_x); |
|
914 UNALLOCA(d); |
|
915 UNALLOCA(L); |
|
916 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
917 return; |
|
918 } |
|
919 #endif |
|
920 ALLOCA (dReal,Dell,n*sizeof(dReal)); |
|
921 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
922 if (Dell == NULL) { |
|
923 UNALLOCA(delta_w); |
|
924 UNALLOCA(delta_x); |
|
925 UNALLOCA(d); |
|
926 UNALLOCA(L); |
|
927 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
928 return; |
|
929 } |
|
930 #endif |
|
931 ALLOCA (dReal,ell,n*sizeof(dReal)); |
|
932 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
933 if (ell == NULL) { |
|
934 UNALLOCA(Dell); |
|
935 UNALLOCA(delta_w); |
|
936 UNALLOCA(delta_x); |
|
937 UNALLOCA(d); |
|
938 UNALLOCA(L); |
|
939 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
940 return; |
|
941 } |
|
942 #endif |
|
943 ALLOCA (dReal*,Arows,n*sizeof(dReal*)); |
|
944 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
945 if (Arows == NULL) { |
|
946 UNALLOCA(ell); |
|
947 UNALLOCA(Dell); |
|
948 UNALLOCA(delta_w); |
|
949 UNALLOCA(delta_x); |
|
950 UNALLOCA(d); |
|
951 UNALLOCA(L); |
|
952 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
953 return; |
|
954 } |
|
955 #endif |
|
956 ALLOCA (int,p,n*sizeof(int)); |
|
957 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
958 if (p == NULL) { |
|
959 UNALLOCA(Arows); |
|
960 UNALLOCA(ell); |
|
961 UNALLOCA(Dell); |
|
962 UNALLOCA(delta_w); |
|
963 UNALLOCA(delta_x); |
|
964 UNALLOCA(d); |
|
965 UNALLOCA(L); |
|
966 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
967 return; |
|
968 } |
|
969 #endif |
|
970 ALLOCA (int,C,n*sizeof(int)); |
|
971 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
972 if (C == NULL) { |
|
973 UNALLOCA(p); |
|
974 UNALLOCA(Arows); |
|
975 UNALLOCA(ell); |
|
976 UNALLOCA(Dell); |
|
977 UNALLOCA(delta_w); |
|
978 UNALLOCA(delta_x); |
|
979 UNALLOCA(d); |
|
980 UNALLOCA(L); |
|
981 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
982 return; |
|
983 } |
|
984 #endif |
|
985 |
|
986 int dir; |
|
987 dReal dirf; |
|
988 |
|
989 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i) |
|
990 ALLOCA (int,state,n*sizeof(int)); |
|
991 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
992 if (state == NULL) { |
|
993 UNALLOCA(C); |
|
994 UNALLOCA(p); |
|
995 UNALLOCA(Arows); |
|
996 UNALLOCA(ell); |
|
997 UNALLOCA(Dell); |
|
998 UNALLOCA(delta_w); |
|
999 UNALLOCA(delta_x); |
|
1000 UNALLOCA(d); |
|
1001 UNALLOCA(L); |
|
1002 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; |
|
1003 return; |
|
1004 } |
|
1005 #endif |
|
1006 |
|
1007 // create LCP object. note that tmp is set to delta_w to save space, this |
|
1008 // optimization relies on knowledge of how tmp is used, so be careful! |
|
1009 dLCP *lcp=new dLCP(n,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows); |
|
1010 nub = lcp->getNub(); |
|
1011 |
|
1012 // loop over all indexes nub..n-1. for index i, if x(i),w(i) satisfy the |
|
1013 // LCP conditions then i is added to the appropriate index set. otherwise |
|
1014 // x(i),w(i) is driven either +ve or -ve to force it to the valid region. |
|
1015 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero. |
|
1016 // while driving x(i) we maintain the LCP conditions on the other variables |
|
1017 // 0..i-1. we do this by watching out for other x(i),w(i) values going |
|
1018 // outside the valid region, and then switching them between index sets |
|
1019 // when that happens. |
|
1020 |
|
1021 for (i=nub; i<n; i++) { |
|
1022 // the index i is the driving index and indexes i+1..n-1 are "dont care", |
|
1023 // i.e. when we make changes to the system those x's will be zero and we |
|
1024 // don't care what happens to those w's. in other words, we only consider |
|
1025 // an (i+1)*(i+1) sub-problem of A*x=b+w. |
|
1026 |
|
1027 // if we've hit the first friction index, we have to compute the lo and |
|
1028 // hi values based on the values of x already computed. we have been |
|
1029 // permuting the indexes, so the values stored in the findex vector are |
|
1030 // no longer valid. thus we have to temporarily unpermute the x vector. |
|
1031 // for the purposes of this computation, 0*infinity = 0 ... so if the |
|
1032 // contact constraint's normal force is 0, there should be no tangential |
|
1033 // force applied. |
|
1034 |
|
1035 if (hit_first_friction_index == 0 && findex && findex[i] >= 0) { |
|
1036 // un-permute x into delta_w, which is not being used at the moment |
|
1037 for (k=0; k<n; k++) delta_w[p[k]] = x[k]; |
|
1038 |
|
1039 // set lo and hi values |
|
1040 for (k=i; k<n; k++) { |
|
1041 dReal wfk = delta_w[findex[k]]; |
|
1042 if (wfk == 0) { |
|
1043 hi[k] = 0; |
|
1044 lo[k] = 0; |
|
1045 } |
|
1046 else { |
|
1047 hi[k] = dFabs (dMUL(hi[k],wfk)); |
|
1048 lo[k] = -hi[k]; |
|
1049 } |
|
1050 } |
|
1051 hit_first_friction_index = 1; |
|
1052 } |
|
1053 |
|
1054 // thus far we have not even been computing the w values for indexes |
|
1055 // greater than i, so compute w[i] now. |
|
1056 w[i] = lcp->AiC_times_qC (i,x) + lcp->AiN_times_qN (i,x) - b[i]; |
|
1057 |
|
1058 // if lo=hi=0 (which can happen for tangential friction when normals are |
|
1059 // 0) then the index will be assigned to set N with some state. however, |
|
1060 // set C's line has zero size, so the index will always remain in set N. |
|
1061 // with the "normal" switching logic, if w changed sign then the index |
|
1062 // would have to switch to set C and then back to set N with an inverted |
|
1063 // state. this is pointless, and also computationally expensive. to |
|
1064 // prevent this from happening, we use the rule that indexes with lo=hi=0 |
|
1065 // will never be checked for set changes. this means that the state for |
|
1066 // these indexes may be incorrect, but that doesn't matter. |
|
1067 |
|
1068 // see if x(i),w(i) is in a valid region |
|
1069 if (lo[i]==0 && w[i] >= 0) { |
|
1070 lcp->transfer_i_to_N (i); |
|
1071 state[i] = 0; |
|
1072 } |
|
1073 else if (hi[i]==0 && w[i] <= 0) { |
|
1074 lcp->transfer_i_to_N (i); |
|
1075 state[i] = 1; |
|
1076 } |
|
1077 else if (w[i]==0) { |
|
1078 // this is a degenerate case. by the time we get to this test we know |
|
1079 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve, |
|
1080 // and similarly that hi > 0. this means that the line segment |
|
1081 // corresponding to set C is at least finite in extent, and we are on it. |
|
1082 // NOTE: we must call lcp->solve1() before lcp->transfer_i_to_C() |
|
1083 lcp->solve1 (delta_x,i,0,1); |
|
1084 |
|
1085 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
1086 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { |
|
1087 UNALLOCA(state); |
|
1088 UNALLOCA(C); |
|
1089 UNALLOCA(p); |
|
1090 UNALLOCA(Arows); |
|
1091 UNALLOCA(ell); |
|
1092 UNALLOCA(Dell); |
|
1093 UNALLOCA(delta_w); |
|
1094 UNALLOCA(delta_x); |
|
1095 UNALLOCA(d); |
|
1096 UNALLOCA(L); |
|
1097 return; |
|
1098 } |
|
1099 #endif |
|
1100 |
|
1101 lcp->transfer_i_to_C (i); |
|
1102 } |
|
1103 else { |
|
1104 // we must push x(i) and w(i) |
|
1105 for (;;) { |
|
1106 // find direction to push on x(i) |
|
1107 if (w[i] <= 0) { |
|
1108 dir = 1; |
|
1109 dirf = REAL(1.0); |
|
1110 } |
|
1111 else { |
|
1112 dir = -1; |
|
1113 dirf = REAL(-1.0); |
|
1114 } |
|
1115 |
|
1116 // compute: delta_x(C) = -dir*A(C,C)\A(C,i) |
|
1117 lcp->solve1 (delta_x,i,dir); |
|
1118 |
|
1119 #ifdef dUSE_MALLOC_FOR_ALLOCA |
|
1120 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { |
|
1121 UNALLOCA(state); |
|
1122 UNALLOCA(C); |
|
1123 UNALLOCA(p); |
|
1124 UNALLOCA(Arows); |
|
1125 UNALLOCA(ell); |
|
1126 UNALLOCA(Dell); |
|
1127 UNALLOCA(delta_w); |
|
1128 UNALLOCA(delta_x); |
|
1129 UNALLOCA(d); |
|
1130 UNALLOCA(L); |
|
1131 return; |
|
1132 } |
|
1133 #endif |
|
1134 |
|
1135 // note that delta_x[i] = dirf, but we wont bother to set it |
|
1136 |
|
1137 // compute: delta_w = A*delta_x ... note we only care about |
|
1138 // delta_w(N) and delta_w(i), the rest is ignored |
|
1139 lcp->pN_equals_ANC_times_qC (delta_w,delta_x); |
|
1140 lcp->pN_plusequals_ANi (delta_w,i,dir); |
|
1141 delta_w[i] = lcp->AiC_times_qC (i,delta_x) + dMUL(lcp->Aii(i),dirf); |
|
1142 |
|
1143 // find largest step we can take (size=s), either to drive x(i),w(i) |
|
1144 // to the valid LCP region or to drive an already-valid variable |
|
1145 // outside the valid region. |
|
1146 |
|
1147 int cmd = 1; // index switching command |
|
1148 int si = 0; // si = index to switch if cmd>3 |
|
1149 dReal s = -dDIV(w[i],delta_w[i]); |
|
1150 if (dir > 0) { |
|
1151 if (hi[i] < dInfinity) { |
|
1152 dReal s2 = dDIV((hi[i]-x[i]),dirf); // step to x(i)=hi(i) |
|
1153 if (s2 < s) { |
|
1154 s = s2; |
|
1155 cmd = 3; |
|
1156 } |
|
1157 } |
|
1158 } |
|
1159 else { |
|
1160 if (lo[i] > -dInfinity) { |
|
1161 dReal s2 = dDIV((lo[i]-x[i]),dirf); // step to x(i)=lo(i) |
|
1162 if (s2 < s) { |
|
1163 s = s2; |
|
1164 cmd = 2; |
|
1165 } |
|
1166 } |
|
1167 } |
|
1168 |
|
1169 for (k=0; k < lcp->numN(); k++) { |
|
1170 if ((state[lcp->indexN(k)]==0 && delta_w[lcp->indexN(k)] < 0) || |
|
1171 (state[lcp->indexN(k)]!=0 && delta_w[lcp->indexN(k)] > 0)) { |
|
1172 // don't bother checking if lo=hi=0 |
|
1173 if (lo[lcp->indexN(k)] == 0 && hi[lcp->indexN(k)] == 0) continue; |
|
1174 dReal s2 = -dDIV(w[lcp->indexN(k)],delta_w[lcp->indexN(k)]); |
|
1175 if (s2 < s) { |
|
1176 s = s2; |
|
1177 cmd = 4; |
|
1178 si = lcp->indexN(k); |
|
1179 } |
|
1180 } |
|
1181 } |
|
1182 |
|
1183 for (k=nub; k < lcp->numC(); k++) { |
|
1184 if (delta_x[lcp->indexC(k)] < 0 && lo[lcp->indexC(k)] > -dInfinity) { |
|
1185 dReal s2 = dDIV((lo[lcp->indexC(k)]-x[lcp->indexC(k)]),delta_x[lcp->indexC(k)]); |
|
1186 if (s2 < s) { |
|
1187 s = s2; |
|
1188 cmd = 5; |
|
1189 si = lcp->indexC(k); |
|
1190 } |
|
1191 } |
|
1192 if (delta_x[lcp->indexC(k)] > 0 && hi[lcp->indexC(k)] < dInfinity) { |
|
1193 dReal s2 = dDIV((hi[lcp->indexC(k)]-x[lcp->indexC(k)]),delta_x[lcp->indexC(k)]); |
|
1194 if (s2 < s) { |
|
1195 s = s2; |
|
1196 cmd = 6; |
|
1197 si = lcp->indexC(k); |
|
1198 } |
|
1199 } |
|
1200 } |
|
1201 |
|
1202 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C", |
|
1203 // "C->NL","C->NH"}; |
|
1204 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i); |
|
1205 |
|
1206 // if s <= 0 then we've got a problem. if we just keep going then |
|
1207 // we're going to get stuck in an infinite loop. instead, just cross |
|
1208 // our fingers and exit with the current solution. |
|
1209 if (s <= 0) { |
|
1210 if (i < (n-1)) { |
|
1211 dSetZero (x+i,n-i); |
|
1212 dSetZero (w+i,n-i); |
|
1213 } |
|
1214 goto done; |
|
1215 } |
|
1216 |
|
1217 // apply x = x + s * delta_x |
|
1218 lcp->pC_plusequals_s_times_qC (x,s,delta_x); |
|
1219 x[i] += dMUL(s,dirf); |
|
1220 |
|
1221 // apply w = w + s * delta_w |
|
1222 lcp->pN_plusequals_s_times_qN (w,s,delta_w); |
|
1223 w[i] += dMUL(s,delta_w[i]); |
|
1224 |
|
1225 // switch indexes between sets if necessary |
|
1226 switch (cmd) { |
|
1227 case 1: // done |
|
1228 w[i] = 0; |
|
1229 lcp->transfer_i_to_C (i); |
|
1230 break; |
|
1231 case 2: // done |
|
1232 x[i] = lo[i]; |
|
1233 state[i] = 0; |
|
1234 lcp->transfer_i_to_N (i); |
|
1235 break; |
|
1236 case 3: // done |
|
1237 x[i] = hi[i]; |
|
1238 state[i] = 1; |
|
1239 lcp->transfer_i_to_N (i); |
|
1240 break; |
|
1241 case 4: // keep going |
|
1242 w[si] = 0; |
|
1243 lcp->transfer_i_from_N_to_C (si); |
|
1244 break; |
|
1245 case 5: // keep going |
|
1246 x[si] = lo[si]; |
|
1247 state[si] = 0; |
|
1248 lcp->transfer_i_from_C_to_N (si); |
|
1249 break; |
|
1250 case 6: // keep going |
|
1251 x[si] = hi[si]; |
|
1252 state[si] = 1; |
|
1253 lcp->transfer_i_from_C_to_N (si); |
|
1254 break; |
|
1255 } |
|
1256 |
|
1257 if (cmd <= 3) break; |
|
1258 } |
|
1259 } |
|
1260 } |
|
1261 |
|
1262 done: |
|
1263 lcp->unpermute(); |
|
1264 delete lcp; |
|
1265 |
|
1266 UNALLOCA (L); |
|
1267 UNALLOCA (d); |
|
1268 UNALLOCA (delta_x); |
|
1269 UNALLOCA (delta_w); |
|
1270 UNALLOCA (Dell); |
|
1271 UNALLOCA (ell); |
|
1272 UNALLOCA (Arows); |
|
1273 UNALLOCA (p); |
|
1274 UNALLOCA (C); |
|
1275 UNALLOCA (state); |
|
1276 } |