|
1 /************************************************************************* |
|
2 * * |
|
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * |
|
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org * |
|
5 * * |
|
6 * This library is free software; you can redistribute it and/or * |
|
7 * modify it under the terms of EITHER: * |
|
8 * (1) The GNU Lesser General Public License as published by the Free * |
|
9 * Software Foundation; either version 2.1 of the License, or (at * |
|
10 * your option) any later version. The text of the GNU Lesser * |
|
11 * General Public License is included with this library in the * |
|
12 * file LICENSE.TXT. * |
|
13 * (2) The BSD-style license that is included with this library in * |
|
14 * the file LICENSE-BSD.TXT. * |
|
15 * * |
|
16 * This library is distributed in the hope that it will be useful, * |
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * |
|
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. * |
|
20 * * |
|
21 *************************************************************************/ |
|
22 |
|
23 #include <ode/common.h> |
|
24 #include <ode/matrix.h> |
|
25 |
|
26 // misc defines |
|
27 //#define ALLOCA dALLOCA16 |
|
28 |
|
29 |
|
30 EXPORT_C void dSetZero (dReal *a, int n) |
|
31 { |
|
32 while (n > 0) { |
|
33 *(a++) = 0; |
|
34 n--; |
|
35 } |
|
36 } |
|
37 |
|
38 |
|
39 EXPORT_C void dSetValue (dReal *a, int n, dReal value) |
|
40 { |
|
41 while (n > 0) { |
|
42 *(a++) = value; |
|
43 n--; |
|
44 } |
|
45 } |
|
46 |
|
47 |
|
48 EXPORT_C void dMultiply0 (dReal *A, const dReal *B, const dReal *C, int p, int q, int r) |
|
49 { |
|
50 int i,j,k,qskip,rskip,rpad; |
|
51 qskip = dPAD(q); |
|
52 rskip = dPAD(r); |
|
53 rpad = rskip - r; |
|
54 dReal sum; |
|
55 const dReal *b,*c,*bb; |
|
56 bb = B; |
|
57 for (i=p; i; i--) { |
|
58 for (j=0 ; j<r; j++) { |
|
59 c = C + j; |
|
60 b = bb; |
|
61 sum = 0; |
|
62 for (k=q; k; k--, c+=rskip) sum += dMUL((*(b++)),(*c)); |
|
63 *(A++) = sum; |
|
64 } |
|
65 A += rpad; |
|
66 bb += qskip; |
|
67 } |
|
68 } |
|
69 |
|
70 |
|
71 EXPORT_C void dMultiply1 (dReal *A, const dReal *B, const dReal *C, int p, int q, int r) |
|
72 { |
|
73 int i,j,k,pskip,rskip; |
|
74 dReal sum; |
|
75 pskip = dPAD(p); |
|
76 rskip = dPAD(r); |
|
77 for (i=0; i<p; i++) { |
|
78 for (j=0; j<r; j++) { |
|
79 sum = 0; |
|
80 for (k=0; k<q; k++) sum += dMUL(B[i+k*pskip],C[j+k*rskip]); |
|
81 A[i*rskip+j] = sum; |
|
82 } |
|
83 } |
|
84 } |
|
85 |
|
86 |
|
87 EXPORT_C void dMultiply2 (dReal *A, const dReal *B, const dReal *C, int p, int q, int r) |
|
88 { |
|
89 int i,j,k,z,rpad,qskip; |
|
90 dReal sum; |
|
91 const dReal *bb,*cc; |
|
92 rpad = dPAD(r) - r; |
|
93 qskip = dPAD(q); |
|
94 bb = B; |
|
95 for (i=p; i; i--) { |
|
96 cc = C; |
|
97 for (j=r; j; j--) { |
|
98 z = 0; |
|
99 sum = 0; |
|
100 for (k=q; k; k--,z++) sum += dMUL(bb[z],cc[z]); |
|
101 *(A++) = sum; |
|
102 cc += qskip; |
|
103 } |
|
104 A += rpad; |
|
105 bb += qskip; |
|
106 } |
|
107 } |
|
108 |
|
109 |
|
110 EXPORT_C int dFactorCholesky (dReal *A, int n) |
|
111 { |
|
112 int i,j,k,nskip; |
|
113 dReal sum,*a,*b,*aa,*bb,*cc,*recip; |
|
114 nskip = dPAD (n); |
|
115 recip = (dReal*) malloc (n * sizeof(dReal)); |
|
116 if(recip == NULL) { |
|
117 return 0; |
|
118 } |
|
119 aa = A; |
|
120 for (i=0; i<n; i++) { |
|
121 bb = A; |
|
122 cc = A + i*nskip; |
|
123 for (j=0; j<i; j++) { |
|
124 sum = *cc; |
|
125 a = aa; |
|
126 b = bb; |
|
127 for (k=j; k; k--) sum -= dMUL((*(a++)),(*(b++))); |
|
128 *cc = dMUL(sum,recip[j]); |
|
129 bb += nskip; |
|
130 cc++; |
|
131 } |
|
132 sum = *cc; |
|
133 a = aa; |
|
134 for (k=i; k; k--, a++) sum -= dMUL((*a),(*a)); |
|
135 if (sum <= REAL(0.0)) { |
|
136 free(recip); |
|
137 return 0; |
|
138 } |
|
139 *cc = dSqrt(sum); |
|
140 recip[i] = dRecip (*cc); |
|
141 aa += nskip; |
|
142 } |
|
143 free(recip); |
|
144 return 1; |
|
145 } |
|
146 |
|
147 |
|
148 EXPORT_C void dSolveCholesky (const dReal *L, dReal *b, int n) |
|
149 { |
|
150 int i,k,nskip; |
|
151 dReal sum,*y; |
|
152 nskip = dPAD (n); |
|
153 y = (dReal*) malloc (n*sizeof(dReal)); |
|
154 if (y == NULL) { |
|
155 return; |
|
156 } |
|
157 for (i=0; i<n; i++) { |
|
158 sum = 0; |
|
159 for (k=0; k < i; k++) sum += dMUL(L[i*nskip+k],y[k]); |
|
160 y[i] = dDIV((b[i]-sum),L[i*nskip+i]); |
|
161 } |
|
162 for (i=n-1; i >= 0; i--) { |
|
163 sum = 0; |
|
164 for (k=i+1; k < n; k++) sum += dMUL(L[k*nskip+i],b[k]); |
|
165 b[i] = dDIV((y[i]-sum),L[i*nskip+i]); |
|
166 } |
|
167 free(y); |
|
168 } |
|
169 |
|
170 |
|
171 EXPORT_C int dInvertPDMatrix (const dReal *A, dReal *Ainv, int n) |
|
172 { |
|
173 int i,j,nskip; |
|
174 dReal *L,*x; |
|
175 nskip = dPAD (n); |
|
176 L = (dReal*) malloc (nskip*n*sizeof(dReal)); |
|
177 if (L == NULL) { |
|
178 return 0; |
|
179 } |
|
180 memcpy (L,A,nskip*n*sizeof(dReal)); |
|
181 x = (dReal*) malloc (n*sizeof(dReal)); |
|
182 if (x == NULL) { |
|
183 free(L); |
|
184 return 0; |
|
185 } |
|
186 if (dFactorCholesky (L,n)==0) { |
|
187 free(L); |
|
188 free(x); |
|
189 return 0; |
|
190 } |
|
191 dSetZero (Ainv,n*nskip); // make sure all padding elements set to 0 |
|
192 for (i=0; i<n; i++) { |
|
193 for (j=0; j<n; j++) x[j] = REAL(0.0); |
|
194 x[i] = REAL(1.0); |
|
195 dSolveCholesky (L,x,n); |
|
196 for (j=0; j<n; j++) Ainv[j*nskip+i] = x[j]; |
|
197 } |
|
198 free(L); |
|
199 free(x); |
|
200 return 1; |
|
201 } |
|
202 |
|
203 |
|
204 EXPORT_C int dIsPositiveDefinite (const dReal *A, int n) |
|
205 { |
|
206 dReal *Acopy; |
|
207 int nskip = dPAD (n); |
|
208 Acopy = (dReal*) malloc (nskip*n * sizeof(dReal)); |
|
209 if ( Acopy == NULL) { |
|
210 return 0; |
|
211 } |
|
212 memcpy (Acopy,A,nskip*n * sizeof(dReal)); |
|
213 int ret = dFactorCholesky (Acopy,n); |
|
214 free(Acopy); |
|
215 return ret; |
|
216 } |
|
217 |
|
218 |
|
219 /***** this has been replaced by a faster version |
|
220 void dSolveL1T (const dReal *L, dReal *b, int n, int nskip) |
|
221 { |
|
222 int i,j; |
|
223 dReal sum; |
|
224 for (i=n-2; i>=0; i--) { |
|
225 sum = 0; |
|
226 for (j=i+1; j<n; j++) sum += L[j*nskip+i]*b[j]; |
|
227 b[i] -= sum; |
|
228 } |
|
229 } |
|
230 */ |
|
231 |
|
232 |
|
233 EXPORT_C void dVectorScale (dReal *a, const dReal *d, int n) |
|
234 { |
|
235 for (int i=0; i<n; i++) a[i] = dMUL(a[i],d[i]); |
|
236 } |
|
237 |
|
238 |
|
239 EXPORT_C void dSolveLDLT (const dReal *L, const dReal *d, dReal *b, int n, int nskip) |
|
240 { |
|
241 dSolveL1 (L,b,n,nskip); |
|
242 dVectorScale (b,d,n); |
|
243 dSolveL1T (L,b,n,nskip); |
|
244 } |
|
245 |
|
246 |
|
247 EXPORT_C void dLDLTAddTL (dReal *L, dReal *d, const dReal *a, int n, int nskip) |
|
248 { |
|
249 int j,p; |
|
250 dReal *W1,*W2,W11,W21,alpha1,alpha2,alphanew,gamma1,gamma2,k1,k2,Wp,ell,dee; |
|
251 |
|
252 if (n < 2) return; |
|
253 W1 = (dReal*) malloc (n*sizeof(dReal)); |
|
254 if (W1 == NULL) { |
|
255 return; |
|
256 } |
|
257 W2 = (dReal*) malloc (n*sizeof(dReal)); |
|
258 if (W2 == NULL) { |
|
259 free(W1); |
|
260 return; |
|
261 } |
|
262 |
|
263 W1[0] = 0; |
|
264 W2[0] = 0; |
|
265 for (j=1; j<n; j++) W1[j] = W2[j] = dMUL(a[j],dSQRT1_2); |
|
266 W11 = dMUL((dMUL(REAL(0.5),a[0])+REAL(1.0)),dSQRT1_2); |
|
267 W21 = dMUL((dMUL(REAL(0.5),a[0])-REAL(1.0)),dSQRT1_2); |
|
268 |
|
269 alpha1=REAL(1.0); |
|
270 alpha2=REAL(1.0); |
|
271 |
|
272 dee = d[0]; |
|
273 alphanew = alpha1 + dMUL(dMUL(W11,W11),dee); |
|
274 dee = dDIV(dee,alphanew); |
|
275 gamma1 = dMUL(W11,dee); |
|
276 dee = dMUL(dee,alpha1); |
|
277 alpha1 = alphanew; |
|
278 alphanew = alpha2 - dMUL(dMUL(W21,W21),dee); |
|
279 dee = dDIV(dee,alphanew); |
|
280 gamma2 = dMUL(W21,dee); |
|
281 alpha2 = alphanew; |
|
282 k1 = REAL(1.0) - dMUL(W21,gamma1); |
|
283 k2 = dMUL(W21,dMUL(gamma1,W11)) - W21; |
|
284 for (p=1; p<n; p++) { |
|
285 Wp = W1[p]; |
|
286 ell = L[p*nskip]; |
|
287 W1[p] = Wp - dMUL(W11,ell); |
|
288 W2[p] = dMUL(k1,Wp) + dMUL(k2,ell); |
|
289 } |
|
290 |
|
291 for (j=1; j<n; j++) { |
|
292 dee = d[j]; |
|
293 alphanew = alpha1 + dMUL(dMUL(W1[j],W1[j]),dee); |
|
294 dee = dDIV(dee,alphanew); |
|
295 gamma1 = dMUL(W1[j],dee); |
|
296 dee = dMUL(dee,alpha1); |
|
297 alpha1 = alphanew; |
|
298 alphanew = alpha2 - dMUL(dMUL(W2[j],W2[j]),dee); |
|
299 dee = dDIV(dee,alphanew); |
|
300 gamma2 = dMUL(W2[j],dee); |
|
301 dee = dMUL(dee,alpha2); |
|
302 d[j] = dee; |
|
303 alpha2 = alphanew; |
|
304 |
|
305 k1 = W1[j]; |
|
306 k2 = W2[j]; |
|
307 for (p=j+1; p<n; p++) { |
|
308 ell = L[p*nskip+j]; |
|
309 Wp = W1[p] - dMUL(k1,ell); |
|
310 ell += dMUL(gamma1,Wp); |
|
311 W1[p] = Wp; |
|
312 Wp = W2[p] - dMUL(k2,ell); |
|
313 ell -= dMUL(gamma2,Wp); |
|
314 W2[p] = Wp; |
|
315 L[p*nskip+j] = ell; |
|
316 } |
|
317 } |
|
318 free(W1); |
|
319 free(W2); |
|
320 } |
|
321 |
|
322 |
|
323 // macros for dLDLTRemove() for accessing A - either access the matrix |
|
324 // directly or access it via row pointers. we are only supposed to reference |
|
325 // the lower triangle of A (it is symmetric), but indexes i and j come from |
|
326 // permutation vectors so they are not predictable. so do a test on the |
|
327 // indexes - this should not slow things down too much, as we don't do this |
|
328 // in an inner loop. |
|
329 |
|
330 #define _GETA(i,j) (A[i][j]) |
|
331 //#define _GETA(i,j) (A[(i)*nskip+(j)]) |
|
332 #define GETA(i,j) ((i > j) ? _GETA(i,j) : _GETA(j,i)) |
|
333 |
|
334 |
|
335 EXPORT_C void dLDLTRemove (dReal **A, const int *p, dReal *L, dReal *d, |
|
336 int /*n1*/, int n2, int r, int nskip) |
|
337 { |
|
338 int i; |
|
339 |
|
340 |
|
341 if (r==n2-1) { |
|
342 return; // deleting last row/col is easy |
|
343 } |
|
344 else if (r==0) { |
|
345 dReal *a = (dReal*) malloc (n2 * sizeof(dReal)); |
|
346 if (a == NULL) { |
|
347 return; |
|
348 } |
|
349 for (i=0; i<n2; i++) a[i] = -GETA(p[i],p[0]); |
|
350 a[0] += REAL(1.0); |
|
351 dLDLTAddTL (L,d,a,n2,nskip); |
|
352 free(a); |
|
353 } |
|
354 else { |
|
355 dReal *t = (dReal*) malloc (r * sizeof(dReal)); |
|
356 if (t == NULL) { |
|
357 return; |
|
358 } |
|
359 dReal *a = (dReal*) malloc ((n2-r) * sizeof(dReal)); |
|
360 if (a == NULL) { |
|
361 free(t); |
|
362 return; |
|
363 } |
|
364 for (i=0; i<r; i++) t[i] = dDIV(L[r*nskip+i],d[i]); |
|
365 for (i=0; i<(n2-r); i++) |
|
366 a[i] = dDot(L+(r+i)*nskip,t,r) - GETA(p[r+i],p[r]); |
|
367 a[0] += REAL(1.0); |
|
368 dLDLTAddTL (L + r*nskip+r, d+r, a, n2-r, nskip); |
|
369 free(t); |
|
370 free(a); |
|
371 } |
|
372 |
|
373 // snip out row/column r from L and d |
|
374 dRemoveRowCol (L,n2,nskip,r); |
|
375 if (r < (n2-1)) memmove (d+r,d+r+1,(n2-r-1)*sizeof(dReal)); |
|
376 } |
|
377 |
|
378 |
|
379 EXPORT_C void dRemoveRowCol (dReal *A, int n, int nskip, int r) |
|
380 { |
|
381 int i; |
|
382 if (r >= n-1) return; |
|
383 if (r > 0) { |
|
384 for (i=0; i<r; i++) |
|
385 memmove (A+i*nskip+r,A+i*nskip+r+1,(n-r-1)*sizeof(dReal)); |
|
386 for (i=r; i<(n-1); i++) |
|
387 memcpy (A+i*nskip,A+i*nskip+nskip,r*sizeof(dReal)); |
|
388 } |
|
389 for (i=r; i<(n-1); i++) |
|
390 memcpy (A+i*nskip+r,A+i*nskip+nskip+r+1,(n-r-1)*sizeof(dReal)); |
|
391 } |