|
1 /* generated code, do not edit. */ |
|
2 |
|
3 #include <ode/matrix.h> |
|
4 |
|
5 /* solve L*X=B, with B containing 1 right hand sides. |
|
6 * L is an n*n lower triangular matrix with ones on the diagonal. |
|
7 * L is stored by rows and its leading dimension is lskip. |
|
8 * B is an n*1 matrix that contains the right hand sides. |
|
9 * B is stored by columns and its leading dimension is also lskip. |
|
10 * B is overwritten with X. |
|
11 * this processes blocks of 4*4. |
|
12 * if this is in the factorizer source file, n must be a multiple of 4. |
|
13 */ |
|
14 |
|
15 EXPORT_C void dSolveL1 (const dReal *L, dReal *B, int n, int lskip1) |
|
16 { |
|
17 /* declare variables - Z matrix, p and q vectors, etc */ |
|
18 dReal Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex; |
|
19 const dReal *ell; |
|
20 int lskip2,lskip3,i,j; |
|
21 /* compute lskip values */ |
|
22 lskip2 = 2*lskip1; |
|
23 lskip3 = 3*lskip1; |
|
24 /* compute all 4 x 1 blocks of X */ |
|
25 for (i=0; i <= n-4; i+=4) { |
|
26 /* compute all 4 x 1 block of X, from rows i..i+4-1 */ |
|
27 /* set the Z matrix to 0 */ |
|
28 Z11=0; |
|
29 Z21=0; |
|
30 Z31=0; |
|
31 Z41=0; |
|
32 ell = L + i*lskip1; |
|
33 ex = B; |
|
34 /* the inner loop that computes outer products and adds them to Z */ |
|
35 for (j=i-12; j >= 0; j -= 12) { |
|
36 /* load p and q values */ |
|
37 p1=ell[0]; |
|
38 q1=ex[0]; |
|
39 p2=ell[lskip1]; |
|
40 p3=ell[lskip2]; |
|
41 p4=ell[lskip3]; |
|
42 /* compute outer product and add it to the Z matrix */ |
|
43 Z11 += dMUL(p1,q1); |
|
44 Z21 += dMUL(p2,q1); |
|
45 Z31 += dMUL(p3,q1); |
|
46 Z41 += dMUL(p4,q1); |
|
47 /* load p and q values */ |
|
48 p1=ell[1]; |
|
49 q1=ex[1]; |
|
50 p2=ell[1+lskip1]; |
|
51 p3=ell[1+lskip2]; |
|
52 p4=ell[1+lskip3]; |
|
53 /* compute outer product and add it to the Z matrix */ |
|
54 Z11 += dMUL(p1,q1); |
|
55 Z21 += dMUL(p2,q1); |
|
56 Z31 += dMUL(p3,q1); |
|
57 Z41 += dMUL(p4,q1); |
|
58 /* load p and q values */ |
|
59 p1=ell[2]; |
|
60 q1=ex[2]; |
|
61 p2=ell[2+lskip1]; |
|
62 p3=ell[2+lskip2]; |
|
63 p4=ell[2+lskip3]; |
|
64 /* compute outer product and add it to the Z matrix */ |
|
65 Z11 += dMUL(p1,q1); |
|
66 Z21 += dMUL(p2,q1); |
|
67 Z31 += dMUL(p3,q1); |
|
68 Z41 += dMUL(p4,q1); |
|
69 /* load p and q values */ |
|
70 p1=ell[3]; |
|
71 q1=ex[3]; |
|
72 p2=ell[3+lskip1]; |
|
73 p3=ell[3+lskip2]; |
|
74 p4=ell[3+lskip3]; |
|
75 /* compute outer product and add it to the Z matrix */ |
|
76 Z11 += dMUL(p1,q1); |
|
77 Z21 += dMUL(p2,q1); |
|
78 Z31 += dMUL(p3,q1); |
|
79 Z41 += dMUL(p4,q1); |
|
80 /* load p and q values */ |
|
81 p1=ell[4]; |
|
82 q1=ex[4]; |
|
83 p2=ell[4+lskip1]; |
|
84 p3=ell[4+lskip2]; |
|
85 p4=ell[4+lskip3]; |
|
86 /* compute outer product and add it to the Z matrix */ |
|
87 Z11 += dMUL(p1,q1); |
|
88 Z21 += dMUL(p2,q1); |
|
89 Z31 += dMUL(p3,q1); |
|
90 Z41 += dMUL(p4,q1); |
|
91 /* load p and q values */ |
|
92 p1=ell[5]; |
|
93 q1=ex[5]; |
|
94 p2=ell[5+lskip1]; |
|
95 p3=ell[5+lskip2]; |
|
96 p4=ell[5+lskip3]; |
|
97 /* compute outer product and add it to the Z matrix */ |
|
98 Z11 += dMUL(p1,q1); |
|
99 Z21 += dMUL(p2,q1); |
|
100 Z31 += dMUL(p3,q1); |
|
101 Z41 += dMUL(p4,q1); |
|
102 /* load p and q values */ |
|
103 p1=ell[6]; |
|
104 q1=ex[6]; |
|
105 p2=ell[6+lskip1]; |
|
106 p3=ell[6+lskip2]; |
|
107 p4=ell[6+lskip3]; |
|
108 /* compute outer product and add it to the Z matrix */ |
|
109 Z11 += dMUL(p1,q1); |
|
110 Z21 += dMUL(p2,q1); |
|
111 Z31 += dMUL(p3,q1); |
|
112 Z41 += dMUL(p4,q1); |
|
113 /* load p and q values */ |
|
114 p1=ell[7]; |
|
115 q1=ex[7]; |
|
116 p2=ell[7+lskip1]; |
|
117 p3=ell[7+lskip2]; |
|
118 p4=ell[7+lskip3]; |
|
119 /* compute outer product and add it to the Z matrix */ |
|
120 Z11 += dMUL(p1,q1); |
|
121 Z21 += dMUL(p2,q1); |
|
122 Z31 += dMUL(p3,q1); |
|
123 Z41 += dMUL(p4,q1); |
|
124 /* load p and q values */ |
|
125 p1=ell[8]; |
|
126 q1=ex[8]; |
|
127 p2=ell[8+lskip1]; |
|
128 p3=ell[8+lskip2]; |
|
129 p4=ell[8+lskip3]; |
|
130 /* compute outer product and add it to the Z matrix */ |
|
131 Z11 += dMUL(p1,q1); |
|
132 Z21 += dMUL(p2,q1); |
|
133 Z31 += dMUL(p3,q1); |
|
134 Z41 += dMUL(p4,q1); |
|
135 /* load p and q values */ |
|
136 p1=ell[9]; |
|
137 q1=ex[9]; |
|
138 p2=ell[9+lskip1]; |
|
139 p3=ell[9+lskip2]; |
|
140 p4=ell[9+lskip3]; |
|
141 /* compute outer product and add it to the Z matrix */ |
|
142 Z11 += dMUL(p1,q1); |
|
143 Z21 += dMUL(p2,q1); |
|
144 Z31 += dMUL(p3,q1); |
|
145 Z41 += dMUL(p4,q1); |
|
146 /* load p and q values */ |
|
147 p1=ell[10]; |
|
148 q1=ex[10]; |
|
149 p2=ell[10+lskip1]; |
|
150 p3=ell[10+lskip2]; |
|
151 p4=ell[10+lskip3]; |
|
152 /* compute outer product and add it to the Z matrix */ |
|
153 Z11 += dMUL(p1,q1); |
|
154 Z21 += dMUL(p2,q1); |
|
155 Z31 += dMUL(p3,q1); |
|
156 Z41 += dMUL(p4,q1); |
|
157 /* load p and q values */ |
|
158 p1=ell[11]; |
|
159 q1=ex[11]; |
|
160 p2=ell[11+lskip1]; |
|
161 p3=ell[11+lskip2]; |
|
162 p4=ell[11+lskip3]; |
|
163 /* compute outer product and add it to the Z matrix */ |
|
164 Z11 += dMUL(p1,q1); |
|
165 Z21 += dMUL(p2,q1); |
|
166 Z31 += dMUL(p3,q1); |
|
167 Z41 += dMUL(p4,q1); |
|
168 /* advance pointers */ |
|
169 ell += 12; |
|
170 ex += 12; |
|
171 /* end of inner loop */ |
|
172 } |
|
173 /* compute left-over iterations */ |
|
174 j += 12; |
|
175 for (; j > 0; j--) { |
|
176 /* load p and q values */ |
|
177 p1=ell[0]; |
|
178 q1=ex[0]; |
|
179 p2=ell[lskip1]; |
|
180 p3=ell[lskip2]; |
|
181 p4=ell[lskip3]; |
|
182 /* compute outer product and add it to the Z matrix */ |
|
183 Z11 += dMUL(p1,q1); |
|
184 Z21 += dMUL(p2,q1); |
|
185 Z31 += dMUL(p3,q1); |
|
186 Z41 += dMUL(p4,q1); |
|
187 /* advance pointers */ |
|
188 ell += 1; |
|
189 ex += 1; |
|
190 } |
|
191 /* finish computing the X(i) block */ |
|
192 Z11 = ex[0] - Z11; |
|
193 ex[0] = Z11; |
|
194 p1 = ell[lskip1]; |
|
195 Z21 = ex[1] - Z21 - dMUL(p1,Z11); |
|
196 ex[1] = Z21; |
|
197 p1 = ell[lskip2]; |
|
198 p2 = ell[1+lskip2]; |
|
199 Z31 = ex[2] - Z31 - dMUL(p1,Z11) - dMUL(p2,Z21); |
|
200 ex[2] = Z31; |
|
201 p1 = ell[lskip3]; |
|
202 p2 = ell[1+lskip3]; |
|
203 p3 = ell[2+lskip3]; |
|
204 Z41 = ex[3] - Z41 - dMUL(p1,Z11) - dMUL(p2,Z21) - dMUL(p3,Z31); |
|
205 ex[3] = Z41; |
|
206 /* end of outer loop */ |
|
207 } |
|
208 /* compute rows at end that are not a multiple of block size */ |
|
209 for (; i < n; i++) { |
|
210 /* compute all 1 x 1 block of X, from rows i..i+1-1 */ |
|
211 /* set the Z matrix to 0 */ |
|
212 Z11=0; |
|
213 ell = L + i*lskip1; |
|
214 ex = B; |
|
215 /* the inner loop that computes outer products and adds them to Z */ |
|
216 for (j=i-12; j >= 0; j -= 12) { |
|
217 /* load p and q values */ |
|
218 p1=ell[0]; |
|
219 q1=ex[0]; |
|
220 /* compute outer product and add it to the Z matrix */ |
|
221 Z11 += dMUL(p1,q1); |
|
222 /* load p and q values */ |
|
223 p1=ell[1]; |
|
224 q1=ex[1]; |
|
225 /* compute outer product and add it to the Z matrix */ |
|
226 Z11 += dMUL(p1,q1); |
|
227 /* load p and q values */ |
|
228 p1=ell[2]; |
|
229 q1=ex[2]; |
|
230 /* compute outer product and add it to the Z matrix */ |
|
231 Z11 += dMUL(p1,q1); |
|
232 /* load p and q values */ |
|
233 p1=ell[3]; |
|
234 q1=ex[3]; |
|
235 /* compute outer product and add it to the Z matrix */ |
|
236 Z11 += dMUL(p1,q1); |
|
237 /* load p and q values */ |
|
238 p1=ell[4]; |
|
239 q1=ex[4]; |
|
240 /* compute outer product and add it to the Z matrix */ |
|
241 Z11 += dMUL(p1,q1); |
|
242 /* load p and q values */ |
|
243 p1=ell[5]; |
|
244 q1=ex[5]; |
|
245 /* compute outer product and add it to the Z matrix */ |
|
246 Z11 += dMUL(p1,q1); |
|
247 /* load p and q values */ |
|
248 p1=ell[6]; |
|
249 q1=ex[6]; |
|
250 /* compute outer product and add it to the Z matrix */ |
|
251 Z11 += dMUL(p1,q1); |
|
252 /* load p and q values */ |
|
253 p1=ell[7]; |
|
254 q1=ex[7]; |
|
255 /* compute outer product and add it to the Z matrix */ |
|
256 Z11 += dMUL(p1,q1); |
|
257 /* load p and q values */ |
|
258 p1=ell[8]; |
|
259 q1=ex[8]; |
|
260 /* compute outer product and add it to the Z matrix */ |
|
261 Z11 += dMUL(p1,q1); |
|
262 /* load p and q values */ |
|
263 p1=ell[9]; |
|
264 q1=ex[9]; |
|
265 /* compute outer product and add it to the Z matrix */ |
|
266 Z11 +=dMUL( p1,q1); |
|
267 /* load p and q values */ |
|
268 p1=ell[10]; |
|
269 q1=ex[10]; |
|
270 /* compute outer product and add it to the Z matrix */ |
|
271 Z11 += dMUL(p1,q1); |
|
272 /* load p and q values */ |
|
273 p1=ell[11]; |
|
274 q1=ex[11]; |
|
275 /* compute outer product and add it to the Z matrix */ |
|
276 Z11 += dMUL(p1,q1); |
|
277 /* advance pointers */ |
|
278 ell += 12; |
|
279 ex += 12; |
|
280 /* end of inner loop */ |
|
281 } |
|
282 /* compute left-over iterations */ |
|
283 j += 12; |
|
284 for (; j > 0; j--) { |
|
285 /* load p and q values */ |
|
286 p1=ell[0]; |
|
287 q1=ex[0]; |
|
288 /* compute outer product and add it to the Z matrix */ |
|
289 Z11 += dMUL(p1,q1); |
|
290 /* advance pointers */ |
|
291 ell += 1; |
|
292 ex += 1; |
|
293 } |
|
294 /* finish computing the X(i) block */ |
|
295 Z11 = ex[0] - Z11; |
|
296 ex[0] = Z11; |
|
297 } |
|
298 } |