|
1 /* generated code, do not edit. */ |
|
2 |
|
3 #include <ode/matrix.h> |
|
4 |
|
5 /* solve L^T * x=b, with b containing 1 right hand side. |
|
6 * L is an n*n lower triangular matrix with ones on the diagonal. |
|
7 * L is stored by rows and its leading dimension is lskip. |
|
8 * b is an n*1 matrix that contains the right hand side. |
|
9 * b is overwritten with x. |
|
10 * this processes blocks of 4. |
|
11 */ |
|
12 |
|
13 EXPORT_C void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1) |
|
14 { |
|
15 /* declare variables - Z matrix, p and q vectors, etc */ |
|
16 dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; |
|
17 const dReal *ell; |
|
18 int lskip2,i,j; |
|
19 /* special handling for L and B because we're solving L1 *transpose* */ |
|
20 L = L + (n-1)*(lskip1+1); |
|
21 B = B + n-1; |
|
22 lskip1 = -lskip1; |
|
23 /* compute lskip values */ |
|
24 lskip2 = 2*lskip1; |
|
25 /* compute all 4 x 1 blocks of X */ |
|
26 for (i=0; i <= n-4; i+=4) { |
|
27 /* compute all 4 x 1 block of X, from rows i..i+4-1 */ |
|
28 /* set the Z matrix to 0 */ |
|
29 Z11=0; |
|
30 Z21=0; |
|
31 Z31=0; |
|
32 Z41=0; |
|
33 ell = L - i; |
|
34 ex = B; |
|
35 /* the inner loop that computes outer products and adds them to Z */ |
|
36 for (j=i-4; j >= 0; j -= 4) { |
|
37 /* load p and q values */ |
|
38 p1=ell[0]; |
|
39 q1=ex[0]; |
|
40 p2=ell[-1]; |
|
41 p3=ell[-2]; |
|
42 p4=ell[-3]; |
|
43 /* compute outer product and add it to the Z matrix */ |
|
44 m11 = dMUL(p1,q1); |
|
45 m21 = dMUL(p2,q1); |
|
46 m31 = dMUL(p3,q1); |
|
47 m41 = dMUL(p4,q1); |
|
48 ell += lskip1; |
|
49 Z11 += m11; |
|
50 Z21 += m21; |
|
51 Z31 += m31; |
|
52 Z41 += m41; |
|
53 /* load p and q values */ |
|
54 p1=ell[0]; |
|
55 q1=ex[-1]; |
|
56 p2=ell[-1]; |
|
57 p3=ell[-2]; |
|
58 p4=ell[-3]; |
|
59 /* compute outer product and add it to the Z matrix */ |
|
60 m11 = dMUL(p1,q1); |
|
61 m21 = dMUL(p2,q1); |
|
62 m31 = dMUL(p3,q1); |
|
63 m41 = dMUL(p4,q1); |
|
64 ell += lskip1; |
|
65 Z11 += m11; |
|
66 Z21 += m21; |
|
67 Z31 += m31; |
|
68 Z41 += m41; |
|
69 /* load p and q values */ |
|
70 p1=ell[0]; |
|
71 q1=ex[-2]; |
|
72 p2=ell[-1]; |
|
73 p3=ell[-2]; |
|
74 p4=ell[-3]; |
|
75 /* compute outer product and add it to the Z matrix */ |
|
76 m11 = dMUL(p1,q1); |
|
77 m21 = dMUL(p2,q1); |
|
78 m31 = dMUL(p3,q1); |
|
79 m41 = dMUL(p4,q1); |
|
80 ell += lskip1; |
|
81 Z11 += m11; |
|
82 Z21 += m21; |
|
83 Z31 += m31; |
|
84 Z41 += m41; |
|
85 /* load p and q values */ |
|
86 p1=ell[0]; |
|
87 q1=ex[-3]; |
|
88 p2=ell[-1]; |
|
89 p3=ell[-2]; |
|
90 p4=ell[-3]; |
|
91 /* compute outer product and add it to the Z matrix */ |
|
92 m11 = dMUL(p1,q1); |
|
93 m21 = dMUL(p2,q1); |
|
94 m31 = dMUL(p3,q1); |
|
95 m41 = dMUL(p4,q1); |
|
96 ell += lskip1; |
|
97 ex -= 4; |
|
98 Z11 += m11; |
|
99 Z21 += m21; |
|
100 Z31 += m31; |
|
101 Z41 += m41; |
|
102 /* end of inner loop */ |
|
103 } |
|
104 /* compute left-over iterations */ |
|
105 j += 4; |
|
106 for (; j > 0; j--) { |
|
107 /* load p and q values */ |
|
108 p1=ell[0]; |
|
109 q1=ex[0]; |
|
110 p2=ell[-1]; |
|
111 p3=ell[-2]; |
|
112 p4=ell[-3]; |
|
113 /* compute outer product and add it to the Z matrix */ |
|
114 m11 = dMUL(p1,q1); |
|
115 m21 = dMUL(p2,q1); |
|
116 m31 = dMUL(p3,q1); |
|
117 m41 = dMUL(p4,q1); |
|
118 ell += lskip1; |
|
119 ex -= 1; |
|
120 Z11 += m11; |
|
121 Z21 += m21; |
|
122 Z31 += m31; |
|
123 Z41 += m41; |
|
124 } |
|
125 /* finish computing the X(i) block */ |
|
126 Z11 = ex[0] - Z11; |
|
127 ex[0] = Z11; |
|
128 p1 = ell[-1]; |
|
129 Z21 = ex[-1] - Z21 - dMUL(p1,Z11); |
|
130 ex[-1] = Z21; |
|
131 p1 = ell[-2]; |
|
132 p2 = ell[-2+lskip1]; |
|
133 Z31 = ex[-2] - Z31 - dMUL(p1,Z11) - dMUL(p2,Z21); |
|
134 ex[-2] = Z31; |
|
135 p1 = ell[-3]; |
|
136 p2 = ell[-3+lskip1]; |
|
137 p3 = ell[-3+lskip2]; |
|
138 Z41 = ex[-3] - Z41 - dMUL(p1,Z11) - dMUL(p2,Z21) - dMUL(p3,Z31); |
|
139 ex[-3] = Z41; |
|
140 /* end of outer loop */ |
|
141 } |
|
142 /* compute rows at end that are not a multiple of block size */ |
|
143 for (; i < n; i++) { |
|
144 /* compute all 1 x 1 block of X, from rows i..i+1-1 */ |
|
145 /* set the Z matrix to 0 */ |
|
146 Z11=0; |
|
147 ell = L - i; |
|
148 ex = B; |
|
149 /* the inner loop that computes outer products and adds them to Z */ |
|
150 for (j=i-4; j >= 0; j -= 4) { |
|
151 /* load p and q values */ |
|
152 p1=ell[0]; |
|
153 q1=ex[0]; |
|
154 /* compute outer product and add it to the Z matrix */ |
|
155 m11 = dMUL(p1,q1); |
|
156 ell += lskip1; |
|
157 Z11 += m11; |
|
158 /* load p and q values */ |
|
159 p1=ell[0]; |
|
160 q1=ex[-1]; |
|
161 /* compute outer product and add it to the Z matrix */ |
|
162 m11 = dMUL(p1,q1); |
|
163 ell += lskip1; |
|
164 Z11 += m11; |
|
165 /* load p and q values */ |
|
166 p1=ell[0]; |
|
167 q1=ex[-2]; |
|
168 /* compute outer product and add it to the Z matrix */ |
|
169 m11 = dMUL(p1,q1); |
|
170 ell += lskip1; |
|
171 Z11 += m11; |
|
172 /* load p and q values */ |
|
173 p1=ell[0]; |
|
174 q1=ex[-3]; |
|
175 /* compute outer product and add it to the Z matrix */ |
|
176 m11 = dMUL(p1,q1); |
|
177 ell += lskip1; |
|
178 ex -= 4; |
|
179 Z11 += m11; |
|
180 /* end of inner loop */ |
|
181 } |
|
182 /* compute left-over iterations */ |
|
183 j += 4; |
|
184 for (; j > 0; j--) { |
|
185 /* load p and q values */ |
|
186 p1=ell[0]; |
|
187 q1=ex[0]; |
|
188 /* compute outer product and add it to the Z matrix */ |
|
189 m11 = dMUL(p1,q1); |
|
190 ell += lskip1; |
|
191 ex -= 1; |
|
192 Z11 += m11; |
|
193 } |
|
194 /* finish computing the X(i) block */ |
|
195 Z11 = ex[0] - Z11; |
|
196 ex[0] = Z11; |
|
197 } |
|
198 } |