ode/src/fastlsolve.c
changeset 0 2f259fa3e83a
equal deleted inserted replaced
-1:000000000000 0:2f259fa3e83a
       
     1 /* generated code, do not edit. */
       
     2 
       
     3 #include <ode/matrix.h>
       
     4 
       
     5 /* solve L*X=B, with B containing 1 right hand sides.
       
     6  * L is an n*n lower triangular matrix with ones on the diagonal.
       
     7  * L is stored by rows and its leading dimension is lskip.
       
     8  * B is an n*1 matrix that contains the right hand sides.
       
     9  * B is stored by columns and its leading dimension is also lskip.
       
    10  * B is overwritten with X.
       
    11  * this processes blocks of 4*4.
       
    12  * if this is in the factorizer source file, n must be a multiple of 4.
       
    13  */
       
    14 
       
    15 EXPORT_C void dSolveL1 (const dReal *L, dReal *B, int n, int lskip1)
       
    16 {  
       
    17   /* declare variables - Z matrix, p and q vectors, etc */
       
    18   dReal Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex;
       
    19   const dReal *ell;
       
    20   int lskip2,lskip3,i,j;
       
    21   /* compute lskip values */
       
    22   lskip2 = 2*lskip1;
       
    23   lskip3 = 3*lskip1;
       
    24   /* compute all 4 x 1 blocks of X */
       
    25   for (i=0; i <= n-4; i+=4) {
       
    26     /* compute all 4 x 1 block of X, from rows i..i+4-1 */
       
    27     /* set the Z matrix to 0 */
       
    28     Z11=0;
       
    29     Z21=0;
       
    30     Z31=0;
       
    31     Z41=0;
       
    32     ell = L + i*lskip1;
       
    33     ex = B;
       
    34     /* the inner loop that computes outer products and adds them to Z */
       
    35     for (j=i-12; j >= 0; j -= 12) {
       
    36       /* load p and q values */
       
    37       p1=ell[0];
       
    38       q1=ex[0];
       
    39       p2=ell[lskip1];
       
    40       p3=ell[lskip2];
       
    41       p4=ell[lskip3];
       
    42       /* compute outer product and add it to the Z matrix */
       
    43       Z11 += dMUL(p1,q1);
       
    44       Z21 += dMUL(p2,q1);
       
    45       Z31 += dMUL(p3,q1);
       
    46       Z41 += dMUL(p4,q1);
       
    47       /* load p and q values */
       
    48       p1=ell[1];
       
    49       q1=ex[1];
       
    50       p2=ell[1+lskip1];
       
    51       p3=ell[1+lskip2];
       
    52       p4=ell[1+lskip3];
       
    53       /* compute outer product and add it to the Z matrix */
       
    54       Z11 += dMUL(p1,q1);
       
    55       Z21 += dMUL(p2,q1);
       
    56       Z31 += dMUL(p3,q1);
       
    57       Z41 += dMUL(p4,q1);
       
    58       /* load p and q values */
       
    59       p1=ell[2];
       
    60       q1=ex[2];
       
    61       p2=ell[2+lskip1];
       
    62       p3=ell[2+lskip2];
       
    63       p4=ell[2+lskip3];
       
    64       /* compute outer product and add it to the Z matrix */
       
    65       Z11 += dMUL(p1,q1);
       
    66       Z21 += dMUL(p2,q1);
       
    67       Z31 += dMUL(p3,q1);
       
    68       Z41 += dMUL(p4,q1);
       
    69       /* load p and q values */
       
    70       p1=ell[3];
       
    71       q1=ex[3];
       
    72       p2=ell[3+lskip1];
       
    73       p3=ell[3+lskip2];
       
    74       p4=ell[3+lskip3];
       
    75       /* compute outer product and add it to the Z matrix */
       
    76       Z11 += dMUL(p1,q1);
       
    77       Z21 += dMUL(p2,q1);
       
    78       Z31 += dMUL(p3,q1);
       
    79       Z41 += dMUL(p4,q1);
       
    80       /* load p and q values */
       
    81       p1=ell[4];
       
    82       q1=ex[4];
       
    83       p2=ell[4+lskip1];
       
    84       p3=ell[4+lskip2];
       
    85       p4=ell[4+lskip3];
       
    86       /* compute outer product and add it to the Z matrix */
       
    87       Z11 += dMUL(p1,q1);
       
    88       Z21 += dMUL(p2,q1);
       
    89       Z31 += dMUL(p3,q1);
       
    90       Z41 += dMUL(p4,q1);
       
    91       /* load p and q values */
       
    92       p1=ell[5];
       
    93       q1=ex[5];
       
    94       p2=ell[5+lskip1];
       
    95       p3=ell[5+lskip2];
       
    96       p4=ell[5+lskip3];
       
    97       /* compute outer product and add it to the Z matrix */
       
    98       Z11 += dMUL(p1,q1);
       
    99       Z21 += dMUL(p2,q1);
       
   100       Z31 += dMUL(p3,q1);
       
   101       Z41 += dMUL(p4,q1);
       
   102       /* load p and q values */
       
   103       p1=ell[6];
       
   104       q1=ex[6];
       
   105       p2=ell[6+lskip1];
       
   106       p3=ell[6+lskip2];
       
   107       p4=ell[6+lskip3];
       
   108       /* compute outer product and add it to the Z matrix */
       
   109       Z11 += dMUL(p1,q1);
       
   110       Z21 += dMUL(p2,q1);
       
   111       Z31 += dMUL(p3,q1);
       
   112       Z41 += dMUL(p4,q1);
       
   113       /* load p and q values */
       
   114       p1=ell[7];
       
   115       q1=ex[7];
       
   116       p2=ell[7+lskip1];
       
   117       p3=ell[7+lskip2];
       
   118       p4=ell[7+lskip3];
       
   119       /* compute outer product and add it to the Z matrix */
       
   120       Z11 += dMUL(p1,q1);
       
   121       Z21 += dMUL(p2,q1);
       
   122       Z31 += dMUL(p3,q1);
       
   123       Z41 += dMUL(p4,q1);
       
   124       /* load p and q values */
       
   125       p1=ell[8];
       
   126       q1=ex[8];
       
   127       p2=ell[8+lskip1];
       
   128       p3=ell[8+lskip2];
       
   129       p4=ell[8+lskip3];
       
   130       /* compute outer product and add it to the Z matrix */
       
   131       Z11 += dMUL(p1,q1);
       
   132       Z21 += dMUL(p2,q1);
       
   133       Z31 += dMUL(p3,q1);
       
   134       Z41 += dMUL(p4,q1);
       
   135       /* load p and q values */
       
   136       p1=ell[9];
       
   137       q1=ex[9];
       
   138       p2=ell[9+lskip1];
       
   139       p3=ell[9+lskip2];
       
   140       p4=ell[9+lskip3];
       
   141       /* compute outer product and add it to the Z matrix */
       
   142       Z11 += dMUL(p1,q1);
       
   143       Z21 += dMUL(p2,q1);
       
   144       Z31 += dMUL(p3,q1);
       
   145       Z41 += dMUL(p4,q1);
       
   146       /* load p and q values */
       
   147       p1=ell[10];
       
   148       q1=ex[10];
       
   149       p2=ell[10+lskip1];
       
   150       p3=ell[10+lskip2];
       
   151       p4=ell[10+lskip3];
       
   152       /* compute outer product and add it to the Z matrix */
       
   153       Z11 += dMUL(p1,q1);
       
   154       Z21 += dMUL(p2,q1);
       
   155       Z31 += dMUL(p3,q1);
       
   156       Z41 += dMUL(p4,q1);
       
   157       /* load p and q values */
       
   158       p1=ell[11];
       
   159       q1=ex[11];
       
   160       p2=ell[11+lskip1];
       
   161       p3=ell[11+lskip2];
       
   162       p4=ell[11+lskip3];
       
   163       /* compute outer product and add it to the Z matrix */
       
   164       Z11 += dMUL(p1,q1);
       
   165       Z21 += dMUL(p2,q1);
       
   166       Z31 += dMUL(p3,q1);
       
   167       Z41 += dMUL(p4,q1);
       
   168       /* advance pointers */
       
   169       ell += 12;
       
   170       ex += 12;
       
   171       /* end of inner loop */
       
   172     }
       
   173     /* compute left-over iterations */
       
   174     j += 12;
       
   175     for (; j > 0; j--) {
       
   176       /* load p and q values */
       
   177       p1=ell[0];
       
   178       q1=ex[0];
       
   179       p2=ell[lskip1];
       
   180       p3=ell[lskip2];
       
   181       p4=ell[lskip3];
       
   182       /* compute outer product and add it to the Z matrix */
       
   183       Z11 += dMUL(p1,q1);
       
   184       Z21 += dMUL(p2,q1);
       
   185       Z31 += dMUL(p3,q1);
       
   186       Z41 += dMUL(p4,q1);
       
   187       /* advance pointers */
       
   188       ell += 1;
       
   189       ex += 1;
       
   190     }
       
   191     /* finish computing the X(i) block */
       
   192     Z11 = ex[0] - Z11;
       
   193     ex[0] = Z11;
       
   194     p1 = ell[lskip1];
       
   195     Z21 = ex[1] - Z21 - dMUL(p1,Z11);
       
   196     ex[1] = Z21;
       
   197     p1 = ell[lskip2];
       
   198     p2 = ell[1+lskip2];
       
   199     Z31 = ex[2] - Z31 - dMUL(p1,Z11) - dMUL(p2,Z21);
       
   200     ex[2] = Z31;
       
   201     p1 = ell[lskip3];
       
   202     p2 = ell[1+lskip3];
       
   203     p3 = ell[2+lskip3];
       
   204     Z41 = ex[3] - Z41 - dMUL(p1,Z11) - dMUL(p2,Z21) - dMUL(p3,Z31);
       
   205     ex[3] = Z41;
       
   206     /* end of outer loop */
       
   207   }
       
   208   /* compute rows at end that are not a multiple of block size */
       
   209   for (; i < n; i++) {
       
   210     /* compute all 1 x 1 block of X, from rows i..i+1-1 */
       
   211     /* set the Z matrix to 0 */
       
   212     Z11=0;
       
   213     ell = L + i*lskip1;
       
   214     ex = B;
       
   215     /* the inner loop that computes outer products and adds them to Z */
       
   216     for (j=i-12; j >= 0; j -= 12) {
       
   217       /* load p and q values */
       
   218       p1=ell[0];
       
   219       q1=ex[0];
       
   220       /* compute outer product and add it to the Z matrix */
       
   221       Z11 += dMUL(p1,q1);
       
   222       /* load p and q values */
       
   223       p1=ell[1];
       
   224       q1=ex[1];
       
   225       /* compute outer product and add it to the Z matrix */
       
   226       Z11 += dMUL(p1,q1);
       
   227       /* load p and q values */
       
   228       p1=ell[2];
       
   229       q1=ex[2];
       
   230       /* compute outer product and add it to the Z matrix */
       
   231       Z11 += dMUL(p1,q1);
       
   232       /* load p and q values */
       
   233       p1=ell[3];
       
   234       q1=ex[3];
       
   235       /* compute outer product and add it to the Z matrix */
       
   236       Z11 += dMUL(p1,q1);
       
   237       /* load p and q values */
       
   238       p1=ell[4];
       
   239       q1=ex[4];
       
   240       /* compute outer product and add it to the Z matrix */
       
   241       Z11 += dMUL(p1,q1);
       
   242       /* load p and q values */
       
   243       p1=ell[5];
       
   244       q1=ex[5];
       
   245       /* compute outer product and add it to the Z matrix */
       
   246       Z11 += dMUL(p1,q1);
       
   247       /* load p and q values */
       
   248       p1=ell[6];
       
   249       q1=ex[6];
       
   250       /* compute outer product and add it to the Z matrix */
       
   251       Z11 += dMUL(p1,q1);
       
   252       /* load p and q values */
       
   253       p1=ell[7];
       
   254       q1=ex[7];
       
   255       /* compute outer product and add it to the Z matrix */
       
   256       Z11 += dMUL(p1,q1);
       
   257       /* load p and q values */
       
   258       p1=ell[8];
       
   259       q1=ex[8];
       
   260       /* compute outer product and add it to the Z matrix */
       
   261       Z11 += dMUL(p1,q1);
       
   262       /* load p and q values */
       
   263       p1=ell[9];
       
   264       q1=ex[9];
       
   265       /* compute outer product and add it to the Z matrix */
       
   266       Z11 +=dMUL( p1,q1);
       
   267       /* load p and q values */
       
   268       p1=ell[10];
       
   269       q1=ex[10];
       
   270       /* compute outer product and add it to the Z matrix */
       
   271       Z11 += dMUL(p1,q1);
       
   272       /* load p and q values */
       
   273       p1=ell[11];
       
   274       q1=ex[11];
       
   275       /* compute outer product and add it to the Z matrix */
       
   276       Z11 += dMUL(p1,q1);
       
   277       /* advance pointers */
       
   278       ell += 12;
       
   279       ex += 12;
       
   280       /* end of inner loop */
       
   281     }
       
   282     /* compute left-over iterations */
       
   283     j += 12;
       
   284     for (; j > 0; j--) {
       
   285       /* load p and q values */
       
   286       p1=ell[0];
       
   287       q1=ex[0];
       
   288       /* compute outer product and add it to the Z matrix */
       
   289       Z11 += dMUL(p1,q1);
       
   290       /* advance pointers */
       
   291       ell += 1;
       
   292       ex += 1;
       
   293     }
       
   294     /* finish computing the X(i) block */
       
   295     Z11 = ex[0] - Z11;
       
   296     ex[0] = Z11;
       
   297   }
       
   298 }