ode/src/fastltsolve.c
changeset 0 2f259fa3e83a
equal deleted inserted replaced
-1:000000000000 0:2f259fa3e83a
       
     1 /* generated code, do not edit. */
       
     2 
       
     3 #include <ode/matrix.h>
       
     4 
       
     5 /* solve L^T * x=b, with b containing 1 right hand side.
       
     6  * L is an n*n lower triangular matrix with ones on the diagonal.
       
     7  * L is stored by rows and its leading dimension is lskip.
       
     8  * b is an n*1 matrix that contains the right hand side.
       
     9  * b is overwritten with x.
       
    10  * this processes blocks of 4.
       
    11  */
       
    12 
       
    13 EXPORT_C void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
       
    14 {  
       
    15   /* declare variables - Z matrix, p and q vectors, etc */
       
    16   dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
       
    17   const dReal *ell;
       
    18   int lskip2,i,j;
       
    19   /* special handling for L and B because we're solving L1 *transpose* */
       
    20   L = L + (n-1)*(lskip1+1);
       
    21   B = B + n-1;
       
    22   lskip1 = -lskip1;
       
    23   /* compute lskip values */
       
    24   lskip2 = 2*lskip1;
       
    25   /* compute all 4 x 1 blocks of X */
       
    26   for (i=0; i <= n-4; i+=4) {
       
    27     /* compute all 4 x 1 block of X, from rows i..i+4-1 */
       
    28     /* set the Z matrix to 0 */
       
    29     Z11=0;
       
    30     Z21=0;
       
    31     Z31=0;
       
    32     Z41=0;
       
    33     ell = L - i;
       
    34     ex = B;
       
    35     /* the inner loop that computes outer products and adds them to Z */
       
    36     for (j=i-4; j >= 0; j -= 4) {
       
    37       /* load p and q values */
       
    38       p1=ell[0];
       
    39       q1=ex[0];
       
    40       p2=ell[-1];
       
    41       p3=ell[-2];
       
    42       p4=ell[-3];
       
    43       /* compute outer product and add it to the Z matrix */
       
    44       m11 = dMUL(p1,q1);
       
    45       m21 = dMUL(p2,q1);
       
    46       m31 = dMUL(p3,q1);
       
    47       m41 = dMUL(p4,q1);
       
    48       ell += lskip1;
       
    49       Z11 += m11;
       
    50       Z21 += m21;
       
    51       Z31 += m31;
       
    52       Z41 += m41;
       
    53       /* load p and q values */
       
    54       p1=ell[0];
       
    55       q1=ex[-1];
       
    56       p2=ell[-1];
       
    57       p3=ell[-2];
       
    58       p4=ell[-3];
       
    59       /* compute outer product and add it to the Z matrix */
       
    60       m11 = dMUL(p1,q1);
       
    61       m21 = dMUL(p2,q1);
       
    62       m31 = dMUL(p3,q1);
       
    63       m41 = dMUL(p4,q1);
       
    64       ell += lskip1;
       
    65       Z11 += m11;
       
    66       Z21 += m21;
       
    67       Z31 += m31;
       
    68       Z41 += m41;
       
    69       /* load p and q values */
       
    70       p1=ell[0];
       
    71       q1=ex[-2];
       
    72       p2=ell[-1];
       
    73       p3=ell[-2];
       
    74       p4=ell[-3];
       
    75       /* compute outer product and add it to the Z matrix */
       
    76       m11 = dMUL(p1,q1);
       
    77       m21 = dMUL(p2,q1);
       
    78       m31 = dMUL(p3,q1);
       
    79       m41 = dMUL(p4,q1);
       
    80       ell += lskip1;
       
    81       Z11 += m11;
       
    82       Z21 += m21;
       
    83       Z31 += m31;
       
    84       Z41 += m41;
       
    85       /* load p and q values */
       
    86       p1=ell[0];
       
    87       q1=ex[-3];
       
    88       p2=ell[-1];
       
    89       p3=ell[-2];
       
    90       p4=ell[-3];
       
    91       /* compute outer product and add it to the Z matrix */
       
    92       m11 = dMUL(p1,q1);
       
    93       m21 = dMUL(p2,q1);
       
    94       m31 = dMUL(p3,q1);
       
    95       m41 = dMUL(p4,q1);
       
    96       ell += lskip1;
       
    97       ex -= 4;
       
    98       Z11 += m11;
       
    99       Z21 += m21;
       
   100       Z31 += m31;
       
   101       Z41 += m41;
       
   102       /* end of inner loop */
       
   103     }
       
   104     /* compute left-over iterations */
       
   105     j += 4;
       
   106     for (; j > 0; j--) {
       
   107       /* load p and q values */
       
   108       p1=ell[0];
       
   109       q1=ex[0];
       
   110       p2=ell[-1];
       
   111       p3=ell[-2];
       
   112       p4=ell[-3];
       
   113       /* compute outer product and add it to the Z matrix */
       
   114       m11 = dMUL(p1,q1);
       
   115       m21 = dMUL(p2,q1);
       
   116       m31 = dMUL(p3,q1);
       
   117       m41 = dMUL(p4,q1);
       
   118       ell += lskip1;
       
   119       ex -= 1;
       
   120       Z11 += m11;
       
   121       Z21 += m21;
       
   122       Z31 += m31;
       
   123       Z41 += m41;
       
   124     }
       
   125     /* finish computing the X(i) block */
       
   126     Z11 = ex[0] - Z11;
       
   127     ex[0] = Z11;
       
   128     p1 = ell[-1];
       
   129     Z21 = ex[-1] - Z21 - dMUL(p1,Z11);
       
   130     ex[-1] = Z21;
       
   131     p1 = ell[-2];
       
   132     p2 = ell[-2+lskip1];
       
   133     Z31 = ex[-2] - Z31 - dMUL(p1,Z11) - dMUL(p2,Z21);
       
   134     ex[-2] = Z31;
       
   135     p1 = ell[-3];
       
   136     p2 = ell[-3+lskip1];
       
   137     p3 = ell[-3+lskip2];
       
   138     Z41 = ex[-3] - Z41 - dMUL(p1,Z11) - dMUL(p2,Z21) - dMUL(p3,Z31);
       
   139     ex[-3] = Z41;
       
   140     /* end of outer loop */
       
   141   }
       
   142   /* compute rows at end that are not a multiple of block size */
       
   143   for (; i < n; i++) {
       
   144     /* compute all 1 x 1 block of X, from rows i..i+1-1 */
       
   145     /* set the Z matrix to 0 */
       
   146     Z11=0;
       
   147     ell = L - i;
       
   148     ex = B;
       
   149     /* the inner loop that computes outer products and adds them to Z */
       
   150     for (j=i-4; j >= 0; j -= 4) {
       
   151       /* load p and q values */
       
   152       p1=ell[0];
       
   153       q1=ex[0];
       
   154       /* compute outer product and add it to the Z matrix */
       
   155       m11 = dMUL(p1,q1);
       
   156       ell += lskip1;
       
   157       Z11 += m11;
       
   158       /* load p and q values */
       
   159       p1=ell[0];
       
   160       q1=ex[-1];
       
   161       /* compute outer product and add it to the Z matrix */
       
   162       m11 = dMUL(p1,q1);
       
   163       ell += lskip1;
       
   164       Z11 += m11;
       
   165       /* load p and q values */
       
   166       p1=ell[0];
       
   167       q1=ex[-2];
       
   168       /* compute outer product and add it to the Z matrix */
       
   169       m11 = dMUL(p1,q1);
       
   170       ell += lskip1;
       
   171       Z11 += m11;
       
   172       /* load p and q values */
       
   173       p1=ell[0];
       
   174       q1=ex[-3];
       
   175       /* compute outer product and add it to the Z matrix */
       
   176       m11 = dMUL(p1,q1);
       
   177       ell += lskip1;
       
   178       ex -= 4;
       
   179       Z11 += m11;
       
   180       /* end of inner loop */
       
   181     }
       
   182     /* compute left-over iterations */
       
   183     j += 4;
       
   184     for (; j > 0; j--) {
       
   185       /* load p and q values */
       
   186       p1=ell[0];
       
   187       q1=ex[0];
       
   188       /* compute outer product and add it to the Z matrix */
       
   189       m11 = dMUL(p1,q1);
       
   190       ell += lskip1;
       
   191       ex -= 1;
       
   192       Z11 += m11;
       
   193     }
       
   194     /* finish computing the X(i) block */
       
   195     Z11 = ex[0] - Z11;
       
   196     ex[0] = Z11;
       
   197   }
       
   198 }