genericopenlibs/liboil/src/arm/math_vfp_asm.cia
branchRCL_3
changeset 56 acd3cd4aaceb
equal deleted inserted replaced
54:4332f0f7be53 56:acd3cd4aaceb
       
     1 #if __ARMCC__
       
     2 
       
     3 #define __CPU_ARM 
       
     4 #define __CPU_HAS_VFP
       
     5 #include <arm_vfp.h>
       
     6 #include <e32std.h>
       
     7 
       
     8 
       
     9 extern "C" {
       
    10 
       
    11 EXPORT_C __NAKED__ void vfp_add_f32 (float *d, const float *s1, const float *s2, int n)
       
    12     {
       
    13     asm(" stmdb sp!, {fp, lr}"); 
       
    14     asm("ands ip, r3, #7"); 
       
    15     asm("beq vfp_add_f32_unroll");
       
    16       
       
    17    //asm("fldmias r1!, {s0}"); 
       
    18    VFP_FLDMIAS(CC_AL,1,0,1);
       
    19    
       
    20    asm("vfp_add_f32_loop1: ");
       
    21       
       
    22    //asm("fldmias r2!, {s1}");    
       
    23      VFP_FLDMIAS(CC_AL,2,1,1);
       
    24  
       
    25     //asm("fadds s2, s0, s1");
       
    26     VFP_FADDS(CC_AL,2,0,1);
       
    27       
       
    28     //asm("fstmias r0!, {s2}");
       
    29     VFP_FSTMIAS(CC_AL,0,2,1);   
       
    30     
       
    31     asm("subs ip, ip, #1"); 
       
    32     asm("bne vfp_add_f32_loop1 ");
       
    33 	asm("vfp_add_f32_unroll: movs ip, r3, lsr #3"); 
       
    34     asm("beq vfp_add_f32_end");
       
    35     
       
    36     
       
    37     //asm("fmrx lr, fpscr");  
       
    38     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
    39     
       
    40     
       
    41     asm("mov fp, #7"); 
       
    42     asm("orr fp, lr, fp, lsl #16"); 
       
    43     
       
    44     //asm("fmxr fpscr, fp"); 
       
    45     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
    46         
       
    47       
       
    48     asm("vfp_add_f32_loop2:");
       
    49   
       
    50     //asm("fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
    51         VFP_FLDMIAS(CC_AL,1,8,8);   
       
    52  
       
    53     //asm("fldmias r2!, {s16, s17, s18, s19, s20, s21, s22, s23}");
       
    54     VFP_FLDMIAS(CC_AL,2,16,8);
       
    55    
       
    56     //asm("fadds s24, s8, s16"); 
       
    57         VFP_FADDS(CC_AL,24,8,16);      
       
    58    
       
    59     asm("subs ip, ip, #1"); 
       
    60     asm("bne vfp_add_f32_loop2"); 
       
    61   
       
    62     //asm("fmxr fpscr, lr"); 
       
    63     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
    64       
       
    65    asm("vfp_add_f32_end:");
       
    66    asm ("ldmia sp!, {fp, pc}");
       
    67     
       
    68     }
       
    69 
       
    70 
       
    71 EXPORT_C __NAKED__ void vfp_divide_f32 (float *d, const float *s1, const float *s2, int n)
       
    72     {
       
    73     asm(" stmdb sp!, {fp, lr}"); 
       
    74     asm("ands ip, r3, #7"); 
       
    75     asm("beq vfp_divide_f32_unroll");
       
    76       
       
    77    //asm("fldmias r1!, {s0}"); 
       
    78    VFP_FLDMIAS(CC_AL,1,0,1);
       
    79    
       
    80    asm("vfp_divide_f32_loop1:");
       
    81       
       
    82    //asm("fldmias r2!, {s1}");    
       
    83      VFP_FLDMIAS(CC_AL,2,1,1);
       
    84  
       
    85     //asm("fadds s2, s0, s1");
       
    86     VFP_FDIVS(CC_AL,2,0,1);
       
    87       
       
    88     //asm("fstmias r0!, {s2}");
       
    89     VFP_FSTMIAS(CC_AL,0,2,1);   
       
    90     
       
    91     asm("subs ip, ip, #1"); 
       
    92     asm("bne vfp_divide_f32_loop1");
       
    93     asm("vfp_divide_f32_unroll: movs ip, r3, lsr #3"); 
       
    94     asm("beq vfp_divide_f32_end");
       
    95     
       
    96     
       
    97     //asm("fmrx lr, fpscr");  
       
    98     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
    99     
       
   100     
       
   101     asm("mov fp, #7"); 
       
   102     asm("orr fp, lr, fp, lsl #16"); 
       
   103     
       
   104     //asm("fmxr fpscr, fp"); 
       
   105     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   106         
       
   107       
       
   108     asm("vfp_divide_f32_loop2:");
       
   109   
       
   110     //asm("fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
   111         VFP_FLDMIAS(CC_AL,1,8,8);   
       
   112  
       
   113     //asm("fldmias r2!, {s16, s17, s18, s19, s20, s21, s22, s23}");
       
   114     VFP_FLDMIAS(CC_AL,2,16,8);
       
   115    
       
   116     //asm("fadds s24, s8, s16"); 
       
   117         VFP_FDIVS(CC_AL,24,8,16);      
       
   118    
       
   119     asm("subs ip, ip, #1"); 
       
   120     asm("bne vfp_divide_f32_loop2"); 
       
   121   
       
   122     //asm("fmxr fpscr, lr"); 
       
   123     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   124       
       
   125    asm("vfp_divide_f32_end:");
       
   126    asm ("ldmia sp!, {fp, pc}");
       
   127     
       
   128     }
       
   129 
       
   130 EXPORT_C __NAKED__ void vfp_multiply_f32 (float *d, const float *s1, const float *s2, int n)
       
   131     {
       
   132     asm(" stmdb sp!, {fp, lr}"); 
       
   133     asm("ands ip, r3, #7"); 
       
   134     asm("beq vfp_multiply_f32_unroll");
       
   135       
       
   136    //asm("fldmias r1!, {s0}"); 
       
   137    VFP_FLDMIAS(CC_AL,1,0,1);
       
   138    
       
   139    asm("vfp_multiply_f32_loop1:");
       
   140       
       
   141    //asm("fldmias r2!, {s1}");    
       
   142      VFP_FLDMIAS(CC_AL,2,1,1);
       
   143  
       
   144     //asm("fadds s2, s0, s1");
       
   145     VFP_FMULS(CC_AL,2,0,1);
       
   146       
       
   147     //asm("fstmias r0!, {s2}");
       
   148     VFP_FSTMIAS(CC_AL,0,2,1);   
       
   149     
       
   150     asm("subs ip, ip, #1"); 
       
   151     asm("bne vfp_multiply_f32_loop1");
       
   152     asm("vfp_multiply_f32_unroll: movs ip, r3, lsr #3"); 
       
   153     asm("beq vfp_multiply_f32_end");
       
   154     
       
   155     
       
   156     //asm("fmrx lr, fpscr");  
       
   157     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   158     
       
   159     
       
   160     asm("mov fp, #7"); 
       
   161     asm("orr fp, lr, fp, lsl #16"); 
       
   162     
       
   163     //asm("fmxr fpscr, fp"); 
       
   164     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   165         
       
   166       
       
   167     asm("vfp_multiply_f32_loop2:");
       
   168   
       
   169     //asm("fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
   170         VFP_FLDMIAS(CC_AL,1,8,8);   
       
   171  
       
   172     //asm("fldmias r2!, {s16, s17, s18, s19, s20, s21, s22, s23}");
       
   173     VFP_FLDMIAS(CC_AL,2,16,8);
       
   174    
       
   175     //asm("fadds s24, s8, s16"); 
       
   176         VFP_FMULS(CC_AL,24,8,16);      
       
   177    
       
   178     asm("subs ip, ip, #1"); 
       
   179     asm("bne vfp_multiply_f32_loop2"); 
       
   180   
       
   181     //asm("fmxr fpscr, lr"); 
       
   182     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   183       
       
   184    asm("vfp_multiply_f32_end:");
       
   185    asm ("ldmia sp!, {fp, pc}");
       
   186     
       
   187     }
       
   188 
       
   189 EXPORT_C __NAKED__ void vfp_subtract_f32 (float *d, const float *s1, const float *s2, int n)
       
   190     {
       
   191     asm(" stmdb sp!, {fp, lr}"); 
       
   192     asm("ands ip, r3, #7"); 
       
   193     asm("beq vfp_subtract_f32_unroll");
       
   194       
       
   195    //asm("fldmias r1!, {s0}"); 
       
   196    VFP_FLDMIAS(CC_AL,1,0,1);
       
   197    
       
   198    asm("vfp_subtract_f32_loop1:");
       
   199       
       
   200    //asm("fldmias r2!, {s1}");    
       
   201      VFP_FLDMIAS(CC_AL,2,1,1);
       
   202  
       
   203     //asm("fadds s2, s0, s1");
       
   204     VFP_FSUBS(CC_AL,2,0,1);
       
   205       
       
   206     //asm("fstmias r0!, {s2}");
       
   207     VFP_FSTMIAS(CC_AL,0,2,1);   
       
   208     
       
   209     asm("subs ip, ip, #1"); 
       
   210     asm("bne vfp_subtract_f32_loop1");
       
   211     asm("vfp_subtract_f32_unroll: movs ip, r3, lsr #3"); 
       
   212     asm("beq vfp_subtract_f32_end");
       
   213     
       
   214     
       
   215     //asm("fmrx lr, fpscr");  
       
   216     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   217     
       
   218     
       
   219     asm("mov fp, #7"); 
       
   220     asm("orr fp, lr, fp, lsl #16"); 
       
   221     
       
   222     //asm("fmxr fpscr, fp"); 
       
   223     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   224         
       
   225       
       
   226     asm("vfp_subtract_f32_loop2:");
       
   227   
       
   228     //asm("fldmias r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
   229         VFP_FLDMIAS(CC_AL,1,8,8);   
       
   230  
       
   231     //asm("fldmias r2!, {s16, s17, s18, s19, s20, s21, s22, s23}");
       
   232     VFP_FLDMIAS(CC_AL,2,16,8);
       
   233    
       
   234     //asm("fadds s24, s8, s16"); 
       
   235         VFP_FSUBS(CC_AL,24,8,16);      
       
   236    
       
   237     asm("subs ip, ip, #1"); 
       
   238     asm("bne vfp_subtract_f32_loop2"); 
       
   239   
       
   240     //asm("fmxr fpscr, lr"); 
       
   241     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   242       
       
   243    asm("vfp_subtract_f32_end:");
       
   244    asm ("ldmia sp!, {fp, pc}");
       
   245     
       
   246     }
       
   247 
       
   248 EXPORT_C __NAKED__ void vfp_add_f64 (double *d, const double *s1, const double *s2, int n)
       
   249 {
       
   250     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */     
       
   251     asm("ands          ip, r3, #3");               /* ip = n % 3 */                 
       
   252     asm("beq           vfp_add_f64_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   253     asm("vfp_add_f64_loop1:");                                                   
       
   254     
       
   255     //asm("fldmiad       r1!, {d0}");   
       
   256     VFP_FLDMIAD(CC_AL,1,0,1);
       
   257                                                   
       
   258     //asm("fldmiad       r2!, {d1}");       
       
   259     VFP_FLDMIAD(CC_AL,2,1,1);         
       
   260                                          
       
   261     //asm("faddd  d2, d0, d1");       
       
   262     VFP_FADDD(,2,0,1);
       
   263                                              
       
   264     //asm("fstmiad       r0!, {d2}");     
       
   265     VFP_FSTMIAD(CC_AL,0,2,1);                                                     
       
   266                                       
       
   267     asm("subs          ip, ip, #1");                                                
       
   268     asm("bne           vfp_add_f64_loop1");                                   
       
   269     asm("vfp_add_f64_unroll:");                  /* unroll by 4 */                
       
   270     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                 
       
   271     asm("  beq           vfp_add_f64_end");    /* if ip == 0 goto finish */     
       
   272     
       
   273     //asm("  fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   274     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   275     
       
   276     asm("mov           fp, #3");                                                    
       
   277     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   278     
       
   279     //asm("fmxr          fpscr, fp");      
       
   280     VFP_FMXR(,VFP_XREG_FPSCR,11);    
       
   281                                                
       
   282     asm("vfp_add_f64_loop2:");                                                    
       
   283     
       
   284     //asm("fldmiad       r1!, {d4, d5, d6, d7}");    
       
   285     VFP_FLDMIAS(CC_AL,1,4,4);                                 
       
   286 
       
   287     //asm("fldmiad       r2!, {d8, d9, d10, d11}");                                    
       
   288     VFP_FLDMIAS(CC_AL,2,8,4);                                 
       
   289     
       
   290     //asm("faddd  d12, d4, d8");                                                
       
   291     VFP_FADDD(,12,4,8);
       
   292     
       
   293     //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                  
       
   294     VFP_FSTMIAS(CC_AL,0,12,4);                                 
       
   295     
       
   296     asm("subs          ip, ip, #1");                                                
       
   297     asm("bne           vfp_add_f64_loop2");                                   
       
   298     
       
   299     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   300     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   301                                     
       
   302     asm("vfp_add_f64_end:");                                                      
       
   303     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   304 }     
       
   305   
       
   306   
       
   307 
       
   308   
       
   309 EXPORT_C __NAKED__  void vfp_abs_f32_f32_ns(float *d, const float *s, int n) 
       
   310     {                                                         
       
   311     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */    
       
   312     asm("ands          ip, r2, #7");               /* ip = n % 8 */                 
       
   313     asm("beq           vfp_abs_f32_f32_ns_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   314   	asm("vfp_abs_f32_f32_ns_loop1:");                                                   
       
   315    
       
   316     //asm("fldmias       r1!, {s0}");  
       
   317     VFP_FLDMIAS(CC_AL,1,0,1);
       
   318                                                    
       
   319     //asm("fabss  s2, s0");  
       
   320     VFP_FABSS(CC_AL,2,0);
       
   321                                                       
       
   322     //asm("fstmias       r0!, {s2}");                                                 
       
   323     VFP_FSTMIAS(CC_AL,0,2,1);   
       
   324    
       
   325     asm("subs          ip, ip, #1");                                                
       
   326     asm("bne           vfp_abs_f32_f32_ns_loop1");                                   
       
   327   	asm("vfp_abs_f32_f32_ns_unroll:");                 /* unroll by 8 */                
       
   328     asm("movs          ip, r2, lsr #3");           /* ip = n / 8 */                 
       
   329     asm("beq           vfp_abs_f32_f32_ns_end");    /* if ip == 0 goto finish */     
       
   330    
       
   331     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   332     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   333    
       
   334     asm("mov           fp, #7");                                                    
       
   335     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   336   
       
   337     //asm("fmxr          fpscr, fp");                                                 
       
   338   	VFP_FMXR(,VFP_XREG_FPSCR,11); 
       
   339   
       
   340   	asm("vfp_abs_f32_f32_ns_loop2:");                                                   
       
   341    
       
   342     //asm("fldmias       r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
   343     VFP_FLDMIAS(CC_AL,1,8,8);
       
   344                    
       
   345     //asm("fabss  s24, s8");                                                   
       
   346     VFP_FABSS(CC_AL,2,0);
       
   347    
       
   348     //asm("fstmias       r0!, {s24, s25, s26, s27, s28, s29, s30, s31}");             
       
   349     VFP_FSTMIAS(CC_AL,0,24,8);
       
   350     
       
   351     asm("subs          ip, ip, #1");                                                
       
   352     asm("bne           vfp_abs_f32_f32_ns_loop2");                                   
       
   353     
       
   354     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   355   	VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   356   	 
       
   357   	asm("vfp_abs_f32_f32_ns_end:");                                                      
       
   358     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */ 
       
   359 	} 
       
   360 	
       
   361 EXPORT_C __NAKED__  void vfp_negative_f32(float *d, const float *s, int n)
       
   362     {                                                         
       
   363     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */    
       
   364     asm("ands          ip, r2, #7");               /* ip = n % 8 */                 
       
   365     asm("beq           vfp_negative_f32_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   366   	asm("vfp_negative_f32_loop1:");                                                   
       
   367     
       
   368     //asm("fldmias       r1!, {s0}"); 
       
   369     VFP_FLDMIAS(CC_AL,1,0,1);
       
   370                                                     
       
   371     //asm("fnegs  s2, s0");                                                    
       
   372     VFP_FNEGS(CC_AL,2,0);
       
   373      
       
   374     //asm("fstmias       r0!, {s2}");                                                 
       
   375     VFP_FSTMIAS(CC_AL,0,2,1); 
       
   376     
       
   377     asm("subs          ip, ip, #1");                                                
       
   378     asm("bne           vfp_negative_f32_loop1");                                   
       
   379   	asm("vfp_negative_f32_unroll:");                 /* unroll by 8 */                
       
   380     asm("movs          ip, r2, lsr #3");           /* ip = n / 8 */                 
       
   381     asm("beq           vfp_negative_f32_end");    /* if ip == 0 goto finish */     
       
   382    
       
   383     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   384     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   385    
       
   386     asm("mov           fp, #7");                                                    
       
   387     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   388    
       
   389    // asm("fmxr          fpscr, fp");                                                 
       
   390   	VFP_FMXR(,VFP_XREG_FPSCR,11); 
       
   391   	
       
   392   	asm("vfp_negative_f32_loop2:");                                                   
       
   393     
       
   394     //asm("fldmias       r1!, {s8, s9, s10, s11, s12, s13, s14, s15}"); 
       
   395     VFP_FLDMIAS(CC_AL,1,8,8);
       
   396                    
       
   397     //asm("fnegs  s24, s8");                                                   
       
   398     VFP_FNEGS(CC_AL,2,0);
       
   399      
       
   400     //asm("fstmias       r0!, {s24, s25, s26, s27, s28, s29, s30, s31}");   
       
   401     VFP_FSTMIAS(CC_AL,0,24,8);
       
   402               
       
   403     asm("subs          ip, ip, #1");                                                
       
   404     asm("bne           vfp_negative_f32_loop2");           
       
   405                             
       
   406     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   407   	VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   408   	
       
   409   	asm("vfp_negative_f32_end:");                                                      
       
   410     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */ 
       
   411 	} 
       
   412 		
       
   413 EXPORT_C __NAKED__ 	void vfp_abs_f64_f64_ns(double *d, const double *s, int n)
       
   414 	{                                                       
       
   415    asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */    
       
   416    asm("ands          ip, r2, #3");               /* ip = n % 3 */                 
       
   417    asm("beq           vfp_abs_f64_f64_ns_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   418    asm("vfp_abs_f64_f64_ns_loop1:");                                                   
       
   419 
       
   420    //asm("fldmiad       r1!, {d0}"); 
       
   421    VFP_FLDMIAD(CC_AL,1,0,1);
       
   422                                                    
       
   423    //asm("fabsd  d2, d0"); 
       
   424    VFP_FABSD(,2,0);
       
   425                                                       
       
   426    //asm("fstmiad       r0!, {d2}");                                                 
       
   427    VFP_FSTMIAD(CC_AL,0,2,1);  
       
   428     
       
   429    asm("subs          ip, ip, #1");                                                
       
   430    asm("bne           vfp_abs_f64_f64_ns_loop1");                                   
       
   431    asm("vfp_abs_f64_f64_ns_unroll:");                 /* unroll by 4 */                
       
   432    asm("movs          ip, r2, lsr #2");           /* ip = n / 4 */                 
       
   433    asm("beq           vfp_abs_f64_f64_ns_end");    /* if ip == 0 goto finish */     
       
   434    
       
   435    //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   436  	 VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   437    
       
   438    asm("mov           fp, #3");                                                    
       
   439    asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 4 */     
       
   440    
       
   441    //asm("fmxr          fpscr, fp");                                                 
       
   442    VFP_FMXR(,VFP_XREG_FPSCR,11); 
       
   443    
       
   444    asm("vfp_abs_f64_f64_ns_loop2:");                                                   
       
   445                    
       
   446                                                      
       
   447    //asm("fldmiad       r1!, {d4, d5, d6, d7}");                                     
       
   448    VFP_FLDMIAD(CC_AL,1,4,4);
       
   449    
       
   450    //asm("fabsd  d12, d4");   
       
   451    VFP_FABSD(,12,4);
       
   452                                                    
       
   453    //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                 
       
   454    VFP_FSTMIAD(CC_AL,0,12,4);
       
   455    
       
   456    asm("subs          ip, ip, #1");                                                
       
   457    asm("bne           vfp_abs_f64_f64_ns_loop2");                                   
       
   458    
       
   459   // asm("fmxr          fpscr, lr");                /* restore original fpscr */     
       
   460    	VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   461    	
       
   462    asm("vfp_abs_f64_f64_ns_end:");                                                     
       
   463    asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   464 	}
       
   465 	
       
   466 	
       
   467 EXPORT_C __NAKED__ 	void vfp_negative_f64(double *d, const double *s, int n)
       
   468 	{                                                       
       
   469    asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */    
       
   470    asm("ands          ip, r2, #3");               /* ip = n % 3 */                 
       
   471    asm("beq           vfp_negative_f64_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   472    asm("vfp_negative_f64_loop1:");                                                   
       
   473    
       
   474    //asm("fldmiad       r1!, {d0}");                                                 
       
   475    VFP_FLDMIAD(CC_AL,1,0,1);
       
   476    
       
   477    //asm("fnegd  d2, d0");                                                    
       
   478    VFP_FNEGD(,2,0);
       
   479    
       
   480    //asm("fstmiad       r0!, {d2}");                                                 
       
   481    VFP_FSTMIAD(CC_AL,0,2,1);
       
   482    
       
   483    asm("subs          ip, ip, #1");                                                
       
   484    asm("bne           vfp_negative_f64_loop1");                                   
       
   485    asm("vfp_negative_f64_unroll:");                 /* unroll by 4 */                
       
   486    asm("movs          ip, r2, lsr #2");           /* ip = n / 4 */                 
       
   487    asm("beq           vfp_negative_f64_end");    /* if ip == 0 goto finish */     
       
   488    
       
   489    //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   490    VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   491    
       
   492    asm("mov           fp, #3");                                                    
       
   493    asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 4 */     
       
   494    
       
   495    //asm("fmxr          fpscr, fp");                                                 
       
   496    VFP_FMXR(,VFP_XREG_FPSCR,11); 
       
   497   
       
   498    asm("vfp_negative_f64_loop2:");           
       
   499      
       
   500    //asm("fldmiad       r1!, {d4, d5, d6, d7}");   
       
   501    VFP_FLDMIAD(CC_AL,1,4,4);
       
   502                                       
       
   503    //asm("fnegd  d12, d4"); 
       
   504    VFP_FNEGD(,12,4);
       
   505                                                        
       
   506    //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                 
       
   507    VFP_FSTMIAD(CC_AL,0,12,4);
       
   508     
       
   509    asm("subs          ip, ip, #1");                                                
       
   510    asm("bne           vfp_negative_f64_loop2");                                   
       
   511    
       
   512    //asm("fmxr          fpscr, lr");                /* restore original fpscr */     
       
   513    VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   514    	
       
   515    asm("vfp_negative_f64_end:");                                                     
       
   516    asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   517 	}
       
   518 		
       
   519 		
       
   520 //Rakhi changes
       
   521 EXPORT_C __NAKED__ void vfp_divide_f64 (double *d, const double *s1, const double *s2, int n)
       
   522 {
       
   523     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */     
       
   524     asm("ands          ip, r3, #3");               /* ip = n % 3 */                 
       
   525     asm("beq           vfp_divide_f64_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   526     asm("vfp_divide_f64_loop1:");                                                   
       
   527     
       
   528     //asm("fldmiad       r1!, {d0}");   
       
   529     VFP_FLDMIAD(CC_AL,1,0,1);
       
   530                                                   
       
   531     //asm("fldmiad       r2!, {d1}");       
       
   532     VFP_FLDMIAD(CC_AL,2,1,1);         
       
   533                                          
       
   534     //asm("faddd  d2, d0, d1");       
       
   535     VFP_FDIVD(,2,0,1);
       
   536                                              
       
   537     //asm("fstmiad       r0!, {d2}");     
       
   538     VFP_FSTMIAD(CC_AL,0,2,1);                                                     
       
   539                                       
       
   540     asm("subs          ip, ip, #1");                                                
       
   541     asm("bne           vfp_divide_f64_loop1");                                   
       
   542     asm("vfp_divide_f64_unroll:");                  /* unroll by 4 */                
       
   543     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                 
       
   544     asm("  beq           vfp_divide_f64_end");    /* if ip == 0 goto finish */     
       
   545     
       
   546     //asm("  fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   547     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   548     
       
   549     asm("mov           fp, #3");                                                    
       
   550     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   551     
       
   552     //asm("fmxr          fpscr, fp");      
       
   553     VFP_FMXR(,VFP_XREG_FPSCR,11);    
       
   554                                                
       
   555     asm("vfp_divide_f64_loop2:");                                                    
       
   556     
       
   557     //asm("fldmiad       r1!, {d4, d5, d6, d7}");    
       
   558     VFP_FLDMIAS(CC_AL,1,4,4);                                 
       
   559 
       
   560     //asm("fldmiad       r2!, {d8, d9, d10, d11}");                                    
       
   561     VFP_FLDMIAS(CC_AL,2,8,4);                                 
       
   562     
       
   563     //asm("faddd  d12, d4, d8");                                                
       
   564     VFP_FDIVD(,12,4,8);
       
   565     
       
   566     //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                  
       
   567     VFP_FSTMIAS(CC_AL,0,12,4);                                 
       
   568     
       
   569     asm("subs          ip, ip, #1");                                                
       
   570     asm("bne           vfp_divide_f64_loop2");                                   
       
   571     
       
   572     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   573     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   574                                     
       
   575     asm("vfp_divide_f64_end:");                                                      
       
   576     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   577 }     
       
   578 
       
   579 EXPORT_C __NAKED__ void vfp_multiply_f64 (double *d, const double *s1, const double *s2, int n)
       
   580 {
       
   581     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */     
       
   582     asm("ands          ip, r3, #3");               /* ip = n % 3 */                 
       
   583     asm("beq           vfp_multiply_f64_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   584     asm("vfp_multiply_f64_loop1:");                                                   
       
   585     
       
   586     //asm("fldmiad       r1!, {d0}");   
       
   587     VFP_FLDMIAD(CC_AL,1,0,1);
       
   588                                                   
       
   589     //asm("fldmiad       r2!, {d1}");       
       
   590     VFP_FLDMIAD(CC_AL,2,1,1);         
       
   591                                          
       
   592     //asm("faddd  d2, d0, d1");       
       
   593     VFP_FMULD(,2,0,1);
       
   594                                              
       
   595     //asm("fstmiad       r0!, {d2}");     
       
   596     VFP_FSTMIAD(CC_AL,0,2,1);                                                     
       
   597                                       
       
   598     asm("subs          ip, ip, #1");                                                
       
   599     asm("bne           vfp_multiply_f64_loop1");                                   
       
   600     asm("vfp_multiply_f64_unroll:");                  /* unroll by 4 */                
       
   601     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                 
       
   602     asm("  beq           vfp_multiply_f64_end");    /* if ip == 0 goto finish */     
       
   603     
       
   604     //asm("  fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   605     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   606     
       
   607     asm("mov           fp, #3");                                                    
       
   608     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   609     
       
   610     //asm("fmxr          fpscr, fp");      
       
   611     VFP_FMXR(,VFP_XREG_FPSCR,11);    
       
   612                                                
       
   613     asm("vfp_multiply_f64_loop2:");                                                    
       
   614     
       
   615     //asm("fldmiad       r1!, {d4, d5, d6, d7}");    
       
   616     VFP_FLDMIAS(CC_AL,1,4,4);                                 
       
   617 
       
   618     //asm("fldmiad       r2!, {d8, d9, d10, d11}");                                    
       
   619     VFP_FLDMIAS(CC_AL,2,8,4);                                 
       
   620     
       
   621     //asm("faddd  d12, d4, d8");                                                
       
   622     VFP_FMULD(,12,4,8);
       
   623     
       
   624     //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                  
       
   625     VFP_FSTMIAS(CC_AL,0,12,4);                                 
       
   626     
       
   627     asm("subs          ip, ip, #1");                                                
       
   628     asm("bne           vfp_multiply_f64_loop2");                                   
       
   629     
       
   630     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   631     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   632                                     
       
   633     asm("vfp_multiply_f64_end:");                                                      
       
   634     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   635 }   
       
   636 
       
   637 EXPORT_C __NAKED__ void vfp_subtract_f64 (double *d, const double *s1, const double *s2, int n)
       
   638 {
       
   639     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */     
       
   640     asm("ands          ip, r3, #3");               /* ip = n % 3 */                 
       
   641     asm("beq           vfp_subtract_f64_unroll"); /* if ip == 0 goto prep_loop2 */ 
       
   642     asm("vfp_subtract_f64_loop1:");                                                   
       
   643     
       
   644     //asm("fldmiad       r1!, {d0}");   
       
   645     VFP_FLDMIAD(CC_AL,1,0,1);
       
   646                                                   
       
   647     //asm("fldmiad       r2!, {d1}");       
       
   648     VFP_FLDMIAD(CC_AL,2,1,1);         
       
   649                                          
       
   650     //asm("faddd  d2, d0, d1");       
       
   651     VFP_FSUBD(,2,0,1);
       
   652                                              
       
   653     //asm("fstmiad       r0!, {d2}");     
       
   654     VFP_FSTMIAD(CC_AL,0,2,1);                                                     
       
   655                                       
       
   656     asm("subs          ip, ip, #1");                                                
       
   657     asm("bne           vfp_subtract_f64_loop1");                                   
       
   658     asm("vfp_subtract_f64_unroll:");                  /* unroll by 4 */                
       
   659     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                 
       
   660     asm("  beq           vfp_subtract_f64_end");    /* if ip == 0 goto finish */     
       
   661     
       
   662     //asm("  fmrx          lr, fpscr");                /* read fpscr register into arm */
       
   663     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   664     
       
   665     asm("mov           fp, #3");                                                    
       
   666     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */     
       
   667     
       
   668     //asm("fmxr          fpscr, fp");      
       
   669     VFP_FMXR(,VFP_XREG_FPSCR,11);    
       
   670                                                
       
   671     asm("vfp_subtract_f64_loop2:");                                                    
       
   672     
       
   673     //asm("fldmiad       r1!, {d4, d5, d6, d7}");    
       
   674     VFP_FLDMIAS(CC_AL,1,4,4);                                 
       
   675 
       
   676     //asm("fldmiad       r2!, {d8, d9, d10, d11}");                                    
       
   677     VFP_FLDMIAS(CC_AL,2,8,4);                                 
       
   678     
       
   679     //asm("faddd  d12, d4, d8");                                                
       
   680     VFP_FSUBD(,12,4,8);
       
   681     
       
   682     //asm("fstmiad       r0!, {d12, d13, d14, d15}");                                  
       
   683     VFP_FSTMIAS(CC_AL,0,12,4);                                 
       
   684     
       
   685     asm("subs          ip, ip, #1");                                                
       
   686     asm("bne           vfp_subtract_f64_loop2");                                   
       
   687     
       
   688     //asm("fmxr          fpscr, lr");                /* restore original fpscr */      
       
   689     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   690                                     
       
   691     asm("vfp_subtract_f64_end:");                                                      
       
   692     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   693 }     
       
   694 
       
   695 EXPORT_C __NAKED__ void vfp_scalaradd_f32_ns (float *d, const float *s1, const float *s2_1, int n)
       
   696 {
       
   697     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */   
       
   698     
       
   699     //asm("fldmias       r2, {s1}");                 /* load scalar value */      
       
   700     VFP_FLDMIAS(CC_AL,2,1,1);
       
   701     
       
   702     asm("ands          ip, r3, #7");               /* ip = n % 8 */                
       
   703     asm("beq           vfp_scalaradd_f32_ns_unroll"); /* if ip == 0 goto prep_loop2 */
       
   704     asm("vfp_scalaradd_f32_ns_loop1:");                                                  
       
   705     
       
   706     //asm("fldmias       r1!, {s0}");
       
   707     VFP_FLDMIAS(CC_AL,1,0,1);
       
   708     
       
   709     //asm("FADDS  s2, s0, s1");   
       
   710     VFP_FADDS(CC_AL,2,0,1);
       
   711     
       
   712     //asm("fstmias       r0!, {s2}");
       
   713     VFP_FSTMIAS(CC_AL,0,2,8);
       
   714     
       
   715     asm("subs          ip, ip, #1");                                               
       
   716     asm("bne           vfp_scalaradd_f32_ns_loop1");                                  
       
   717     asm("vfp_scalaradd_f32_ns_unroll:");                 /* unroll by 8 */               
       
   718     asm("movs          ip, r3, lsr #3");           /* ip = n / 8 */                
       
   719     asm("beq           vfp_scalaradd_f32_ns_end");    /* if ip == 0 goto finish */    
       
   720     
       
   721     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */\
       
   722     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   723     
       
   724     asm("mov           fp, #7");                                                   
       
   725     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */    
       
   726     
       
   727     //asm("fmxr          fpscr, fp");                                                
       
   728     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   729     
       
   730     asm("vfp_scalaradd_f32_ns_loop2:");                                                  
       
   731     //asm("fldmias       r1!, {s8, s9, s10, s11, s12, s13, s14, s15}");   
       
   732     VFP_FLDMIAS(CC_AL,1,8,8);
       
   733     
       
   734     //asm("FADDS  s24, s8, s1");    
       
   735     VFP_FADDS(CC_AL,24,8,1);
       
   736     
       
   737     //asm("fstmias       r0!, {s24, s25, s26, s27, s28, s29, s30, s31}");     
       
   738     VFP_FSTMIAS(CC_AL,0,24,8);
       
   739     
       
   740     asm("subs          ip, ip, #1");                                               
       
   741     asm("bne           vfp_scalaradd_f32_ns_loop2");      
       
   742     
       
   743     //asm("fmxr          fpscr, lr");                /* restore original fpscr */    
       
   744     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   745     
       
   746     asm("vfp_scalaradd_f32_ns_end:");                                                    
       
   747     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   748 }    
       
   749 
       
   750 EXPORT_C __NAKED__ void vfp_scalarmultiply_f32_ns (float *d, const float *s1, const float *s2_1, int n)
       
   751 {
       
   752     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */   
       
   753     
       
   754     //asm("fldmias       r2, {s1}");                 /* load scalar value */      
       
   755     VFP_FLDMIAS(CC_AL,2,1,1);
       
   756     
       
   757     asm("ands          ip, r3, #7");               /* ip = n % 8 */                
       
   758     asm("beq           vfp_scalarmultiply_f32_ns_unroll"); /* if ip == 0 goto prep_loop2 */
       
   759     asm("vfp_scalarmultiply_f32_ns_loop1:");                                                  
       
   760     
       
   761     //asm("fldmias       r1!, {s0}");
       
   762     VFP_FLDMIAS(CC_AL,1,0,1);
       
   763     
       
   764     //asm("FADDS  s2, s0, s1");   
       
   765     VFP_FMULS(CC_AL,2,0,1);
       
   766     
       
   767     //asm("fstmias       r0!, {s2}");
       
   768     VFP_FSTMIAS(CC_AL,0,2,8);
       
   769     
       
   770     asm("subs          ip, ip, #1");                                               
       
   771     asm("bne           vfp_scalarmultiply_f32_ns_loop1");                                  
       
   772     asm("vfp_scalarmultiply_f32_ns_unroll:");                 /* unroll by 8 */               
       
   773     asm("movs          ip, r3, lsr #3");           /* ip = n / 8 */                
       
   774     asm("beq           vfp_scalarmultiply_f32_ns_end");    /* if ip == 0 goto finish */    
       
   775     
       
   776     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */\
       
   777     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   778     
       
   779     asm("mov           fp, #7");                                                   
       
   780     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 8 */    
       
   781     
       
   782     //asm("fmxr          fpscr, fp");                                                
       
   783     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   784     
       
   785     asm("vfp_scalarmultiply_f32_ns_loop2:");                                                  
       
   786     //asm("fldmias       r1!, {s8, s9, s10, s11, s12, s13, s14, s15}");   
       
   787     VFP_FLDMIAS(CC_AL,1,8,8);
       
   788     
       
   789     //asm("FADDS  s24, s8, s1");    
       
   790     VFP_FMULS(CC_AL,24,8,1);
       
   791     
       
   792     //asm("fstmias       r0!, {s24, s25, s26, s27, s28, s29, s30, s31}");     
       
   793     VFP_FSTMIAS(CC_AL,0,24,8);
       
   794     
       
   795     asm("subs          ip, ip, #1");                                               
       
   796     asm("bne           vfp_scalarmultiply_f32_ns_loop2");      
       
   797     
       
   798     //asm("fmxr          fpscr, lr");                /* restore original fpscr */    
       
   799     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   800     
       
   801     asm("vfp_scalarmultiply_f32_ns_end:");                                                    
       
   802     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   803 }    
       
   804 
       
   805 EXPORT_C __NAKED__ void vfp_scalaradd_f64_ns (double *d, const double *s1, const double *s2_1, int n)
       
   806 {                                                       
       
   807     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */   
       
   808     
       
   809     //asm("fldmiad       r2, {d1}");                 /* load scalar value */  
       
   810      VFP_FLDMIAD(CC_AL,2,1,1);
       
   811     
       
   812     asm("ands          ip, r3, #3");               /* ip = n % 3 */                
       
   813     asm("beq           vfp_scalaradd_f64_ns_unroll"); /* if ip == 0 goto prep_loop2 */
       
   814     asm("vfp_scalaradd_f64_ns_loop1:");                                                  
       
   815     //asm("fldmiad       r1!, {d0}");   
       
   816     VFP_FLDMIAD(CC_AL,1,0,1);
       
   817     
       
   818     //asm("VFP_FADDD  d2, d0, d1");    
       
   819     VFP_FADDD(,2,0,1);
       
   820     
       
   821     //asm("fstmiad       r0!, {d2}");
       
   822     VFP_FSTMIAD(CC_AL,0,2,1);
       
   823     
       
   824     asm("subs          ip, ip, #1");                                               
       
   825     asm("bne           vfp_scalaradd_f64_ns_loop1");                                  
       
   826     asm("vfp_scalaradd_f64_ns_unroll:");                 /* unroll by 4 */               
       
   827     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                
       
   828     asm("beq           vfp_scalaradd_f64_ns_end");    /* if ip == 0 goto finish */    
       
   829     
       
   830     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */\
       
   831     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   832     
       
   833     asm("mov           fp, #3");                                                   
       
   834     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 4 */    
       
   835     
       
   836     //asm("fmxr          fpscr, fp");                                                
       
   837     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   838     
       
   839     asm("vfp_scalaradd_f64_ns_loop2:");                                                  
       
   840     
       
   841     //asm("fldmiad       r1!, {d4, d5, d6, d7}"); 
       
   842     VFP_FLDMIAD(CC_AL,1,4,4);
       
   843     
       
   844     //asm("VFP_FADDD  d12, d4, d1");   
       
   845     VFP_FADDD(,12,4,1);
       
   846     
       
   847     //asm("fstmiad       r0!, {d12, d13, d14, d15}"); 
       
   848     VFP_FSTMIAD(CC_AL,0,12,4);
       
   849     
       
   850     asm("subs          ip, ip, #1");                                               
       
   851     asm("bne           vfp_scalaradd_f64_ns_loop2");                                  
       
   852     
       
   853     //asm("fmxr          fpscr, lr");                /* restore original fpscr */    
       
   854     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   855     
       
   856     asm("vfp_scalaradd_f64_ns_end:");                                                    
       
   857     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */   
       
   858 }   
       
   859 	
       
   860 EXPORT_C __NAKED__ void vfp_scalarmultiply_f64_ns (double *d, const double *s1, const double *s2_1, int n)
       
   861 {
       
   862 	                                                       
       
   863     asm("stmdb         sp!, {fp, lr}");            /* save registers to stack */   
       
   864     
       
   865     //asm("fldmiad       r2, {d1}");                 /* load scalar value */  
       
   866      VFP_FLDMIAD(CC_AL,2,1,1);
       
   867     
       
   868     asm("ands          ip, r3, #3");               /* ip = n % 3 */                
       
   869     asm("beq           vfp_scalarmultiply_f64_ns_unroll"); /* if ip == 0 goto prep_loop2 */
       
   870     asm("vfp_scalarmultiply_f64_ns_loop1:");                                                  
       
   871     //asm("fldmiad       r1!, {d0}");   
       
   872     VFP_FLDMIAD(CC_AL,1,0,1);
       
   873     
       
   874     //asm("VFP_FADDD  d2, d0, d1");    
       
   875     VFP_FMULD(,2,0,1);
       
   876     
       
   877     //asm("fstmiad       r0!, {d2}");
       
   878     VFP_FSTMIAD(CC_AL,0,2,1);
       
   879     
       
   880     asm("subs          ip, ip, #1");                                               
       
   881     asm("bne           vfp_scalarmultiply_f64_ns_loop1");                                  
       
   882     asm("vfp_scalarmultiply_f64_ns_unroll:");                 /* unroll by 4 */               
       
   883     asm("movs          ip, r3, lsr #2");           /* ip = n / 4 */                
       
   884     asm("beq           vfp_scalarmultiply_f64_ns_end");    /* if ip == 0 goto finish */    
       
   885     
       
   886     //asm("fmrx          lr, fpscr");                /* read fpscr register into arm */\
       
   887     VFP_FMRX(,14,VFP_XREG_FPSCR);
       
   888     
       
   889     asm("mov           fp, #3");                                                   
       
   890     asm("orr           fp, lr, fp, lsl #16");      /* set vector lenght to 4 */    
       
   891     
       
   892     //asm("fmxr          fpscr, fp");                                                
       
   893     VFP_FMXR(,VFP_XREG_FPSCR,11);
       
   894     
       
   895     asm("vfp_scalarmultiply_f64_ns_loop2:");                                                  
       
   896     
       
   897     //asm("fldmiad       r1!, {d4, d5, d6, d7}"); 
       
   898     VFP_FLDMIAD(CC_AL,1,4,4);
       
   899     
       
   900     //asm("VFP_FADDD  d12, d4, d1");   
       
   901     VFP_FMULD(,12,4,1);
       
   902     
       
   903     //asm("fstmiad       r0!, {d12, d13, d14, d15}"); 
       
   904     VFP_FSTMIAD(CC_AL,0,12,4);
       
   905     
       
   906     asm("subs          ip, ip, #1");                                               
       
   907     asm("bne           vfp_scalarmultiply_f64_ns_loop2");                                  
       
   908     
       
   909     //asm("fmxr          fpscr, lr");                /* restore original fpscr */    
       
   910     VFP_FMXR(,VFP_XREG_FPSCR,14);
       
   911     
       
   912     asm("vfp_scalarmultiply_f64_ns_end:");                                                    
       
   913     asm("ldmia         sp!, {fp, pc}");        /* recovering from stack and return */  
       
   914 
       
   915 }
       
   916 	
       
   917 		
       
   918 }
       
   919 #endif