kernel/eka/common/arm/cgcchelp.cia
changeset 0 a41df078684a
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\common\arm\cgcchelp.cia
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "../common.h"
       
    19 #ifdef __KERNEL_MODE__
       
    20 #include "nkern.h"
       
    21 #endif
       
    22 
       
    23 extern "C" {
       
    24 #ifdef __GCC32__
       
    25 EXPORT_C __NAKED__ TInt __divsi3(TInt /*dividend*/,TInt /*divisor*/)
       
    26 //
       
    27 // Signed divide of r0 by r1: returns quotient in r0
       
    28 // Quotient is truncated (rounded towards zero).
       
    29 // Destroys r2, r3 and ip
       
    30 // Negates dividend and divisor, then does an unsigned divide; signs
       
    31 // get sorted out again at the end.
       
    32 // 
       
    33 // Have to calculate the sign of the result for the end of the calculation.
       
    34 // Store this in the LSB of ip which also saves the old lr.
       
    35 //
       
    36     {
       
    37 
       
    38 	asm("STMFD	sp!, {lr} ");
       
    39     asm("ANDS    r3, r1, #0x80000000        ");	// r3 bit 31=sign of divisor, rest of r3=0
       
    40     asm("RSBMI   r1, r1, #0                 ");	// r1=ABS(divisor)
       
    41     asm("EORS    ip, r3, r0, ASR #32        ");	// ip bit 31=sign of quotient, all other bits=carry=sign of dividend
       
    42     asm("RSBCS   r0, r0, #0                 ");	// r0=ABS(dividend)
       
    43     asm(".EXTERN 			    ");
       
    44     asm("BL      __umodsi3_start            ");
       
    45     asm("MOV     r0, r3                     ");
       
    46     asm("CMP	 ip, #0						");	// test sign of quotient
       
    47     asm("RSBMI   r0, r0, #0                 ");	// negate if necessary
       
    48 	__POPRET("");
       
    49 	}
       
    50 
       
    51 
       
    52 EXPORT_C __NAKED__ TInt __modsi3(TInt /*dividend*/,TInt /*divisor*/)
       
    53 //
       
    54 // Signed divide of r0 by r1: returns remainder in r0
       
    55 // Sign of remainder = sign of dividend.
       
    56 // Destroys r2, r3 and ip
       
    57 // Negates dividend and divisor, then does an unsigned divide; signs
       
    58 // get sorted out again at the end.
       
    59 //
       
    60 // Have to save sign of dividend in order to apply sign to remainder
       
    61 // at the end of the calculation. Store this in the LSB of ip which also
       
    62 // saves the old lr.
       
    63 //
       
    64     {
       
    65 
       
    66 	asm("STMFD	sp!, {lr} ");
       
    67     asm("MOVS    r1, r1                     ");
       
    68     asm("RSBMI   r1, r1, #0                 ");
       
    69     asm("MOVS    ip, r0 ");
       
    70     asm("RSBMI   r0, r0, #0                 ");
       
    71     asm(".EXTERN 			    ");
       
    72     asm("BL      __umodsi3_start            ");
       
    73     asm("MOVS    ip, ip	");
       
    74     asm("RSBMI   r0, r0, #0                 ");
       
    75 	__POPRET("");
       
    76     }
       
    77 
       
    78 EXPORT_C __NAKED__ TUint __udivsi3(TUint /*dividend*/,TUint /*divisor*/)
       
    79 //
       
    80 // Unsigned divide of r0 by r1: returns quotient in r0
       
    81 // Quotient is truncated (rounded towards zero).
       
    82 // Destroys r2, r3 and ip
       
    83 // 
       
    84     {
       
    85 
       
    86     asm("MOV     ip, lr                     ");
       
    87     asm(".EXTERN 			    ");
       
    88     asm("BL      __umodsi3_start            ");
       
    89     asm("MOV     r0, r3                     ");
       
    90 	__JUMP(,ip);
       
    91     }
       
    92 
       
    93 
       
    94 EXPORT_C __NAKED__ long long __divdi3(long long /*dividend*/, long long /*divisor*/)
       
    95 //
       
    96 // Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
       
    97 //
       
    98 	{
       
    99 	asm("stmfd sp!, {r4-r8,lr} ");
       
   100 	asm("eor r8, r1, r3 ");				// sign of result into r8
       
   101 	asm("movs r1, r1 ");
       
   102 	asm("bpl 1f ");
       
   103 	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
       
   104 	asm("rsc r1, r1, #0 ");
       
   105 	asm("1: ");
       
   106 	asm("movs r3, r3 ");
       
   107 	asm("bpl 2f ");
       
   108 	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
       
   109 	asm("rsc r3, r3, #0 ");
       
   110 	asm("2: ");
       
   111 	asm("bl UDiv01 ");					// do the division, result in r4,r5
       
   112 	asm("eors r0, r4, r8, asr #32 ");	// quotient into r1:r0, inverted if quotient -ve
       
   113 	asm("eors r1, r5, r8, asr #32 ");
       
   114 	asm("adcs r0, r0, #0 ");		// if quotient -ve, add 1
       
   115 	asm("adcs r1, r1, #0 ");
       
   116 	__POPRET("r4-r8,");
       
   117 	}
       
   118 
       
   119 EXPORT_C __NAKED__ long long __moddi3(long long /*dividend*/, long long /*divisor*/)	/* signed */
       
   120 	{
       
   121 	asm("stmfd sp!, {r4-r8,lr} ");
       
   122 	asm("movs r8, r1 ");				// sign of remainder (=sign of dividend) into r8
       
   123 	asm("bpl 1f ");
       
   124 	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
       
   125 	asm("rsc r1, r1, #0 ");
       
   126 	asm("1: ");
       
   127 	asm("movs r3, r3 ");
       
   128 	asm("bpl 2f ");
       
   129 	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
       
   130 	asm("rsc r3, r3, #0 ");
       
   131 	asm("2: ");
       
   132 	asm("bl UDiv01 ");					// do the division, remainder in r3,r6
       
   133 	asm("eors r0, r3, r8, asr #32 ");	// remainder into r1:r0, inverted if dividend -ve
       
   134 	asm("eors r1, r6, r8, asr #32 ");
       
   135 	asm("adcs r0, r0, #0 ");			// if dividend -ve, add 1
       
   136 	asm("adcs r1, r1, #0 ");
       
   137 	__POPRET("r4-r8,");
       
   138 	}
       
   139 
       
   140 EXPORT_C __NAKED__ long long __umoddi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)	/* unsigned */
       
   141 	{
       
   142 	asm("stmfd sp!, {r4-r7,lr} ");
       
   143 	asm("bl UDiv01 ");					// do the division, remainder in r6:r3
       
   144 	asm("mov r0, r3 ");
       
   145 	asm("mov r1, r6 ");
       
   146 	__POPRET("r4-r7,");
       
   147 	}
       
   148 
       
   149 EXPORT_C __NAKED__ long long __ashrdi3(long long /*value*/, unsigned int /*count*/)
       
   150 	{
       
   151 	asm("cmp r2, #63 ");
       
   152 	asm("movhi r2, #63 ");			// count>63 same as count=63
       
   153 	asm("cmp r2, #32 ");
       
   154 	asm("bcs Asr01 ");				// jump if shift count >=32
       
   155 	asm("rsb r12, r2, #32 ");		// r12=32-shift count
       
   156 	asm("mov r0, r0, lsr r2 ");		// shift ls word right
       
   157 	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
       
   158 	asm("mov r1, r1, asr r2 ");		// shift ms word right
       
   159 	__JUMP(,lr);
       
   160 	asm("Asr01: ");
       
   161 	asm("sub r2, r2, #32 ");		// r2=shift count-32
       
   162 	asm("mov r0, r1, asr r2 ");		// ls word = ms word >> (count-32)
       
   163 	asm("mov r1, r1, asr #32 ");	// ms word of result=sign extension of r1
       
   164 	__JUMP(,lr);
       
   165 	}
       
   166 
       
   167 EXPORT_C __NAKED__ long long __ashldi3(long long /*value*/, unsigned int /*count*/)
       
   168 	{
       
   169 	asm("cmp r2, #63 ");
       
   170 	asm("movhi r2, #64 ");			// count>63 same as count=64
       
   171 	asm("cmp r2, #32 ");
       
   172 	asm("bcs Asl01 ");				// jump if shift count >=32
       
   173 	asm("rsb r12, r2, #32 ");		// r12=32-shift count
       
   174 	asm("mov r1, r1, asl r2 ");		// shift ms word left
       
   175 	asm("orr r1, r1, r0, lsr r12 ");	// or in bits shifted out of ls word
       
   176 	asm("mov r0, r0, asl r2 ");		// shift ls word left
       
   177 	__JUMP(,lr);
       
   178 	asm("Asl01: ");
       
   179 	asm("sub r2, r2, #32 ");		// r2=shift count-32
       
   180 	asm("mov r1, r0, asl r2 ");		// result ms word = ls word << (count-32)
       
   181 	asm("mov r0, #0 ");				// ls word of result is zero
       
   182 	__JUMP(,lr);
       
   183 	}
       
   184 
       
   185 EXPORT_C __NAKED__ unsigned long long __lshrdi3(unsigned long long /*value*/, unsigned int /*count*/)
       
   186 	{
       
   187 	asm("cmp r2, #63 ");
       
   188 	asm("movhi r2, #64 ");			// count>63 same as count=64
       
   189 	asm("cmp r2, #32 ");
       
   190 	asm("bcs Lsr01 ");				// jump if shift count >=32
       
   191 	asm("rsb r12, r2, #32 ");		// r12=32-shift count
       
   192 	asm("mov r0, r0, lsr r2 ");		// shift ls word right
       
   193 	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
       
   194 	asm("mov r1, r1, lsr r2 ");		// shift ms word right
       
   195 	__JUMP(,lr);
       
   196 	asm("Lsr01: ");
       
   197 	asm("sub r2, r2, #32 ");		// r2=shift count-32
       
   198 	asm("mov r0, r1, lsr r2 ");		// ls word = ms word >> (count-32)
       
   199 	asm("mov r1, #0 ");				// ms word of result = 0
       
   200 	__JUMP(,lr);
       
   201 	}
       
   202 
       
   203 EXPORT_C __NAKED__ long long __muldi3(long long /*multiplicand*/, long long /*multiplier*/)
       
   204 	{
       
   205 	asm("mul r1, r2, r1 ");				// r1=low2*high1
       
   206 	asm("mov ip, r0 ");					// ip=low1
       
   207 	asm("mla r1, r0, r3, r1 ");			// r1+=low1*high2
       
   208 	asm("mov r0, #0 ");
       
   209 	asm("umlal r0, r1, r2, ip ");		// r1:r0 += high1*low1
       
   210 	__JUMP(,lr);
       
   211 	}
       
   212 
       
   213 EXPORT_C __NAKED__ long long __negdi2(long long /*argument*/)
       
   214 	{
       
   215 	asm("rsbs r0, r0, #0 ");		// r0=0-r0, set carry
       
   216 	asm("rscs r1, r1, #0 ");		// r1=0-r1-(1-C)
       
   217 	__JUMP(,lr);
       
   218 	}
       
   219 
       
   220 EXPORT_C __NAKED__ unsigned long long __udivmoddi4 (unsigned long long /*dividend*/,
       
   221 													unsigned long long /*divisor*/,
       
   222 													unsigned long long* /*p_remainder*/)
       
   223 	{
       
   224 	asm("stmfd sp!, {r4-r7,lr} ");
       
   225 	asm("bl UDiv01 ");					// do the division, quotient in r5:r4 remainder in r6:r3
       
   226 	asm("ldr r7, [sp, #20] ");			// r7=p_remainder
       
   227 	asm("mov r0, r4 ");					// r0=quotient low
       
   228 	asm("stmia r7, {r3,r6} ");			// store remainder
       
   229 	asm("mov r1, r5 ");					// r0=quotient high
       
   230 	__POPRET("r4-r7,");
       
   231 	}
       
   232 
       
   233 EXPORT_C __NAKED__ int __cmpdi2(long long /*a*/, long long /*b*/)
       
   234 	{
       
   235 	// return 0 if a<b, 1 if a=b, 2 if a>b
       
   236 	asm("subs r0, r2, r0 ");
       
   237 	asm("sbcs r1, r3, r1 ");			// r1:r0 = b-a, set flags
       
   238 	asm("movlt r0, #2 ");				// if b<a r0=2
       
   239 	__JUMP(lt,lr);						// if b<a return
       
   240 	asm("cmpeq r0, #0 ");				// if top word of difference=0, look at bottom
       
   241 	asm("moveq r0, #1 ");				// if a=b, r0=1
       
   242 	asm("movne r0, #0 ");				// else r=0
       
   243 	__JUMP(,lr);
       
   244 	}
       
   245 
       
   246 EXPORT_C __NAKED__ int __ucmpdi2(unsigned long long /*a*/, unsigned long long /*b*/)
       
   247 	{
       
   248 	// return 0 if a<b, 1 if a=b, 2 if a>b
       
   249 	asm("cmp r1, r3 ");
       
   250 	asm("cmpeq r0, r2 ");				// compare r1:r0 - r3:r2
       
   251 	asm("movhi r0, #2 ");				// r0=2 if a>b
       
   252 	asm("moveq r0, #1 ");				// r0=1 if a=b
       
   253 	asm("movlo r0, #0 ");				// r0=0 if a<b
       
   254 	__JUMP(,lr);
       
   255 	}
       
   256 #endif
       
   257 
       
   258 #if defined(__GCC32__)
       
   259 void __division_by_zero();
       
   260 #define DIV_BY_ZERO " __division_by_zero "
       
   261 #elif defined(__ARMCC__)
       
   262 void __rt_div0 (void);
       
   263 #define DIV_BY_ZERO " __cpp(__rt_div0) "
       
   264 #endif
       
   265 
       
   266 EXPORT_C __NAKED__ TUint __umodsi3(TUint /*dividend*/,TUint /*divisor*/)
       
   267 //
       
   268 // Unsigned divide of r0 by r1: returns remainder in r0, quotient in r3
       
   269 // Sign of remainder = sign of dividend.
       
   270 // Destroys r2, r3
       
   271 //
       
   272     {
       
   273 
       
   274     asm("__umodsi3_start:");
       
   275 //
       
   276 // Use lookup table for divisors less than 17, and jump to
       
   277 // an optimised routine if available
       
   278 // 
       
   279     asm("MOV     r3, #0                     ");
       
   280     asm("CMP     r1, #16                    ");
       
   281     asm("LDRLS   r3, [pc, #modtable - . - 8]");
       
   282     asm("LDRLS   r3, [r3, r1, asl #2]       ");
       
   283     asm("CMP     r3, #0                     ");
       
   284 	__JUMP(NE,r3);
       
   285 //
       
   286 // r3 must be zero when entering this point
       
   287 //
       
   288     asm("MOV     r2, r1                     ");
       
   289 
       
   290     asm("__umodsi3_loop:                    ");
       
   291     asm("CMP     r2, r0, LSR #8             ");
       
   292     asm("MOVLS   r2, r2, LSL #8             ");
       
   293     asm("BLO     __umodsi3_loop             ");
       
   294 
       
   295     asm("CMP     r2, r0, LSR #1             ");
       
   296     asm("BHI     __umodsi3_jump7            ");
       
   297     asm("CMP     r2, r0, LSR #2             ");
       
   298     asm("BHI     __umodsi3_jump6            ");
       
   299     asm("CMP     r2, r0, LSR #3             ");
       
   300     asm("BHI     __umodsi3_jump5            ");
       
   301     asm("CMP     r2, r0, LSR #4             ");
       
   302     asm("BHI     __umodsi3_jump4            ");
       
   303     asm("CMP     r2, r0, LSR #5             ");
       
   304     asm("BHI     __umodsi3_jump3            ");
       
   305     asm("CMP     r2, r0, LSR #6             ");
       
   306     asm("BHI     __umodsi3_jump2            ");
       
   307     asm("CMP     r2, r0, LSR #7             ");
       
   308     asm("BHI     __umodsi3_jump1            ");
       
   309 
       
   310     asm("__umodsi3_loop2:                   ");
       
   311     asm("MOVHI   r2, r2, LSR #8             ");
       
   312 
       
   313     asm("CMP     r0, r2, LSL #7             ");
       
   314     asm("ADC     r3, r3, r3                 ");
       
   315     asm("SUBCS   r0, r0, r2, LSL #7         ");
       
   316     asm("CMP     r0, r2, LSL #6             ");
       
   317 
       
   318     asm("__umodsi3_jump1:                   ");
       
   319     asm("ADC     r3, r3, r3                 ");
       
   320     asm("SUBCS   r0, r0, r2, LSL #6         ");
       
   321     asm("CMP     r0, r2, LSL #5             ");
       
   322     asm("__umodsi3_jump2:                   ");
       
   323     asm("ADC     r3, r3, r3                 ");
       
   324     asm("SUBCS   r0, r0, r2, LSL #5         ");
       
   325     asm("CMP     r0, r2, LSL #4             ");
       
   326     asm("__umodsi3_jump3:                   ");
       
   327     asm("ADC     r3, r3, r3                 ");
       
   328     asm("SUBCS   r0, r0, r2, LSL #4         ");
       
   329     asm("CMP     r0, r2, LSL #3             ");
       
   330     asm("__umodsi3_jump4:                   ");
       
   331     asm("ADC     r3, r3, r3                 ");
       
   332     asm("SUBCS   r0, r0, r2, LSL #3         ");
       
   333     asm("CMP     r0, r2, LSL #2             ");
       
   334     asm("__umodsi3_jump5:                   ");
       
   335     asm("ADC     r3, r3, r3                 ");
       
   336     asm("SUBCS   r0, r0, r2, LSL #2         ");
       
   337     asm("CMP     r0, r2, LSL #1             ");
       
   338     asm("__umodsi3_jump6:                   ");
       
   339     asm("ADC     r3, r3, r3                 ");
       
   340     asm("SUBCS   r0, r0, r2, LSL #1         ");
       
   341     asm("__umodsi3_jump7:                   ");
       
   342     asm("CMP     r0, r2                     ");
       
   343     asm("ADC     r3, r3, r3                 ");
       
   344     asm("SUBCS   r0, r0, r2                 ");
       
   345 
       
   346     asm("CMP     r2, r1                     ");
       
   347     asm("BNE     __umodsi3_loop2            ");
       
   348 
       
   349 	__JUMP(,lr);
       
   350 
       
   351     asm("modtable:                          ");
       
   352     asm(".word   mod_jump_table             ");
       
   353 //
       
   354 // Lookup for optimised divide routines
       
   355 //
       
   356     asm("mod_jump_table:                    ");
       
   357     asm(".word " DIV_BY_ZERO); // 0
       
   358     asm(".word   __mod1                     "); // 1
       
   359     asm(".word   __mod2                     "); // 2
       
   360     asm(".word   0                          "); // 3
       
   361     asm(".word   __mod4                     "); // 4
       
   362     asm(".word   __mod5                     "); // 5
       
   363     asm(".word   0                          "); // 6
       
   364     asm(".word   __mod7                     "); // 7
       
   365     asm(".word   __mod8                     "); // 8
       
   366     asm(".word   0                          "); // 9
       
   367     asm(".word   __mod10                    "); // 10
       
   368     asm(".word   0                          "); // 11
       
   369     asm(".word   0                          "); // 12
       
   370     asm(".word   0                          "); // 13
       
   371     asm(".word   0                          "); // 14
       
   372     asm(".word   0                          "); // 15
       
   373     asm(".word   __mod16                    "); // 16
       
   374 
       
   375     asm("__mod16:                           ");
       
   376     asm("MOV     r3,r0,LSR #4               ");
       
   377     asm("AND     r0,r0,#15                  ");
       
   378 	__JUMP(,lr);
       
   379 
       
   380     asm("__mod1:                            ");
       
   381     asm("MOV     r3,r0                      ");
       
   382     asm("MOV     r0,#0                      ");
       
   383 	__JUMP(,lr);
       
   384 
       
   385     asm("__mod2:                            ");
       
   386     asm("MOV     r3,r0,LSR #1               ");
       
   387     asm("AND     r0,r0,#1                   ");
       
   388 	__JUMP(,lr);
       
   389 
       
   390     asm("__mod4:                            ");
       
   391     asm("MOV     r3,r0,LSR #2               ");
       
   392     asm("AND     r0,r0,#3                   ");
       
   393 	__JUMP(,lr);
       
   394 
       
   395     asm("__mod8:                            ");
       
   396     asm("MOV     r3,r0,LSR #3               ");
       
   397     asm("AND     r0,r0,#7                   ");
       
   398 	__JUMP(,lr);
       
   399 
       
   400     asm("__mod10:                           ");
       
   401     asm("MOV     r3, r0                     ");
       
   402     asm("SUB     r0, r3, #10                ");
       
   403     asm("SUB     r3, r3, r3, LSR #2         ");
       
   404     asm("ADD     r3, r3, r3, LSR #4         ");
       
   405     asm("ADD     r3, r3, r3, LSR #8         ");
       
   406     asm("ADD     r3, r3, r3, LSR #16        ");
       
   407     asm("MOV     r3, r3, LSR #3             ");
       
   408     asm("ADD     r2, r3, r3, ASL #2         ");
       
   409     asm("SUBS    r0, r0, r2, ASL #1         ");
       
   410     asm("ADDPL   r3, r3, #1                 ");
       
   411     asm("ADDMI   r0, r0, #10                ");
       
   412 	__JUMP(,lr);
       
   413 
       
   414     asm("__mod7:                            ");
       
   415     asm("MOV     r3, r0                     ");
       
   416 	asm("SUB     r0, r3, #7                 ");
       
   417 	asm("MOV     r3, r3, lsr #1             ");
       
   418 	asm("ADD     r3, r3, r3, lsr #3         ");
       
   419 	asm("ADD     r3, r3, r3, lsr #6         ");
       
   420 	asm("ADD     r3, r3, r3, lsr #12        ");
       
   421 	asm("ADD     r3, r3, r3, lsr #24        ");
       
   422 	asm("MOV     r3, r3, lsr #2             ");
       
   423 	asm("RSB     r2, r3, r3, asl #3         ");
       
   424 	asm("SUBS    r0, r0, r2, asl #0         ");
       
   425 	asm("ADDPL   r3, r3, #1                 ");
       
   426 	asm("ADDMI   r0, r0, #7                 ");
       
   427 	__JUMP(,lr);
       
   428 
       
   429     asm("__mod5:                            ");
       
   430     asm("MOV     r3, r0                     ");
       
   431 	asm("SUB     r0, r3, #5                 ");
       
   432 	asm("SUB     r3, r3, r3, lsr #2         ");
       
   433 	asm("ADD     r3, r3, r3, lsr #4         ");
       
   434 	asm("ADD     r3, r3, r3, lsr #8         ");
       
   435 	asm("ADD     r3, r3, r3, lsr #16        ");
       
   436 	asm("MOV     r3, r3, lsr #2             ");
       
   437 	asm("ADD     r2, r3, r3, asl #2         ");
       
   438 	asm("SUBS    r0, r0, r2, asl #0         ");
       
   439 	asm("ADDPL   r3, r3, #1                 ");
       
   440 	asm("ADDMI   r0, r0, #5                 ");
       
   441 	__JUMP(,lr);
       
   442     }
       
   443 
       
   444 
       
   445 EXPORT_C __NAKED__ unsigned long long __udivdi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)
       
   446 //
       
   447 // Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
       
   448 //
       
   449 	{
       
   450 	asm("stmfd sp!, {r4-r7,lr} ");
       
   451 	asm("bl UDiv01 ");					// do the division, result in r4,r5
       
   452 	asm("mov r0, r4 ");
       
   453 	asm("mov r1, r5 ");
       
   454 	__POPRET("r4-r7,");
       
   455 
       
   456 	// Unsigned 64-bit division. Dividend in r0,r1, divisor in r2,r3
       
   457 	// Quotient returned in r4,r5, Remainder in r3,r6
       
   458 	// Registers r0-r7,r12 used, r8-r11 unmodified
       
   459 	asm(".global UDiv01 ");
       
   460 	asm("UDiv01: ");
       
   461 	asm("movs r3, r3 ");				// check if divisor fits in 32 bits
       
   462 	asm("bne udiv64a ");				// branch if not
       
   463 	asm("movs r2, r2 ");				// check if divisor fits in 31 bits
       
   464 	asm("bmi udiv64e ");				// branch if not
       
   465 	asm("beq udiv64_divby0 ");			// if divisor=0, branch to error routine
       
   466 
       
   467 	// Divisor is <0x80000000
       
   468 	// This means that a 32-bit accumulator is sufficient
       
   469 	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
       
   470 	asm("mov r5, #0 ");
       
   471 	asm("mov r6, #8 ");					// do 2 set of 32 iterations
       
   472 	asm("udiv64b: ");
       
   473 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   474 	asm("adcs r3, r3, r3 ");
       
   475 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   476 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   477 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   478 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   479 	asm("adcs r3, r3, r3 ");
       
   480 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   481 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   482 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   483 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   484 	asm("adcs r3, r3, r3 ");
       
   485 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   486 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   487 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   488 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   489 	asm("adcs r3, r3, r3 ");
       
   490 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   491 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   492 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   493 	asm("subs r6, r6, #1 ");			// loop
       
   494 	asm("bne udiv64b ");
       
   495 	asm("mov r6, #8 ");					// 2nd set of 32 iterations
       
   496 	asm("udiv64c: ");
       
   497 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   498 	asm("adcs r3, r3, r3 ");
       
   499 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   500 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   501 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   502 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   503 	asm("adcs r3, r3, r3 ");
       
   504 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   505 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   506 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   507 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   508 	asm("adcs r3, r3, r3 ");
       
   509 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   510 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   511 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   512 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   513 	asm("adcs r3, r3, r3 ");
       
   514 	asm("subs r3, r3, r2 ");			// subtract divisor from acc
       
   515 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   516 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   517 	asm("subs r6, r6, #1 ");			// loop
       
   518 	asm("bne udiv64c ");
       
   519 	__JUMP(,lr);
       
   520 
       
   521 	// 2^31 <= Divisor < 2^32
       
   522 	// Need 33-bit accumulator - use carry flag as 33rd bit
       
   523 	asm("udiv64e: ");
       
   524 	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
       
   525 	asm("mov r5, #0 ");
       
   526 	asm("mov r6, #8 ");					// do 2 set of 32 iterations
       
   527 	asm("udiv64f: ");
       
   528 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   529 	asm("adcs r3, r3, r3 ");
       
   530 	asm("subcs r3, r3, r2 ");
       
   531 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   532 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   533 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   534 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   535 	asm("adcs r3, r3, r3 ");
       
   536 	asm("subcs r3, r3, r2 ");
       
   537 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   538 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   539 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   540 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   541 	asm("adcs r3, r3, r3 ");
       
   542 	asm("subcs r3, r3, r2 ");
       
   543 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   544 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   545 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   546 	asm("adds r1, r1, r1 ");			// shift dividend left into acc
       
   547 	asm("adcs r3, r3, r3 ");
       
   548 	asm("subcs r3, r3, r2 ");
       
   549 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   550 	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
       
   551 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   552 	asm("subs r6, r6, #1 ");			// loop
       
   553 	asm("bne udiv64f ");
       
   554 	asm("mov r6, #8 ");					// 2nd set of 32 iterations
       
   555 	asm("udiv64g: ");
       
   556 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   557 	asm("adcs r3, r3, r3 ");
       
   558 	asm("subcs r3, r3, r2 ");
       
   559 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   560 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   561 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   562 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   563 	asm("adcs r3, r3, r3 ");
       
   564 	asm("subcs r3, r3, r2 ");
       
   565 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   566 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   567 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   568 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   569 	asm("adcs r3, r3, r3 ");
       
   570 	asm("subcs r3, r3, r2 ");
       
   571 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   572 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   573 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   574 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   575 	asm("adcs r3, r3, r3 ");
       
   576 	asm("subcs r3, r3, r2 ");
       
   577 	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
       
   578 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   579 	asm("addcc r3, r3, r2 ");			// if borrow, add back
       
   580 	asm("subs r6, r6, #1 ");			// loop
       
   581 	asm("bne udiv64g ");
       
   582 	__JUMP(,lr);
       
   583 	
       
   584 	// Divisor >= 2^32, so quotient < 2^32
       
   585 	// Use 64 bit accumulator, 32 bit quotient
       
   586 	asm("udiv64a: ");
       
   587 	asm("mov r4, #0 ");					// quotient in r4, use r1, r6 as accumulator
       
   588 	asm("mov r6, #0 ");
       
   589 	asm("mov r5, #8 ");					// do 32 iterations
       
   590 	asm("udiv64d: ");
       
   591 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   592 	asm("adcs r1, r1, r1 ");
       
   593 	asm("adcs r6, r6, r6 ");
       
   594 	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
       
   595 	asm("sbcs r12, r6, r3 ");
       
   596 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   597 	asm("movcs r1, r7 ");				// if no borrow, update acc
       
   598 	asm("movcs r6, r12 ");
       
   599 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   600 	asm("adcs r1, r1, r1 ");
       
   601 	asm("adcs r6, r6, r6 ");
       
   602 	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
       
   603 	asm("sbcs r12, r6, r3 ");
       
   604 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   605 	asm("movcs r1, r7 ");				// if no borrow, update acc
       
   606 	asm("movcs r6, r12 ");
       
   607 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   608 	asm("adcs r1, r1, r1 ");
       
   609 	asm("adcs r6, r6, r6 ");
       
   610 	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
       
   611 	asm("sbcs r12, r6, r3 ");
       
   612 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   613 	asm("movcs r1, r7 ");				// if no borrow, update acc
       
   614 	asm("movcs r6, r12 ");
       
   615 	asm("adds r0, r0, r0 ");			// shift dividend left into acc
       
   616 	asm("adcs r1, r1, r1 ");
       
   617 	asm("adcs r6, r6, r6 ");
       
   618 	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
       
   619 	asm("sbcs r12, r6, r3 ");
       
   620 	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
       
   621 	asm("movcs r1, r7 ");				// if no borrow, update acc
       
   622 	asm("movcs r6, r12 ");
       
   623 	asm("subs r5, r5, #1 ");			// loop
       
   624 	asm("bne udiv64d ");
       
   625 	asm("mov r3, r1 ");					// remainder in r3,r6
       
   626 	__JUMP(,lr);
       
   627 
       
   628 	asm("udiv64_divby0: ");
       
   629 	asm("stmfd sp!, {r11,lr} ");
       
   630 	__EH_FRAME_PUSH2(r11,lr)
       
   631 	asm("mov r11, sp ");
       
   632 	asm("bic sp, sp, #4 ");
       
   633 	asm("bl " DIV_BY_ZERO);
       
   634 	asm("mov sp, r11 ");
       
   635 	__POPRET("r11,");
       
   636 	}
       
   637 
       
   638 }
       
   639