kernel/eka/common/arm/cgcchelp.cia
changeset 0 a41df078684a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kernel/eka/common/arm/cgcchelp.cia	Mon Oct 19 15:55:17 2009 +0100
@@ -0,0 +1,639 @@
+// Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
+// All rights reserved.
+// This component and the accompanying materials are made available
+// under the terms of the License "Eclipse Public License v1.0"
+// which accompanies this distribution, and is available
+// at the URL "http://www.eclipse.org/legal/epl-v10.html".
+//
+// Initial Contributors:
+// Nokia Corporation - initial contribution.
+//
+// Contributors:
+//
+// Description:
+// e32\common\arm\cgcchelp.cia
+// 
+//
+
+#include "../common.h"
+#ifdef __KERNEL_MODE__
+#include "nkern.h"
+#endif
+
+extern "C" {
+#ifdef __GCC32__
+EXPORT_C __NAKED__ TInt __divsi3(TInt /*dividend*/,TInt /*divisor*/)
+//
+// Signed divide of r0 by r1: returns quotient in r0
+// Quotient is truncated (rounded towards zero).
+// Destroys r2, r3 and ip
+// Negates dividend and divisor, then does an unsigned divide; signs
+// get sorted out again at the end.
+// 
+// Have to calculate the sign of the result for the end of the calculation.
+// Store this in the LSB of ip which also saves the old lr.
+//
+    {
+
+	asm("STMFD	sp!, {lr} ");
+    asm("ANDS    r3, r1, #0x80000000        ");	// r3 bit 31=sign of divisor, rest of r3=0
+    asm("RSBMI   r1, r1, #0                 ");	// r1=ABS(divisor)
+    asm("EORS    ip, r3, r0, ASR #32        ");	// ip bit 31=sign of quotient, all other bits=carry=sign of dividend
+    asm("RSBCS   r0, r0, #0                 ");	// r0=ABS(dividend)
+    asm(".EXTERN 			    ");
+    asm("BL      __umodsi3_start            ");
+    asm("MOV     r0, r3                     ");
+    asm("CMP	 ip, #0						");	// test sign of quotient
+    asm("RSBMI   r0, r0, #0                 ");	// negate if necessary
+	__POPRET("");
+	}
+
+
+EXPORT_C __NAKED__ TInt __modsi3(TInt /*dividend*/,TInt /*divisor*/)
+//
+// Signed divide of r0 by r1: returns remainder in r0
+// Sign of remainder = sign of dividend.
+// Destroys r2, r3 and ip
+// Negates dividend and divisor, then does an unsigned divide; signs
+// get sorted out again at the end.
+//
+// Have to save sign of dividend in order to apply sign to remainder
+// at the end of the calculation. Store this in the LSB of ip which also
+// saves the old lr.
+//
+    {
+
+	asm("STMFD	sp!, {lr} ");
+    asm("MOVS    r1, r1                     ");
+    asm("RSBMI   r1, r1, #0                 ");
+    asm("MOVS    ip, r0 ");
+    asm("RSBMI   r0, r0, #0                 ");
+    asm(".EXTERN 			    ");
+    asm("BL      __umodsi3_start            ");
+    asm("MOVS    ip, ip	");
+    asm("RSBMI   r0, r0, #0                 ");
+	__POPRET("");
+    }
+
+EXPORT_C __NAKED__ TUint __udivsi3(TUint /*dividend*/,TUint /*divisor*/)
+//
+// Unsigned divide of r0 by r1: returns quotient in r0
+// Quotient is truncated (rounded towards zero).
+// Destroys r2, r3 and ip
+// 
+    {
+
+    asm("MOV     ip, lr                     ");
+    asm(".EXTERN 			    ");
+    asm("BL      __umodsi3_start            ");
+    asm("MOV     r0, r3                     ");
+	__JUMP(,ip);
+    }
+
+
+EXPORT_C __NAKED__ long long __divdi3(long long /*dividend*/, long long /*divisor*/)
+//
+// Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
+//
+	{
+	asm("stmfd sp!, {r4-r8,lr} ");
+	asm("eor r8, r1, r3 ");				// sign of result into r8
+	asm("movs r1, r1 ");
+	asm("bpl 1f ");
+	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
+	asm("rsc r1, r1, #0 ");
+	asm("1: ");
+	asm("movs r3, r3 ");
+	asm("bpl 2f ");
+	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
+	asm("rsc r3, r3, #0 ");
+	asm("2: ");
+	asm("bl UDiv01 ");					// do the division, result in r4,r5
+	asm("eors r0, r4, r8, asr #32 ");	// quotient into r1:r0, inverted if quotient -ve
+	asm("eors r1, r5, r8, asr #32 ");
+	asm("adcs r0, r0, #0 ");		// if quotient -ve, add 1
+	asm("adcs r1, r1, #0 ");
+	__POPRET("r4-r8,");
+	}
+
+EXPORT_C __NAKED__ long long __moddi3(long long /*dividend*/, long long /*divisor*/)	/* signed */
+	{
+	asm("stmfd sp!, {r4-r8,lr} ");
+	asm("movs r8, r1 ");				// sign of remainder (=sign of dividend) into r8
+	asm("bpl 1f ");
+	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
+	asm("rsc r1, r1, #0 ");
+	asm("1: ");
+	asm("movs r3, r3 ");
+	asm("bpl 2f ");
+	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
+	asm("rsc r3, r3, #0 ");
+	asm("2: ");
+	asm("bl UDiv01 ");					// do the division, remainder in r3,r6
+	asm("eors r0, r3, r8, asr #32 ");	// remainder into r1:r0, inverted if dividend -ve
+	asm("eors r1, r6, r8, asr #32 ");
+	asm("adcs r0, r0, #0 ");			// if dividend -ve, add 1
+	asm("adcs r1, r1, #0 ");
+	__POPRET("r4-r8,");
+	}
+
+EXPORT_C __NAKED__ long long __umoddi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)	/* unsigned */
+	{
+	asm("stmfd sp!, {r4-r7,lr} ");
+	asm("bl UDiv01 ");					// do the division, remainder in r6:r3
+	asm("mov r0, r3 ");
+	asm("mov r1, r6 ");
+	__POPRET("r4-r7,");
+	}
+
+EXPORT_C __NAKED__ long long __ashrdi3(long long /*value*/, unsigned int /*count*/)
+	{
+	asm("cmp r2, #63 ");
+	asm("movhi r2, #63 ");			// count>63 same as count=63
+	asm("cmp r2, #32 ");
+	asm("bcs Asr01 ");				// jump if shift count >=32
+	asm("rsb r12, r2, #32 ");		// r12=32-shift count
+	asm("mov r0, r0, lsr r2 ");		// shift ls word right
+	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
+	asm("mov r1, r1, asr r2 ");		// shift ms word right
+	__JUMP(,lr);
+	asm("Asr01: ");
+	asm("sub r2, r2, #32 ");		// r2=shift count-32
+	asm("mov r0, r1, asr r2 ");		// ls word = ms word >> (count-32)
+	asm("mov r1, r1, asr #32 ");	// ms word of result=sign extension of r1
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ long long __ashldi3(long long /*value*/, unsigned int /*count*/)
+	{
+	asm("cmp r2, #63 ");
+	asm("movhi r2, #64 ");			// count>63 same as count=64
+	asm("cmp r2, #32 ");
+	asm("bcs Asl01 ");				// jump if shift count >=32
+	asm("rsb r12, r2, #32 ");		// r12=32-shift count
+	asm("mov r1, r1, asl r2 ");		// shift ms word left
+	asm("orr r1, r1, r0, lsr r12 ");	// or in bits shifted out of ls word
+	asm("mov r0, r0, asl r2 ");		// shift ls word left
+	__JUMP(,lr);
+	asm("Asl01: ");
+	asm("sub r2, r2, #32 ");		// r2=shift count-32
+	asm("mov r1, r0, asl r2 ");		// result ms word = ls word << (count-32)
+	asm("mov r0, #0 ");				// ls word of result is zero
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ unsigned long long __lshrdi3(unsigned long long /*value*/, unsigned int /*count*/)
+	{
+	asm("cmp r2, #63 ");
+	asm("movhi r2, #64 ");			// count>63 same as count=64
+	asm("cmp r2, #32 ");
+	asm("bcs Lsr01 ");				// jump if shift count >=32
+	asm("rsb r12, r2, #32 ");		// r12=32-shift count
+	asm("mov r0, r0, lsr r2 ");		// shift ls word right
+	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
+	asm("mov r1, r1, lsr r2 ");		// shift ms word right
+	__JUMP(,lr);
+	asm("Lsr01: ");
+	asm("sub r2, r2, #32 ");		// r2=shift count-32
+	asm("mov r0, r1, lsr r2 ");		// ls word = ms word >> (count-32)
+	asm("mov r1, #0 ");				// ms word of result = 0
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ long long __muldi3(long long /*multiplicand*/, long long /*multiplier*/)
+	{
+	asm("mul r1, r2, r1 ");				// r1=low2*high1
+	asm("mov ip, r0 ");					// ip=low1
+	asm("mla r1, r0, r3, r1 ");			// r1+=low1*high2
+	asm("mov r0, #0 ");
+	asm("umlal r0, r1, r2, ip ");		// r1:r0 += high1*low1
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ long long __negdi2(long long /*argument*/)
+	{
+	asm("rsbs r0, r0, #0 ");		// r0=0-r0, set carry
+	asm("rscs r1, r1, #0 ");		// r1=0-r1-(1-C)
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ unsigned long long __udivmoddi4 (unsigned long long /*dividend*/,
+													unsigned long long /*divisor*/,
+													unsigned long long* /*p_remainder*/)
+	{
+	asm("stmfd sp!, {r4-r7,lr} ");
+	asm("bl UDiv01 ");					// do the division, quotient in r5:r4 remainder in r6:r3
+	asm("ldr r7, [sp, #20] ");			// r7=p_remainder
+	asm("mov r0, r4 ");					// r0=quotient low
+	asm("stmia r7, {r3,r6} ");			// store remainder
+	asm("mov r1, r5 ");					// r0=quotient high
+	__POPRET("r4-r7,");
+	}
+
+EXPORT_C __NAKED__ int __cmpdi2(long long /*a*/, long long /*b*/)
+	{
+	// return 0 if a<b, 1 if a=b, 2 if a>b
+	asm("subs r0, r2, r0 ");
+	asm("sbcs r1, r3, r1 ");			// r1:r0 = b-a, set flags
+	asm("movlt r0, #2 ");				// if b<a r0=2
+	__JUMP(lt,lr);						// if b<a return
+	asm("cmpeq r0, #0 ");				// if top word of difference=0, look at bottom
+	asm("moveq r0, #1 ");				// if a=b, r0=1
+	asm("movne r0, #0 ");				// else r=0
+	__JUMP(,lr);
+	}
+
+EXPORT_C __NAKED__ int __ucmpdi2(unsigned long long /*a*/, unsigned long long /*b*/)
+	{
+	// return 0 if a<b, 1 if a=b, 2 if a>b
+	asm("cmp r1, r3 ");
+	asm("cmpeq r0, r2 ");				// compare r1:r0 - r3:r2
+	asm("movhi r0, #2 ");				// r0=2 if a>b
+	asm("moveq r0, #1 ");				// r0=1 if a=b
+	asm("movlo r0, #0 ");				// r0=0 if a<b
+	__JUMP(,lr);
+	}
+#endif
+
+#if defined(__GCC32__)
+void __division_by_zero();
+#define DIV_BY_ZERO " __division_by_zero "
+#elif defined(__ARMCC__)
+void __rt_div0 (void);
+#define DIV_BY_ZERO " __cpp(__rt_div0) "
+#endif
+
+EXPORT_C __NAKED__ TUint __umodsi3(TUint /*dividend*/,TUint /*divisor*/)
+//
+// Unsigned divide of r0 by r1: returns remainder in r0, quotient in r3
+// Sign of remainder = sign of dividend.
+// Destroys r2, r3
+//
+    {
+
+    asm("__umodsi3_start:");
+//
+// Use lookup table for divisors less than 17, and jump to
+// an optimised routine if available
+// 
+    asm("MOV     r3, #0                     ");
+    asm("CMP     r1, #16                    ");
+    asm("LDRLS   r3, [pc, #modtable - . - 8]");
+    asm("LDRLS   r3, [r3, r1, asl #2]       ");
+    asm("CMP     r3, #0                     ");
+	__JUMP(NE,r3);
+//
+// r3 must be zero when entering this point
+//
+    asm("MOV     r2, r1                     ");
+
+    asm("__umodsi3_loop:                    ");
+    asm("CMP     r2, r0, LSR #8             ");
+    asm("MOVLS   r2, r2, LSL #8             ");
+    asm("BLO     __umodsi3_loop             ");
+
+    asm("CMP     r2, r0, LSR #1             ");
+    asm("BHI     __umodsi3_jump7            ");
+    asm("CMP     r2, r0, LSR #2             ");
+    asm("BHI     __umodsi3_jump6            ");
+    asm("CMP     r2, r0, LSR #3             ");
+    asm("BHI     __umodsi3_jump5            ");
+    asm("CMP     r2, r0, LSR #4             ");
+    asm("BHI     __umodsi3_jump4            ");
+    asm("CMP     r2, r0, LSR #5             ");
+    asm("BHI     __umodsi3_jump3            ");
+    asm("CMP     r2, r0, LSR #6             ");
+    asm("BHI     __umodsi3_jump2            ");
+    asm("CMP     r2, r0, LSR #7             ");
+    asm("BHI     __umodsi3_jump1            ");
+
+    asm("__umodsi3_loop2:                   ");
+    asm("MOVHI   r2, r2, LSR #8             ");
+
+    asm("CMP     r0, r2, LSL #7             ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #7         ");
+    asm("CMP     r0, r2, LSL #6             ");
+
+    asm("__umodsi3_jump1:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #6         ");
+    asm("CMP     r0, r2, LSL #5             ");
+    asm("__umodsi3_jump2:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #5         ");
+    asm("CMP     r0, r2, LSL #4             ");
+    asm("__umodsi3_jump3:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #4         ");
+    asm("CMP     r0, r2, LSL #3             ");
+    asm("__umodsi3_jump4:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #3         ");
+    asm("CMP     r0, r2, LSL #2             ");
+    asm("__umodsi3_jump5:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #2         ");
+    asm("CMP     r0, r2, LSL #1             ");
+    asm("__umodsi3_jump6:                   ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2, LSL #1         ");
+    asm("__umodsi3_jump7:                   ");
+    asm("CMP     r0, r2                     ");
+    asm("ADC     r3, r3, r3                 ");
+    asm("SUBCS   r0, r0, r2                 ");
+
+    asm("CMP     r2, r1                     ");
+    asm("BNE     __umodsi3_loop2            ");
+
+	__JUMP(,lr);
+
+    asm("modtable:                          ");
+    asm(".word   mod_jump_table             ");
+//
+// Lookup for optimised divide routines
+//
+    asm("mod_jump_table:                    ");
+    asm(".word " DIV_BY_ZERO); // 0
+    asm(".word   __mod1                     "); // 1
+    asm(".word   __mod2                     "); // 2
+    asm(".word   0                          "); // 3
+    asm(".word   __mod4                     "); // 4
+    asm(".word   __mod5                     "); // 5
+    asm(".word   0                          "); // 6
+    asm(".word   __mod7                     "); // 7
+    asm(".word   __mod8                     "); // 8
+    asm(".word   0                          "); // 9
+    asm(".word   __mod10                    "); // 10
+    asm(".word   0                          "); // 11
+    asm(".word   0                          "); // 12
+    asm(".word   0                          "); // 13
+    asm(".word   0                          "); // 14
+    asm(".word   0                          "); // 15
+    asm(".word   __mod16                    "); // 16
+
+    asm("__mod16:                           ");
+    asm("MOV     r3,r0,LSR #4               ");
+    asm("AND     r0,r0,#15                  ");
+	__JUMP(,lr);
+
+    asm("__mod1:                            ");
+    asm("MOV     r3,r0                      ");
+    asm("MOV     r0,#0                      ");
+	__JUMP(,lr);
+
+    asm("__mod2:                            ");
+    asm("MOV     r3,r0,LSR #1               ");
+    asm("AND     r0,r0,#1                   ");
+	__JUMP(,lr);
+
+    asm("__mod4:                            ");
+    asm("MOV     r3,r0,LSR #2               ");
+    asm("AND     r0,r0,#3                   ");
+	__JUMP(,lr);
+
+    asm("__mod8:                            ");
+    asm("MOV     r3,r0,LSR #3               ");
+    asm("AND     r0,r0,#7                   ");
+	__JUMP(,lr);
+
+    asm("__mod10:                           ");
+    asm("MOV     r3, r0                     ");
+    asm("SUB     r0, r3, #10                ");
+    asm("SUB     r3, r3, r3, LSR #2         ");
+    asm("ADD     r3, r3, r3, LSR #4         ");
+    asm("ADD     r3, r3, r3, LSR #8         ");
+    asm("ADD     r3, r3, r3, LSR #16        ");
+    asm("MOV     r3, r3, LSR #3             ");
+    asm("ADD     r2, r3, r3, ASL #2         ");
+    asm("SUBS    r0, r0, r2, ASL #1         ");
+    asm("ADDPL   r3, r3, #1                 ");
+    asm("ADDMI   r0, r0, #10                ");
+	__JUMP(,lr);
+
+    asm("__mod7:                            ");
+    asm("MOV     r3, r0                     ");
+	asm("SUB     r0, r3, #7                 ");
+	asm("MOV     r3, r3, lsr #1             ");
+	asm("ADD     r3, r3, r3, lsr #3         ");
+	asm("ADD     r3, r3, r3, lsr #6         ");
+	asm("ADD     r3, r3, r3, lsr #12        ");
+	asm("ADD     r3, r3, r3, lsr #24        ");
+	asm("MOV     r3, r3, lsr #2             ");
+	asm("RSB     r2, r3, r3, asl #3         ");
+	asm("SUBS    r0, r0, r2, asl #0         ");
+	asm("ADDPL   r3, r3, #1                 ");
+	asm("ADDMI   r0, r0, #7                 ");
+	__JUMP(,lr);
+
+    asm("__mod5:                            ");
+    asm("MOV     r3, r0                     ");
+	asm("SUB     r0, r3, #5                 ");
+	asm("SUB     r3, r3, r3, lsr #2         ");
+	asm("ADD     r3, r3, r3, lsr #4         ");
+	asm("ADD     r3, r3, r3, lsr #8         ");
+	asm("ADD     r3, r3, r3, lsr #16        ");
+	asm("MOV     r3, r3, lsr #2             ");
+	asm("ADD     r2, r3, r3, asl #2         ");
+	asm("SUBS    r0, r0, r2, asl #0         ");
+	asm("ADDPL   r3, r3, #1                 ");
+	asm("ADDMI   r0, r0, #5                 ");
+	__JUMP(,lr);
+    }
+
+
+EXPORT_C __NAKED__ unsigned long long __udivdi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)
+//
+// Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
+//
+	{
+	asm("stmfd sp!, {r4-r7,lr} ");
+	asm("bl UDiv01 ");					// do the division, result in r4,r5
+	asm("mov r0, r4 ");
+	asm("mov r1, r5 ");
+	__POPRET("r4-r7,");
+
+	// Unsigned 64-bit division. Dividend in r0,r1, divisor in r2,r3
+	// Quotient returned in r4,r5, Remainder in r3,r6
+	// Registers r0-r7,r12 used, r8-r11 unmodified
+	asm(".global UDiv01 ");
+	asm("UDiv01: ");
+	asm("movs r3, r3 ");				// check if divisor fits in 32 bits
+	asm("bne udiv64a ");				// branch if not
+	asm("movs r2, r2 ");				// check if divisor fits in 31 bits
+	asm("bmi udiv64e ");				// branch if not
+	asm("beq udiv64_divby0 ");			// if divisor=0, branch to error routine
+
+	// Divisor is <0x80000000
+	// This means that a 32-bit accumulator is sufficient
+	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
+	asm("mov r5, #0 ");
+	asm("mov r6, #8 ");					// do 2 set of 32 iterations
+	asm("udiv64b: ");
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("subs r6, r6, #1 ");			// loop
+	asm("bne udiv64b ");
+	asm("mov r6, #8 ");					// 2nd set of 32 iterations
+	asm("udiv64c: ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("subs r6, r6, #1 ");			// loop
+	asm("bne udiv64c ");
+	__JUMP(,lr);
+
+	// 2^31 <= Divisor < 2^32
+	// Need 33-bit accumulator - use carry flag as 33rd bit
+	asm("udiv64e: ");
+	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
+	asm("mov r5, #0 ");
+	asm("mov r6, #8 ");					// do 2 set of 32 iterations
+	asm("udiv64f: ");
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r1, r1, r1 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("subs r6, r6, #1 ");			// loop
+	asm("bne udiv64f ");
+	asm("mov r6, #8 ");					// 2nd set of 32 iterations
+	asm("udiv64g: ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r3, r3, r3 ");
+	asm("subcs r3, r3, r2 ");
+	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("addcc r3, r3, r2 ");			// if borrow, add back
+	asm("subs r6, r6, #1 ");			// loop
+	asm("bne udiv64g ");
+	__JUMP(,lr);
+	
+	// Divisor >= 2^32, so quotient < 2^32
+	// Use 64 bit accumulator, 32 bit quotient
+	asm("udiv64a: ");
+	asm("mov r4, #0 ");					// quotient in r4, use r1, r6 as accumulator
+	asm("mov r6, #0 ");
+	asm("mov r5, #8 ");					// do 32 iterations
+	asm("udiv64d: ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r1, r1, r1 ");
+	asm("adcs r6, r6, r6 ");
+	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
+	asm("sbcs r12, r6, r3 ");
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("movcs r1, r7 ");				// if no borrow, update acc
+	asm("movcs r6, r12 ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r1, r1, r1 ");
+	asm("adcs r6, r6, r6 ");
+	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
+	asm("sbcs r12, r6, r3 ");
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("movcs r1, r7 ");				// if no borrow, update acc
+	asm("movcs r6, r12 ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r1, r1, r1 ");
+	asm("adcs r6, r6, r6 ");
+	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
+	asm("sbcs r12, r6, r3 ");
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("movcs r1, r7 ");				// if no borrow, update acc
+	asm("movcs r6, r12 ");
+	asm("adds r0, r0, r0 ");			// shift dividend left into acc
+	asm("adcs r1, r1, r1 ");
+	asm("adcs r6, r6, r6 ");
+	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
+	asm("sbcs r12, r6, r3 ");
+	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
+	asm("movcs r1, r7 ");				// if no borrow, update acc
+	asm("movcs r6, r12 ");
+	asm("subs r5, r5, #1 ");			// loop
+	asm("bne udiv64d ");
+	asm("mov r3, r1 ");					// remainder in r3,r6
+	__JUMP(,lr);
+
+	asm("udiv64_divby0: ");
+	asm("stmfd sp!, {r11,lr} ");
+	__EH_FRAME_PUSH2(r11,lr)
+	asm("mov r11, sp ");
+	asm("bic sp, sp, #4 ");
+	asm("bl " DIV_BY_ZERO);
+	asm("mov sp, r11 ");
+	__POPRET("r11,");
+	}
+
+}
+