kernel/eka/common/arm/cgcchelp.cia
author Tom Cosgrove <tom.cosgrove@nokia.com>
Fri, 28 May 2010 16:29:07 +0100
changeset 30 8aab599e3476
parent 0 a41df078684a
permissions -rw-r--r--
Fix for bug 2283 (RVCT 4.0 support is missing from PDK 3.0.h) Have multiple extension sections in the bld.inf, one for each version of the compiler. The RVCT version building the tools will build the runtime libraries for its version, but make sure we extract all the other versions from zip archives. Also add the archive for RVCT4.

// Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32\common\arm\cgcchelp.cia
// 
//

#include "../common.h"
#ifdef __KERNEL_MODE__
#include "nkern.h"
#endif

extern "C" {
#ifdef __GCC32__
EXPORT_C __NAKED__ TInt __divsi3(TInt /*dividend*/,TInt /*divisor*/)
//
// Signed divide of r0 by r1: returns quotient in r0
// Quotient is truncated (rounded towards zero).
// Destroys r2, r3 and ip
// Negates dividend and divisor, then does an unsigned divide; signs
// get sorted out again at the end.
// 
// Have to calculate the sign of the result for the end of the calculation.
// Store this in the LSB of ip which also saves the old lr.
//
    {

	asm("STMFD	sp!, {lr} ");
    asm("ANDS    r3, r1, #0x80000000        ");	// r3 bit 31=sign of divisor, rest of r3=0
    asm("RSBMI   r1, r1, #0                 ");	// r1=ABS(divisor)
    asm("EORS    ip, r3, r0, ASR #32        ");	// ip bit 31=sign of quotient, all other bits=carry=sign of dividend
    asm("RSBCS   r0, r0, #0                 ");	// r0=ABS(dividend)
    asm(".EXTERN 			    ");
    asm("BL      __umodsi3_start            ");
    asm("MOV     r0, r3                     ");
    asm("CMP	 ip, #0						");	// test sign of quotient
    asm("RSBMI   r0, r0, #0                 ");	// negate if necessary
	__POPRET("");
	}


EXPORT_C __NAKED__ TInt __modsi3(TInt /*dividend*/,TInt /*divisor*/)
//
// Signed divide of r0 by r1: returns remainder in r0
// Sign of remainder = sign of dividend.
// Destroys r2, r3 and ip
// Negates dividend and divisor, then does an unsigned divide; signs
// get sorted out again at the end.
//
// Have to save sign of dividend in order to apply sign to remainder
// at the end of the calculation. Store this in the LSB of ip which also
// saves the old lr.
//
    {

	asm("STMFD	sp!, {lr} ");
    asm("MOVS    r1, r1                     ");
    asm("RSBMI   r1, r1, #0                 ");
    asm("MOVS    ip, r0 ");
    asm("RSBMI   r0, r0, #0                 ");
    asm(".EXTERN 			    ");
    asm("BL      __umodsi3_start            ");
    asm("MOVS    ip, ip	");
    asm("RSBMI   r0, r0, #0                 ");
	__POPRET("");
    }

EXPORT_C __NAKED__ TUint __udivsi3(TUint /*dividend*/,TUint /*divisor*/)
//
// Unsigned divide of r0 by r1: returns quotient in r0
// Quotient is truncated (rounded towards zero).
// Destroys r2, r3 and ip
// 
    {

    asm("MOV     ip, lr                     ");
    asm(".EXTERN 			    ");
    asm("BL      __umodsi3_start            ");
    asm("MOV     r0, r3                     ");
	__JUMP(,ip);
    }


EXPORT_C __NAKED__ long long __divdi3(long long /*dividend*/, long long /*divisor*/)
//
// Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
//
	{
	asm("stmfd sp!, {r4-r8,lr} ");
	asm("eor r8, r1, r3 ");				// sign of result into r8
	asm("movs r1, r1 ");
	asm("bpl 1f ");
	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
	asm("rsc r1, r1, #0 ");
	asm("1: ");
	asm("movs r3, r3 ");
	asm("bpl 2f ");
	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
	asm("rsc r3, r3, #0 ");
	asm("2: ");
	asm("bl UDiv01 ");					// do the division, result in r4,r5
	asm("eors r0, r4, r8, asr #32 ");	// quotient into r1:r0, inverted if quotient -ve
	asm("eors r1, r5, r8, asr #32 ");
	asm("adcs r0, r0, #0 ");		// if quotient -ve, add 1
	asm("adcs r1, r1, #0 ");
	__POPRET("r4-r8,");
	}

EXPORT_C __NAKED__ long long __moddi3(long long /*dividend*/, long long /*divisor*/)	/* signed */
	{
	asm("stmfd sp!, {r4-r8,lr} ");
	asm("movs r8, r1 ");				// sign of remainder (=sign of dividend) into r8
	asm("bpl 1f ");
	asm("rsbs r0, r0, #0 ");			// ABS(dividend)
	asm("rsc r1, r1, #0 ");
	asm("1: ");
	asm("movs r3, r3 ");
	asm("bpl 2f ");
	asm("rsbs r2, r2, #0 ");			// ABS(divisor)
	asm("rsc r3, r3, #0 ");
	asm("2: ");
	asm("bl UDiv01 ");					// do the division, remainder in r3,r6
	asm("eors r0, r3, r8, asr #32 ");	// remainder into r1:r0, inverted if dividend -ve
	asm("eors r1, r6, r8, asr #32 ");
	asm("adcs r0, r0, #0 ");			// if dividend -ve, add 1
	asm("adcs r1, r1, #0 ");
	__POPRET("r4-r8,");
	}

EXPORT_C __NAKED__ long long __umoddi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)	/* unsigned */
	{
	asm("stmfd sp!, {r4-r7,lr} ");
	asm("bl UDiv01 ");					// do the division, remainder in r6:r3
	asm("mov r0, r3 ");
	asm("mov r1, r6 ");
	__POPRET("r4-r7,");
	}

EXPORT_C __NAKED__ long long __ashrdi3(long long /*value*/, unsigned int /*count*/)
	{
	asm("cmp r2, #63 ");
	asm("movhi r2, #63 ");			// count>63 same as count=63
	asm("cmp r2, #32 ");
	asm("bcs Asr01 ");				// jump if shift count >=32
	asm("rsb r12, r2, #32 ");		// r12=32-shift count
	asm("mov r0, r0, lsr r2 ");		// shift ls word right
	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
	asm("mov r1, r1, asr r2 ");		// shift ms word right
	__JUMP(,lr);
	asm("Asr01: ");
	asm("sub r2, r2, #32 ");		// r2=shift count-32
	asm("mov r0, r1, asr r2 ");		// ls word = ms word >> (count-32)
	asm("mov r1, r1, asr #32 ");	// ms word of result=sign extension of r1
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ long long __ashldi3(long long /*value*/, unsigned int /*count*/)
	{
	asm("cmp r2, #63 ");
	asm("movhi r2, #64 ");			// count>63 same as count=64
	asm("cmp r2, #32 ");
	asm("bcs Asl01 ");				// jump if shift count >=32
	asm("rsb r12, r2, #32 ");		// r12=32-shift count
	asm("mov r1, r1, asl r2 ");		// shift ms word left
	asm("orr r1, r1, r0, lsr r12 ");	// or in bits shifted out of ls word
	asm("mov r0, r0, asl r2 ");		// shift ls word left
	__JUMP(,lr);
	asm("Asl01: ");
	asm("sub r2, r2, #32 ");		// r2=shift count-32
	asm("mov r1, r0, asl r2 ");		// result ms word = ls word << (count-32)
	asm("mov r0, #0 ");				// ls word of result is zero
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ unsigned long long __lshrdi3(unsigned long long /*value*/, unsigned int /*count*/)
	{
	asm("cmp r2, #63 ");
	asm("movhi r2, #64 ");			// count>63 same as count=64
	asm("cmp r2, #32 ");
	asm("bcs Lsr01 ");				// jump if shift count >=32
	asm("rsb r12, r2, #32 ");		// r12=32-shift count
	asm("mov r0, r0, lsr r2 ");		// shift ls word right
	asm("orr r0, r0, r1, lsl r12 ");	// or in bits shifted out of ms word
	asm("mov r1, r1, lsr r2 ");		// shift ms word right
	__JUMP(,lr);
	asm("Lsr01: ");
	asm("sub r2, r2, #32 ");		// r2=shift count-32
	asm("mov r0, r1, lsr r2 ");		// ls word = ms word >> (count-32)
	asm("mov r1, #0 ");				// ms word of result = 0
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ long long __muldi3(long long /*multiplicand*/, long long /*multiplier*/)
	{
	asm("mul r1, r2, r1 ");				// r1=low2*high1
	asm("mov ip, r0 ");					// ip=low1
	asm("mla r1, r0, r3, r1 ");			// r1+=low1*high2
	asm("mov r0, #0 ");
	asm("umlal r0, r1, r2, ip ");		// r1:r0 += high1*low1
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ long long __negdi2(long long /*argument*/)
	{
	asm("rsbs r0, r0, #0 ");		// r0=0-r0, set carry
	asm("rscs r1, r1, #0 ");		// r1=0-r1-(1-C)
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ unsigned long long __udivmoddi4 (unsigned long long /*dividend*/,
													unsigned long long /*divisor*/,
													unsigned long long* /*p_remainder*/)
	{
	asm("stmfd sp!, {r4-r7,lr} ");
	asm("bl UDiv01 ");					// do the division, quotient in r5:r4 remainder in r6:r3
	asm("ldr r7, [sp, #20] ");			// r7=p_remainder
	asm("mov r0, r4 ");					// r0=quotient low
	asm("stmia r7, {r3,r6} ");			// store remainder
	asm("mov r1, r5 ");					// r0=quotient high
	__POPRET("r4-r7,");
	}

EXPORT_C __NAKED__ int __cmpdi2(long long /*a*/, long long /*b*/)
	{
	// return 0 if a<b, 1 if a=b, 2 if a>b
	asm("subs r0, r2, r0 ");
	asm("sbcs r1, r3, r1 ");			// r1:r0 = b-a, set flags
	asm("movlt r0, #2 ");				// if b<a r0=2
	__JUMP(lt,lr);						// if b<a return
	asm("cmpeq r0, #0 ");				// if top word of difference=0, look at bottom
	asm("moveq r0, #1 ");				// if a=b, r0=1
	asm("movne r0, #0 ");				// else r=0
	__JUMP(,lr);
	}

EXPORT_C __NAKED__ int __ucmpdi2(unsigned long long /*a*/, unsigned long long /*b*/)
	{
	// return 0 if a<b, 1 if a=b, 2 if a>b
	asm("cmp r1, r3 ");
	asm("cmpeq r0, r2 ");				// compare r1:r0 - r3:r2
	asm("movhi r0, #2 ");				// r0=2 if a>b
	asm("moveq r0, #1 ");				// r0=1 if a=b
	asm("movlo r0, #0 ");				// r0=0 if a<b
	__JUMP(,lr);
	}
#endif

#if defined(__GCC32__)
void __division_by_zero();
#define DIV_BY_ZERO " __division_by_zero "
#elif defined(__ARMCC__)
void __rt_div0 (void);
#define DIV_BY_ZERO " __cpp(__rt_div0) "
#endif

EXPORT_C __NAKED__ TUint __umodsi3(TUint /*dividend*/,TUint /*divisor*/)
//
// Unsigned divide of r0 by r1: returns remainder in r0, quotient in r3
// Sign of remainder = sign of dividend.
// Destroys r2, r3
//
    {

    asm("__umodsi3_start:");
//
// Use lookup table for divisors less than 17, and jump to
// an optimised routine if available
// 
    asm("MOV     r3, #0                     ");
    asm("CMP     r1, #16                    ");
    asm("LDRLS   r3, [pc, #modtable - . - 8]");
    asm("LDRLS   r3, [r3, r1, asl #2]       ");
    asm("CMP     r3, #0                     ");
	__JUMP(NE,r3);
//
// r3 must be zero when entering this point
//
    asm("MOV     r2, r1                     ");

    asm("__umodsi3_loop:                    ");
    asm("CMP     r2, r0, LSR #8             ");
    asm("MOVLS   r2, r2, LSL #8             ");
    asm("BLO     __umodsi3_loop             ");

    asm("CMP     r2, r0, LSR #1             ");
    asm("BHI     __umodsi3_jump7            ");
    asm("CMP     r2, r0, LSR #2             ");
    asm("BHI     __umodsi3_jump6            ");
    asm("CMP     r2, r0, LSR #3             ");
    asm("BHI     __umodsi3_jump5            ");
    asm("CMP     r2, r0, LSR #4             ");
    asm("BHI     __umodsi3_jump4            ");
    asm("CMP     r2, r0, LSR #5             ");
    asm("BHI     __umodsi3_jump3            ");
    asm("CMP     r2, r0, LSR #6             ");
    asm("BHI     __umodsi3_jump2            ");
    asm("CMP     r2, r0, LSR #7             ");
    asm("BHI     __umodsi3_jump1            ");

    asm("__umodsi3_loop2:                   ");
    asm("MOVHI   r2, r2, LSR #8             ");

    asm("CMP     r0, r2, LSL #7             ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #7         ");
    asm("CMP     r0, r2, LSL #6             ");

    asm("__umodsi3_jump1:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #6         ");
    asm("CMP     r0, r2, LSL #5             ");
    asm("__umodsi3_jump2:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #5         ");
    asm("CMP     r0, r2, LSL #4             ");
    asm("__umodsi3_jump3:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #4         ");
    asm("CMP     r0, r2, LSL #3             ");
    asm("__umodsi3_jump4:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #3         ");
    asm("CMP     r0, r2, LSL #2             ");
    asm("__umodsi3_jump5:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #2         ");
    asm("CMP     r0, r2, LSL #1             ");
    asm("__umodsi3_jump6:                   ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2, LSL #1         ");
    asm("__umodsi3_jump7:                   ");
    asm("CMP     r0, r2                     ");
    asm("ADC     r3, r3, r3                 ");
    asm("SUBCS   r0, r0, r2                 ");

    asm("CMP     r2, r1                     ");
    asm("BNE     __umodsi3_loop2            ");

	__JUMP(,lr);

    asm("modtable:                          ");
    asm(".word   mod_jump_table             ");
//
// Lookup for optimised divide routines
//
    asm("mod_jump_table:                    ");
    asm(".word " DIV_BY_ZERO); // 0
    asm(".word   __mod1                     "); // 1
    asm(".word   __mod2                     "); // 2
    asm(".word   0                          "); // 3
    asm(".word   __mod4                     "); // 4
    asm(".word   __mod5                     "); // 5
    asm(".word   0                          "); // 6
    asm(".word   __mod7                     "); // 7
    asm(".word   __mod8                     "); // 8
    asm(".word   0                          "); // 9
    asm(".word   __mod10                    "); // 10
    asm(".word   0                          "); // 11
    asm(".word   0                          "); // 12
    asm(".word   0                          "); // 13
    asm(".word   0                          "); // 14
    asm(".word   0                          "); // 15
    asm(".word   __mod16                    "); // 16

    asm("__mod16:                           ");
    asm("MOV     r3,r0,LSR #4               ");
    asm("AND     r0,r0,#15                  ");
	__JUMP(,lr);

    asm("__mod1:                            ");
    asm("MOV     r3,r0                      ");
    asm("MOV     r0,#0                      ");
	__JUMP(,lr);

    asm("__mod2:                            ");
    asm("MOV     r3,r0,LSR #1               ");
    asm("AND     r0,r0,#1                   ");
	__JUMP(,lr);

    asm("__mod4:                            ");
    asm("MOV     r3,r0,LSR #2               ");
    asm("AND     r0,r0,#3                   ");
	__JUMP(,lr);

    asm("__mod8:                            ");
    asm("MOV     r3,r0,LSR #3               ");
    asm("AND     r0,r0,#7                   ");
	__JUMP(,lr);

    asm("__mod10:                           ");
    asm("MOV     r3, r0                     ");
    asm("SUB     r0, r3, #10                ");
    asm("SUB     r3, r3, r3, LSR #2         ");
    asm("ADD     r3, r3, r3, LSR #4         ");
    asm("ADD     r3, r3, r3, LSR #8         ");
    asm("ADD     r3, r3, r3, LSR #16        ");
    asm("MOV     r3, r3, LSR #3             ");
    asm("ADD     r2, r3, r3, ASL #2         ");
    asm("SUBS    r0, r0, r2, ASL #1         ");
    asm("ADDPL   r3, r3, #1                 ");
    asm("ADDMI   r0, r0, #10                ");
	__JUMP(,lr);

    asm("__mod7:                            ");
    asm("MOV     r3, r0                     ");
	asm("SUB     r0, r3, #7                 ");
	asm("MOV     r3, r3, lsr #1             ");
	asm("ADD     r3, r3, r3, lsr #3         ");
	asm("ADD     r3, r3, r3, lsr #6         ");
	asm("ADD     r3, r3, r3, lsr #12        ");
	asm("ADD     r3, r3, r3, lsr #24        ");
	asm("MOV     r3, r3, lsr #2             ");
	asm("RSB     r2, r3, r3, asl #3         ");
	asm("SUBS    r0, r0, r2, asl #0         ");
	asm("ADDPL   r3, r3, #1                 ");
	asm("ADDMI   r0, r0, #7                 ");
	__JUMP(,lr);

    asm("__mod5:                            ");
    asm("MOV     r3, r0                     ");
	asm("SUB     r0, r3, #5                 ");
	asm("SUB     r3, r3, r3, lsr #2         ");
	asm("ADD     r3, r3, r3, lsr #4         ");
	asm("ADD     r3, r3, r3, lsr #8         ");
	asm("ADD     r3, r3, r3, lsr #16        ");
	asm("MOV     r3, r3, lsr #2             ");
	asm("ADD     r2, r3, r3, asl #2         ");
	asm("SUBS    r0, r0, r2, asl #0         ");
	asm("ADDPL   r3, r3, #1                 ");
	asm("ADDMI   r0, r0, #5                 ");
	__JUMP(,lr);
    }


EXPORT_C __NAKED__ unsigned long long __udivdi3(unsigned long long /*dividend*/, unsigned long long /*divisor*/)
//
// Dividend in r1:r0, divisor in r3:r2, Return quotient in r1:r0
//
	{
	asm("stmfd sp!, {r4-r7,lr} ");
	asm("bl UDiv01 ");					// do the division, result in r4,r5
	asm("mov r0, r4 ");
	asm("mov r1, r5 ");
	__POPRET("r4-r7,");

	// Unsigned 64-bit division. Dividend in r0,r1, divisor in r2,r3
	// Quotient returned in r4,r5, Remainder in r3,r6
	// Registers r0-r7,r12 used, r8-r11 unmodified
	asm(".global UDiv01 ");
	asm("UDiv01: ");
	asm("movs r3, r3 ");				// check if divisor fits in 32 bits
	asm("bne udiv64a ");				// branch if not
	asm("movs r2, r2 ");				// check if divisor fits in 31 bits
	asm("bmi udiv64e ");				// branch if not
	asm("beq udiv64_divby0 ");			// if divisor=0, branch to error routine

	// Divisor is <0x80000000
	// This means that a 32-bit accumulator is sufficient
	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
	asm("mov r5, #0 ");
	asm("mov r6, #8 ");					// do 2 set of 32 iterations
	asm("udiv64b: ");
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("subs r6, r6, #1 ");			// loop
	asm("bne udiv64b ");
	asm("mov r6, #8 ");					// 2nd set of 32 iterations
	asm("udiv64c: ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("subs r6, r6, #1 ");			// loop
	asm("bne udiv64c ");
	__JUMP(,lr);

	// 2^31 <= Divisor < 2^32
	// Need 33-bit accumulator - use carry flag as 33rd bit
	asm("udiv64e: ");
	asm("mov r4, #0 ");					// use r3 as acc, result in r4, r5
	asm("mov r5, #0 ");
	asm("mov r6, #8 ");					// do 2 set of 32 iterations
	asm("udiv64f: ");
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r1, r1, r1 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r5, r5, r5 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("subs r6, r6, #1 ");			// loop
	asm("bne udiv64f ");
	asm("mov r6, #8 ");					// 2nd set of 32 iterations
	asm("udiv64g: ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r3, r3, r3 ");
	asm("subcs r3, r3, r2 ");
	asm("subccs r3, r3, r2 ");			// subtract divisor from acc
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("addcc r3, r3, r2 ");			// if borrow, add back
	asm("subs r6, r6, #1 ");			// loop
	asm("bne udiv64g ");
	__JUMP(,lr);
	
	// Divisor >= 2^32, so quotient < 2^32
	// Use 64 bit accumulator, 32 bit quotient
	asm("udiv64a: ");
	asm("mov r4, #0 ");					// quotient in r4, use r1, r6 as accumulator
	asm("mov r6, #0 ");
	asm("mov r5, #8 ");					// do 32 iterations
	asm("udiv64d: ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r1, r1, r1 ");
	asm("adcs r6, r6, r6 ");
	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
	asm("sbcs r12, r6, r3 ");
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("movcs r1, r7 ");				// if no borrow, update acc
	asm("movcs r6, r12 ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r1, r1, r1 ");
	asm("adcs r6, r6, r6 ");
	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
	asm("sbcs r12, r6, r3 ");
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("movcs r1, r7 ");				// if no borrow, update acc
	asm("movcs r6, r12 ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r1, r1, r1 ");
	asm("adcs r6, r6, r6 ");
	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
	asm("sbcs r12, r6, r3 ");
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("movcs r1, r7 ");				// if no borrow, update acc
	asm("movcs r6, r12 ");
	asm("adds r0, r0, r0 ");			// shift dividend left into acc
	asm("adcs r1, r1, r1 ");
	asm("adcs r6, r6, r6 ");
	asm("subs r7, r1, r2 ");			// subtract divisor from acc, result into r7,r12
	asm("sbcs r12, r6, r3 ");
	asm("adc r4, r4, r4 ");				// shift result bit left into quotient
	asm("movcs r1, r7 ");				// if no borrow, update acc
	asm("movcs r6, r12 ");
	asm("subs r5, r5, #1 ");			// loop
	asm("bne udiv64d ");
	asm("mov r3, r1 ");					// remainder in r3,r6
	__JUMP(,lr);

	asm("udiv64_divby0: ");
	asm("stmfd sp!, {r11,lr} ");
	__EH_FRAME_PUSH2(r11,lr)
	asm("mov r11, sp ");
	asm("bic sp, sp, #4 ");
	asm("bl " DIV_BY_ZERO);
	asm("mov sp, r11 ");
	__POPRET("r11,");
	}

}