kernel/eka/nkernsmp/arm/ncutils.cia
author William Roberts <williamr@symbian.org>
Mon, 28 Jun 2010 11:25:30 +0100
branchGCC_SURGE
changeset 184 0e2270015475
parent 90 947f0dc9f7a8
permissions -rw-r--r--
Identify (and sometimes fix) remaining uses of old-style GCC name mangling in cia files - Bug 3115 Instances which don't have defined "CSM_xxx" macros are marked "// CSM needed"

// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32\nkernsmp\arm\ncutils.cia
// 
//

#include <e32cia.h>
#include <arm.h>
#include <arm_gic.h>

extern TSpinLock BTraceLock;

extern "C" {
extern TUint32 CrashStateOut;
extern SFullArmRegSet DefaultRegSet;
}

//#define __DBG_MON_FAULT__
//#define __RAM_LOADED_CODE__
//#define __EARLY_DEBUG__

#ifdef _DEBUG
#define ASM_KILL_LINK(rp,rs)	asm("mov "#rs", #0xdf ");\
								asm("orr "#rs", "#rs", "#rs", lsl #8 ");\
								asm("orr "#rs", "#rs", "#rs", lsl #16 ");\
								asm("str "#rs", ["#rp"] ");\
								asm("str "#rs", ["#rp", #4] ");
#else
#define ASM_KILL_LINK(rp,rs)
#endif

#ifdef __PRI_LIST_MACHINE_CODED__
/** Return the priority of the highest priority item present on a priority list.

	@return	The highest priority present or -1 if the list is empty.
 */
EXPORT_C __NAKED__ TInt TPriListBase::HighestPriority()
	{
	asm("ldr r2, [r0, #4] ");				// r2=iPresent MSW
	asm("ldr r1, [r0, #0] ");				// r1=iPresent LSW
	CLZ(0,2);								// r0=31-MSB(r2)
	asm("subs r0, r0, #32 ");				// r0=-1-MSB(r2), 0 if r2=0
	CLZcc(CC_EQ,0,1);						// if r2=0, r0=31-MSB(r1)
	asm("rsb r0, r0, #31 ");				// r0=highest priority
	__JUMP(,lr);
	}

/** Find the highest priority item present on a priority list.
	If multiple items at the same priority are present, return the first to be
	added in chronological order.

	@return	a pointer to the item or NULL if the list is empty.
 */
EXPORT_C __NAKED__ TPriListLink* TPriListBase::First()
	{
	asm("ldr r2, [r0, #4] ");				// r2=iPresent MSW
	asm("ldr r1, [r0], #8 ");				// r1=iPresent LSW, r0=&iQueue[0]
	CLZ(3,2);								// r3=31-MSB(r2)
	asm("subs r3, r3, #32 ");				// r3=-1-MSB(r2), 0 if r2=0
	CLZcc(CC_EQ,3,1);						// if r2=0, r3=31-MSB(r1)
	asm("rsbs r3, r3, #31 ");				// r3=highest priority
	asm("ldrpl r0, [r0, r3, lsl #2] ");		// if r3>=0 list is nonempty, r0->first entry
	asm("movmi r0, #0 ");					// if r3<0 list empty, return NULL
	__JUMP(,lr);
	}

/** Add an item to a priority list.

	@param aLink = a pointer to the item - must not be NULL
 */
EXPORT_C __NAKED__ void TPriListBase::Add(TPriListLink* /*aLink*/)
	{
	asm("ldrb r2, [r1, #8]" );				// r2=priority of aLink
	asm("add ip, r0, #8 ");					// ip=&iQueue[0]
	asm("ldr r3, [ip, r2, lsl #2]! ");		// r3->first entry at this priority
	asm("cmp r3, #0 ");						// is this first entry at this priority?
	asm("bne pri_list_add_1 ");				// branch if not
	asm("str r1, [ip] ");					// if queue originally empty, iQueue[pri]=aThread
	asm("ldrb ip, [r0, r2, lsr #3]! ");		// ip=relevant byte of present mask, r0->same
	asm("and r2, r2, #7 ");
	asm("mov r3, #1 ");
	asm("str r1, [r1, #0] ");				// aThread->next=aThread
	asm("orr ip, ip, r3, lsl r2 ");			// ip |= 1<<(pri&7)
	asm("str r1, [r1, #4] ");				// aThread->iPrev=aThread
	asm("strb ip, [r0] ");					// update relevant byte of present mask
	__JUMP(,lr);
	asm("pri_list_add_1: ");
	asm("ldr ip, [r3, #4] ");				// if nonempty, ip=last
	asm("str r1, [r3, #4] ");				// first->prev=aThread
	asm("stmia r1, {r3,ip} ");				// aThread->next=r3=first, aThread->prev=ip=last
	asm("str r1, [ip, #0] ");				// last->next=aThread
	__JUMP(,lr);
	}


/** Removes an item from a priority list.

	@param aLink A pointer to the item - this must not be NULL.
 */
EXPORT_C __NAKED__ void TPriListBase::Remove(TPriListLink* /*aLink*/)
	{
	asm("ldmia r1, {r2,r3} ");				// r2=aLink->iNext, r3=aLink->iPrev
	ASM_KILL_LINK(r1,r12);
	asm("subs r12, r1, r2 ");				// check if more threads at this priority, r12=0 if not
	asm("bne 1f ");							// branch if there are more at same priority
	asm("ldrb r2, [r1, #%a0]" : : "i" _FOFF(NThread, iPriority));	// r2=thread priority
	asm("add r1, r0, #%a0" : : "i" _FOFF(TPriListBase, iQueue));	// r1->iQueue[0]
	asm("str r12, [r1, r2, lsl #2] ");		// iQueue[priority]=NULL
	asm("ldrb r1, [r0, r2, lsr #3] ");		// r1=relevant byte in present mask
	asm("and r3, r2, #7 ");					// r3=priority & 7
	asm("mov r12, #1 ");
	asm("bic r1, r1, r12, lsl r3 ");		// clear bit in present mask
	asm("strb r1, [r0, r2, lsr #3] ");		// update relevant byte in present mask
	__JUMP(,lr);
	asm("1: ");								// get here if there are other threads at same priority
	asm("ldrb r12, [r1, #%a0]" : : "i" _FOFF(NThread, iPriority));	// r12=thread priority
	asm("add r0, r0, #%a0" : : "i" _FOFF(TPriListBase, iQueue));		// r0=&iQueue[0]
	asm("str r3, [r2, #4] ");				// next->prev=prev
	asm("ldr r12, [r0, r12, lsl #2]! ");	// r12=iQueue[priority], r0=&iQueue[priority]
	asm("str r2, [r3, #0] ");				// and prev->next=next
	asm("cmp r12, r1 ");					// if aThread was first...
	asm("streq r2, [r0, #0] ");				// iQueue[priority]=aThread->next
	__JUMP(,lr);							// finished
	}


/** Change the priority of an item on a priority list

	@param	aLink = pointer to the item to act on - must not be NULL
	@param	aNewPriority = new priority for the item
 */
EXPORT_C __NAKED__ void TPriListBase::ChangePriority(TPriListLink* /*aLink*/, TInt /*aNewPriority*/)
	{
	asm("ldrb r3, [r1, #8] ");				// r3=old priority
	asm("stmfd sp!, {r4-r6,lr} ");
	asm("cmp r3, r2 ");
	asm("ldmeqfd sp!, {r4-r6,pc} ");		// if old priority=new, finished
	asm("ldmia r1, {r4,r12} ");				// r4=next, r12=prev
	asm("ldmia r0!, {r6,lr} ");				// lr:r6=present mask, r0=&iQueue[0]
	asm("subs r5, r4, r1 ");				// check if aLink is only one at that priority, r5=0 if it is
	asm("beq change_pri_1 ");				// branch if it is
	asm("ldr r5, [r0, r3, lsl #2] ");		// r5=iQueue[old priority]
	asm("str r4, [r12, #0] ");				// prev->next=next
	asm("str r12, [r4, #4] ");				// next->prev=prev
	asm("cmp r5, r1 ");						// was aLink first?
	asm("streq r4, [r0, r3, lsl #2] ");		// if it was, iQueue[old priority]=aLink->next
	asm("b change_pri_2 ");
	asm("change_pri_1: ");
	asm("str r5, [r0, r3, lsl #2] ");		// if empty, set iQueue[old priority]=NULL
	asm("mov r12, #0x80000000 ");
	asm("rsbs r3, r3, #31 ");				// r3=31-priority
	asm("bicmi lr, lr, r12, ror r3 ");		// if pri>31, clear bit is MS word
	asm("bicpl r6, r6, r12, ror r3 ");		// if pri<=31, clear bit in LS word
	asm("change_pri_2: ");
	asm("ldr r4, [r0, r2, lsl #2] ");		// r4=iQueue[new priority]
	asm("strb r2, [r1, #8] ");				// store new priority
	asm("cmp r4, #0 ");						// new priority queue empty?
	asm("bne change_pri_3 ");				// branch if not
	asm("str r1, [r0, r2, lsl #2] ");		// if new priority queue was empty, iQueue[new p]=aLink
	asm("mov r12, #0x80000000 ");
	asm("str r1, [r1, #0] ");				// aLink->next=aLink
	asm("rsbs r2, r2, #31 ");				// r2=31-priority
	asm("str r1, [r1, #4] ");				// aLink->prev=aLink
	asm("orrmi lr, lr, r12, ror r2 ");		// if pri>31, set bit is MS word
	asm("orrpl r6, r6, r12, ror r2 ");		// if pri<=31, set bit in LS word
	asm("stmdb r0!, {r6,lr} ");				// store present mask and restore r0
	asm("ldmfd sp!, {r4-r6,pc} ");
	asm("change_pri_3: ");
	asm("ldr r12, [r4, #4] ");				// r12->last link at this priority
	asm("str r1, [r4, #4] ");				// first->prev=aLink
	asm("str r1, [r12, #0] ");				// old last->next=aLink
	asm("stmia r1, {r4,r12} ");				// aLink->next=r3=first, aLink->prev=r12=old last
	asm("stmdb r0!, {r6,lr} ");				// store present mask and restore r0
	asm("ldmfd sp!, {r4-r6,pc} ");
	}
#endif

__NAKED__ void initialiseState(TInt /*aCpu*/, TSubScheduler* /*aSS*/)
	{
	SET_RWNO_TID(,r1);
	__ASM_CLI_MODE(MODE_ABT);
	asm("str	sp, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iAbtStackTop));
	asm("mvn	r3, #0 ");
	asm("str	r3, [sp, #%a0]" : : "i" _FOFF(SFullArmRegSet, iExcCode));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iIrqNestCount));
	__ASM_CLI_MODE(MODE_UND);
	asm("str	sp, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iUndStackTop));
	__ASM_CLI_MODE(MODE_FIQ);
	asm("str	sp, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iFiqStackTop));
	__ASM_CLI_MODE(MODE_IRQ);
	asm("str	sp, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iIrqStackTop));
	__ASM_CLI_MODE(MODE_SVC);
	asm("ldr	r2, __TheScheduler ");
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iSX.iScuAddr));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iScuAddr));
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iSX.iGicDistAddr));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicDistAddr));
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iSX.iGicCpuIfcAddr));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicCpuIfcAddr));
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iSX.iLocalTimerAddr));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iLocalTimerAddr));
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iSX.iGlobalTimerAddr));
	asm("str	r3, [r1, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGlobalTimerAddr));
	asm("mov	r3, #0 ");
	SET_RWRO_TID(,r3);
	SET_RWRW_TID(,r3);

	__JUMP(,lr);

	asm("__TheScheduler: ");
	asm(".word TheScheduler ");
	}

__NAKED__ TUint32 __mpid()
	{
	asm("mrc	p15, 0, r0, c0, c0, 5 ");
	__JUMP(,lr);
	}

/** @internalTechnology

	Called to indicate that the system has crashed and all CPUs should be
	halted and should dump their registers.

*/
__NAKED__ void NKern::NotifyCrash(const TAny* /*a0*/, TInt /*a1*/)
	{
	asm("stmfd	sp!, {r0-r1} ");			// save parameters
	GET_RWNO_TID(,r0);
	asm("cmp	r0, #0 ");
	asm("ldreq	r0, __SS0 ");
	asm("ldr	r0, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iRegs));
	asm("cmp	r0, #0 ");
	asm("ldreq	r0, __DefaultRegs ");
	asm("ldr	r1, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet, iExcCode));
	asm("cmp	r1, #0 ");					// context already saved?
	asm("bge	state_already_saved ");		// skip if so
	asm("mov	r1, lr ");
	asm("bl "	CSM_ZN3Arm9SaveStateER14SFullArmRegSet );
	asm("str	r1, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet, iN.iR15));
	asm("ldmia	sp!, {r2-r3} ");			// original R0,R1
	asm("stmia	r0, {r2-r3} ");				// save original R0,R1
	asm("add	r1, r0, #%a0" : : "i" _FOFF(SFullArmRegSet, iExcCode));
	asm("mov	r4, r0 ");					// save pointer to i_Regs
	asm("stmib	r1, {r2-r3} ");				// save a0, a1 in iCrashArgs
	asm("mov	r1, #13 ");					// r1 = regnum
	asm("mrs	r2, cpsr ");				// r2 = mode
	asm("bl "	CSM_ZN3Arm3RegER14SFullArmRegSetim );	// r0 = pointer to exception mode R13
	asm("str	sp, [r0] ");				// save correct original value for exception mode R13
	asm("b		state_save_complete ");

	asm("state_already_saved: ");
	asm("ldmia	sp!, {r2-r3} ");			// original R0,R1
	asm("add	r1, r0, #%a0" : : "i" _FOFF(SFullArmRegSet, iExcCode));
	asm("ldr	r4, [r1, #4]! ");
	asm("cmp	r4, #0 ");
	asm("stmeqia	r1, {r2-r3} ");			// save a0, a1 in iCrashArgs, provided iCrashArgs not already set
	asm("mov	r4, r0 ");					// save pointer to i_Regs
	asm("state_save_complete: ");

	__ASM_CLI_MODE(MODE_FIQ);				// mode_fiq, interrupts off
	GET_RWNO_TID(,r0);
	asm("ldr	r1, __CrashState ");
	asm("cmp	r0, #0 ");
	asm("moveq	r2, #1 ");
	asm("streq	r2, [r1] ");
	asm("beq	skip_other_cores ");		// If subscheduler not yet set, don't bother with other cores
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iCpuMask));
	asm("ldr	r5, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicCpuIfcAddr));
//	asm("ldr	r4, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iRegs));
	asm("bic	sp, sp, #4 ");				// align stack to multiple of 8

	__DATA_MEMORY_BARRIER_Z__(r6);
	asm("1: ");
	LDREX(3,1);
	asm("orr	r5, r3, r2 ");
	STREX(12,5,1);							// set bit in CrashState for this CPU
	asm("cmp	r12, #0 ");
	asm("bne	1b ");
	__DATA_MEMORY_BARRIER__(r6);
	asm("cmp	r3, #0 ");					// were we first to crash?
	asm("beq	first_to_crash ");			// branch if so

	// we weren't first to crash, so wait here for a crash IPI
	// disable all interrupts except for CRASH_IPI
	GET_RWNO_TID(,r0);
	asm("ldr	r0, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicCpuIfcAddr));
	asm("mov	r1, #0 ");
	asm("1: ");
	asm("add	r1, r1, #1 ");
	asm("str	r1, [r0, #%a0]" : : "i" _FOFF(GicCpuIfc, iPriMask));
	__DATA_SYNC_BARRIER__(r6);
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(GicCpuIfc, iPriMask));
	asm("cmp	r2, #0 ");
	asm("beq	1b ");						// loop until priority mask is nonzero

	asm("2: ");
	__ASM_STI_MODE(MODE_ABT);
	ARM_WFE;
	asm("b		2b ");						// loop until we get a CRASH_IPI

	// This CPU was first to crash
	asm("first_to_crash: ");
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iScheduler));
	asm("ldr	r7, __CrashStateOut ");
	asm("ldr	r3, [r2, #%a0]" : : "i" _FOFF(TScheduler, iIpiAcceptCpus));
	asm("str	r3, [r7] ");			// mask of CPUs pending
	asm("ldr	r5, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicDistAddr));
	asm("ldr	r1, __CrashIPIWord ");
	__DATA_SYNC_BARRIER_Z__(r6);
	asm("str	r1, [r5, #%a0]" : : "i" _FOFF(GicDistributor, iSoftIrq));	// send CRASH_IPI to all other CPUs
	__DATA_SYNC_BARRIER__(r6);

	asm("skip_other_cores: ");
	asm("mov	r0, #0 ");
	asm("mov	r1, #0 ");
	asm("mov	r2, #0 ");
	asm("bl		NKCrashHandler ");		// call NKCrashHandler(0,0,0)

	__DATA_SYNC_BARRIER__(r6);
	GET_RWNO_TID(,r0);
	asm("cmp	r0, #0 ");
	asm("beq	skip_other_cores2 ");	// If subscheduler not yet set, don't bother with other cores
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iCpuMask));
	asm("7: ");
	LDREX(1,7);
	asm("bic	r1, r1, r2 ");
	STREX(3,1,7);						// atomic { CrashStateOut &= ~iCpuMask; }
	asm("cmp	r3, #0 ");
	asm("bne	7b ");
	asm("1: ");
	asm("ldr	r1, [r7] ");
	asm("cmp	r1, #0 ");				// wait for all CPUs to acknowledge
	asm("beq	2f ");
	asm("adds	r6, r6, #1 ");
	asm("bne	1b ");					// if not ACKed after 2^32 iterations give up waiting
	asm("2: ");
	__DATA_MEMORY_BARRIER_Z__(r0);

	asm("skip_other_cores2: ");
	asm("mov	r0, #1 ");
	asm("ldr	r1, [r4, #%a0] " : : "i" _FOFF(SFullArmRegSet,iN.iR0));	// original R0 = a0 parameter
	asm("ldr	r2, [r4, #%a0] " : : "i" _FOFF(SFullArmRegSet,iN.iR1));	// original R1 = a1 parameter
	asm("bl		NKCrashHandler ");		// call NKCrashHandler(1,a0,a1) - shouldn't return

	// shouldn't get back here
	__ASM_CRASH();

	asm("__CrashState: ");
	asm(".word %a0" : : "i" ((TInt)&CrashState));
	asm("__CrashStateOut: ");
	asm(".word CrashStateOut ");
	asm("__CrashIPIWord: ");
	asm(".word %a0" : : "i" ( (TInt)GIC_IPI_OTHERS(CRASH_IPI_VECTOR) ));
	asm("__SS0: ");
	asm(".word %a0" : : "i" ((TInt)&TheSubSchedulers[0]));
	asm("__DefaultRegs: ");
	asm(".word %a0" : : "i" ((TInt)&DefaultRegSet));
	}


#ifdef __USE_BTRACE_LOCK__
#define	__ASM_ACQUIRE_BTRACE_LOCK(regs)					\
	asm("stmfd sp!, " regs);							\
	asm("ldr r0, __BTraceLock ");						\
	asm("bl " CSM_ZN9TSpinLock11LockIrqSaveEv );		\
	asm("mov r4, r0 ");									\
	asm("ldmfd sp!, " regs)

#define	__ASM_RELEASE_BTRACE_LOCK()						\
	asm("stmfd sp!, {r0-r1} ");							\
	asm("ldr r0, __BTraceLock ");						\
	asm("mov r1, r4 ");									\
	asm("bl " CSM_ZN9TSpinLock16UnlockIrqRestoreEi );	\
	asm("ldmfd sp!, {r0-r1} ")

#else
#define	__ASM_ACQUIRE_BTRACE_LOCK(regs)
#define	__ASM_RELEASE_BTRACE_LOCK()
#endif


__NAKED__ EXPORT_C TBool BTrace::Out(TUint32 a0, TUint32 a1, TUint32 a2, TUint32 a3)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	__ASM_ACQUIRE_BTRACE_LOCK("{r0-r1}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("mov	r3, r1");			// r3 = a1 (ready for call to handler)
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("ldr	r12, [r12, #%a0]" : : "i" _FOFF(SBTraceData,iHandler));
	asm("adr	lr, 9f");
	asm("cmp	r2, #0");
	asm("moveq	r0, #0");
	__JUMP(ne,	r12);
	asm("9: ");
	__ASM_RELEASE_BTRACE_LOCK();
	__POPRET("r2,r3,r4,");
	}

__NAKED__ EXPORT_C TBool BTrace::OutN(TUint32 a0, TUint32 a1, TUint32 a2, const TAny* aData, TInt aDataSize)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	__ASM_ACQUIRE_BTRACE_LOCK("{r0-r3}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("ldr	r14, [sp, #16]");	// r14 = aDataSize
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("ldr	r12, [r12, #%a0]" : : "i" _FOFF(SBTraceData,iHandler));
	asm("cmp	r2, #0");
	asm("moveq	r0, #0");
	asm("beq	0f ");

	asm("cmp	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
	asm("movhi	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
 	asm("orrhi	r0, r0, #%a0" : : "i" ((TInt)(BTrace::ERecordTruncated<<(BTrace::EFlagsIndex*8))));
	asm("add	r0, r0, r14");
	asm("subs	r14, r14, #1");
	asm("ldrhs	r2, [r3]");			// get first word of aData is aDataSize!=0
	asm("mov	r3, r1");			// r3 = a1 (ready for call to handler)
	asm("cmp	r14, #4");
	asm("strlo	r2, [sp, #4]");		// replace aData with first word if aDataSize is 1-4

	asm("mov	lr, pc");
	__JUMP(,	r12);
	asm("0: ");
	__ASM_RELEASE_BTRACE_LOCK();
	__POPRET("r2,r3,r4,");
	}

__NAKED__ EXPORT_C TBool BTrace::OutX(TUint32 a0, TUint32 a1, TUint32 a2, TUint32 a3)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	__ASM_ACQUIRE_BTRACE_LOCK("{r0-r1}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("mov	r3, r1");			// r3 = a1 (ready for call to handler)
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("mrs	r14, cpsr ");
	asm("ldr	r12, [r12, #%a0]" : : "i" _FOFF(SBTraceData,iHandler));
	asm("cmp	r2, #0");			// check category filter
	asm("moveq	r0, #0");
	asm("beq	0f ");				// if category disabled, exit now
	__ASM_CLI();
	asm("and	r2, r14, #0x0f ");
	asm("cmp	r2, #3 ");
	asm("movhi	r2, #2 ");			// r2 = context ID = 1 for FIQ, 2 for IRQ/ABT/UND/SYS
	asm("bne	1f ");
	GET_RWNO_TID(,r1);
	asm("movs   r2, r1 ");			// r2 = context ID = 0 for early boot, no threads
	asm("beq    1f ");
	asm("ldrb	r2, [r1, #%a0]" : : "i" _FOFF(TSubScheduler,iInIDFC));
	asm("cmp	r2, #0 ");
	asm("ldreq	r2, [r1, #%a0]" : : "i" _FOFF(TSubScheduler,iCurrentThread));
	asm("movne	r2, #3 ");			// r2 = context ID = 3 for IDFC = NThread pointer for thread
	asm("1: ");
	asm("msr	cpsr, r14 ");
	asm("mov	lr, pc");
	__JUMP(,	r12);
	asm("0: ");
	__ASM_RELEASE_BTRACE_LOCK();
	__POPRET("r2,r3,r4,");
	}

__NAKED__ EXPORT_C TBool BTrace::OutNX(TUint32 a0, TUint32 a1, TUint32 a2, const TAny* aData, TInt aDataSize)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	__ASM_ACQUIRE_BTRACE_LOCK("{r0-r3}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("ldr	r14, [sp, #16]");	// r14 = aDataSize
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("ldr	r12, [r12, #%a0]" : : "i" _FOFF(SBTraceData,iHandler));
	asm("cmp	r2, #0");			// check category filter
	asm("moveq	r0, #0");
	asm("beq	0f ");				// if category disabled, exit now

	asm("cmp	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
	asm("movhi	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
 	asm("orrhi	r0, r0, #%a0" : : "i" ((TInt)(BTrace::ERecordTruncated<<(BTrace::EFlagsIndex*8))));
	asm("add	r0, r0, r14");
	asm("subs	r14, r14, #1");
	asm("ldrhs	r2, [r3]");			// get first word of aData is aDataSize!=0
	asm("mov	r3, r1");			// r3 = a1 (ready for call to handler)
	asm("cmp	r14, #4");
	asm("strlo	r2, [sp, #4]");		// replace aData with first word if aDataSize is 1-4

	asm("mrs	r14, cpsr ");
	__ASM_CLI();
	asm("and	r2, r14, #0x0f ");
	asm("cmp	r2, #3 ");
	asm("movhi	r2, #2 ");			// r2 = context ID = 1 for FIQ, 2 for IRQ/ABT/UND/SYS
	asm("bne	1f ");
	GET_RWNO_TID(,r1);
	asm("movs   r2, r1 ");			// r2 = context ID = 0 for early boot, no threads
	asm("beq    1f ");
	asm("ldrb	r2, [r1, #%a0]" : : "i" _FOFF(TSubScheduler,iInIDFC));
	asm("cmp	r2, #0 ");
	asm("ldreq	r2, [r1, #%a0]" : : "i" _FOFF(TSubScheduler,iCurrentThread));
	asm("movne	r2, #3 ");			// r2 = context ID = 3 for IDFC = NThread pointer for thread
	asm("1: ");
	asm("msr	cpsr, r14 ");

	asm("mov	lr, pc");
	__JUMP(,	r12);
	asm("0: ");
	__ASM_RELEASE_BTRACE_LOCK();
	__POPRET("r2,r3,r4,");
	}

__NAKED__ EXPORT_C TBool BTrace::OutBig(TUint32 a0, TUint32 a1, const TAny* aData, TInt aDataSize)
	{
	asm("stmdb	sp!, {r4,lr}");
	asm("ldr	r12, __BTraceData");
	asm("str	lr, [sp, #-4]! ");	// PC
	asm("and	r14, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("ldrb	r14, [r12, r14, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("cmp	r14, #0");			// check category filter
	asm("addeq	sp, sp, #4 ");
	asm("moveq	r0, #0 ");
	asm("beq	0f ");				// if category disabled, exit now

	asm("mrs	r14, cpsr ");
	__ASM_CLI();
	asm("and	r12, r14, #0x0f ");
	asm("cmp	r12, #3 ");
	asm("movhi	r12, #2 ");			// r12 = context ID = 1 for FIQ, 2 for IRQ/ABT/UND/SYS
	asm("bne	1f ");
	GET_RWNO_TID(,r12);
	asm("cmp    r12, #0 ");			// r2 = context ID = 0 for early boot, no threads
	asm("beq    1f ");
	asm("ldrb	r12, [r12, #%a0]" : : "i" _FOFF(TSubScheduler,iInIDFC));
	asm("cmp	r12, #0 ");
	GET_RWNO_TID(eq,r12);
	asm("ldreq	r12, [r12, #%a0]" : : "i" _FOFF(TSubScheduler,iCurrentThread));
	asm("movne	r12, #3 ");			// r12 = context ID = 3 for IDFC = NThread pointer for thread
	asm("1: ");
	asm("msr	cpsr, r14 ");
	asm("str	r12, [sp, #-4]! ");	// context ID
	asm("bl "	CSM_ZN6BTrace8DoOutBigEmmPKvimm);
	asm("add	sp, sp, #8");
	asm("0: ");
	__POPRET("r4,");

	asm("__BTraceLock: ");
	asm(".word %a0" : : "i" ((TInt)&BTraceLock));
	asm("__BTraceData: ");
	asm(".word BTraceData ");
	}


__NAKED__ EXPORT_C TBool BTrace::OutFiltered(TUint32 a0, TUint32 a1, TUint32 a2, TUint32 a3)
	{
	// fall through to OutFilteredX...
	}

__NAKED__ EXPORT_C TBool BTrace::OutFilteredX(TUint32 a0, TUint32 a1, TUint32 a2, TUint32 a3)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("mov	r3, r1");			// r3 = a1 (ready for call to handler)
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("cmp	r2, #0");
	asm("moveq	r0, #0");
	asm("beq	9f ");

	// r0=header, r1=a1=secondary filter UID, r2=unused, r3=a1, r12->SBTraceData
	// if trace enabled return r0,r1,r3 unmodified, r2=context value r12->handler, Z=0
	// if trace disabled return r0=0 Z=1
	asm("bl		btrace_check_filter2 ");
	asm("beq	9f ");
	__ASM_ACQUIRE_BTRACE_LOCK("{r0,r2,r3,r12}");
	asm("adr	lr, 1f ");
	__JUMP(,	r12);
	asm("1: ");
	__ASM_RELEASE_BTRACE_LOCK();
	asm("9: ");
	__POPRET("r2,r3,r4,");

	asm("btrace_check_filter2: ");
	asm("stmfd	sp!, {r0,r1,r3,r4,r12,lr} ");
	asm("mov	r0, r12 ");
	asm("bl		CheckFilter2__11SBTraceDataUl ");  // CSM needed
	asm("cmp	r0, #0 ");
	asm("beq	0f ");
	asm("mrs	r14, cpsr ");
	__ASM_CLI();
	asm("and	r2, r14, #0x0f ");
	asm("cmp	r2, #3 ");
	asm("movhi	r2, #2 ");			// r2 = context ID = 1 for FIQ, 2 for IRQ/ABT/UND/SYS
	asm("bne	1f ");
	GET_RWNO_TID(,r4);
	asm("movs   r2, r4 ");			// r2 = context ID = 0 for early boot, no threads
	asm("beq    1f ");
	asm("ldrb	r2, [r4, #%a0]" : : "i" _FOFF(TSubScheduler,iInIDFC));
	asm("cmp	r2, #0 ");
	asm("ldreq	r2, [r4, #%a0]" : : "i" _FOFF(TSubScheduler,iCurrentThread));
	asm("movne	r2, #3 ");			// r2 = context ID = 3 for IDFC = NThread pointer for thread
	asm("1: ");
	asm("msr	cpsr, r14 ");
	asm("0: ");
	asm("ldmfd	sp!, {r0,r1,r3,r4,r12,lr} ");
	asm("moveq	r0, #0 ");
	asm("ldrne	r12, [r12, #%a0]" : : "i" _FOFF(SBTraceData,iHandler));
	__JUMP(,lr);
	}

__NAKED__ EXPORT_C TBool BTrace::OutFilteredN(TUint32 a0, TUint32 a1, TUint32 a2, const TAny* aData, TInt aDataSize)
	{
	// fall through to OutFilteredNX...
	}

__NAKED__ EXPORT_C TBool BTrace::OutFilteredNX(TUint32 a0, TUint32 a1, TUint32 a2, const TAny* aData, TInt aDataSize)
	{
	asm("stmdb	sp!, {r2,r3,r4,lr}");
	asm("ldr	r12, __BTraceData");
	asm("and	r2, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("ldrb	r2, [r12, r2, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("cmp	r2, #0");
	asm("moveq	r0, #0");
	asm("beq	9f ");

	// r0=header, r1=a1=secondary filter UID, r2=unused, r3=aData, r12->SBTraceData
	// if trace enabled return r0,r1,r3 unmodified, r2=context value r12->handler, Z=0
	// if trace disabled return r0=0 Z=1
	asm("bl		btrace_check_filter2 ");
	asm("beq	9f ");

	__ASM_ACQUIRE_BTRACE_LOCK("{r0-r3,r11,r12}");
	asm("ldr	r14, [sp, #16] ");	// r14 = aDataSize
	asm("cmp	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
	asm("movhi	r14, #%a0" : : "i" ((TInt)KMaxBTraceDataArray));
 	asm("orrhi	r0, r0, #%a0" : : "i" ((TInt)(BTrace::ERecordTruncated<<(BTrace::EFlagsIndex*8))));
	asm("add	r0, r0, r14 ");
	asm("subs	r14, r14, #1 ");
	asm("ldrhs	r3, [r3] ");		// get first word of aData if aDataSize!=0
	asm("cmp	r14, #4 ");
	asm("strlo	r3, [sp, #4] ");	// replace aData with first word if aDataSize is 1-4
	asm("mov	r3, r1 ");			// r3 = a1 (ready for call to handler)
	asm("adr	lr, 1f ");
	__JUMP(,	r12);
	asm("1: ");
	__ASM_RELEASE_BTRACE_LOCK();
	asm("9: ");
	__POPRET("r2,r3,r4,");
	}

__NAKED__ EXPORT_C TBool BTrace::OutFilteredBig(TUint32 a0, TUint32 a1, const TAny* aData, TInt aDataSize)
	{
	asm("stmdb	sp!, {r4,lr} ");
	asm("ldr	r12, __BTraceData ");
	asm("stmfd	sp!, {r2,lr} ");	// save aData, PC
	asm("and	r14, r0, #%a0" : : "i" ((TInt)(0xff<<(BTrace::ECategoryIndex*8))));
	asm("ldrb	r14, [r12, r14, lsr #%a0]" : : "i" ((TInt)(BTrace::ECategoryIndex*8)));
	asm("cmp	r14, #0 ");			// check category filter
	asm("blne	btrace_check_filter2 ");	// if enabled, check secondary filter
	asm("addeq	sp, sp, #8 ");
	asm("moveq	r0, #0 ");
	asm("beq	9f ");				// if category or secondary filter disabled, exit now
	asm("mov	r12, r2 ");
	asm("ldr	r2, [sp, #0] ");	// restore aData into r2
	asm("str	r12, [sp, #0] ");	// Context ID
	asm("bl "	CSM_ZN6BTrace8DoOutBigEmmPKvimm);
	asm("add	sp, sp, #8 ");
	asm("9: ");
	__POPRET("r4,");
	}


/******************************************************************************/

/** Save all the ARM registers

@internalTechnology
*/
__NAKED__ void Arm::SaveState(SFullArmRegSet&)
	{
	asm("stmia	r0, {r0-r14}^ ");	// save R0-R7, R8_usr-R14_usr
	asm("str	lr, [r0, #60]! ");	// save R15
	asm("mrs	r1, cpsr ");
	asm("str	r1, [r0, #4]! ");	// save CPSR
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xd3 ");	// mode_svc, all interrupts off
	asm("msr	cpsr, r2 ");
	asm("stmib	r0!, {r13,r14} ");	// save R13_svc, R14_svc
	asm("mrs	r3, spsr ");
	asm("str	r3, [r0, #4]! ");	// save SPSR_svc
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xd7 ");	// mode_abt, all interrupts off
	asm("msr	cpsr, r2 ");
	asm("stmib	r0!, {r13,r14} ");	// save R13_abt, R14_abt
	asm("mrs	r3, spsr ");
	asm("str	r3, [r0, #4]! ");	// save SPSR_abt
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xdb ");	// mode_und, all interrupts off
	asm("msr	cpsr, r2 ");
	asm("stmib	r0!, {r13,r14} ");	// save R13_und, R14_und
	asm("mrs	r3, spsr ");
	asm("str	r3, [r0, #4]! ");	// save SPSR_und
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xd2 ");	// mode_irq, all interrupts off
	asm("msr	cpsr, r2 ");
	asm("stmib	r0!, {r13,r14} ");	// save R13_irq, R14_irq
	asm("mrs	r3, spsr ");
	asm("str	r3, [r0, #4]! ");	// save SPSR_irq
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xd1 ");	// mode_fiq, all interrupts off
	asm("msr	cpsr, r2 ");
	asm("stmib	r0!, {r8-r14} ");	// save R8_fiq ... R14_fiq
	asm("mrs	r3, spsr ");
	asm("str	r3, [r0, #4]! ");	// save SPSR_fiq
	asm("bic	r2, r1, #0x1f ");
	asm("orr	r2, r2, #0xd3 ");	// mode_svc, all interrupts off
	asm("msr	cpsr, r2 ");

	asm("mov	r4, #0 ");
	asm("mov	r5, #0 ");
	asm("mov	r6, #0 ");
	asm("mov	r7, #0 ");
	asm("mov	r8, #0 ");
	asm("mov	r9, #0 ");
	asm("mov	r10, #0 ");
	asm("mov	r11, #0 ");

	// monitor mode - skip for now
	asm("mov	r3, #0 ");
	asm("stmib	r0!, {r4-r6} ");	// R13_mon, R14_mon, SPSR_mon

	// zero spare words
	asm("mov	r3, #0 ");
	asm("stmib	r0!, {r4-r11} ");
	asm("add	r0, r0, #4 ");		// r0 = &a.iA

#ifdef __CPU_ARMV7
	asm("mrc	p14, 6, r3, c1, c0, 0 ");
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// TEEHBR
#ifdef __CPU_HAS_COPROCESSOR_ACCESS_REG
	GET_CAR(,r3);
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// CPACR

	// skip SCR, SDER, NSACR, PMCR, MVBAR for now
	asm("mov	r3, #0 ");
	asm("stmia	r0!, {r4-r8} ");	// SCR, SDER, NSACR, PMCR, MVBAR

	// zero spare words
	asm("mov	r3, #0 ");
	asm("stmia	r0!, {r3-r11} ");	// r0 = &a.iB[0]

	// just fill in iB[0]
#ifdef __CPU_HAS_MMU
	asm("mrc	p15, 0, r3, c1, c0, 0 ");
	asm("str	r3, [r0], #4 ");	// SCTLR
#ifdef __CPU_HAS_ACTLR
	asm("mrc	p15, 0, r3, c1, c0, 1 ");
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// ACTLR
	asm("mrc	p15, 0, r3, c2, c0, 0 ");
	asm("str	r3, [r0], #4 ");	// TTBR0
#ifdef __CPU_HAS_TTBR1
	asm("mrc	p15, 0, r2, c2, c0, 1 ");
	asm("mrc	p15, 0, r3, c2, c0, 2 ");
#else
	asm("mov	r2, #0 ");
	asm("mov	r3, #0 ");
#endif
	asm("stmia	r0!, {r2,r3} ");	// TTBR1, TTBCR
	asm("mrc	p15, 0, r3, c3, c0, 0 ");
	asm("str	r3, [r0], #4 ");	// DACR
#ifdef __CPU_MEMORY_TYPE_REMAPPING
	asm("mrc	p15, 0, r2, c10, c2, 0 ");
	asm("mrc	p15, 0, r3, c10, c2, 1 ");
#else
	asm("mov	r2, #0 ");
	asm("mov	r3, #0 ");
#endif
	asm("stmia	r0!, {r2,r3} ");	// PRRR, NMRR
#ifdef __CPU_ARMV7
	asm("mrc	p15, 0, r3, c12, c0, 0 ");
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// VBAR
#if defined(__CPU_SA1) || defined(__CPU_ARM920T) || defined(__CPU_ARM925T) || defined(__CPU_ARMV5T) || defined(__CPU_ARMV6) || defined(__CPU_ARMV7)
	asm("mrc	p15, 0, r3, c13, c0, 0 ");
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// FCSEIDR
#if defined(__CPU_ARMV6) || defined(__CPU_ARMV7)
	asm("mrc	p15, 0, r3, c13, c0, 1 ");
#else
	asm("mov	r3, #0 ");
#endif
	asm("str	r3, [r0], #4 ");	// CONTEXTIDR
#ifdef __CPU_HAS_CP15_THREAD_ID_REG
	GET_RWRW_TID(,r2);
	GET_RWRO_TID(,r3);
	GET_RWNO_TID(,r12);
#else
	asm("mov	r2, #0 ");
	asm("mov	r3, #0 ");
	asm("mov	r12, #0 ");
#endif
	asm("stmia	r0!, {r2,r3,r12} ");	// RWRWTID, RWROTID, RWNOTID
	asm("mrc	p15, 0, r2, c5, c0, 0 ");	// DFSR
#ifdef __CPU_ARM_HAS_SPLIT_FSR
	asm("mrc	p15, 0, r3, c5, c0, 1 ");	// IFSR
#else
	asm("mov	r3, #0 ");
#endif
	asm("stmia	r0!, {r2,r3} ");	// DFSR, IFSR
#ifdef __CPU_ARMV7
	asm("mrc	p15, 0, r2, c5, c1, 0 ");	// ADFSR
	asm("mrc	p15, 0, r3, c5, c1, 1 ");	// AIFSR
#else
	asm("mov	r2, #0 ");
	asm("mov	r3, #0 ");
#endif
	asm("stmia	r0!, {r2,r3} ");	// ADFSR, AIFSR
	asm("mrc	p15, 0, r2, c6, c0, 0 ");	// DFAR
#ifdef __CPU_ARM_HAS_CP15_IFAR
	asm("mrc	p15, 0, r3, c6, c0, 2 ");	// IFAR
#else
	asm("mov	r3, #0 ");
#endif
	asm("stmia	r0!, {r2,r3} ");	// DFAR, IFAR

	// zero spare words
	asm("stmia	r0!, {r4-r7} ");
	asm("stmia	r0!, {r4-r11} ");
#else	// __CPU_HAS_MMU
	asm("stmia	r0!, {r4-r11} ");	// no MMU so zero fill
	asm("stmia	r0!, {r4-r11} ");	// no MMU so zero fill
	asm("stmia	r0!, {r4-r11} ");	// no MMU so zero fill
	asm("stmia	r0!, {r4-r11} ");	// no MMU so zero fill
#endif	// __CPU_HAS_MMU

	// zero iB[1]
	asm("stmia	r0!, {r4-r11} ");
	asm("stmia	r0!, {r4-r11} ");
	asm("stmia	r0!, {r4-r11} ");
	asm("stmia	r0!, {r4-r11} ");	// r0 = &a.iMore[0]
	asm("add	r1, r0, #62*8 ");	// r1 = &a.iExcCode

	// Save VFP state
	// Save order:
	//				FPEXC	FPSCR
	// VFPv2 ONLY:	FPINST	FPINST2
	//				D0-D3	D4-D7	D8-D11	D12-D15 
	// VFPv3 ONLY:	D16-D19	D20-D23	D24-D27	D28-D31
#ifdef __CPU_HAS_VFP
	GET_CAR(,r2);
	asm("bic	r2, r2, #0x00f00000 ");
#ifdef __VFP_V3
	asm("bic	r2, r2, #0xc0000000 ");	// mask off ASEDIS, D32DIS
#endif
	asm("orr	r2, r2, #0x00500000 ");	// enable privileged access to CP10, CP11
	SET_CAR(,r2);
	VFP_FMRX(,2,VFP_XREG_FPEXC);		// r2=FPEXC
	asm("orr	r3, r2, #%a0" : : "i" ((TInt)VFP_FPEXC_EN));
	asm("bic	r3, r3, #%a0" : : "i" ((TInt)VFP_FPEXC_EX));
	VFP_FMXR(,VFP_XREG_FPEXC,3);		// enable VFP
	__DATA_SYNC_BARRIER__(r4);
	__INST_SYNC_BARRIER__(r4);
	VFP_FMRX(,3,VFP_XREG_FPSCR);		// r3=FPSCR
	asm("stmia	r0!, {r2,r3} ");		//
#ifdef __VFP_V3
	VFP_FSTMIADW(CC_AL,0,0,16);			// save D0 - D15
	VFP_FMRX(,3,VFP_XREG_MVFR0);
	asm("tst r3, #%a0" : : "i" ((TInt)VFP_MVFR0_ASIMD32)); // check to see if all 32 Advanced SIMD registers are present
	VFP_FSTMIADW(CC_NE,0,16,16);		// if so then save D15 - D31 (don't need to check CPACR.D32DIS as it is cleared above)
#else
	VFP_FMRX(,2,VFP_XREG_FPINST);
	VFP_FMRX(,3,VFP_XREG_FPINST2);
	asm("stmia	r0!, {r2,r3} ");		// FPINST, FPINST2
	VFP_FSTMIADW(CC_AL,0,0,16);			// save D0 - D15
#endif
#endif	// __CPU_HAS_VFP
	asm("1:		");
	asm("cmp	r0, r1 ");
	asm("strlo	r4, [r0], #4 ");		// clear up to end of iMore[61]
	asm("blo	1b ");
	asm("mov	r1, #%a0" : : "i" ((TInt)KMaxTInt));
	asm("stmia	r0!, {r1,r5-r7} ");		// iExcCode=KMaxTInt, iCrashArgs[0...2]=0
	asm("sub	r0, r0, #1024 ");		// r0 = &a
#ifdef __CPU_HAS_VFP
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iMore[0]));
	VFP_FMXR(,VFP_XREG_FPEXC,2);		// restore FPEXC
	__DATA_SYNC_BARRIER__(r4);
	__INST_SYNC_BARRIER__(r4);
	asm("ldr	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iA.iCPACR));
	SET_CAR(,r2);						// restore CPACR
#endif
	asm("ldr	r1, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iFlags));
	asm("orr	r1, r1, #0xC0 ");		// interrupts off
	asm("msr	cpsr, r1 ");			// restore CPSR with interrupts off
	asm("ldmia	r0, {r0-r11} ");		// restore R4-R11
	__JUMP(,lr);
	}


/** Update the saved ARM registers with information from an exception

@internalTechnology
*/
__NAKED__ void Arm::UpdateState(SFullArmRegSet&, TArmExcInfo&)
	{
	asm("ldr	r2, [r1, #%a0]" : : "i" _FOFF(TArmExcInfo, iExcCode));
	asm("cmp	r2, #%a0 " : : "i" ((TInt)EArmExceptionPrefetchAbort));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("streq	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iB[0].iIFAR));
	asm("strne	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iB[0].iDFAR));
	asm("streq	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iB[0].iIFSR));
	asm("strne	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iB[0].iDFSR));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iSpsrSvc));
	asm("add	r1, r1, #4 ");
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR13Svc));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR14Svc));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR0));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR1));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR2));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR3));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR4));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR5));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR6));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR7));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR8));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR9));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR10));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR11));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR12));
	asm("ldmia	r1!, {r2,r3,r12} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR13));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR14));
	asm("str	r12, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iExcCode));
	asm("ldmia	r1!, {r2,r3} ");
	asm("str	r2, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iR15));
	asm("str	r3, [r0, #%a0]" : : "i" _FOFF(SFullArmRegSet,iN.iFlags));
	__JUMP(,lr);
	}


/** Get a pointer to a stored integer register, accounting for registers which
	are banked across modes.

@param	a		Pointer to saved register block
@param	aRegNum	Number of register required, 0-15 or -1 (indicates SPSR)
@param	aMode	Bottom 5 bits indicate which processor mode
				Other bits of aMode are ignored
@return			Pointer to the required saved register value

@internalTechnology
*/
__NAKED__ TArmReg* Arm::Reg(SFullArmRegSet& /*a*/, TInt /*aRegNum*/, TArmReg /*aMode*/)
	{
	asm("cmp	r1, #8 ");				// register number < 8 ?
	asm("addlo	r0, r0, r1, lsl #2 ");	// register R0-R7 are not banked
	asm("blo	0f ");
	asm("cmp	r1, #15 ");				// register number = 15 ?
	asm("addeq	r0, r0, r1, lsl #2 ");	// register R15 not banked
	asm("movgt	r0, #0 ");				// no registers > 15
	asm("bge	0f ");
	asm("cmn	r1, #1 ");
	asm("movlt	r0, #0 ");				// no registers < -1
	asm("blt	0f ");
	asm("and	r12, r2, #0x1F ");
	asm("cmp	r12, #0x11 ");			// mode_fiq?
	asm("beq	1f ");					// skip if it is
	asm("cmp	r1, #13 ");
	asm("addlo	r0, r0, r1, lsl #2 ");	// register R8-R12 are only banked in mode_fiq
	asm("blo	0f ");
	asm("cmp	r12, #0x10 ");			// mode_usr ?
	asm("cmpne	r12, #0x1F ");			// if not, mode_sys ?
	asm("bne	2f ");					// skip if neither
	asm("cmp	r1, #16 ");
	asm("addlo	r0, r0, r1, lsl #2 ");	// handle R13_usr, R14_usr
	asm("movhs	r0, #0 ");				// no SPSR in mode_usr or mode_sys
	asm("blo	0f ");
	asm("1: ");							// mode_fiq, regnum = 8-12
	asm("2: ");							// exception mode, regnum not 0-12 or 15
	asm("cmn	r1, #1 ");				// regnum = -1 ?
	asm("moveq	r1, #15 ");				// if so, change to 15
	asm("sub	r1, r1, #13 ");
	asm("add	r0, r0, r1, lsl #2 ");	// add 0 for R13, 4 for R14, 8 for SPSR
	asm("cmp	r12, #0x16 ");
	asm("addeq	r0, r0, #12 ");			// if mon, add offset from R13Fiq to R13Mon
	asm("cmpne	r12, #0x11 ");
	asm("addeq	r0, r0, #32 ");			// if valid but not svc/abt/und/irq, add offset from R13Irq to R13Fiq
	asm("cmpne	r12, #0x12 ");
	asm("addeq	r0, r0, #12 ");			// if valid but not svc/abt/und, add offset from R13Und to R13Irq
	asm("cmpne	r12, #0x1b ");
	asm("addeq	r0, r0, #12 ");			// if valid but not svc/abt, add offset from R13Abt to R13Und
	asm("cmpne	r12, #0x17 ");
	asm("addeq	r0, r0, #12 ");			// if valid but not svc, add offset from R13Svc to R13Abt
	asm("cmpne	r12, #0x13 ");
	asm("addeq	r0, r0, #%a0" : : "i" _FOFF(SFullArmRegSet, iN.iR13Svc));	// if valid mode add offset to R13Svc
	asm("movne	r0, #0 ");
	asm("0: ");
	__JUMP(,lr);
	}


/** Restore all the ARM registers

@internalTechnology
*/
__NAKED__ void Arm::RestoreState(SFullArmRegSet&)
	{
	}

__NAKED__ EXPORT_C TBool BTrace::OutFilteredPcFormatBig(TUint32 a0, TUint32 aModuleUid, TUint32 aPc, TUint16 aFormatId, const TAny* aData, TInt aDataSize)
	{
	asm("mov	r0, #0"); //Kernel side not implemented yet
	}

#ifdef 	__CPU_ARM_HAS_WFE_SEV

extern "C" __NAKED__ void __arm_wfe()
	{
	ARM_WFE;
	__JUMP(,	lr);
	}

extern "C" __NAKED__ void __arm_sev()
	{
	ARM_SEV;
	__JUMP(,	lr);
	}

#endif

// Called by a CPU which has completed its detach sequence and should now be powered off
// Doesn't return - just waits for power to be removed
// CPU will come back up via the reset vector when it next wakes up.
// NOTE: On entry the CPU caches are disabled and the CPU does not participate in coherency
// SO BE VERY CAREFUL
extern "C" __NAKED__ void DetachComplete()
	{
	GET_RWNO_TID(,r0);
	asm("ldr	r1, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iUncached));
	asm("ldr	r2, [r1, #%a0]" : : "i" _FOFF(SPerCpuUncached, iDetachCompleteCpus));
	asm("ldr	r3, [r0, #%a0]" : : "i" _FOFF(TSubScheduler, iSSX.iGicDistAddr));
	__DATA_SYNC_BARRIER_Z__(r12);		// need DSB before sending any IPI
	asm("mov	r2, r2, lsl #16 ");
	asm("orr	r2, r2, #%a0" : : "i" ((TInt)INDIRECT_POWERDOWN_IPI_VECTOR));
	asm("str	r2, [r3, #%a0]" : : "i" _FOFF(GicDistributor, iSoftIrq));	// trigger IPIs

	asm("wait_forever: ");
	__DATA_SYNC_BARRIER__(r12);
	ARM_WFE;
	__DATA_SYNC_BARRIER__(r12);
	asm("ldr	r2, [r1, #%a0]" : : "i" _FOFF(SPerCpuUncached, iPowerOnReq));
	__DATA_SYNC_BARRIER__(r12);
	asm("cmp	r2, #0xF000000F ");		// for 'fake' power down
	asm("bne	wait_forever ");

	asm("0:		");
	__JUMP(,lr);
	}