kernel/eka/common/x86/atomics.cia
author Tom Cosgrove <tom.cosgrove@nokia.com>
Fri, 28 May 2010 16:29:07 +0100
changeset 30 8aab599e3476
parent 0 a41df078684a
permissions -rw-r--r--
Fix for bug 2283 (RVCT 4.0 support is missing from PDK 3.0.h) Have multiple extension sections in the bld.inf, one for each version of the compiler. The RVCT version building the tools will build the runtime libraries for its version, but make sure we extract all the other versions from zip archives. Also add the archive for RVCT4.

// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32\common\x86\atomics.cia
// 
//

#include <e32atomics.h>
#include <cpudefs.h>

/*
Versions needed:
	WINS/WINSCW		Use X86 locked operations. Assume Pentium or above CPU (CMPXCHG8B available)
	X86				For Pentium and above use locked operations
					For 486 use locked operations for 8, 16, 32 bit. For 64 bit must disable interrupts.
					NOTE: 486 not supported at the moment
	ARMv4/ARMv5		Must disable interrupts.
	ARMv6			LDREX/STREX for 8, 16, 32 bit. For 64 bit must disable interrupts (maybe).
	ARMv6K/ARMv7	LDREXB/LDREXH/LDREX/LDREXD

Need both kernel side and user side versions
*/

#if	defined(__SMP__) || !defined(__EPOC32__)
#define	__BARRIERS_NEEDED__
#define	__LOCK__	"lock "
#else
#define	__LOCK__
#endif


extern "C" {

#undef	__TUintX__
#undef	__TIntX__
#undef	__fname__
#undef	__redir__
#undef	__A_REG__
#undef	__C_REG__
#undef	__D_REG__
#define	__TUintX__		TUint32
#define	__TIntX__		TInt32
#define	__fname__(x)	x##32
#define	__redir__(x)	asm("jmp _"#x "32")
#define	__A_REG__		"eax"
#define	__C_REG__		"ecx"
#define	__D_REG__		"edx"
#include "atomic_skeleton.h"

#undef	__TUintX__
#undef	__TIntX__
#undef	__fname__
#undef	__redir__
#undef	__A_REG__
#undef	__C_REG__
#undef	__D_REG__
#define	__TUintX__		TUint16
#define	__TIntX__		TInt16
#define	__fname__(x)	x##16
#define	__redir__(x)	asm("jmp _"#x "16")
#define	__A_REG__		"ax"
#define	__C_REG__		"cx"
#define	__D_REG__		"dx"
#include "atomic_skeleton.h"

#undef	__TUintX__
#undef	__TIntX__
#undef	__fname__
#undef	__redir__
#undef	__A_REG__
#undef	__C_REG__
#undef	__D_REG__
#define	__TUintX__		TUint8
#define	__TIntX__		TInt8
#define	__fname__(x)	x##8
#define	__redir__(x)	asm("jmp _"#x "8")
#define	__A_REG__		"al"
#define	__C_REG__		"cl"
#define	__D_REG__		"dl"
#include "atomic_skeleton.h"

#undef	__TUintX__
#undef	__TIntX__
#undef	__fname__
#undef	__redir__
#undef	__A_REG__
#undef	__C_REG__
#undef	__D_REG__

/** Full memory barrier for explicit memory accesses

*/
EXPORT_C __NAKED__ void __e32_memory_barrier()
	{
#ifdef __BARRIERS_NEEDED__
	asm("lock add dword ptr [esp], 0 ");
#endif
	asm("ret ");
	}


/** Barrier guaranteeing completion as well as ordering

*/
EXPORT_C __NAKED__ void __e32_io_completion_barrier()
	{
	asm("push ebx ");
	asm("cpuid ");
	asm("pop ebx ");
	asm("ret ");
	}


/** Find the most significant 1 in a 32 bit word

	@param	v	The word to be scanned
	@return		The bit number of the most significant 1 if v != 0
				-1 if v == 0
*/
EXPORT_C __NAKED__ TInt __e32_find_ms1_32(TUint32 /*v*/)
	{
	asm("bsr eax, [esp+4] ");
	asm("jnz short 1f ");
	asm("mov eax, 0xffffffff ");
	asm("1: ");
	asm("ret ");
	}


/** Find the least significant 1 in a 32 bit word

	@param	v	The word to be scanned
	@return		The bit number of the least significant 1 if v != 0
				-1 if v == 0
*/
EXPORT_C __NAKED__ TInt __e32_find_ls1_32(TUint32 /*v*/)
	{
	asm("bsf eax, [esp+4] ");
	asm("jnz short 1f ");
	asm("mov eax, 0xffffffff ");
	asm("1: ");
	asm("ret ");
	}


/** Count the number of 1's in a 32 bit word

	@param	v	The word to be scanned
	@return		The number of 1's
*/
EXPORT_C __NAKED__ TInt __e32_bit_count_32(TUint32 /*v*/)
	{
	asm("mov eax, [esp+4] ");
	asm("mov edx, eax ");
	asm("and eax, 0xaaaaaaaa ");
	asm("and edx, 0x55555555 ");	/* edx = even bits of arg */
	asm("shr eax, 1 ");				/* eax = odd bits of arg shifted into even bits */
	asm("add eax, edx ");			/* eax = 16 groups of 2 bit counts */
	asm("mov edx, eax ");
	asm("and eax, 0xcccccccc ");
	asm("and edx, 0x33333333 ");	/* even groups of 2 */
	asm("shr eax, 2 ");				/* odd groups of 2 shifted to even positions */
	asm("add eax, edx ");			/* 8 groups of 4 bit counts */
	asm("mov edx, eax ");
	asm("shr eax, 4 ");
	asm("add eax, edx ");			/* even nibbles = sum of 8 bits, odd nibbles garbage */
	asm("and eax, 0x0f0f0f0f ");	/* eliminate garbage nibbles */
	asm("add al, ah ");				/* AL = bit count of lower 16 bits */
	asm("mov dl, al ");
	asm("shr eax, 16 ");
	asm("add al, ah ");				/* AL = bit count of upper 16 bits */
	asm("xor ah, ah ");				/* top 24 bits of EAX now zero */
	asm("add al, dl ");				/* AL = bit count of entire 32 bits */
	asm("ret ");
	}


/** Find the most significant 1 in a 64 bit word

	@param	v	The word to be scanned
	@return		The bit number of the most significant 1 if v != 0
				-1 if v == 0
*/
EXPORT_C __NAKED__ TInt __e32_find_ms1_64(TUint64 /*v*/)
	{
	asm("bsr eax, [esp+8] ");
	asm("jnz short 2f ");
	asm("bsr eax, [esp+4] ");
	asm("jnz short 1f ");
	asm("mov eax, 0xffffffff ");
	asm("2: ");
	asm("or eax, 32 ");
	asm("1: ");
	asm("ret ");
	}


/** Find the least significant 1 in a 64 bit word

	@param	v	The word to be scanned
	@return		The bit number of the least significant 1 if v != 0
				-1 if v == 0
*/
EXPORT_C __NAKED__ TInt __e32_find_ls1_64(TUint64 /*v*/)
	{
	asm("bsf eax, [esp+4] ");
	asm("jnz short 1f ");
	asm("bsf eax, [esp+8] ");
	asm("jnz short 2f ");
	asm("mov eax, 0xffffffff ");
	asm("2: ");
	asm("or eax, 32 ");
	asm("1: ");
	asm("ret ");
	}


/** Count the number of 1's in a 64 bit word

	@param	v	The word to be scanned
	@return		The number of 1's
*/
EXPORT_C __NAKED__ TInt __e32_bit_count_64(TUint64 /*v*/)
	{
	asm("mov eax, [esp+4] ");
	asm("mov edx, [esp+8] ");

	asm("mov ecx, eax ");
	asm("and eax, 0xaaaaaaaa ");
	asm("and ecx, 0x55555555 ");
	asm("shr eax, 1 ");
	asm("add eax, ecx ");
	asm("mov ecx, eax ");
	asm("and eax, 0xcccccccc ");
	asm("and ecx, 0x33333333 ");
	asm("shr eax, 2 ");
	asm("add ecx, eax ");

	asm("mov eax, edx ");
	asm("and eax, 0xaaaaaaaa ");
	asm("and edx, 0x55555555 ");
	asm("shr eax, 1 ");
	asm("add eax, edx ");
	asm("mov edx, eax ");
	asm("and eax, 0xcccccccc ");
	asm("and edx, 0x33333333 ");
	asm("shr eax, 2 ");
	asm("add eax, edx ");

	asm("add eax, ecx ");
	asm("mov edx, eax ");
	asm("and eax, 0xf0f0f0f0 ");
	asm("and edx, 0x0f0f0f0f ");
	asm("shr eax, 4 ");
	asm("add eax, edx ");
	asm("add al, ah ");
	asm("mov dl, al ");
	asm("shr eax, 16 ");
	asm("add al, ah ");
	asm("xor ah, ah ");
	asm("add al, dl ");
	asm("ret ");
	}




/** Read a 64 bit word with acquire semantics

	@param	a	Address of word to be read - must be a multiple of 8
	@return		The value read
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_load_acq64(const volatile TAny* /*a*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");
	asm("mov eax, 0x0badbeef ");
	asm("mov edx, eax ");
	asm("mov ebx, eax ");
	asm("mov ecx, eax ");
	asm(__LOCK__ "cmpxchg8b [edi] ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** Write a 64 bit word with release semantics

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The value written
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_store_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");
	asm("mov ebx, [esp+16] ");
	asm("mov ecx, [esp+20] ");
	asm("mov eax, [edi] ");
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm(__LOCK__ "cmpxchg8b [edi] " );
	asm("jne short 1b ");
	asm("mov eax, ebx ");
	asm("mov edx, ecx ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** Write a 64 bit word with full barrier semantics

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The value written
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_store_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_store_rel64 ");
	}


/** Write a 64 bit word to memory and return the original value of the memory.
	Relaxed ordering.

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_swp_ord64 ");
	}


/** Write a 64 bit word to memory and return the original value of the memory.
	Acquire semantics.

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_swp_ord64 ");
	}


/** Write a 64 bit word to memory and return the original value of the memory.
	Release semantics.

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_swp_ord64 ");
	}


/** Write a 64 bit word to memory and return the original value of the memory.
	Full barrier semantics.

	@param	a	Address of word to be written - must be a multiple of 8
	@param	v	The value to be written
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_swp_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");
	asm("mov ebx, [esp+16] ");
	asm("mov ecx, [esp+20] ");
	asm("mov eax, [edi] ");
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm(__LOCK__ "cmpxchg8b [edi] ");
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit compare and swap, relaxed ordering.

	Atomically performs the following operation:
		if (*a == *q)	{ *a = v; return TRUE; }
		else			{ *q = *a; return FALSE; }

	@param	a	Address of word to be written - must be a multiple of 8
	@param	q	Address of location containing expected value
	@param	v	The new value to be written if the old value is as expected
	@return		TRUE if *a was updated, FALSE otherwise
*/
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rlx64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_cas_ord64 ");
	}


/** 64 bit compare and swap, acquire semantics.

	Atomically performs the following operation:
		if (*a == *q)	{ *a = v; return TRUE; }
		else			{ *q = *a; return FALSE; }

	@param	a	Address of word to be written - must be a multiple of 8
	@param	q	Address of location containing expected value
	@param	v	The new value to be written if the old value is as expected
	@return		TRUE if *a was updated, FALSE otherwise
*/
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_acq64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_cas_ord64 ");
	}


/** 64 bit compare and swap, release semantics.

	Atomically performs the following operation:
		if (*a == *q)	{ *a = v; return TRUE; }
		else			{ *q = *a; return FALSE; }

	@param	a	Address of word to be written - must be a multiple of 8
	@param	q	Address of location containing expected value
	@param	v	The new value to be written if the old value is as expected
	@return		TRUE if *a was updated, FALSE otherwise
*/
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_rel64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_cas_ord64 ");
	}


/** 64 bit compare and swap, full barrier semantics.

	Atomically performs the following operation:
		if (*a == *q)	{ *a = v; return TRUE; }
		else			{ *q = *a; return FALSE; }

	@param	a	Address of word to be written - must be a multiple of 8
	@param	q	Address of location containing expected value
	@param	v	The new value to be written if the old value is as expected
	@return		TRUE if *a was updated, FALSE otherwise
*/
EXPORT_C __NAKED__ TBool		__e32_atomic_cas_ord64(volatile TAny* /*a*/, TUint64* /*q*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("push esi ");
	asm("mov edi, [esp+16] ");			// edi = a
	asm("mov esi, [esp+20] ");			// esi = q
	asm("mov ebx, [esp+24] ");			// ecx:ebx = v
	asm("mov ecx, [esp+28] ");
	asm("mov eax, [esi] ");				// edx:eax = *q
	asm("mov edx, [esi+4] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==*q) *a=v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 2f ");
	asm("mov eax, 1 ");
	asm("pop esi ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	asm("2: ");
	asm("mov [esi], eax ");				// *q = edx:eax
	asm("mov [esi+4], edx ");
	asm("xor eax, eax ");
	asm("pop esi ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit atomic add, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; *a = oldv + v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be added
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_add_ord64 ");
	}


/** 64 bit atomic add, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv + v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be added
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_add_ord64 ");
	}


/** 64 bit atomic add, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv + v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be added
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_add_ord64 ");
	}


/** 64 bit atomic add, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv + v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be added
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_add_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, eax ");
	asm("mov ecx, edx ");
	asm("add ebx, [esp+16] ");			// ecx:ebx = oldv + v
	asm("adc ecx, [esp+20] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv+v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit atomic bitwise logical AND, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; *a = oldv & v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ANDed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_and_ord64 ");
	}


/** 64 bit atomic bitwise logical AND, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv & v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ANDed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_and_ord64 ");
	}


/** 64 bit atomic bitwise logical AND, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv & v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ANDed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_and_ord64 ");
	}


/** 64 bit atomic bitwise logical AND, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv & v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ANDed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_and_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, eax ");
	asm("mov ecx, edx ");
	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & v
	asm("and ecx, [esp+20] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv&v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit atomic bitwise logical inclusive OR, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; *a = oldv | v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_ior_ord64 ");
	}


/** 64 bit atomic bitwise logical inclusive OR, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv | v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_ior_ord64 ");
	}


/** 64 bit atomic bitwise logical inclusive OR, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv | v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_ior_ord64 ");
	}


/** 64 bit atomic bitwise logical inclusive OR, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv | v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be ORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_ior_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, eax ");
	asm("mov ecx, edx ");
	asm("or ebx, [esp+16] ");			// ecx:ebx = oldv | v
	asm("or ecx, [esp+20] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv|v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit atomic bitwise logical exclusive OR, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; *a = oldv ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be XORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rlx64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_xor_ord64 ");
	}


/** 64 bit atomic bitwise logical exclusive OR, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be XORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_acq64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_xor_ord64 ");
	}


/** 64 bit atomic bitwise logical exclusive OR, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be XORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_rel64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_xor_ord64 ");
	}


/** 64 bit atomic bitwise logical exclusive OR, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; *a = oldv ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	v	The value to be XORed with *a
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_xor_ord64(volatile TAny* /*a*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, eax ");
	asm("mov ecx, edx ");
	asm("xor ebx, [esp+16] ");			// ecx:ebx = oldv ^ v
	asm("xor ecx, [esp+20] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=oldv^v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit atomic bitwise universal function, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; *a = (oldv & u) ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	u	The value to be ANDed with *a
	@param	v	The value to be XORed with (*a&u)
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rlx64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_axo_ord64 ");
	}


/** 64 bit atomic bitwise universal function, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; *a = (oldv & u) ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	u	The value to be ANDed with *a
	@param	v	The value to be XORed with (*a&u)
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_acq64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_axo_ord64 ");
	}


/** 64 bit atomic bitwise universal function, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = (oldv & u) ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	u	The value to be ANDed with *a
	@param	v	The value to be XORed with (*a&u)
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_rel64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_axo_ord64 ");
	}


/** 64 bit atomic bitwise universal function, release semantics.

	Atomically performs the following operation:
		oldv = *a; *a = (oldv & u) ^ v; return oldv;

	@param	a	Address of word to be updated - must be a multiple of 8
	@param	u	The value to be ANDed with *a
	@param	v	The value to be XORed with (*a&u)
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_axo_ord64(volatile TAny* /*a*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, eax ");
	asm("mov ecx, edx ");
	asm("and ebx, [esp+16] ");			// ecx:ebx = oldv & u
	asm("and ecx, [esp+20] ");
	asm("xor ebx, [esp+24] ");			// ecx:ebx = (oldv & u) ^ v
	asm("xor ecx, [esp+28] ");
	asm(__LOCK__ "cmpxchg8b [edi] ");	// if (*a==oldv) *a=(oldv&u)^v, ZF=1 else edx:eax=*a, ZF=0
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit threshold and add, unsigned, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (unsigned compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rlx64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_tau_ord64 ");
	}


/** 64 bit threshold and add, unsigned, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (unsigned compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_acq64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_tau_ord64 ");
	}


/** 64 bit threshold and add, unsigned, release semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (unsigned compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_rel64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("jmp ___e32_atomic_tau_ord64 ");
	}


/** 64 bit threshold and add, unsigned, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (unsigned compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TUint64	__e32_atomic_tau_ord64(volatile TAny* /*a*/, TUint64 /*t*/, TUint64 /*u*/, TUint64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, edx ");
	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
	asm("sbb ebx, [esp+20] ");			// CF = borrow from (oldv - t)
	asm("jnc short 2f ");				// no borrow means oldv>=t so use u
	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
	asm("mov ecx, [esp+36] ");
	asm("jmp short 3f ");
	asm("2: ");
	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
	asm("mov ecx, [esp+28] ");
	asm("3: ");
	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
	asm("adc ecx, edx ");
	asm(__LOCK__ "cmpxchg8b [edi] ");
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}


/** 64 bit threshold and add, signed, relaxed ordering.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (signed compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rlx64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
	{
	asm("jmp ___e32_atomic_tas_ord64 ");
	}


/** 64 bit threshold and add, signed, acquire semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (signed compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_acq64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
	{
	asm("jmp ___e32_atomic_tas_ord64 ");
	}


/** 64 bit threshold and add, signed, release semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (signed compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_rel64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
	{
	asm("jmp ___e32_atomic_tas_ord64 ");
	}


/** 64 bit threshold and add, signed, full barrier semantics.

	Atomically performs the following operation:
		oldv = *a; if (oldv>=t) *a=oldv+u else *a=oldv+v; return oldv;

	@param	a	Address of data to be updated - must be naturally aligned
	@param	t	The threshold to compare *a to (signed compare)
	@param	u	The value to be added to *a if it is originally >= t
	@param	u	The value to be added to *a if it is originally < t
	@return		The original value of *a
*/
EXPORT_C __NAKED__ TInt64	__e32_atomic_tas_ord64(volatile TAny* /*a*/, TInt64 /*t*/, TInt64 /*u*/, TInt64 /*v*/)
	{
	asm("push ebx ");
	asm("push edi ");
	asm("mov edi, [esp+12] ");			// edi = a
	asm("mov eax, [edi] ");				// edx:eax = oldv
	asm("mov edx, [edi+4] ");
	asm("1: ");
	asm("mov ebx, edx ");
	asm("cmp eax, [esp+16] ");			// eax - t.low, CF=borrow
	asm("sbb ebx, [esp+20] ");			// SF=sign, OF=overflow from (oldv - t)
	asm("jge short 2f ");				// SF==OF (GE condition) means oldv>=t so use u
	asm("mov ebx, [esp+32] ");			// ecx:ebx = v
	asm("mov ecx, [esp+36] ");
	asm("jmp short 3f ");
	asm("2: ");
	asm("mov ebx, [esp+24] ");			// ecx:ebx = u
	asm("mov ecx, [esp+28] ");
	asm("3: ");
	asm("add ebx, eax ");				// ecx:ebx = oldv + u or v
	asm("adc ecx, edx ");
	asm(__LOCK__ "cmpxchg8b [edi] ");
	asm("jne short 1b ");
	asm("pop edi ");
	asm("pop ebx ");
	asm("ret ");
	}

} // extern "C"