kerneltest/e32test/math/t_vfp.cpp
author Slion
Tue, 08 Dec 2009 08:11:42 +0100
branchanywhere
changeset 19 f6d3d9676ee4
parent 0 a41df078684a
child 31 56f325a607ea
permissions -rw-r--r--
Trying to figure out how to implement my WINC like compatibility layer. Going the emulation way is probably not so smart. We should not use the kernel but rather hook native functions in the Exec calls.

// Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// e32test\math\t_vfp.cpp
// Overview:
// Test the ARM Vector Floating Point operations.
// API Information:
// VFP
// Details:
// - Check that the HAL agrees with the hardware about whether
// VFP is supported.
// - Test setting VFP to IEEE with no exceptions mode, if IEEE mode is
// supported, otherwise leave the mode alone.
// - Test single and double precision vector floating point operations:
// ABS, NEG, ADD, SUB, MUL, DIV, NMUL, SQRT, MAC, MSC, NMAC and NMSC.
// Verify results are as expected - if IEEE mode was set, verify
// bit-for-bit, in accordance with the IEEE specification, otherwise
// use normal floating point equality.
// - Test VFP context save.
// - Test various VFP operations that cause bounces to support code if
// IEEE mode is supported.
// - Test setting VFP to RunFast mode if RunFast mode is supported.
// - Test setting VFP rounding mode.
// - Test inheriting VFP mode to created threads.
// Platforms/Drives/Compatibility:
// All 
// Assumptions/Requirement/Pre-requisites:
// Failures and causes:
// Base Port information:
// 
//

//! @file
//! @SYMTestCaseID KBASE-0017-T_VFP
//! @SYMTestCaseDesc VFPv2 general functionality and bounce handling
//! @SYMREQ 5159
//! @SYMTestPriority Critical
//! @SYMTestActions Check VFP functions correctly in all modes and that mode switching works correctly.
//! @SYMTestExpectedResults Test runs until this message is emitted: RTEST: SUCCESS : T_VFP test completed O.K.
//! @SYMTestType UT

#include "t_vfp.h"
#define __E32TEST_EXTENSION__
#include <e32test.h>
#include <e32math.h>
#include <hal.h>
#include <e32svr.h>
#include <u32hal.h>

RTest test(_L("T_VFP"));
TUint32 FPSID;
TUint32 ArchVersion; 
TBool Double;
TBool IEEEMode;
TInt CPUs;
TInt CurrentCpu1;
TInt CurrentCpu2;

typedef void TSglTest(const TReal32* aArgs, TReal32* aResults);
typedef void TDblTest(const TReal64* aArgs, TReal64* aResults);

TBool DetectVFP()
	{
	TInt r = UserSvr::HalFunction(EHalGroupKernel, EKernelHalFloatingPointSystemId, &FPSID, NULL);
	return (r==KErrNone);
	}

TInt TestVFPInitThreadFn(TAny* aPtr)
	{
	UserSvr::HalFunction(EHalGroupKernel, EKernelHalLockThreadToCpu, (TAny*)CurrentCpu1, 0);
	TReal32* p = (TReal32*)aPtr;
	TInt i;
	for (i=0; i<32; ++i)
		*p++ = Vfp::SReg(i);
	return 0;
	}

void TestVFPInitialState()
	{
	for (CurrentCpu1 = 0; CurrentCpu1 < CPUs; CurrentCpu1++)
		{
		TReal32 f[32];
		RThread t;
		TInt r = t.Create(KNullDesC, &TestVFPInitThreadFn, 0x1000, NULL, f);
		test(r==KErrNone);
		TRequestStatus s;
		t.Logon(s);
		t.Resume();
		User::WaitForRequest(s);
		TInt xt = t.ExitType();
		TInt xr = t.ExitReason();
		test(xt == EExitKill && xr == KErrNone);
		CLOSE_AND_WAIT(t);
		UserSvr::HalFunction(EHalGroupKernel, EKernelHalLockThreadToCpu, (TAny*)CurrentCpu1, 0);
		test.Printf(_L("FPSCR = %08x for core %d\n"), Vfp::Fpscr(), CurrentCpu1);
		const TUint32* p = (const TUint32*)f;
		for (TInt i=0; i<32; ++i)
			{
			if (f[i] != 0.0f)
				{
				test.Printf(_L("S%d = 0x%08x\n"), i, p[i]);
				test(f[i] == 0.0f);
				}
			}
		}
	}

void TestVFPSglRegs(TInt aIter=2)
	{
	TInt i;
	TInt j;
	TInt nSglRegs=0; 

	switch(ArchVersion)	
		{ 
		case ARCH_VERSION_VFPV2:
		case ARCH_VERSION_VFPV3_SUBARCH_V2:
		case ARCH_VERSION_VFPV3_SUBARCH_NULL:
		case ARCH_VERSION_VFPV3_SUBARCH_V3:
			nSglRegs = 32;
			break; 		
		case 0:
		default:
			__ASSERT_ALWAYS(0, User::Panic(_L("Bad VFP version"),__LINE__)); 
			/* NOTREACHED */
		} 

	for (i=0; i<aIter; ++i)
		{
		for (j=0; j<nSglRegs; ++j)
			{
			TInt32 f = i + j;
			Vfp::SetSReg(f, j);
			}
		for (j=0; j<nSglRegs; ++j)
			{
			TInt32 f = i + j;
			TInt32 g = Vfp::SRegInt(j);
			test(f == g);
			}
		}
	}

TInt TestVFPSglRegsThread(TAny*)
	{
	UserSvr::HalFunction(EHalGroupKernel, EKernelHalLockThreadToCpu, (TAny*)CurrentCpu1, 0);
	TestVFPSglRegs(KMaxTInt);
	return 0;
	}

void TestVFPDblRegs(TInt aIter=2)
	{
	TInt i;
	TInt j;
	TInt nDblRegs=0; 

	switch(ArchVersion)
		{ 
		case ARCH_VERSION_VFPV2:
			{
			nDblRegs = 16;
			break;
			}
		case ARCH_VERSION_VFPV3_SUBARCH_V2:
		case ARCH_VERSION_VFPV3_SUBARCH_NULL:
		case ARCH_VERSION_VFPV3_SUBARCH_V3:
			{
			TInt vfpType;
			TInt ret = HAL::Get(HALData::EHardwareFloatingPoint, vfpType);
			if (ret == KErrNone && vfpType == EFpTypeVFPv3)
				nDblRegs = 32;
			else
				nDblRegs = 16;
			break;
				}
		case 0:
		default:
			__ASSERT_ALWAYS(0, User::Panic(_L("Bad VFP version"),__LINE__)); 
		} 


	for (i=0; i<aIter; ++i)
		{
		for (j=0; j<nDblRegs; ++j)
			{
			TInt64 f = i + j + KMaxTUint;
			Vfp::SetDReg(f, j);
			}
		for (j=0; j<nDblRegs; ++j)
			{
			TInt64 f = i + j + KMaxTUint;
			TInt64 g = Vfp::DRegInt(j);
			test(f == g);
			}
		}
	}

TInt TestVFPDblRegsThread(TAny*)
	{
	UserSvr::HalFunction(EHalGroupKernel, EKernelHalLockThreadToCpu, (TAny*)CurrentCpu2, 0);
	TestVFPDblRegs(KMaxTInt);
	return 0;
	}

void TestVFPContextSave()
	{
	for (CurrentCpu2 = 0; CurrentCpu2 < CPUs; CurrentCpu2++)
		{
		for (CurrentCpu1 = 0; CurrentCpu1 < CPUs; CurrentCpu1++)
			{
			TThreadFunction tf1 = &TestVFPSglRegsThread;
			TThreadFunction tf2 = Double ? &TestVFPDblRegsThread : &TestVFPSglRegsThread;
			RThread t1, t2;
			TInt r;
			r = t1.Create(KNullDesC, tf1, 0x1000, 0x1000, 0x1000, NULL);
			test(r==KErrNone);
			t1.SetPriority(EPriorityLess);
			r = t2.Create(KNullDesC, tf2, 0x1000, 0x1000, 0x1000, NULL);
			test(r==KErrNone);
			t2.SetPriority(EPriorityLess);
			TRequestStatus s1, s2;
			t1.Logon(s1);
			t2.Logon(s2);
			t1.Resume();
			t2.Resume();
			test.Printf(_L("Let threads run concurrently (cores %d and %d)\n"), CurrentCpu1, CurrentCpu2);
			User::After(20*1000*1000/CPUs);

			test.Printf(_L("Kill threads\n"));
			t1.Kill(0);
			t2.Kill(0);
			User::WaitForRequest(s1);
			User::WaitForRequest(s2);
			test(t1.ExitType()==EExitKill && t1.ExitReason()==KErrNone);
			test(t2.ExitType()==EExitKill && t2.ExitReason()==KErrNone);
			CLOSE_AND_WAIT(t1);
			CLOSE_AND_WAIT(t2);
			}
		}
	}

TInt TestBounceCtxThread1(TAny*)
	{
	for(TInt iter=0; iter<KMaxTInt; ++iter)
		{
		Vfp::SReg(0);
		}
	return KErrNone;
	}

TInt TestBounceCtxThread2(TAny*)
	{
	TInt start_rep = 0x00800000; // smallest single precision normal number, 1*2^-126
	TReal32 start = *(TReal32*)&start_rep;
	for(TInt iter=0; iter<KMaxTInt; ++iter)
		{
		Vfp::SetSReg(start, 1);
		Vfp::SetSReg(2.0f, 2);
		Vfp::DivS();
		Vfp::CpyS0(1);
		Vfp::MulS();
		Vfp::CpyS0(1);
		TReal32 end = Vfp::SReg(0);
		TInt end_rep = *(TInt*)&end;
		if (start_rep != end_rep)
			{
			RDebug::Printf("mismatch in iter %d, start %08x end %08x\n", iter, start_rep, end_rep);
			test(0);
			}
		}
	return KErrNone;
	}

void DoBounceContextSwitchTests()
	{
	RThread t1, t2;
	TInt r;
	r = t1.Create(KNullDesC, &TestBounceCtxThread1, 0x1000, 0x1000, 0x1000, NULL);
	test(r==KErrNone);
	t1.SetPriority(EPriorityLess);
	r = t2.Create(KNullDesC, &TestBounceCtxThread2, 0x1000, 0x1000, 0x1000, NULL);
	test(r==KErrNone);
	t2.SetPriority(EPriorityLess);
	TRequestStatus s1, s2;
	t1.Logon(s1);
	t2.Logon(s2);
	t1.Resume();
	t2.Resume();
	test.Printf(_L("Let threads run concurrently ...\n"));
	User::After(20*1000*1000);

	test.Printf(_L("Kill threads\n"));
	t1.Kill(0);
	t2.Kill(0);
	User::WaitForRequest(s1);
	User::WaitForRequest(s2);
	test(t1.ExitType()==EExitKill && t1.ExitReason()==KErrNone);
	test(t2.ExitType()==EExitKill && t2.ExitReason()==KErrNone);
	CLOSE_AND_WAIT(t1);
	CLOSE_AND_WAIT(t2);
	}

void TestAbsS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::AbsS();
	r[0] = Vfp::SReg(0);
	r[1] = Abs(a[0]);
	}

void TestAddS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SetSReg(a[1], 2);
	Vfp::AddS();
	r[0] = Vfp::SReg(0);
	r[1] = a[0] + a[1];
	}

void TestDivS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SetSReg(a[1], 2);
	Vfp::DivS();
	r[0] = Vfp::SReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.DivEq(y);
	r[1] = (TReal32)x;
	}

void TestMacS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 0);
	Vfp::SetSReg(a[1], 1);
	Vfp::SetSReg(a[2], 2);
	Vfp::MacS();
	r[0] = Vfp::SReg(0);
	r[1] = a[0] + a[1] * a[2];
	}

void TestMscS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 0);
	Vfp::SetSReg(a[1], 1);
	Vfp::SetSReg(a[2], 2);
	Vfp::MscS();
	r[0] = Vfp::SReg(0);
	r[1] = a[1] * a[2] - a[0];
	}

void TestMulS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SetSReg(a[1], 2);
	Vfp::MulS();
	r[0] = Vfp::SReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.MultEq(y);
	r[1] = (TReal32)x;
	}

void TestNegS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::NegS();
	r[0] = Vfp::SReg(0);
	r[1] = -a[0];
	}

void TestNMacS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 0);
	Vfp::SetSReg(a[1], 1);
	Vfp::SetSReg(a[2], 2);
	Vfp::NMacS();
	r[0] = Vfp::SReg(0);
	r[1] = a[0] - a[1] * a[2];
	}

void TestNMscS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 0);
	Vfp::SetSReg(a[1], 1);
	Vfp::SetSReg(a[2], 2);
	Vfp::NMscS();
	r[0] = Vfp::SReg(0);
	r[1] = -a[1] * a[2] - a[0];
	}

void TestNMulS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SetSReg(a[1], 2);
	Vfp::NMulS();
	r[0] = Vfp::SReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.MultEq(y);
	r[1] = -(TReal32)x;
	}

void TestSqrtS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SqrtS();
	r[0] = Vfp::SReg(0);
	TReal x = a[0];
	TReal y;
	Math::Sqrt(y, x);
	r[1] = (TReal32)y;
	}

void TestSubS(const TReal32* a, TReal32* r)
	{
	Vfp::SetSReg(a[0], 1);
	Vfp::SetSReg(a[1], 2);
	Vfp::SubS();
	r[0] = Vfp::SReg(0);
	r[1] = a[0] - a[1];
	}



void TestAbsD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::AbsD();
	r[0] = Vfp::DReg(0);
	r[1] = Abs(a[0]);
	}

void TestAddD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SetDReg(a[1], 2);
	Vfp::AddD();
	r[0] = Vfp::DReg(0);
	r[1] = a[0] + a[1];
	}

void TestDivD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SetDReg(a[1], 2);
	Vfp::DivD();
	r[0] = Vfp::DReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.DivEq(y);
	r[1] = (TReal64)x;
	}

void TestMacD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 0);
	Vfp::SetDReg(a[1], 1);
	Vfp::SetDReg(a[2], 2);
	Vfp::MacD();
	r[0] = Vfp::DReg(0);
	r[1] = a[0] + a[1] * a[2];
	}

void TestMscD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 0);
	Vfp::SetDReg(a[1], 1);
	Vfp::SetDReg(a[2], 2);
	Vfp::MscD();
	r[0] = Vfp::DReg(0);
	r[1] = a[1] * a[2] - a[0];
	}

void TestMulD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SetDReg(a[1], 2);
	Vfp::MulD();
	r[0] = Vfp::DReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.MultEq(y);
	r[1] = (TReal64)x;
	}

void TestNegD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::NegD();
	r[0] = Vfp::DReg(0);
	r[1] = -a[0];
	}

void TestNMacD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 0);
	Vfp::SetDReg(a[1], 1);
	Vfp::SetDReg(a[2], 2);
	Vfp::NMacD();
	r[0] = Vfp::DReg(0);
	r[1] = a[0] - a[1] * a[2];
	}

void TestNMscD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 0);
	Vfp::SetDReg(a[1], 1);
	Vfp::SetDReg(a[2], 2);
	Vfp::NMscD();
	r[0] = Vfp::DReg(0);
	r[1] = -a[1] * a[2] - a[0];
	}

void TestNMulD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SetDReg(a[1], 2);
	Vfp::NMulD();
	r[0] = Vfp::DReg(0);
	TRealX x(a[0]);
	TRealX y(a[1]);
	x.MultEq(y);
	r[1] = -(TReal64)x;
	}

void TestSqrtD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SqrtD();
	r[0] = Vfp::DReg(0);
	TReal x = a[0];
	TReal y;
	Math::Sqrt(y, x);
	r[1] = (TReal64)y;
	}

void TestSubD(const TReal64* a, TReal64* r)
	{
	Vfp::SetDReg(a[0], 1);
	Vfp::SetDReg(a[1], 2);
	Vfp::SubD();
	r[0] = Vfp::DReg(0);
	r[1] = a[0] - a[1];
	}

#define DO_SGL_TEST1(name, func, a1)			DoSglTest(name, __LINE__, func, a1)
#define DO_SGL_TEST2(name, func, a1, a2)		DoSglTest(name, __LINE__, func, a1, a2)
#define DO_SGL_TEST3(name, func, a1, a2, a3)	DoSglTest(name, __LINE__, func, a1, a2, a3)
void DoSglTest(const char* aName, TInt aLine, TSglTest aFunc, TReal32 a1, TReal32 a2=0.0f, TReal32 a3=0.0f)
	{
	TPtrC8 name8((const TText8*)aName);
	TBuf<128> name16;
	name16.Copy(name8);
	test.Printf(_L("%S(%g,%g,%g)\n"), &name16, a1, a2, a3);
	TReal32 args[3] = {a1, a2, a3};
	TReal32 results[2];
	(*aFunc)(args, results);
	if (IEEEMode)
		{
		if (*((TUint32*)&(results[0])) == *((TUint32*)&(results[1])))
			return;
		}
	else
		{
		if (results[0] == results[1])
			return;
		}
	const TUint32* pa = (const TUint32*)args;
	const TUint32* pr = (const TUint32*)results;
	test.Printf(_L("a1=%08x a2=%08x a3=%08x\n"), pa[0], pa[1], pa[2]);
	test.Printf(_L("actual result = %08x (%g)\nexpected result = %08x (%g)\n"), pr[0], results[0], pr[1], results[1]);
	test.Printf(_L("Test at line %d failed\n"), aLine);
	test(0);
	}

void DoSglTests()
	{
	// ABS
	DO_SGL_TEST1("ABS", &TestAbsS, 1.0f);
	DO_SGL_TEST1("ABS", &TestAbsS, -1.0f);
	DO_SGL_TEST1("ABS", &TestAbsS, 0.0f);
	DO_SGL_TEST1("ABS", &TestAbsS, -3.1415926536f);

	// NEG
	DO_SGL_TEST1("NEG", &TestNegS, 1.0f);
	DO_SGL_TEST1("NEG", &TestNegS, -1.0f);
	DO_SGL_TEST1("NEG", &TestNegS, 0.0f);
	DO_SGL_TEST1("NEG", &TestNegS, -3.1415926536f);

	// ADD
	DO_SGL_TEST2("ADD", &TestAddS, 0.0f, 0.0f);
	DO_SGL_TEST2("ADD", &TestAddS, 0.0f, 1.0f);
	DO_SGL_TEST2("ADD", &TestAddS, -1.0f, 1.0f);
	DO_SGL_TEST2("ADD", &TestAddS, 1.0f, 2.5f);
	DO_SGL_TEST2("ADD", &TestAddS, 1.0f, 6.022045e23f);
	DO_SGL_TEST2("ADD", &TestAddS, -7.3890561f, 1.414213562f);
	DO_SGL_TEST2("ADD", &TestAddS, -7.3890561f, -1.414213562f);

	// SUB
	DO_SGL_TEST2("SUB", &TestSubS, 0.0f, 0.0f);
	DO_SGL_TEST2("SUB", &TestSubS, 0.0f, 1.0f);
	DO_SGL_TEST2("SUB", &TestSubS, 1.0f, 1.0f);
	DO_SGL_TEST2("SUB", &TestSubS, 1.0f, 2.5f);
	DO_SGL_TEST2("SUB", &TestSubS, 91.0f, 2.5f);
	DO_SGL_TEST2("SUB", &TestSubS, 1.0f, 6.022045e23f);
	DO_SGL_TEST2("SUB", &TestSubS, -7.3890561f, 1.414213562f);
	DO_SGL_TEST2("SUB", &TestSubS, -7.3890561f, -1.414213562f);

	// MUL
	DO_SGL_TEST2("MUL", &TestMulS, 0.0f, 0.0f);
	DO_SGL_TEST2("MUL", &TestMulS, 1.0f, 0.0f);
	DO_SGL_TEST2("MUL", &TestMulS, 0.0f, 1.0f);
	DO_SGL_TEST2("MUL", &TestMulS, 2.5f, 6.5f);
	DO_SGL_TEST2("MUL", &TestMulS, -39.6f, 19.72f);
	DO_SGL_TEST2("MUL", &TestMulS, -10.1f, -20.1f);
	DO_SGL_TEST2("MUL", &TestMulS, 1e20f, 1e20f);
	DO_SGL_TEST2("MUL", &TestMulS, 1e-30f, 1e-30f);

	// DIV
	DO_SGL_TEST2("DIV", &TestDivS, 0.0f, 1.0f);
	DO_SGL_TEST2("DIV", &TestDivS, 1.0f, 5.0f);
	DO_SGL_TEST2("DIV", &TestDivS, 1.0f, -5.0f);
	DO_SGL_TEST2("DIV", &TestDivS, -1.0f, 5.0f);
	DO_SGL_TEST2("DIV", &TestDivS, -1.0f, -5.0f);
	DO_SGL_TEST2("DIV", &TestDivS, 7.3890561f, 2.7182818f);
	DO_SGL_TEST2("DIV", &TestDivS, 1e20f, 1e-20f);
	DO_SGL_TEST2("DIV", &TestDivS, 1e-30f, 1e30f);

	// NMUL
	DO_SGL_TEST2("NMUL", &TestNMulS, 0.0f, 0.0f);
	DO_SGL_TEST2("NMUL", &TestNMulS, 1.0f, 0.0f);
	DO_SGL_TEST2("NMUL", &TestNMulS, 0.0f, 1.0f);
	DO_SGL_TEST2("NMUL", &TestNMulS, 2.5f, 6.5f);
	DO_SGL_TEST2("NMUL", &TestNMulS, -39.6f, 19.72f);
	DO_SGL_TEST2("NMUL", &TestNMulS, -10.1f, -20.1f);
	DO_SGL_TEST2("NMUL", &TestNMulS, 1e20f, 1e20f);
	DO_SGL_TEST2("NMUL", &TestNMulS, 1e-30f, 1e-30f);

	// SQRT
	DO_SGL_TEST1("SQRT", &TestSqrtS, 0.0f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 1.0f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 2.0f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 3.0f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 9096256.0f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 1e36f);
	DO_SGL_TEST1("SQRT", &TestSqrtS, 1e-36f);

	// MAC
	DO_SGL_TEST3("MAC", &TestMacS, 0.0f, 0.0f, 0.0f);
	DO_SGL_TEST3("MAC", &TestMacS, 0.0f, 1.0f, 0.0f);
	DO_SGL_TEST3("MAC", &TestMacS, 0.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("MAC", &TestMacS, -1.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("MAC", &TestMacS, 0.8f, 0.1f, 8.0f);
	DO_SGL_TEST3("MAC", &TestMacS, 0.8f, -0.1f, 8.0f);
	DO_SGL_TEST3("MAC", &TestMacS, -0.8f, -0.1f, -8.0f);
	DO_SGL_TEST3("MAC", &TestMacS, 0.8f, 0.3333333333f, 3.1415926536f);

	// MSC
	DO_SGL_TEST3("MSC", &TestMscS, 0.0f, 0.0f, 0.0f);
	DO_SGL_TEST3("MSC", &TestMscS, 0.0f, 1.0f, 0.0f);
	DO_SGL_TEST3("MSC", &TestMscS, 0.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("MSC", &TestMscS, -1.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("MSC", &TestMscS, 0.8f, 0.1f, 8.0f);
	DO_SGL_TEST3("MSC", &TestMscS, 0.8f, -0.1f, 8.0f);
	DO_SGL_TEST3("MSC", &TestMscS, -0.8f, -0.1f, -8.0f);
	DO_SGL_TEST3("MSC", &TestMscS, 0.8f, 0.3333333333f, 3.1415926536f);

	// NMAC
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.0f, 0.0f, 0.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.0f, 1.0f, 0.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, -1.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.8f, 0.1f, 8.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.8f, -0.1f, 8.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, -0.8f, -0.1f, -8.0f);
	DO_SGL_TEST3("NMAC", &TestNMacS, 0.8f, 0.3333333333f, 3.1415926536f);

	// NMSC
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.0f, 0.0f, 0.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.0f, 1.0f, 0.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, -1.0f, 1.0f, 1.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.8f, 0.1f, 8.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.8f, -0.1f, 8.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, -0.8f, -0.1f, -8.0f);
	DO_SGL_TEST3("NMSC", &TestNMscS, 0.8f, 0.3333333333f, 3.1415926536f);
	}

#define DO_DBL_TEST1(name, func, a1)			DoDblTest(name, __LINE__, func, a1)
#define DO_DBL_TEST2(name, func, a1, a2)		DoDblTest(name, __LINE__, func, a1, a2)
#define DO_DBL_TEST3(name, func, a1, a2, a3)	DoDblTest(name, __LINE__, func, a1, a2, a3)
void DoDblTest(const char* aName, TInt aLine, TDblTest aFunc, TReal64 a1, TReal64 a2=0.0, TReal64 a3=0.0)
	{
	TPtrC8 name8((const TText8*)aName);
	TBuf<128> name16;
	name16.Copy(name8);
	test.Printf(_L("%S(%g,%g,%g)\n"), &name16, a1, a2, a3);
	TReal64 args[3] = {a1, a2, a3};
	TReal64 results[2];
	SDouble sargs[3];
	sargs[0] = a1;
	sargs[1] = a2;
	sargs[2] = a3;
	(*aFunc)(args, results);
	if (IEEEMode)
		{
		if (*((TUint64*)&(results[0])) == *((TUint64*)&(results[1])))
			return;
		}
	else
		{
		if (results[0] == results[1])
			return;
		}
	SDouble sres[3];
	sres[0] = results[0];
	sres[1] = results[1];
	test.Printf(_L("a1=%08x %08x\na2=%08x %08x\na3=%08x %08x\n"), sargs[0].iData[1], sargs[0].iData[0],
								sargs[1].iData[1], sargs[1].iData[0], sargs[2].iData[1], sargs[2].iData[0]);
	test.Printf(_L("actual result = %08x %08x (%g)\nexpected result = %08x %08x (%g)\n"),
			sres[0].iData[1], sres[0].iData[0], results[0], sres[1].iData[1], sres[1].iData[0], results[1]);
	test.Printf(_L("Test at line %d failed\n"), aLine);
	test(0);
	}

void DoDblTests()
	{
	// ABS
	DO_DBL_TEST1("ABS", &TestAbsD, 1.0);
	DO_DBL_TEST1("ABS", &TestAbsD, -1.0);
	DO_DBL_TEST1("ABS", &TestAbsD, 0.0);
	DO_DBL_TEST1("ABS", &TestAbsD, -3.1415926536);

	// NEG
	DO_DBL_TEST1("NEG", &TestNegD, 1.0);
	DO_DBL_TEST1("NEG", &TestNegD, -1.0);
	DO_DBL_TEST1("NEG", &TestNegD, 0.0);
	DO_DBL_TEST1("NEG", &TestNegD, -3.1415926536);

	// ADD
	DO_DBL_TEST2("ADD", &TestAddD, 0.0, 0.0);
	DO_DBL_TEST2("ADD", &TestAddD, 0.0, 1.0);
	DO_DBL_TEST2("ADD", &TestAddD, -1.0, 1.0);
	DO_DBL_TEST2("ADD", &TestAddD, 1.0, 2.5);
	DO_DBL_TEST2("ADD", &TestAddD, 1.0, 6.022045e23);
	DO_DBL_TEST2("ADD", &TestAddD, -7.3890561, 1.414213562);
	DO_DBL_TEST2("ADD", &TestAddD, -7.3890561, -1.414213562);

	// SUB
	DO_DBL_TEST2("SUB", &TestSubD, 0.0, 0.0);
	DO_DBL_TEST2("SUB", &TestSubD, 0.0, 1.0);
	DO_DBL_TEST2("SUB", &TestSubD, 1.0, 1.0);
	DO_DBL_TEST2("SUB", &TestSubD, 1.0, 2.5);
	DO_DBL_TEST2("SUB", &TestSubD, 91.0, 2.5);
	DO_DBL_TEST2("SUB", &TestSubD, 1.0, 6.022045e23);
	DO_DBL_TEST2("SUB", &TestSubD, -7.3890561, 1.414213562);
	DO_DBL_TEST2("SUB", &TestSubD, -7.3890561, -1.414213562);

	// MUL
	DO_DBL_TEST2("MUL", &TestMulD, 0.0, 0.0);
	DO_DBL_TEST2("MUL", &TestMulD, 1.0, 0.0);
	DO_DBL_TEST2("MUL", &TestMulD, 0.0, 1.0);
	DO_DBL_TEST2("MUL", &TestMulD, 2.5, 6.5);
	DO_DBL_TEST2("MUL", &TestMulD, -39.6, 19.72);
	DO_DBL_TEST2("MUL", &TestMulD, -10.1, -20.1);
	DO_DBL_TEST2("MUL", &TestMulD, 1e20, 1e20);
	DO_DBL_TEST2("MUL", &TestMulD, 1e100, 1e300);
	DO_DBL_TEST2("MUL", &TestMulD, 1e-20, 1e-20);
	DO_DBL_TEST2("MUL", &TestMulD, 1e-200, 1e-300);

	// DIV
	DO_DBL_TEST2("DIV", &TestDivD, 0.0, 1.0);
	DO_DBL_TEST2("DIV", &TestDivD, 1.0, 5.0);
	DO_DBL_TEST2("DIV", &TestDivD, 1.0, -5.0);
	DO_DBL_TEST2("DIV", &TestDivD, -1.0, 5.0);
	DO_DBL_TEST2("DIV", &TestDivD, -1.0, -5.0);
	DO_DBL_TEST2("DIV", &TestDivD, 7.3890561, 2.7182818);
	DO_DBL_TEST2("DIV", &TestDivD, 1e20, 1e-20);
	DO_DBL_TEST2("DIV", &TestDivD, 1e-20, 1e20);
	DO_DBL_TEST2("DIV", &TestDivD, 1e-50, 1e300);

	// NMUL
	DO_DBL_TEST2("NMUL", &TestNMulD, 0.0, 0.0);
	DO_DBL_TEST2("NMUL", &TestNMulD, 1.0, 0.0);
	DO_DBL_TEST2("NMUL", &TestNMulD, 0.0, 1.0);
	DO_DBL_TEST2("NMUL", &TestNMulD, 2.5, 6.5);
	DO_DBL_TEST2("NMUL", &TestNMulD, -39.6, 19.72);
	DO_DBL_TEST2("NMUL", &TestNMulD, -10.1, -20.1);
	DO_DBL_TEST2("NMUL", &TestNMulD, 1e20, 1e20);
	DO_DBL_TEST2("NMUL", &TestNMulD, 1e100, 1e300);
	DO_DBL_TEST2("NMUL", &TestNMulD, 1e-20, 1e-20);
	DO_DBL_TEST2("NMUL", &TestNMulD, 1e-200, 1e-300);

	// SQRT
	DO_DBL_TEST1("SQRT", &TestSqrtD, 0.0);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 1.0);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 2.0);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 3.0);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 9096256.0);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 1e36);
	DO_DBL_TEST1("SQRT", &TestSqrtD, 1e-36);

	// MAC
	DO_DBL_TEST3("MAC", &TestMacD, 0.0, 0.0, 0.0);
	DO_DBL_TEST3("MAC", &TestMacD, 0.0, 1.0, 0.0);
	DO_DBL_TEST3("MAC", &TestMacD, 0.0, 1.0, 1.0);
	DO_DBL_TEST3("MAC", &TestMacD, -1.0, 1.0, 1.0);
	DO_DBL_TEST3("MAC", &TestMacD, 0.8, 0.1, 8.0);
	DO_DBL_TEST3("MAC", &TestMacD, 0.8, -0.1, 8.0);
	DO_DBL_TEST3("MAC", &TestMacD, -0.8, -0.1, -8.0);
	DO_DBL_TEST3("MAC", &TestMacD, 0.8, 0.3333333333, 3.1415926536);

	// MSC
	DO_DBL_TEST3("MSC", &TestMscD, 0.0, 0.0, 0.0);
	DO_DBL_TEST3("MSC", &TestMscD, 0.0, 1.0, 0.0);
	DO_DBL_TEST3("MSC", &TestMscD, 0.0, 1.0, 1.0);
	DO_DBL_TEST3("MSC", &TestMscD, -1.0, 1.0, 1.0);
	DO_DBL_TEST3("MSC", &TestMscD, 0.8, 0.1, 8.0);
	DO_DBL_TEST3("MSC", &TestMscD, 0.8, -0.1, 8.0);
	DO_DBL_TEST3("MSC", &TestMscD, -0.8, -0.1, -8.0);
	DO_DBL_TEST3("MSC", &TestMscD, 0.8, 0.3333333333, 3.1415926536);

	// NMAC
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.0, 0.0, 0.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.0, 1.0, 0.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.0, 1.0, 1.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, -1.0, 1.0, 1.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.8, 0.1, 8.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.8, -0.1, 8.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, -0.8, -0.1, -8.0);
	DO_DBL_TEST3("NMAC", &TestNMacD, 0.8, 0.3333333333, 3.1415926536);

	// NMSC
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.0, 0.0, 0.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.0, 1.0, 0.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.0, 1.0, 1.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, -1.0, 1.0, 1.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.8, 0.1, 8.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.8, -0.1, 8.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, -0.8, -0.1, -8.0);
	DO_DBL_TEST3("NMSC", &TestNMscD, 0.8, 0.3333333333, 3.1415926536);
	}

void DoBounceTests()
	{
	test.Next(_L("Test denormal handling - single"));
	DO_SGL_TEST2("ADD", &TestAddS, 1e-39f, 1e-39f);
	test.Next(_L("Test potential underflow - single"));
	DO_SGL_TEST2("MUL", &TestMulS, 3.162e-20f, 3.162e-20f);
// fails on VFPv2 hardware. ARM's library should be fixed
//	test.Next(_L("Test NaN input - single"));
//	TReal32 aSingleNaN;
//	*((TUint32*)&aSingleNaN) = 0x7F9ABCDE;
//	Vfp::SetSReg(aSingleNaN, 1);
//	Vfp::SetSReg(aSingleNaN, 2);
//	Vfp::AddS();
//	TReal32 aSingleResult = Vfp::SReg(0);
//	test(*((TUint32*)&aSingleResult) == 0x7FDABCDE);

	if (Double)
		{
		test.Next(_L("Test denormal handling - double"));
		DO_DBL_TEST2("ADD", &TestAddD, 3.1234e-322, 3.1234e-322);
		test.Next(_L("Test potential underflow - double"));
		DO_DBL_TEST2("MUL", &TestMulD, 1.767e-161, 1.767e-161);
// fails on VFPv2 hardware. ARM's library should be fixed
//		test.Next(_L("Test NaN input - double"));
//		TReal64 aDoubleNaN;
//		*((TUint64*)&aDoubleNaN) = 0x7FF0123456789ABCll;
//		Vfp::SetDReg(aDoubleNaN, 1);
//		Vfp::SetDReg(aDoubleNaN, 2);
//		Vfp::AddD();
//		TReal64 aDoubleResult = Vfp::DReg(0);
//		test(*((TUint64*)&aDoubleResult) == 0x7FF8123456789ABC);
		}
	}

void DoRunFastTests()
	{
	test.Next(_L("Test flushing denormals to zero - single"));
	Vfp::SetSReg(1e-39f, 1);
	Vfp::SetSReg(1e-39f, 2);
	Vfp::AddS();
	test(Vfp::SReg(0)==0);

	test.Next(_L("Test flushing underflow to zero - single"));
	Vfp::SetSReg(3.162e-20f, 1);
	Vfp::SetSReg(3.162e-20f, 2);
	Vfp::MulS();
	test(Vfp::SReg(0)==0);

	test.Next(_L("Test default NaNs - single"));
	TReal32 aSingleNaN;
	*((TUint32*)&aSingleNaN) = 0x7F9ABCDE;
	Vfp::SetSReg(aSingleNaN, 1);
	Vfp::SetSReg(aSingleNaN, 2);
	Vfp::AddS();
	TReal32 aSingleResult = Vfp::SReg(0);
	test(*((TUint32*)&aSingleResult) == 0x7FC00000);

	if (Double)
		{
		test.Next(_L("Test flushing denormals to zero - double"));
		Vfp::SetDReg(3.1234e-322, 1);
		Vfp::SetDReg(3.1234e-322, 2);
		Vfp::AddD();
		test(Vfp::DReg(0)==0);
	
		test.Next(_L("Test flushing underflow to zero - double"));
		Vfp::SetDReg(1.767e-161, 1);
		Vfp::SetDReg(1.767e-161, 2);
		Vfp::MulD();
		test(Vfp::DReg(0)==0);

		test.Next(_L("Test default NaNs - double"));
		TReal64 aDoubleNaN;
		*((TUint64*)&aDoubleNaN) = 0x7FF0123456789ABCll;
		Vfp::SetDReg(aDoubleNaN, 1);
		Vfp::SetDReg(aDoubleNaN, 2);
		Vfp::AddD();
		TReal64 aDoubleResult = Vfp::DReg(0);
		test(*((TUint64*)&aDoubleResult) == 0x7FF8000000000000ll);
		}
	}

void TestAddSResult(const TReal32 a, const TReal32 b, const TReal32 r)
	{
	Vfp::SetSReg(a, 1);
	Vfp::SetSReg(b, 2);
	Vfp::AddS();
	test(Vfp::SReg(0) == r);
	}

void DoRoundingTests()
	{
	TFloatingPointMode fpmode = IEEEMode ? EFpModeIEEENoExceptions : EFpModeRunFast;
	test.Next(_L("Check default rounding to nearest"));
	test(User::SetFloatingPointMode(fpmode) == KErrNone);
	test.Next(_L("Check nearest-downward"));
	TestAddSResult(16777215, 0.49f, 16777215);
	test.Next(_L("Check nearest-upward"));
	TestAddSResult(16777215, 0.51f, 16777216);
	test.Next(_L("Set rounding mode to toward-plus-infinity"));
	test(User::SetFloatingPointMode(fpmode, EFpRoundToPlusInfinity) == KErrNone);
	test.Next(_L("Check positive rounding goes upward"));
	TestAddSResult(16777215, 0.49f, 16777216);
	test.Next(_L("Check negative rounding goes upward"));
	TestAddSResult(-16777215, -0.51f, -16777215);
	test.Next(_L("Set rounding mode to toward-minus-infinity"));
	test(User::SetFloatingPointMode(fpmode, EFpRoundToMinusInfinity) == KErrNone);
	test.Next(_L("Check positive rounding goes downward"));
	TestAddSResult(16777215, 0.51f, 16777215);
	test.Next(_L("Check negative rounding goes downward"));
	TestAddSResult(-16777215, -0.49f, -16777216);
	test.Next(_L("Set rounding mode to toward-zero"));
	test(User::SetFloatingPointMode(fpmode, EFpRoundToZero) == KErrNone);
	test.Next(_L("Check positive rounding goes downward"));
	TestAddSResult(16777215, 0.51f, 16777215);
	test.Next(_L("Check negative rounding goes upward"));
	TestAddSResult(-16777215, -0.51f, -16777215);
	}

TInt RunFastThread(TAny* /*unused*/)
	{
	Vfp::SetSReg(1e-39f, 1);
	Vfp::SetSReg(1e-39f, 2);
	Vfp::AddS();
	return (Vfp::SReg(0)==0) ? KErrNone : KErrGeneral;
	}

TInt IEEECompliantThread(TAny* /*unused*/)
	{
	Vfp::SetSReg(1e-39f, 1);
	Vfp::SetSReg(1e-39f, 2);
	Vfp::AddS();
	return (Vfp::SReg(0)==2e-39f) ? KErrNone : KErrGeneral;
	}

void TestVFPModeInheritance()
	{
	test.Printf(_L("Set floating point mode to RunFast\n"));
	test(User::SetFloatingPointMode(EFpModeRunFast)==KErrNone);
	RThread t;
	TInt r = t.Create(KNullDesC, &RunFastThread, 0x1000, NULL, NULL);
	test(r==KErrNone);
	TRequestStatus s;
	t.Logon(s);
	test.Printf(_L("Run RunFast test in another thread...\n"));
	t.Resume();
	test.Printf(_L("Wait for other thread to terminate\n"));
	User::WaitForRequest(s);
	test(t.ExitType() == EExitKill);
	test(s == KErrNone);
	CLOSE_AND_WAIT(t);
	test.Printf(_L("Set floating point mode to IEEE\n"));
	test(User::SetFloatingPointMode(EFpModeIEEENoExceptions)==KErrNone);
	r = t.Create(KNullDesC, &IEEECompliantThread, 0x1000, NULL, NULL);
	test(r==KErrNone);
	t.Logon(s);
	test.Printf(_L("Run IEEE test in another thread...\n"));
	t.Resume();
	test.Printf(_L("Wait for other thread to terminate\n"));
	User::WaitForRequest(s);
	test(t.ExitType() == EExitKill);
	test(s == KErrNone);
	CLOSE_AND_WAIT(t);
	}


void TestVFPv3()
	{
	test.Next(_L("Transferring to and from fixed point"));
	
	Vfp::SetSReg(2.5f, 0);
	test(Vfp::SReg(0)==2.5f);
	Vfp::ToFixedS(3);				// Convert to fixed (3) precision
	test(Vfp::SRegInt(0)==0x14);	// 10.100 in binary fixed(3) format
	Vfp::FromFixedS(3);				//Convert from fixed (3) precision
	test(Vfp::SReg(0)==2.5f);

	
	test.Next(_L("Setting immediate value to floating point registers"));
	
	Vfp::SetSReg(5.0f, 0);
	test(Vfp::SReg(0) == 5.0f);
	Vfp::TconstS2();
	test(Vfp::SReg(0) == 2.0f);
	Vfp::SetSReg(5.0f, 0);
	Vfp::TconstS2_8();
	test(Vfp::SReg(0) == 2.875f);
	
	Vfp::SetDReg(5.0f, 0);
	test(Vfp::DReg(0) == 5.0f);
	Vfp::TconstD2();
	test(Vfp::DReg(0) == 2.0f);
	Vfp::TconstD2_8();
	test(Vfp::DReg(0) == 2.875f);
	}

void TestNEON()
	{
	RThread t;
	TRequestStatus s;
	test.Next(_L("Test creating a thread to execute an F2-prefix instruction"));
	test_KErrNone(t.Create(KNullDesC, &NeonWithF2, 0x1000, NULL, NULL));
	t.Logon(s);
	t.Resume();
	User::WaitForRequest(s);
	test(t.ExitType() == EExitKill);
	test(s == KErrNone);
	t.Close();
	test.Next(_L("Test creating a thread to execute an F3-prefix instruction"));
	test_KErrNone(t.Create(KNullDesC, &NeonWithF3, 0x1000, NULL, NULL));
	t.Logon(s);
	t.Resume();
	User::WaitForRequest(s);
	test(t.ExitType() == EExitKill);
	test(s == KErrNone);
	t.Close();
	test.Next(_L("Test creating a thread to execute an F4x-prefix instruction"));
	test_KErrNone(t.Create(KNullDesC, &NeonWithF4x, 0x1000, NULL, NULL));
	t.Logon(s);
	t.Resume();
	User::WaitForRequest(s);
	test(t.ExitType() == EExitKill);
	test(s == KErrNone);
	t.Close();
	}

void TestThumb()
	{
	RThread t;
	TRequestStatus s;
	TInt testStep = 0;
	do {
		test_KErrNone(t.Create(KNullDesC, &ThumbMode, 0x1000, NULL, (TAny*)testStep++));
		t.Logon(s);
		t.Resume();
		User::WaitForRequest(s);
		test(s == KErrNone || s == 1);
		test(t.ExitType() == EExitKill);
		t.Close();
		}
	while (s == KErrNone);

	test(s == 1);
	test(testStep == 7);
	}

TInt E32Main()
	{
	test.Title();

	test.Start(_L("Ask HAL if we have hardware floating point"));

	CPUs = UserSvr::HalFunction(EHalGroupKernel, EKernelHalNumLogicalCpus, 0, 0);
	TInt supportedTypes;
	TInt HalVfp = HAL::Get(HALData::EHardwareFloatingPoint, supportedTypes);
	if (HalVfp == KErrNone) 
		{ 
		if (supportedTypes == EFpTypeVFPv2) 
			{ 
			test.Printf(_L("HAL reports VFPv2\n"));
			} 
		else if (supportedTypes == EFpTypeVFPv3)
			{ 
			test.Printf(_L("HAL reports VFPv3\n"));
			} 
		else if (supportedTypes == EFpTypeVFPv3D16)
			{ 
			test.Printf(_L("HAL reports VFPv3-D16\n"));
			} 
		else
			{
			test.Printf(_L("HAL reports an unknown floating point type\n"));
			test(0);
			}
		} 
	else
		{ 
		test.Printf(_L("HAL reports no VFP support\n"));
		} 
		
	test.Next(_L("Check VFP present"));
	TBool present = DetectVFP();
	if (!present)
		{
		test.Printf(_L("No VFP detected\n"));
		test(HalVfp == KErrNotSupported || 
						((supportedTypes != EFpTypeVFPv2) && 
						(supportedTypes != EFpTypeVFPv3) && 
						(supportedTypes != EFpTypeVFPv3D16))
						);
		test.End();
		return 0;
		}
	
	test.Printf(_L("VFP detected. FPSID = %08x\n"), FPSID);
	test(HalVfp == KErrNone);

	// Verify that the HAL architecture ID matches the FPSID values
	// ARMv7 redefines some of these bits so the masks are different :(
	if (supportedTypes == EFpTypeVFPv2)
		{
		// assume armv5/6's bit definitions, where 19:16 are the arch version
		// and 20 is the single-precision-only bit
		ArchVersion = (FPSID >> 16) & 0xf;
		test(ArchVersion == ARCH_VERSION_VFPV2);
		Double = !(FPSID & VFP_FPSID_SNG);
		}
	else if (supportedTypes == EFpTypeVFPv3 || supportedTypes == EFpTypeVFPv3D16)
		{
		// assume armv7's bit definitions, where 22:16 are the arch version
		ArchVersion = (FPSID >> 16) & 0x7f;
		test(ArchVersion == ARCH_VERSION_VFPV3_SUBARCH_V2
		  || ArchVersion == ARCH_VERSION_VFPV3_SUBARCH_NULL
		  || ArchVersion == ARCH_VERSION_VFPV3_SUBARCH_V3); 
		// there are bits for this in MVFR0 but ARM implementations should always have it?
		Double = ETrue;
		}

	if (Double)
		test.Printf(_L("Both single and double precision supported\n"), FPSID);
	else
		test.Printf(_L("Only single precision supported\n"), FPSID);

	test.Next(_L("Test VFP Initial State"));
	TestVFPInitialState();

	test.Next(_L("Test setting VFP to IEEE no exceptions mode"));
	IEEEMode = User::SetFloatingPointMode(EFpModeIEEENoExceptions) == KErrNone;
	if (!IEEEMode)
		test.Printf(_L("IEEE no exceptions mode not supported, continuing in RunFast\n"));

	test.Next(_L("Test VFP calculations - single"));
	DoSglTests();
	if (Double)
		{
		test.Next(_L("Test VFP calculations - double"));
		DoDblTests();
		}

	test.Next(_L("Test VFP Context Save"));
	TestVFPContextSave();

	if (IEEEMode)
		{
		test.Next(_L("Test bounce handling"));
		DoBounceTests();
		test.Next(_L("Test bouncing while context switching"));
		DoBounceContextSwitchTests();
		test.Next(_L("Test setting VFP to RunFast mode"));
		test(User::SetFloatingPointMode(EFpModeRunFast) == KErrNone);
		DoRunFastTests();
		}

	test.Next(_L("Test VFP rounding modes"));
	DoRoundingTests();

	if (IEEEMode)
		{
		test.Next(_L("Test VFP mode inheritance between threads"));
		TestVFPModeInheritance();
		}

	if (supportedTypes == EFpTypeVFPv3 || supportedTypes == EFpTypeVFPv3D16)
		{
		test.Next(_L("Test VFPv3"));
		TestVFPv3();

		if (supportedTypes == EFpTypeVFPv3)
			{
			test.Next(_L("Test NEON"));
			TestNEON();

#if defined(__SUPPORT_THUMB_INTERWORKING)
			test.Next(_L("Test Thumb Decode"));
			TestThumb();
#endif
			}
		}

	test.End();
	return 0;
	}