kerneltest/e32test/benchmark/bm_main.cpp
author Tom Cosgrove <tom.cosgrove@nokia.com>
Fri, 28 May 2010 16:29:07 +0100
changeset 30 8aab599e3476
parent 0 a41df078684a
child 39 2bb754abd467
permissions -rw-r--r--
Fix for bug 2283 (RVCT 4.0 support is missing from PDK 3.0.h) Have multiple extension sections in the bld.inf, one for each version of the compiler. The RVCT version building the tools will build the runtime libraries for its version, but make sure we extract all the other versions from zip archives. Also add the archive for RVCT4.

// Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// Overview:
// Test and benchmark kernel-side utility operations  
// API Information:
// RBusLogicalChannel			
// Details:
// - Create a list of benchmark modules and start running them one by one;
// each module contains a set of measurement units, each unit runs for a fixed
// amount of time in a series of iterations; the results, minimum, maximum and
// average times are displayed on the screen;
// The tests use a high resolution timer implemented kernel side in a device
// driver.
// - The test contains the following benchmark modules:
// - Real-time latency module measures:
// - interrupt latency by calculating the time taken from when an
// interrupt is generated until the ISR starts
// - kernel thread latency by calculating the time taken from an ISR
// scheduling a DFC to signal the kernel thread until the kernel thread
// starts running
// - kernel thread latency as above while a CPU intensive low priority
// user thread runs at the same time
// - user thread latency by calculating the time taken from an ISR 
// scheduling a DFC to signal the user thread until the user thread
// starts running
// - user thread latency as above while a CPU intensive low priority
// user thread runs at the same time
// - NTimer period jitter by calculating the actual period as the delta
// between two consecutive NTimer callbacks that store the current time;
// the jitter is the difference between the actual period and a theoretical
// period.
// - timer overhead by calculating the delta of time between two consecutive
// timestamps requested from the high precision timer implemented in the
// device driver; the calls are made from kernel side code
// - Overhead module measures:
// - timer overhead by calculating the delta of time between two consecutive
// timestamps requested from the high precision timer implemented in the
// device driver; the calls are made from user side code
// - Synchronization module measures: 
// - mutex passing, local mutex contention, remote mutex contention, 
// local semaphore latency, remote semaphore latency, 
// local thread semaphore latency, remote thread semaphore latency.
// - Client-server framework module measures:
// - For local high priority, local low priority, remote high priority 
// and remote low priority: connection request latency, connection
// reply latency, request latency, request response time, reply latency.
// - Threads modules measures:
// - Thread creation latency, thread creation suicide, thread suicide,
// thread killing, setting per thread data, getting per thread data.
// - Properties module measures:
// - Local int notification latency, remote int notification latency, 
// local byte(1) notification latency, remote byte(1) notification latency, 
// local byte(8) notification latency, remote byte(8) notification latency, 
// local byte(512) notification latency, remote byte(512) notification latency, 
// int set overhead, byte(1) set overhead, byte(8) set overhead, byte(512) set 
// overhead, int get overhead, byte(1) get overhead, byte(8) get overhead, 
// byte(512) get overhead. 
// Platforms/Drives/Compatibility:
// All.
// Assumptions/Requirement/Pre-requisites:
// Failures and causes:
// Base Port information:
// 
//

#include "bm_suite.h"
#include <e32svr.h>
#include <u32hal.h>

RTest test(_L("Benchmark Suite"));

//
// The default value of the time allocated for one benchmark program.  
//
static TInt KBMSecondsPerProgram = 30;
//
// The initial number of iterations to estimate the acctual number of iteration. 
//
static TInt KBMCalibrationIter = 64;

//
// Global handle to high-resolution timer. 
//
RBMTimer bmTimer;
//
// The head of the benchmark programs' list
//
BMProgram* bmSuite; 
//
// Global handle to the kernel side benchmark utilty API 
//
static RBMDriver bmDriver;

TBMResult::TBMResult(const TDesC& aName) : iName(aName) 
	{
	Reset();
	}

void TBMResult::Reset()
	{
	::bmTimer.Period(&iMinTicks);
	iMaxTicks = 0;
	iCumulatedTicks = 0;
	iCumulatedIterations = 0;
	iIterations = 0;
	iMin = 0;
	iMax = 0;
	iAverage = 0;
	}

void TBMResult::Reset(const TDesC& aName)
	{
	Reset();
	iName.Set(aName);
	}

void TBMResult::Cumulate(TBMTicks aTicks)
{
	if (aTicks < iMinTicks) iMinTicks = aTicks;
	if (iMaxTicks < aTicks) iMaxTicks = aTicks;

	iCumulatedTicks += aTicks;
	if (iCumulatedIterations < KHeadSize)
		{
		iHeadTicks[iCumulatedIterations] = aTicks;
		}
		// use the array as a circular buufer to store last KTailSize results
		// (would not really know which one was actually the last)
	iTailTicks[iCumulatedIterations % KTailSize] = aTicks;
	++iCumulatedIterations;
	
}


void TBMResult::Cumulate(TBMTicks aTicks, TBMUInt64 aIter)
{
	iCumulatedIterations += aIter;
	iCumulatedTicks += aTicks;
}

void TBMResult::Update()
{
	if (iCumulatedIterations == 0) return;
	iIterations = iCumulatedIterations;
	::bmTimer.TicksToNs(&iMinTicks, &iMin);
	::bmTimer.TicksToNs(&iMaxTicks, &iMax);
	TBMTicks averageTicks = iCumulatedTicks/TBMUInt64(iCumulatedIterations);
	::bmTimer.TicksToNs(&averageTicks, &iAverage);
	TInt i;
	for (i = 0; i < KHeadSize; ++i) 
		{
		::bmTimer.TicksToNs(&iHeadTicks[i], &iHead[i]);
		}
	for (i = 0; i < KTailSize; ++i) 
		{
		::bmTimer.TicksToNs(&iTailTicks[i], &iTail[i]);
		}
	}

inline TBMNs TTimeIntervalMicroSecondsToTBMNs(TTimeIntervalMicroSeconds us)
	{
	return BMUsToNs(*(TBMUInt64*)&us);
	}

TBMNs TBMTimeInterval::iStampPeriodNs;
TBMTicks TBMTimeInterval::iStampPeriod;

void TBMTimeInterval::Init()
	{
	::bmTimer.Period(&iStampPeriod); 
	::bmTimer.TicksToNs(&iStampPeriod, &iStampPeriodNs);
}

void TBMTimeInterval::Begin()
	{
	//
	// Order is important: read first low-precision timer, then the high-precision one. 
	// Therefore, two high-precision timer reads will be accounted in the low-precision interval,
	// that's better than the opposite.
	//
	iTime.HomeTime();
	::bmTimer.Stamp(&iStamp);
	}

TBMNs TBMTimeInterval::EndNs()
	{
	//
	// Now, in the reverse order
	//
	TBMTicks stamp;
	::bmTimer.Stamp(&stamp);
	TTime time;	
	time.HomeTime();
	TBMNs ns = TTimeIntervalMicroSecondsToTBMNs(time.MicroSecondsFrom(iTime));
	//
	// If the interval fits in the high-precision timer period we can use it;
	// otherwise, use the low-precision timer.
	//
	if (ns < iStampPeriodNs)
		{
		stamp = TBMTicksDelta(iStamp, stamp);
		::bmTimer.TicksToNs(&stamp, &ns);
		}
	return ns;
	}

TBMTicks TBMTimeInterval::End()
	{
	//
	// The same as the previous one but returns ticks
	//

	TBMTicks stamp;
	::bmTimer.Stamp(&stamp);
	TTime time;	
	time.HomeTime();
	TBMNs ns = TTimeIntervalMicroSecondsToTBMNs(time.MicroSecondsFrom(iTime));
	if (ns < iStampPeriodNs)
		{
		stamp = TBMTicksDelta(iStamp, stamp);
		}
	else
		{
			// multiply first - privileging precision to improbable overflow.
		stamp = (ns * iStampPeriod) / iStampPeriodNs; 
		}
	return stamp;
	}

TInt BMProgram::SetAbsPriority(RThread aThread, TInt aNewPrio)
	{
	TInt aOldPrio=0;
	TInt r = ::bmDriver.SetAbsPriority(aThread, aNewPrio, &aOldPrio);
	BM_ERROR(r, r == KErrNone);
	return aOldPrio;
	}

const TInt TBMSpawnArgs::KMagic = 0xdeadbeef;

TBMSpawnArgs::TBMSpawnArgs(TThreadFunction aChildFunc, TInt aChildPrio, TBool aRemote, TInt aSize)
	{
	iMagic = KMagic;
	iParentId = RThread().Id();
		// get a thread handle meaningful in the context of any other thread. 
		// (RThread() doesn't work since contextual!)
	TInt r = iParent.Open(iParentId);
	BM_ERROR(r, r == KErrNone);
	iRemote = aRemote;
	iChildFunc = aChildFunc;
	iChildPrio = aChildPrio;
	iSize = aSize;
	}

TBMSpawnArgs::~TBMSpawnArgs()
	{
	iParent.Close();
	}

//
// An object of CLocalChild class represents a "child" thread created by its "parent" thread 
// in the parent's process through BmProgram::SpawnChild() interface.
//
// CLocalChild class is typically used (invoked) by the parent's thread. 
//
class CLocalChild : public CBase, public MBMChild
	{
private:
	BMProgram*		iProg;
public:
	RThread			iChild;
	TRequestStatus	iExitStatus;

	CLocalChild(BMProgram* aProg)
		{
		iProg = aProg;
		}
	
	virtual void WaitChildExit();
	virtual void Kill();
	};

void CLocalChild::Kill()
	{
	iChild.Kill(KErrCancel);
	}

void CLocalChild::WaitChildExit()
	{
	User::WaitForRequest(iExitStatus);
	CLOSE_AND_WAIT(iChild);
	//
	// Lower the parent thread prioirty and then restore the current one 
	// to make sure that the kernel-side thread destruction DFC had a chance to complete.
	//
	TInt prio = BMProgram::SetAbsPriority(RThread(), iProg->iOrigAbsPriority);
	BMProgram::SetAbsPriority(RThread(), prio);
	delete this;
	}

//
// Local (i.e. sharing the parent's process) child's entry point
//
TInt LocalChildEntry(void* ptr)
	{
	TBMSpawnArgs* args = (TBMSpawnArgs*) ptr;
	args->iChildOrigPriority = BMProgram::SetAbsPriority(RThread(), args->iChildPrio);
	return args->iChildFunc(args);
	}

MBMChild* BMProgram::SpawnLocalChild(TBMSpawnArgs* args)
	{
	CLocalChild* child = new CLocalChild(this);
	BM_ERROR(KErrNoMemory, child);
	TInt r = child->iChild.Create(KNullDesC, ::LocalChildEntry, 0x2000, NULL, args);
	BM_ERROR(r, r == KErrNone);
	child->iChild.Logon(child->iExitStatus);
	child->iChild.Resume();
	return child;
	}

//
// An object of CRemoteChild class represents a "child" thread created by its "parent" thread 
// as a separate process through BmProgram::SpawnChild() interface.
//
// CRemoteChild class is typically used (invoked) by the parent's thread. 
//
class CRemoteChild : public CBase, public MBMChild
	{
private:
	BMProgram*		iProg;
public:
	RProcess		iChild;
	TRequestStatus	iExitStatus;

	CRemoteChild(BMProgram* aProg)
		{
		iProg = aProg;
		}

	virtual void WaitChildExit();
	virtual void Kill();
	};

void CRemoteChild::Kill()
	{
	iChild.Kill(KErrCancel);
	}

void CRemoteChild::WaitChildExit()
	{
	User::WaitForRequest(iExitStatus);
	CLOSE_AND_WAIT(iChild);
	//
	// Lower the parent thread prioirty and then restore the current one 
	// to make sure that the kernel-side thread destruction DFC had a chance to complete.
	//
	TInt prio = BMProgram::SetAbsPriority(RThread(), iProg->iOrigAbsPriority);
	BMProgram::SetAbsPriority(RThread(), prio);
	delete this;
	}

//
// Remote (i.e. running in its own process) child's entry point.
// Note that the child's process entry point is still E32Main() process (see below)
//
TInt ChildMain(TBMSpawnArgs* args)
	{
	args->iChildOrigPriority = BMProgram::SetAbsPriority(RThread(), args->iChildPrio);
		// get a handle to the parent's thread in the child's context.
	TInt r = args->iParent.Open(args->iParentId);
	BM_ERROR(r, r == KErrNone);
	return args->iChildFunc(args);
	}

MBMChild* BMProgram::SpawnRemoteChild(TBMSpawnArgs* args)
	{
	CRemoteChild* child = new CRemoteChild(this);
	BM_ERROR(KErrNoMemory, child);
	//
	// Create the child process and pass args as a UNICODE command line.
	// (we suppose that the args size is multiple of sizeof(TUint16))
	//
	BM_ASSERT((args->iSize % sizeof(TUint16)) == 0);
	TInt r = child->iChild.Create(RProcess().FileName(), TPtrC((TUint16*) args, args->iSize/sizeof(TUint16)));
	BM_ERROR(r, (r == KErrNone) );
	child->iChild.Logon(child->iExitStatus);
	child->iChild.Resume();
	return child;
	}

MBMChild* BMProgram::SpawnChild(TBMSpawnArgs* args)
	{
	MBMChild* child;
	if (args->iRemote)
		{
		child = SpawnRemoteChild(args);
		}
	else
		{
		child = SpawnLocalChild(args);
		}
	return child;
	}

//
// The benchmark-suite entry point.
//
GLDEF_C TInt E32Main()
	{
	test.Title();

	TInt r = UserSvr::HalFunction(EHalGroupKernel, EKernelHalNumLogicalCpus, 0, 0);
	if (r != 1)
		{
		test.Printf(_L("%d CPUs detected ... test not run\n"), r);
		return r;
		}
	
	AddProperty();
	AddThread();
	AddIpc();
	AddSync();
	AddOverhead();
	AddrtLatency();

	r = User::LoadPhysicalDevice(KBMPddFileName);
	BM_ERROR(r, (r == KErrNone) || (r == KErrAlreadyExists));

	r = User::LoadLogicalDevice(KBMLddFileName);
	BM_ERROR(r, (r == KErrNone) || (r == KErrAlreadyExists));

	r = ::bmTimer.Open();
	BM_ERROR(r, (r == KErrNone));

	r = ::bmDriver.Open();
	BM_ERROR(r, (r == KErrNone));

	TBMTimeInterval::Init();

	TInt seconds = KBMSecondsPerProgram;

	TInt len = User::CommandLineLength();
	if (len)
		{
		//
		// Copy the command line in a buffer
		//
		TInt size = len * sizeof(TUint16);
		HBufC8* hb = HBufC8::NewMax(size);
		BM_ERROR(KErrNoMemory, hb);
		TPtr cmd((TUint16*) hb->Ptr(), len);
		User::CommandLine(cmd);
		//
		// Check for the TBMSpawnArgs magic number.
		//
		TBMSpawnArgs* args = (TBMSpawnArgs*) hb->Ptr();	
		if (args->iMagic == TBMSpawnArgs::KMagic)
			{
			//
			// This is a child process -  call it's entry point
			//
			return ::ChildMain(args);
			}
		else
			{
			//
			// A real command line - the time (in seconds) for each benchmark program.
			//
			TLex l(cmd);
			r = l.Val(seconds);
			if (r != KErrNone)
				{
				test.Printf(_L("Usage: bm_suite <seconds>\n"));
				BM_ERROR(r, 0);
				}
			}
		delete hb;
		}

	{
	TBMTicks ticks = 1;
	TBMNs ns;
	::bmTimer.TicksToNs(&ticks, &ns);
	test.Printf(_L("High resolution timer tick %dns\n"), TInt(ns));
	test.Printf(_L("High resolution timer period %dms\n"), BMNsToMs(TBMTimeInterval::iStampPeriodNs));
	}

	test.Start(_L("Performance Benchmark Suite"));

	BMProgram* prog = ::bmSuite;
	while (prog) {
		//
		// For each program from the benchmark-suite's list
		//

		//
		// Remember the number of open handles. Just for a sanity check ....
		//
		TInt start_thc, start_phc;
		RThread().HandleCount(start_phc, start_thc);

		test.Printf(_L("%S\n"), &prog->Name());
	
		//
		// A benchmark-suite's thread can run at any of three possible absolute priorities:
		//		KBMPriorityLow, KBMPriorityMid and KBMPriorityHigh.
		// The main thread starts individual benchmark programs at KBMPriorityMid
		//
		prog->iOrigAbsPriority = BMProgram::SetAbsPriority(RThread(), KBMPriorityMid);

		//
		// First of all figure out how many iteration would be required to run this program
		// for the given number of seconds.
		//
		TInt count;
		TBMNs ns = 0;
		TBMUInt64 iter = KBMCalibrationIter;
		for (;;) 
			{
			TBMTimeInterval ti;
			ti.Begin();
			prog->Run(iter, &count);
			ns = ti.EndNs();
				// run at least 100ms (otherwise, could be too much impricise ...)
			if (ns > BMMsToNs(100)) break;
			iter *= 2;
			}
		test.Printf(_L("%d iterations in %dms\n"), TInt(iter), BMNsToMs(ns));
		iter = (BMSecondsToNs(seconds) * iter) / ns;
		test.Printf(_L("Go for %d iterations ...\n"), TInt(iter));

		//
		// Now the real run ...
		//
		TBMResult* results = prog->Run(iter, &count);

			// Restore the original prioirty
		BMProgram::SetAbsPriority(RThread(), prog->iOrigAbsPriority);

		//
		// Now print out the results
		//
		for (TInt i = 0; i < count; ++i) 
			{
			if (results[i].iMax)
				{
				test.Printf(_L("%S. %d iterations; Avr: %dns; Min: %dns; Max: %dns\n"), 
							&results[i].iName, TInt(results[i].iIterations),
							TInt(results[i].iAverage), TInt(results[i].iMin), TInt(results[i].iMax));

				TInt j;
				BM_ASSERT((TBMResult::KHeadSize % 4) == 0);
				test.Printf(_L("Head:"));
				for (j = 0; j < TBMResult::KHeadSize; j += 4)
					{
					test.Printf(_L(" %d %d %d %d "), 
								TInt(results[i].iHead[j]), TInt(results[i].iHead[j+1]), 
								TInt(results[i].iHead[j+2]), TInt(results[i].iHead[j+3]));
					}
				test.Printf(_L("\n"));

				BM_ASSERT((TBMResult::KTailSize % 4) == 0);
				test.Printf(_L("Tail:"));
				for (j = 0; j < TBMResult::KTailSize; j += 4)
					{
					test.Printf(_L(" %d %d %d %d "), 
								TInt(results[i].iTail[j]), TInt(results[i].iTail[j+1]), 
								TInt(results[i].iTail[j+2]), TInt(results[i].iTail[j+3]));
					}
				test.Printf(_L("\n"));
				}
			else
				{
				test.Printf(_L("%S. %d iterations; Avr: %dns\n"), 
							&results[i].iName, TInt(results[i].iIterations), TInt(results[i].iAverage));
				}

			}

		//
		// Sanity check for open handles
		//
		TInt end_thc, end_phc;
		RThread().HandleCount(end_phc, end_thc);
		BM_ASSERT(start_thc == end_thc);
		BM_ASSERT(start_phc == end_phc);
			// and also for pending requests ...
		BM_ASSERT(RThread().RequestCount() == 0);

		prog = prog->Next();
//
//		This can be used to run forever ...
//
//		if (prog == NULL)
//			prog = ::bmSuite;
//
	}
	
	test.End();

	::bmDriver.Close();
	::bmTimer.Close();
	return 0;
	}


void bm_assert_failed(char* aCond, char* aFile, TInt aLine)
	{
	TPtrC8 fd((TUint8*)aFile);
	TPtrC8 cd((TUint8*)aCond);

	HBufC* fhb = HBufC::NewMax(fd.Length());
	test(fhb != 0);
	HBufC* chb = HBufC::NewMax(cd.Length());
	test(chb != 0);

	fhb->Des().Copy(fd);
	chb->Des().Copy(cd);

	test.Printf(_L("Assertion %S failed;  File: %S; Line %d;\n"), chb, fhb, aLine);
	test(0);
	}

void bm_error_detected(TInt aError, char* aCond, char* aFile, TInt aLine)
	{
	TPtrC8 fd((TUint8*)aFile);
	TPtrC8 cd((TUint8*)aCond);

	HBufC* fhb = HBufC::NewMax(fd.Length());
	test(fhb != 0);
	HBufC* chb = HBufC::NewMax(cd.Length());
	test(chb != 0);

	fhb->Des().Copy(fd);
	chb->Des().Copy(cd);

	test.Printf(_L("Error: %d; Cond: %S; File: %S; Line %d;\n"), aError, chb, fhb, aLine);
	test(0);
	}