kernel/eka/nkernsmp/nk_bal.cpp
branchRCL_3
changeset 43 c1f20ce4abcf
equal deleted inserted replaced
42:a179b74831c9 43:c1f20ce4abcf
       
     1 // Copyright (c) 2009-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\nkernsmp\nk_bal.cpp
       
    15 // 
       
    16 //
       
    17 
       
    18 // NThreadBase member data
       
    19 #define __INCLUDE_NTHREADBASE_DEFINES__
       
    20 
       
    21 // TDfc member data
       
    22 #define __INCLUDE_TDFC_DEFINES__
       
    23 
       
    24 #include "nk_bal.h"
       
    25 
       
    26 #include "nk_priv.h"
       
    27 #include "nk_irq.h"
       
    28 
       
    29 #include <e32cmn.h>
       
    30 
       
    31 /******************************************************************************
       
    32  * Load balancing
       
    33  ******************************************************************************/
       
    34 
       
    35 enum TCCState
       
    36 	{
       
    37 	ECCReqPending = 0x80000000u,
       
    38 	ECCReqDeferred = 0x40000000u,
       
    39 	ECCPowerUpInProgress = 0x20000000u,
       
    40 	ECCPowerDownInProgress = 0x10000000u,
       
    41 	ECCRebalanceRequired = 0x08000000u,
       
    42 	ECCRebalanceTimerQueued = 0x04000000u,
       
    43 	ECCPeriodicBalancingActive = 0x02000000u,
       
    44 	};
       
    45 
       
    46 const TUint K_CpuMask	= 0x1fu;
       
    47 const TUint K_Keep		= 0x20u;
       
    48 const TUint K_SameCpu	= 0x40u;
       
    49 const TUint K_NewCpu	= 0x80u;
       
    50 const TUint K_CpuSticky	= 0x40u;
       
    51 const TUint K_CheckCpu	= 0x100u;
       
    52 
       
    53 #define	PERCENT(fsd, percent)					(((fsd)*(percent)+50)/100)
       
    54 
       
    55 const TUint K_LB_HeavyThreshold					= PERCENT(4095, 90);
       
    56 const TUint K_LB_GravelThreshold_RunAvg			= PERCENT(4095, 1);
       
    57 const TUint K_LB_GravelThreshold_RunActAvg		= PERCENT(4095, 50);
       
    58 const TInt	K_LB_HeavyCapacityThreshold			= PERCENT(4095, 1);
       
    59 const TInt	K_LB_BalanceInterval				= 107;
       
    60 const TInt	K_LB_CpuLoadDiffThreshold			= 128;
       
    61 
       
    62 //const TUint K_LB_HeavyStateThreshold			= 128;
       
    63 const TUint K_LB_HeavyPriorityThreshold			= 25;
       
    64 
       
    65 inline TBool IsHeavy(NSchedulable* a)
       
    66 	{
       
    67 	TUint x = 0xffu ^ a->iLbInfo.iLbHeavy;
       
    68 	return (x&(x-1))==0;
       
    69 	}
       
    70 
       
    71 inline TBool IsNew(NSchedulable* a)
       
    72 	{ return a->iLbState & NSchedulable::ELbState_PerCpu; }
       
    73 
       
    74 struct SPerPri : public SDblQue
       
    75 	{
       
    76 	inline SPerPri() : iTotalRun(0), iTotalAct(0), iCount(0), iHeavy(0) {}
       
    77 
       
    78 	TUint32	iTotalRun;
       
    79 	TUint32	iTotalAct;
       
    80 	TUint16	iCount;
       
    81 	TUint16	iHeavy;
       
    82 	};
       
    83 
       
    84 struct SCpuAvailability
       
    85 	{
       
    86 	enum
       
    87 		{
       
    88 		EIdle = 4095,
       
    89 		EMaxedOut = -268435456,
       
    90 		EUnavailable = KMinTInt
       
    91 		};
       
    92 
       
    93 	void	Init(TUint32 aActive);
       
    94 	TInt	FindMax() const;
       
    95 	TInt	FindMax(NSchedulable* aS) const;
       
    96 	TInt	PickCpu(NSchedulable* aS, TBool aDropped) const;
       
    97 	TInt	SetMaxed(TInt aCpu);
       
    98 	void	AddLoad(TInt aCpu, TInt aLoad);
       
    99 	inline	TInt operator[](TInt aCpu) const
       
   100 		{	return iRemain[aCpu]; }
       
   101 	inline	TInt TotalRemain() const
       
   102 		{	return iTotalRemain; }
       
   103 
       
   104 	TInt	iRemain[KMaxCpus];
       
   105 	TInt	iCount;
       
   106 	TInt	iTotalRemain;
       
   107 	};
       
   108 
       
   109 TUint32 HotWarmUnit;
       
   110 TUint32 LB_DormantThreshold;
       
   111 volatile TUint32 LBDelayed = 0;
       
   112 
       
   113 void CalcHotWarm(TUint8& aOut, TUint64 aTime)
       
   114 	{
       
   115 	TUint8 out = 0;
       
   116 	if (aTime>0)
       
   117 		{
       
   118 		aTime /= TUint64(HotWarmUnit);
       
   119 		if (I64HIGH(aTime))
       
   120 			out = 255;
       
   121 		else
       
   122 			{
       
   123 			aTime *= aTime;
       
   124 			out = __e32_find_ms1_64(aTime) + 1;
       
   125 			}
       
   126 		}
       
   127 	aOut = (TUint8)out;
       
   128 	}
       
   129 
       
   130 void TScheduler::InitLB()
       
   131 	{
       
   132 	TScheduler& s = TheScheduler;
       
   133 	TDfcQue* rbQ = s.iRebalanceDfcQ;
       
   134 	s.iBalanceTimer.SetDfcQ(rbQ);
       
   135 	s.iCCReactivateDfc.SetDfcQ(rbQ);
       
   136 	s.iCCRequestDfc.SetDfcQ(rbQ);
       
   137 	s.iCCPowerDownDfc.SetDfcQ(rbQ);
       
   138 	s.iFreqChgDfc.SetDfcQ(rbQ);
       
   139 	NThreadBase* lbt = rbQ->iThread;
       
   140 	lbt->iRebalanceAttr = 1;
       
   141 	TUint32 f = NKern::CpuTimeMeasFreq();
       
   142 	HotWarmUnit = f / 1000000;
       
   143 	TUint8 y = 0;
       
   144 	CalcHotWarm(y, f/5);
       
   145 	LB_DormantThreshold = y;
       
   146 	__KTRACE_OPT(KBOOT,DEBUGPRINT("InitLB()"));
       
   147 	__KTRACE_OPT(KBOOT,DEBUGPRINT("LB_DormantThreshold=%d", LB_DormantThreshold));
       
   148 	}
       
   149 
       
   150 void TSubScheduler::GetLbThreads(SDblQue& aQ)
       
   151 	{
       
   152 	NKern::Lock();
       
   153 	iReadyListLock.LockOnly();
       
   154 	if (!iLbQ.IsEmpty())
       
   155 		{
       
   156 		aQ.MoveFrom(&iLbQ);
       
   157 		iLbCounter ^= NSchedulable::ELbState_Generation;
       
   158 		}
       
   159 	iReadyListLock.UnlockOnly();
       
   160 	NKern::Unlock();
       
   161 	}
       
   162 
       
   163 void TScheduler::GetLbThreads(SDblQue& aQ)
       
   164 	{
       
   165 	NKern::Lock();
       
   166 	iBalanceListLock.LockOnly();
       
   167 	if (!iBalanceList.IsEmpty())
       
   168 		{
       
   169 		aQ.MoveFrom(&iBalanceList);
       
   170 		iLbCounter ^= NSchedulable::ELbState_Generation;
       
   171 		}
       
   172 	iBalanceListLock.UnlockOnly();
       
   173 	NKern::Unlock();
       
   174 	}
       
   175 
       
   176 void NSchedulable::InitLbInfo()
       
   177 	{
       
   178 	}
       
   179 
       
   180 void NSchedulable::NominalPriorityChanged()
       
   181 	{
       
   182 	}
       
   183 
       
   184 void NSchedulable::LbDone(TUint aFlags)
       
   185 	{
       
   186 	BTrace8(BTrace::EHSched, BTrace::ELbDone, this, aFlags);
       
   187 #ifdef KSCHED3
       
   188 	if (IsGroup())
       
   189 		{
       
   190 		__KTRACE_OPT(KSCHED3,DEBUGPRINT("LbDone %G %x", this, aFlags));
       
   191 		}
       
   192 	else
       
   193 		{
       
   194 		__KTRACE_OPT(KSCHED3,DEBUGPRINT("LbDone %T %x", this, aFlags));
       
   195 		}
       
   196 #endif
       
   197 	TBool keep = aFlags & K_Keep;
       
   198 	TInt cpu = aFlags & K_CpuMask;
       
   199 	TBool setcpu = aFlags & K_NewCpu;
       
   200 	TBool keepcpu = aFlags & K_SameCpu;
       
   201 	TBool checkcpu = aFlags & K_CheckCpu;
       
   202 	LAcqSLock();
       
   203 	TBool died = iLbState & ELbState_ExtraRef;
       
   204 	if (keep && !died)
       
   205 		{
       
   206 		TScheduler& s = TheScheduler;
       
   207 		s.iBalanceListLock.LockOnly();
       
   208 		s.iBalanceList.Add(&iLbLink);
       
   209 		iLbState = s.iLbCounter;
       
   210 		s.iBalanceListLock.UnlockOnly();
       
   211 		if (setcpu)
       
   212 			SetCpuAffinityT(cpu | KCpuAffinityPref | (aFlags & K_CpuSticky));
       
   213 		else
       
   214 			{
       
   215 			if (!keepcpu)
       
   216 				iPreferredCpu = 0;
       
   217 			if (checkcpu)
       
   218 				SetCpuAffinityT(NTHREADBASE_CPU_AFFINITY_MASK);	// move it if it's on a core which is shutting down
       
   219 			}
       
   220 		}
       
   221 	else
       
   222 		{
       
   223 		if (!keepcpu)
       
   224 			iPreferredCpu = 0;
       
   225 		iLbState = ELbState_Inactive;
       
   226 		iLbLink.iNext = 0;
       
   227 		iLbInfo.iRecentTime.i64 = 0;
       
   228 		iLbInfo.iRecentCpuTime.i64 = 0;
       
   229 		iLbInfo.iRecentActiveTime.i64 = 0;
       
   230 		iLbInfo.iLbRunAvg = 0;
       
   231 		iLbInfo.iLbActAvg = 0;
       
   232 		iLbInfo.iLbRunActAvg = 0;
       
   233 		if (checkcpu && !died)
       
   234 			SetCpuAffinityT(NTHREADBASE_CPU_AFFINITY_MASK);	// move it if it's on a core which is shutting down
       
   235 		}
       
   236 	RelSLockU();
       
   237 	if (died)
       
   238 		{
       
   239 		NKern::Lock();
       
   240 		DropRef();
       
   241 		NKern::Unlock();
       
   242 		}
       
   243 	}
       
   244 
       
   245 void CalcRatio(TUint16& aRatio, TUint64 aN, TUint64 aD)
       
   246 	{
       
   247 	TInt ms1 = __e32_find_ms1_64(aD);
       
   248 	if (ms1 < 0)
       
   249 		{
       
   250 		aRatio = 4095;
       
   251 		return;
       
   252 		}
       
   253 	if (ms1 >= 20)
       
   254 		{
       
   255 		TInt shift = ms1 - 19;
       
   256 		aD >>= shift;
       
   257 		aN >>= shift;
       
   258 		}
       
   259 	// aD, aN now < 2^20
       
   260 	TUint32 d = I64LOW(aD);
       
   261 	TUint32 n = I64LOW(aN);
       
   262 	if (n>d) n=d;
       
   263 	TUint32 r = (n*4095+(d>>1))/d;
       
   264 	if (r>4095) r=4095;	// shouldn't really happen
       
   265 	aRatio = (TUint16)r;
       
   266 	}
       
   267 
       
   268 void CalcRatios(TUint16& aRT, TUint16& aAT, TUint16& aRA, TUint64 aDT, TUint64 aDR, TUint64 aDA)
       
   269 	{
       
   270 	TInt ms1 = __e32_find_ms1_64(aDT);
       
   271 	if (ms1 >= 20)
       
   272 		{
       
   273 		TInt shift = ms1 - 19;
       
   274 		aDT >>= shift;
       
   275 		aDR >>= shift;
       
   276 		aDA >>= shift;
       
   277 		}
       
   278 	// aDT, aDR, aDA now all < 2^20
       
   279 	TUint32 t = I64LOW(aDT);
       
   280 	TUint32 rtd = I64LOW(aDR);
       
   281 	TUint32 atd = I64LOW(aDA);
       
   282 	if (rtd>t) rtd=t;
       
   283 	if (atd>t) atd=t;
       
   284 	TUint32 rtt = (rtd*4095+(t>>1))/t;
       
   285 	TUint32 att = (atd*4095+(t>>1))/t;
       
   286 	TUint32 rta = atd ? (rtd*4095+(atd>>1))/atd : 0;
       
   287 	if (rta>4095) rta=4095;	// shouldn't really happen
       
   288 	aRT = (TUint16)rtt;
       
   289 	aAT = (TUint16)att;
       
   290 	aRA = (TUint16)rta;
       
   291 	}
       
   292 
       
   293 void NSchedulable::GetLbStats(TUint64 aTime)
       
   294 	{
       
   295 	SCpuStats stats;
       
   296 	LAcqSLock();
       
   297 	if (IsGroup())
       
   298 		{
       
   299 		NThreadGroup* g = (NThreadGroup*)this;
       
   300 		if (g->iNThreadList.IsEmpty())
       
   301 			iLbInfo.iLbNomPri = 1;
       
   302 		else
       
   303 			{
       
   304 			NThreadBase* t = (NThreadBase*)g->iNThreadList.First();
       
   305 			iLbInfo.iLbNomPri = t->iNominalPri;
       
   306 			}
       
   307 		}
       
   308 	else
       
   309 		iLbInfo.iLbNomPri = ((NThreadBase*)this)->iNominalPri;
       
   310 	GetCpuStatsT(E_AllStats, stats);
       
   311 	iLbInfo.iRecentTime.i64 += aTime;
       
   312 	iLbInfo.iRecentCpuTime.i64 += stats.iRunTimeDelta;
       
   313 	iLbInfo.iRecentActiveTime.i64 += stats.iActiveTimeDelta;
       
   314 	TUint32 aff = iCpuAffinity;
       
   315 	RelSLockU();
       
   316 	CalcRatios(iLbInfo.iLbRunTime, iLbInfo.iLbActTime, iLbInfo.iLbRunAct, aTime, stats.iRunTimeDelta, stats.iActiveTimeDelta);
       
   317 	iLbInfo.iLbRunAvg = TUint16((iLbInfo.iLbRunAvg + iLbInfo.iLbRunTime) >> 1);
       
   318 	iLbInfo.iLbActAvg = TUint16((iLbInfo.iLbActAvg + iLbInfo.iLbActTime) >> 1);
       
   319 	CalcRatio(iLbInfo.iLbRunActAvg, iLbInfo.iRecentCpuTime.i64, iLbInfo.iRecentActiveTime.i64);
       
   320 
       
   321 	if (aff & NTHREADBASE_CPU_AFFINITY_MASK)
       
   322 		iLbInfo.iLbAffinity = (TUint8)(aff & 0xff);
       
   323 	else
       
   324 		iLbInfo.iLbAffinity = 1u << aff;
       
   325 	CalcHotWarm(iLbInfo.iLbHot, stats.iLastRunTime);
       
   326 	CalcHotWarm(iLbInfo.iLbWarm, stats.iLastActiveTime);
       
   327 	if (IsNew(this))
       
   328 		{
       
   329 		if (iLbInfo.iLbNomPri <= K_LB_HeavyPriorityThreshold)
       
   330 			iLbInfo.iLbHeavy = 0xffu;
       
   331 		else
       
   332 			iLbInfo.iLbHeavy = 0;
       
   333 		}
       
   334 	iLbInfo.iLbHeavy >>= 1;
       
   335 	if (iLbInfo.iLbActTime > K_LB_HeavyThreshold)
       
   336 		iLbInfo.iLbHeavy |= 0x80u;
       
   337 /*
       
   338 	TUint64 blx = NKern::CpuTimeMeasFreq();
       
   339 	blx *= 3;
       
   340 	if (i_NSchedulable_Spare3 && iLbInfo.iLbRunActAvg<400 && stats.iActiveTime>blx)
       
   341 		{
       
   342 		__crash();
       
   343 		}
       
   344 */	}
       
   345 
       
   346 void AddToSortedQueue(SPerPri* aQ, NSchedulable* aS)
       
   347 	{
       
   348 	TInt k = aS->iLbInfo.iLbNomPri;
       
   349 	if (k >= KNumPriorities)
       
   350 		k = KNumPriorities;
       
   351 	SPerPri* q = aQ + k;
       
   352 	TBool h = IsHeavy(aS);
       
   353 	SDblQueLink* anchor = &q->iA;
       
   354 	SDblQueLink* p = q->First();
       
   355 	for (; p!=anchor; p=p->iNext)
       
   356 		{
       
   357 		NSchedulable* s = _LOFF(p, NSchedulable, iLbLink);
       
   358 		if (h)
       
   359 			{
       
   360 			if (!IsHeavy(s))
       
   361 				continue;
       
   362 			if (aS->iLbInfo.iLbRunActAvg < s->iLbInfo.iLbRunActAvg)
       
   363 				break;
       
   364 			}
       
   365 		else
       
   366 			{
       
   367 			if (IsHeavy(s))
       
   368 				break;
       
   369 			if (aS->iLbInfo.iLbRunAvg > s->iLbInfo.iLbRunAvg)
       
   370 				break;
       
   371 			}
       
   372 		}
       
   373 	aS->iLbLink.InsertBefore(p);
       
   374 	++q->iCount;
       
   375 	if (h)
       
   376 		{
       
   377 		++q->iHeavy;
       
   378 		}
       
   379 	else
       
   380 		{
       
   381 		q->iTotalRun += aS->iLbInfo.iLbRunAvg;
       
   382 		if (q->iTotalRun>4095)
       
   383 			q->iTotalRun=4095;
       
   384 		q->iTotalAct += aS->iLbInfo.iLbActAvg;
       
   385 		}
       
   386 	}
       
   387 
       
   388 void SCpuAvailability::Init(TUint32 a)
       
   389 	{
       
   390 	iCount = __e32_find_ms1_32(a) + 1;
       
   391 	iTotalRemain = 0;
       
   392 	TInt i;
       
   393 	for (i=0; i<KMaxCpus; ++i)
       
   394 		{
       
   395 		if (a & (1<<i))
       
   396 			{
       
   397 			iRemain[i] = EIdle;
       
   398 			iTotalRemain += EIdle;
       
   399 			}
       
   400 		else
       
   401 			iRemain[i] = EUnavailable;
       
   402 		}
       
   403 	}
       
   404 
       
   405 TInt SCpuAvailability::SetMaxed(TInt aCpu)
       
   406 	{
       
   407 	TInt x = iRemain[aCpu];
       
   408 	if (x>0)
       
   409 		iTotalRemain -= x;
       
   410 	iRemain[aCpu] = EMaxedOut;
       
   411 	return x;
       
   412 	}
       
   413 
       
   414 void SCpuAvailability::AddLoad(TInt aCpu, TInt aLoad)
       
   415 	{
       
   416 	if (TUint32(aLoad) > TUint32(EIdle))
       
   417 		__crash();
       
   418 	TInt& x = iRemain[aCpu];
       
   419 	TInt orig = x;
       
   420 	x -= aLoad;
       
   421 	if (x < EMaxedOut)
       
   422 		x = EMaxedOut;
       
   423 	if (orig > 0)
       
   424 		iTotalRemain -= ((orig > aLoad) ? aLoad : orig);
       
   425 	}
       
   426 
       
   427 TInt SCpuAvailability::FindMax() const
       
   428 	{
       
   429 	TInt maxv = KMinTInt;
       
   430 	TInt maxi = -1;
       
   431 	TInt i;
       
   432 	for (i=0; i<iCount; ++i)
       
   433 		{
       
   434 		if (iRemain[i] > maxv)
       
   435 			{
       
   436 			maxv = iRemain[i];
       
   437 			maxi = i;
       
   438 			}
       
   439 		}
       
   440 	return maxi;
       
   441 	}
       
   442 
       
   443 TInt SCpuAvailability::FindMax(NSchedulable* aS) const
       
   444 	{
       
   445 	TUint32 s = aS->iLbInfo.iLbAffinity;
       
   446 	s &= TheScheduler.iThreadAcceptCpus;
       
   447 	if ( (s&(s-1)) == 0 )
       
   448 		return __e32_find_ms1_32(s);
       
   449 	TInt maxv = KMinTInt;
       
   450 	TInt maxi = -1;
       
   451 	TInt i = 0;
       
   452 	for (; s; s>>=1, ++i)
       
   453 		{
       
   454 		if ((s&1) && iRemain[i] > maxv)
       
   455 			{
       
   456 			maxv = iRemain[i];
       
   457 			maxi = i;
       
   458 			}
       
   459 		}
       
   460 	return maxi;
       
   461 	}
       
   462 
       
   463 TInt SCpuAvailability::PickCpu(NSchedulable* aS, TBool aDropped) const
       
   464 	{
       
   465 	TUint32 s0 = aS->iLbInfo.iLbAffinity & TheScheduler.iThreadAcceptCpus;
       
   466 	TUint32 s = s0;
       
   467 //	BTrace12(BTrace::EHSched, 0x90u, aS, s, aPtr);
       
   468 	if ( (s&(s-1)) == 0 )
       
   469 		return __e32_find_ms1_32(s);
       
   470 	TInt maxv = KMinTInt;
       
   471 	TInt maxi = -1;
       
   472 	TInt i = 0;
       
   473 	for (; s; s>>=1, ++i)
       
   474 		{
       
   475 //		BTrace12(BTrace::EHSched, 0x91u, s, maxv, aPtr[i]);
       
   476 		if ((s&1) && iRemain[i] > maxv)
       
   477 			{
       
   478 			maxv = iRemain[i];
       
   479 			maxi = i;
       
   480 			}
       
   481 		}
       
   482 	if (IsNew(aS))
       
   483 		{
       
   484 		// this thread hasn't run for a while
       
   485 		// pick the highest numbered CPU with a near-maximum availability
       
   486 		i = __e32_find_ms1_32(s0);
       
   487 		for (; i>maxi; --i)
       
   488 			{
       
   489 			if ( (s0&(1u<<i)) && maxv-iRemain[i]<K_LB_CpuLoadDiffThreshold)
       
   490 				return i;
       
   491 			}
       
   492 		}
       
   493 	else
       
   494 		{
       
   495 		// this thread has run recently - see if we can keep it on the same CPU
       
   496 		TInt threshold = aDropped ? 1 : (TInt)K_LB_CpuLoadDiffThreshold;
       
   497 		TInt lcpu = aS->iLastCpu;
       
   498 		if ( (s0&(1u<<lcpu)) && maxv-iRemain[lcpu]<threshold)
       
   499 			return lcpu;
       
   500 		}
       
   501 	// use highest availability CPU
       
   502 	return maxi;
       
   503 	}
       
   504 
       
   505 void TScheduler::BalanceTimerExpired(TAny* aPtr)
       
   506 	{
       
   507 	((TScheduler*)aPtr)->PeriodicBalance();
       
   508 	}
       
   509 
       
   510 TBool TScheduler::ReBalance(SDblQue& aQ, TBool aCC)
       
   511 	{
       
   512 	ModifyCCState(~ECCRebalanceRequired, 0);
       
   513 
       
   514 	SPerPri sbq[KNumPriorities+1];
       
   515 	NSchedulable* s = 0;
       
   516 	TInt i;
       
   517 	TUint64 now = NKern::Timestamp();
       
   518 	TUint64 lbt = iLastBalanceTime;
       
   519 	iLastBalanceTime = now;
       
   520 	TUint64 bpl = now - lbt;		// balance period length
       
   521 	TUint cc = aCC ? K_CheckCpu : 0;
       
   522 
       
   523 	TInt nact = __e32_bit_count_32(iThreadAcceptCpus);	// number of CPUs available
       
   524 
       
   525 	// aQ holds list of threads/groups to be considered
       
   526 	TInt ns = 0;	// number for further consideration
       
   527 	TInt nd = 0;	// number dropped this time round
       
   528 	SCpuAvailability avail;
       
   529 	avail.Init(iThreadAcceptCpus);
       
   530 	TUint32 gravel = 0;
       
   531 	TInt totalN = 0;
       
   532 	TInt checked = 0;
       
   533 	while (!aQ.IsEmpty())
       
   534 		{
       
   535 		NThread* t = 0;
       
   536 		++totalN;
       
   537 		s = _LOFF(aQ.First()->Deque(), NSchedulable, iLbLink);
       
   538 		if (!s->IsGroup())
       
   539 			{
       
   540 			t = (NThread*)s;
       
   541 			if (t->iRebalanceAttr & 1)
       
   542 				++checked;
       
   543 			}
       
   544 		s->GetLbStats(bpl);
       
   545 		if (
       
   546 			(s->iLbInfo.iLbWarm >= LB_DormantThreshold)	// hasn't run for a while
       
   547 		||	(s->iLbInfo.iLbWarm>0 && s->iLbInfo.iLbRunAvg<K_LB_GravelThreshold_RunAvg && s->iLbInfo.iLbRunActAvg>K_LB_GravelThreshold_RunActAvg)	// gravel
       
   548 		)
       
   549 			{
       
   550 			TUint32 a = s->iLbInfo.iLbAffinity;
       
   551 			if ( (a&(a-1)) == 0)
       
   552 				avail.AddLoad(__e32_find_ms1_32(a), s->iLbInfo.iLbRunAvg);
       
   553 			else
       
   554 				gravel += s->iLbInfo.iLbRunAvg;
       
   555 			if (!IsNew(s))
       
   556 				++nd;
       
   557 			s->LbDone(cc);		// drop it
       
   558 			}
       
   559 		else if (nact==1)
       
   560 			{
       
   561 			s->LbDone(cc|K_Keep);	// keep it but only 1 CPU so don't balance
       
   562 			}
       
   563 		else if (t && t->iCoreCycling)
       
   564 			{
       
   565 			s->LbDone(cc|K_Keep);	// keep it but don't balance
       
   566 			}
       
   567 		else
       
   568 			{
       
   569 			++ns;
       
   570 			AddToSortedQueue(&sbq[0], s);
       
   571 			}
       
   572 		}
       
   573 
       
   574 	gravel /= TUint(nact);
       
   575 	for (i=0; i<KMaxCpus; ++i)
       
   576 		{
       
   577 		if (iThreadAcceptCpus & (1<<i))
       
   578 			avail.AddLoad(i, gravel);
       
   579 		}
       
   580 	if (ns>0)
       
   581 		{
       
   582 		TInt k;
       
   583 		for (k=KNumPriorities; k>=0; --k)
       
   584 			{
       
   585 			SPerPri& q = sbq[k];
       
   586 			if (q.iCount==0)
       
   587 				{
       
   588 				__NK_ASSERT_ALWAYS(q.IsEmpty());
       
   589 				continue;
       
   590 				}
       
   591 			if (nact==0)
       
   592 				goto dump_remaining;
       
   593 			while (!q.IsEmpty())
       
   594 				{
       
   595 				s = _LOFF(q.First(), NSchedulable, iLbLink);
       
   596 //				BTrace12(BTrace::EHSched, 0x80u, s, s->iLbInfo.iLbRunAvg, s->iLbInfo.iLbRunActAvg);
       
   597 				if (IsHeavy(s))
       
   598 					break;
       
   599 				s->iLbLink.Deque();
       
   600 				TInt cpu = avail.PickCpu(s, nd);
       
   601 //				BTrace12(BTrace::EHSched, 0x81u, cpu, remain[cpu], totalremain);
       
   602 				avail.AddLoad(cpu, s->iLbInfo.iLbRunAvg);
       
   603 //				BTrace8(BTrace::EHSched, 0x82u, remain[cpu], totalremain);
       
   604 				s->LbDone(cc|K_Keep|K_NewCpu|cpu);
       
   605 				}
       
   606 			if (q.iHeavy > nact)
       
   607 				{
       
   608 				TInt hr = avail.TotalRemain() / q.iHeavy;
       
   609 				TInt n = q.iHeavy;
       
   610 				TInt j;
       
   611 				for (j=0; j<nact; ++j)
       
   612 					{
       
   613 					// don't bother about keeping same CPU since we must rotate
       
   614 					// threads between CPUs to even out the run times.
       
   615 					TInt cpu = avail.FindMax();
       
   616 //					BTrace12(BTrace::EHSched, 0x83u, cpu, remain[cpu], totalremain);
       
   617 					TInt capacity = avail.SetMaxed(cpu);
       
   618 //					BTrace8(BTrace::EHSched, 0x84u, remain[cpu], totalremain);
       
   619 					TInt nh = 0;
       
   620 					if (hr > K_LB_HeavyCapacityThreshold)
       
   621 						{
       
   622 						if (j == nact-1)
       
   623 							nh = n;
       
   624 						else
       
   625 							nh = capacity / hr;
       
   626 						}
       
   627 					else
       
   628 						nh = n / (nact-j);
       
   629 					n -= nh;
       
   630 					for (; nh>0; --nh)
       
   631 						{
       
   632 						if (q.IsEmpty())
       
   633 							__crash();
       
   634 						s = _LOFF(q.First()->Deque(), NSchedulable, iLbLink);
       
   635 						s->LbDone(cc|K_Keep|K_NewCpu|cpu);
       
   636 						}
       
   637 					}
       
   638 				nact = 0;
       
   639 				}
       
   640 			else
       
   641 				{
       
   642 				while (!q.IsEmpty())
       
   643 					{
       
   644 					s = _LOFF(q.First()->Deque(), NSchedulable, iLbLink);
       
   645 					TInt cpu = avail.PickCpu(s, nd);
       
   646 //					BTrace12(BTrace::EHSched, 0x85u, cpu, remain[cpu], totalremain);
       
   647 					avail.SetMaxed(cpu);
       
   648 //					BTrace8(BTrace::EHSched, 0x86u, remain[cpu], totalremain);
       
   649 					s->LbDone(cc|K_Keep|K_NewCpu|cpu);
       
   650 					--nact;
       
   651 					}
       
   652 				}
       
   653 			__NK_ASSERT_ALWAYS(q.IsEmpty());
       
   654 			if (nact==0)
       
   655 				{
       
   656 dump_remaining:
       
   657 				while (!q.IsEmpty())
       
   658 					{
       
   659 //					BTrace4(BTrace::EHSched, 0x87u, s);
       
   660 					s = _LOFF(q.First()->Deque(), NSchedulable, iLbLink);
       
   661 					s->LbDone(cc|K_Keep);	// keep it but lose preferred CPU
       
   662 					}
       
   663 				continue;
       
   664 				}
       
   665 			}
       
   666 		}
       
   667 
       
   668 	// return TRUE if the only threads which ran were this one and the NTimer thread
       
   669 	return (totalN==2 && checked==2);
       
   670 	}
       
   671 
       
   672 void TScheduler::PeriodicBalance()
       
   673 	{
       
   674 	iNeedBal = 0;
       
   675 	ModifyCCState( ~ECCRebalanceTimerQueued, 0 );
       
   676 	SDblQue rbq;	// raw balance queue
       
   677 	GetLbThreads(rbq);
       
   678 	TInt i;
       
   679 	for (i=0; i<iNumCpus; ++i)
       
   680 		iSub[i]->GetLbThreads(rbq);
       
   681 	TBool bored = ReBalance(rbq, FALSE);
       
   682 	if (!bored || iNeedBal)
       
   683 		StartRebalanceTimer(FALSE);
       
   684 	}
       
   685 
       
   686 
       
   687 void TScheduler::StartPeriodicBalancing()
       
   688 	{
       
   689 #ifdef KBOOT
       
   690 	__KTRACE_OPT(KBOOT,DEBUGPRINT("StartPeriodicBalancing()"));
       
   691 	TInt i;
       
   692 	for (i=0; i<KMaxCpus; ++i)
       
   693 		{
       
   694 		TSubScheduler& ss = TheSubSchedulers[i];
       
   695 		volatile TUint32* p = (volatile TUint32*)ss.iUncached;
       
   696 		__KTRACE_OPT(KBOOT,DEBUGPRINT("CPU %1d: iUncached=%08x -> %08x %08x %08x %08x", i, p, p[0], p[1], p[2], p[3]));
       
   697 		}
       
   698 #endif
       
   699 	TheScheduler.StartRebalanceTimer(TRUE);
       
   700 	}
       
   701 
       
   702 void TScheduler::StartRebalanceTimer(TBool aRestart)
       
   703 	{
       
   704 	TInt interval = K_LB_BalanceInterval;
       
   705 	TUint32 mask = aRestart ? (ECCRebalanceTimerQueued|ECCPeriodicBalancingActive) : (ECCRebalanceTimerQueued);
       
   706 	TUint32 orig = ModifyCCState(~mask, mask);
       
   707 	TUint32 ns = (orig &~ mask) ^ mask;
       
   708 	__KTRACE_OPT(KSCHED3,DEBUGPRINT("StrtRbTmr %08x %08x %08x", mask, orig, ns));
       
   709 	if ((ns & ECCPeriodicBalancingActive) && !(orig & ECCRebalanceTimerQueued))
       
   710 		{
       
   711 		TInt r = KErrArgument;
       
   712 		if (orig & ECCPeriodicBalancingActive)
       
   713 			{
       
   714 			r = iBalanceTimer.Again(interval);
       
   715 			if (r == KErrArgument)
       
   716 				{
       
   717 				++LBDelayed;	// so we can see if this happened
       
   718 				}
       
   719 			}
       
   720 		if (r == KErrArgument)
       
   721 			{
       
   722 			r = iBalanceTimer.OneShot(interval);
       
   723 			}
       
   724 		if (r != KErrNone)
       
   725 			__crash();
       
   726 		}
       
   727 	}
       
   728 
       
   729 void TScheduler::StopRebalanceTimer(TBool aTemp)
       
   730 	{
       
   731 	TUint32 mask = aTemp ? ECCRebalanceTimerQueued : (ECCRebalanceTimerQueued|ECCPeriodicBalancingActive);
       
   732 	TUint32 orig = ModifyCCState(~mask, 0);
       
   733 	__KTRACE_OPT(KSCHED3,DEBUGPRINT("StopRbTmr %08x %08x", mask, orig));
       
   734 	if (orig & ECCRebalanceTimerQueued)
       
   735 		iBalanceTimer.Cancel();
       
   736 	}
       
   737 
       
   738 
       
   739 
       
   740 /******************************************************************************
       
   741  * Core Control
       
   742  ******************************************************************************/
       
   743 
       
   744 /*
       
   745 
       
   746 TScheduler fields used for core control:
       
   747 
       
   748 iThreadAcceptCpus
       
   749 	Bit n = 1 iff CPU n is available to threads with no specific affinity.
       
   750 	Bits corresponding to existing CPUs are set at boot time.
       
   751 	Subsequently this word is only modified by load balancer thread.
       
   752 	Bit n is cleared when a decision is made to shut down core n.
       
   753 
       
   754 
       
   755 iIpiAcceptCpus
       
   756 	Bit n = 1 iff CPU n is accepting generic IPIs
       
   757 	Bits corresponding to existing CPUs are set at boot time.
       
   758 	Bit n is cleared when CPU n makes the decision to ask the idle handler to power down
       
   759 		At the same time, bit n of iCpusGoingDown is set.
       
   760 	Bit n is set when CPU n returns from the idle handler after waking up.
       
   761 	Protected by iGenIPILock
       
   762 
       
   763 iCpusComingUp
       
   764 	Bit n = 1 iff CPU n is in the process of powering up
       
   765 	All bits zero at boot
       
   766 	Bit n set when the load balancer decides to initiate power up of CPU n, provided iCCDeferCount==0
       
   767 	Bit n cleared when the load balancer sets iThreadAcceptCpus bit n
       
   768 	Protected by iGenIPILock
       
   769 
       
   770 iCpusGoingDown
       
   771 	Bit n = 1 iff CPU n is in the process of powering down and is no longer accepting IPIs
       
   772 	All bits zero at boot
       
   773 	Bit n is set when CPU n makes the decision to ask the idle handler to power down
       
   774 	?Bit n is cleared when?
       
   775 		- when TCoreCycler observes the CPU has detached
       
   776 		- when the load balancer observes the CPU has detached
       
   777 		- when the load balancer decides to reactivate the CPU
       
   778 	Protected by iGenIPILock
       
   779 
       
   780 iCCDeferCount
       
   781 	If this is positive CPUs being shut down will not proceed to clear iIpiAcceptCpus
       
   782 	In this case bits can be set in iIpiAcceptCpus but cannot be cleared.
       
   783 	Also (iIpiAcceptCpus|iCpusComingUp) remains constant
       
   784 	Protected by iGenIPILock
       
   785 
       
   786 iCCSyncCpus
       
   787 	Bit n = 1 iff a change has been made to iThreadAcceptCpus which CPU n should observe
       
   788 	but it has not yet observed it.
       
   789 	Bit n set by the load balancer after a change is made to iThreadAcceptCpus, provided bit n
       
   790 	is also set in iIpiAcceptCpus.
       
   791 	Bit n cleared when CPU n services the core control sync IPI if iKernCSLocked==0 or the
       
   792 	next time iKernCSLocked becomes zero otherwise.
       
   793 
       
   794 iCCReactivateCpus
       
   795 	Bit n = 1 if CPU n is being reactivated after being removed from iThreadAcceptCpus
       
   796 	Bit n is set if a thread is made ready, cannot be assigned to any active CPU on
       
   797 		account of affinity restrictions and is assigned to CPU n.
       
   798 	Bit n is also set when CPU n wakes up from being retired.
       
   799 	Protected by iGenIPILock
       
   800 
       
   801 iCCState
       
   802 	Bit 31 (ECCReqPending)	Set when an external request to change the number of cores is in progress
       
   803 
       
   804 iCCRequestLevel
       
   805 	The number of CPUs last requested to be active.
       
   806 
       
   807 iGenIPILock
       
   808 
       
   809 iCCSyncIDFC
       
   810 	Runs when all CPUs have observed a change to iThreadAcceptCpus
       
   811 
       
   812 iCCReactivateDfc
       
   813 	Runs whenever one or more bits have been set in iCCReactivateCpus
       
   814 
       
   815 iCCRequestDfc
       
   816 	Runs whenever a request is received to change the number of active cores
       
   817 
       
   818 TSubScheduler fields used for core control:
       
   819 
       
   820 
       
   821 */
       
   822 
       
   823 void TScheduler::CCUnDefer()
       
   824 	{
       
   825 	TUint32 powerOn = 0;
       
   826 	TBool doDeferredReq = FALSE;
       
   827 	TInt irq = iGenIPILock.LockIrqSave();
       
   828 	if (--iCCDeferCount == 0)
       
   829 		{
       
   830 		// Kick cores waiting to power off
       
   831 		__holler();
       
   832 
       
   833 		// See if any cores are waiting to power on
       
   834 		powerOn = iCCReactivateCpus &~ iCpusComingUp;
       
   835 
       
   836 		// See if a core control request has been deferred
       
   837 		if (iCCState & ECCReqDeferred)
       
   838 			{
       
   839 			if (iCpusComingUp==0 && iCCReactivateCpus==0)
       
   840 				doDeferredReq = TRUE;
       
   841 			}
       
   842 		}
       
   843 	iGenIPILock.UnlockIrqRestore(irq);
       
   844 	if (powerOn)
       
   845 		iCCReactivateDfc.Enque();
       
   846 	if (doDeferredReq)
       
   847 		iCCRequestDfc.Enque();
       
   848 	}
       
   849 
       
   850 void TScheduler::CCSyncDone(TAny* aPtr)
       
   851 	{
       
   852 	NFastSemaphore* s = (NFastSemaphore*)aPtr;
       
   853 	s->Signal();
       
   854 	}
       
   855 
       
   856 void CCSyncIPI(TGenericIPI*)
       
   857 	{
       
   858 	TScheduler& s = TheScheduler;
       
   859 	TSubScheduler& ss = SubScheduler();
       
   860 	if (ss.iKernLockCount)
       
   861 		{
       
   862 		ss.iCCSyncPending = 1;
       
   863 		ss.iRescheduleNeededFlag = 1;
       
   864 		return;
       
   865 		}
       
   866 	TUint32 m = ss.iCpuMask;
       
   867 	if (__e32_atomic_and_ord32(&s.iCCSyncCpus, ~m)==m)
       
   868 		{
       
   869 		s.iCCSyncIDFC.Add();
       
   870 		}
       
   871 	}
       
   872 
       
   873 void TScheduler::ChangeThreadAcceptCpus(TUint32 aNewMask)
       
   874 	{
       
   875 	NThread* lbt = LBThread();
       
   876 	if (NKern::CurrentThread() != lbt)
       
   877 		__crash();
       
   878 	TInt irq = iGenIPILock.LockIrqSave();
       
   879 	++iCCDeferCount;
       
   880 	iThreadAcceptCpus = aNewMask;
       
   881 	TUint32 cpus = iIpiAcceptCpus;
       
   882 	iCCSyncCpus = cpus;
       
   883 	iCpusComingUp &= ~aNewMask;
       
   884 	iGenIPILock.UnlockIrqRestore(irq);
       
   885 
       
   886 	NFastSemaphore sem(0);
       
   887 	iCCSyncIDFC.iPtr = &sem;
       
   888 	TGenericIPI ipi;
       
   889 	ipi.Queue(&CCSyncIPI, cpus);
       
   890 
       
   891 	NKern::FSWait(&sem);
       
   892 	CCUnDefer();
       
   893 	}
       
   894 
       
   895 template<int N> struct Log2 {};
       
   896 
       
   897 TEMPLATE_SPECIALIZATION struct Log2<1> { enum {Log=0u}; };
       
   898 TEMPLATE_SPECIALIZATION struct Log2<2> { enum {Log=1u}; };
       
   899 TEMPLATE_SPECIALIZATION struct Log2<4> { enum {Log=2u}; };
       
   900 TEMPLATE_SPECIALIZATION struct Log2<8> { enum {Log=3u}; };
       
   901 TEMPLATE_SPECIALIZATION struct Log2<16> { enum {Log=4u}; };
       
   902 TEMPLATE_SPECIALIZATION struct Log2<32> { enum {Log=5u}; };
       
   903 
       
   904 
       
   905 class TCpuSet
       
   906 	{
       
   907 public:
       
   908 	enum {
       
   909 		EBitsPerTUint8Shift=3u,
       
   910 		EBitsPerTUint32Shift=EBitsPerTUint8Shift+Log2<sizeof(TUint32)>::Log,
       
   911 		EBitsPerTUint8=1u<<EBitsPerTUint8Shift,
       
   912 		EBitsPerTUint32=1u<<EBitsPerTUint32Shift,
       
   913 		EWords=1u<<(KMaxCpus-EBitsPerTUint32Shift),
       
   914 		EBytes=1u<<(KMaxCpus-EBitsPerTUint8Shift),
       
   915 		EBits=1u<<KMaxCpus,
       
   916 		};
       
   917 public:
       
   918 	TCpuSet(TUint32 aMask);
       
   919 	void Consider(TUint32 aAffinity);
       
   920 	TCpuSet& operator&=(const TCpuSet&);
       
   921 	TCpuSet& operator|=(const TCpuSet&);
       
   922 	TCpuSet& Not();
       
   923 	TBool IsEmpty() const;
       
   924 	TInt Profile(TInt* aOut) const;
       
   925 	TUint32 Select(TInt aDesiredNumber, TUint32 aCurrent, TUint32 aIgnore) const;
       
   926 private:
       
   927 	/**
       
   928 	Bitfield: Bit n	= bit (n%8) of byte INT(n/8)
       
   929 					= bit (n%32) of word INT(n/32)
       
   930 	Bit n is set if the subset S of CPUs represented by the bits of n in the
       
   931 	canonical way (i.e. x \in S <=> bit x of n = 1) is acceptable.
       
   932 	*/
       
   933 	TUint32	iMask[EWords];
       
   934 	};
       
   935 
       
   936 TCpuSet::TCpuSet(TUint32 aM)
       
   937 	{
       
   938 	memset(iMask, 0, sizeof(iMask));
       
   939 	TInt i;
       
   940 	TUint32 m=1;	// empty set only
       
   941 	for (i=0; i<EBitsPerTUint32Shift; ++i)
       
   942 		{
       
   943 		TUint32 ibit = 1u<<i;
       
   944 		if (aM & ibit)
       
   945 			m |= (m<<ibit);
       
   946 		}
       
   947 	iMask[0] = m;
       
   948 	for (; i<KMaxCpus; ++i)
       
   949 		{
       
   950 		TUint32 ibit = 1u<<i;
       
   951 		if (aM & ibit)
       
   952 			{
       
   953 			TInt ws = 1<<(i-EBitsPerTUint32Shift);
       
   954 			TInt j;
       
   955 			for (j=0; j<ws; ++j)
       
   956 				iMask[ws+j] = iMask[j];
       
   957 			}
       
   958 		}
       
   959 	}
       
   960 
       
   961 TCpuSet& TCpuSet::operator&=(const TCpuSet& aS)
       
   962 	{
       
   963 	TInt i;
       
   964 	for (i=0; i<EWords; ++i)
       
   965 		iMask[i] &= aS.iMask[i];
       
   966 	return *this;
       
   967 	}
       
   968 
       
   969 TCpuSet& TCpuSet::operator|=(const TCpuSet& aS)
       
   970 	{
       
   971 	TInt i;
       
   972 	for (i=0; i<EWords; ++i)
       
   973 		iMask[i] |= aS.iMask[i];
       
   974 	return *this;
       
   975 	}
       
   976 
       
   977 TCpuSet& TCpuSet::Not()
       
   978 	{
       
   979 	TInt i;
       
   980 	for (i=0; i<EWords; ++i)
       
   981 		iMask[i] = ~iMask[i];
       
   982 	return *this;
       
   983 	}
       
   984 
       
   985 TBool TCpuSet::IsEmpty() const
       
   986 	{
       
   987 	TInt i;
       
   988 	TUint32 x = 0;
       
   989 	for (i=0; i<EWords; ++i)
       
   990 		x |= iMask[i];
       
   991 	return !x;
       
   992 	}
       
   993 
       
   994 void TCpuSet::Consider(TUint32 aAffinity)
       
   995 	{
       
   996 	TUint32 am = AffinityToMask(aAffinity);
       
   997 	am &= EBits-1;
       
   998 	if (am == EBits-1 || am==0)
       
   999 		return;	// no restrictions
       
  1000 
       
  1001 	TCpuSet bad(am ^ (EBits-1));	// sets incompatible with aAffinity
       
  1002 	TInt i;
       
  1003 	for (i=0; i<EWords; ++i)
       
  1004 		iMask[i] &= ~bad.iMask[i];	// knock out sets incompatible with aAffinity
       
  1005 	}
       
  1006 
       
  1007 const TUint32 Pmask[6] =
       
  1008 	{
       
  1009 	0x00000001,			// no bits set
       
  1010 	0x00010116,			// 1 bit set (10000, 01000, 00100, 00010, 00001 -> 16,8,4,2,1)
       
  1011 	0x01161668,			// 2 bits set (11000, 10100, 10010, 10001, 01100, 01010, 01001, 00110, 00101, 00011 -> 24,20,18,17,12,10,9,6,5,3)
       
  1012 	0x16686880,			// 3 bits set (11100, 11010, 11001, 10110, 10101, 10011, 01110, 01101, 01011, 00111 -> 28,26,25,22,21,19,14,13,11,7)
       
  1013 	0x68808000,			// 4 bits set (11110, 11101, 11011, 10111, 01111 -> 30,29,27,23,15)
       
  1014 	0x80000000			// 5 bits set
       
  1015 	};
       
  1016 
       
  1017 /**
       
  1018 	Sets aOut[n] = number of entries with n CPUs present (0<=n<=KMaxCpus)
       
  1019 	Returns total number of entries
       
  1020 */
       
  1021 TInt TCpuSet::Profile(TInt* aOut) const
       
  1022 	{
       
  1023 	TInt i,j;
       
  1024 	TInt r = 0;
       
  1025 	memset(aOut, 0, (KMaxCpus+1)*sizeof(TInt));
       
  1026 	for (i=0; i<EWords; ++i)
       
  1027 		{
       
  1028 		TUint32 m = iMask[i];
       
  1029 		if (!m)
       
  1030 			continue;
       
  1031 		TInt n1 = __e32_bit_count_32(i);
       
  1032 		for (j=0; j<=EBitsPerTUint32Shift; ++j)
       
  1033 			{
       
  1034 			TInt dr = __e32_bit_count_32(m & Pmask[j]);
       
  1035 			r += dr;
       
  1036 			aOut[n1+j] += dr;
       
  1037 			}
       
  1038 		}
       
  1039 	return r;
       
  1040 	}
       
  1041 
       
  1042 /**
       
  1043 	Given a desired number of active cores and the mask of currently
       
  1044 	running cores, returns the new mask of active cores.
       
  1045 */
       
  1046 TUint32 TCpuSet::Select(TInt aDesiredNumber, TUint32 aCurrent, TUint32 aIgnore) const
       
  1047 	{
       
  1048 	TInt max = __e32_bit_count_32(aCurrent);
       
  1049 	if (aDesiredNumber > max)
       
  1050 		return 0;
       
  1051 	TInt profile[KMaxCpus+1] = {0};
       
  1052 	Profile(profile);
       
  1053 	TInt dn;
       
  1054 	for (dn=aDesiredNumber; dn<=max && profile[dn]==0; ++dn)
       
  1055 		{}
       
  1056 	if (dn > max)
       
  1057 		return 0;
       
  1058 	TInt wix;
       
  1059 	TUint32 bestMask = 0;
       
  1060 	TInt bestDiff = KMaxTInt;
       
  1061 	TInt stop = max - dn;
       
  1062 	for (wix=0; wix<EWords; ++wix)
       
  1063 		{
       
  1064 		TUint32 candidate = wix << EBitsPerTUint32Shift;
       
  1065 		TUint32 m = iMask[wix];
       
  1066 		if (!m)
       
  1067 			continue;
       
  1068 		TInt n1 = __e32_bit_count_32(wix);
       
  1069 		if (n1 > dn)
       
  1070 			continue;
       
  1071 		m &= Pmask[dn-n1];
       
  1072 		for (; m; m>>=1, ++candidate)
       
  1073 			{
       
  1074 			if (!(m&1))
       
  1075 				continue;
       
  1076 			TUint32 diff = (candidate&~aIgnore) ^ aCurrent;
       
  1077 			TInt wt = __e32_bit_count_32(diff);
       
  1078 			if (wt < bestDiff)
       
  1079 				{
       
  1080 				bestDiff = wt;
       
  1081 				bestMask = candidate;
       
  1082 				if (bestDiff == stop)
       
  1083 					{
       
  1084 					wix = EWords;
       
  1085 					break;
       
  1086 					}
       
  1087 				}
       
  1088 			}
       
  1089 		}
       
  1090 	return bestMask;
       
  1091 	}
       
  1092 
       
  1093 void NSchedulable::LbTransfer(SDblQue& aDestQ)
       
  1094 	{
       
  1095 	if (iLbState & ELbState_PerCpu)
       
  1096 		{
       
  1097 		TSubScheduler* ss = &TheSubSchedulers[iLbState & ELbState_CpuMask];
       
  1098 		ss->iReadyListLock.LockOnly();
       
  1099 		if (iLbState == ss->iLbCounter)
       
  1100 			{
       
  1101 			iLbLink.Deque();
       
  1102 			}
       
  1103 		ss->iReadyListLock.UnlockOnly();
       
  1104 		}
       
  1105 	else if ((iLbState & ELbState_CpuMask) == ELbState_Global)
       
  1106 		{
       
  1107 		TScheduler& s = TheScheduler;
       
  1108 		s.iBalanceListLock.LockOnly();
       
  1109 		if (iLbState == s.iLbCounter)
       
  1110 			{
       
  1111 			iLbLink.Deque();
       
  1112 			}
       
  1113 		s.iBalanceListLock.UnlockOnly();
       
  1114 		}
       
  1115 	else if (iLbState != ELbState_Inactive)
       
  1116 		{
       
  1117 		// shouldn't happen
       
  1118 		__crash();
       
  1119 		}
       
  1120 	iLbState = ELbState_Temp;
       
  1121 	aDestQ.Add(&iLbLink);
       
  1122 	}
       
  1123 
       
  1124 void GetAll(SDblQue& aOutQ, SIterDQ* aInQ)
       
  1125 	{
       
  1126 	TScheduler& s = TheScheduler;
       
  1127 	SIterDQIterator iter;
       
  1128 	TInt maxSteps = NKern::NumberOfCpus() + 2;
       
  1129 	TInt r;
       
  1130 	NKern::Lock();
       
  1131 	s.iEnumerateLock.LockOnly();
       
  1132 	iter.Attach(aInQ);
       
  1133 	FOREVER
       
  1134 		{
       
  1135 		SIterDQLink* link = 0;
       
  1136 		r = iter.Step(link, maxSteps);
       
  1137 		if (r == KErrEof)
       
  1138 			break;
       
  1139 		if (r == KErrNone)
       
  1140 			{
       
  1141 			NSchedulable* sch = _LOFF(link, NSchedulable, iEnumerateLink);
       
  1142 			sch->AcqSLock();
       
  1143 			sch->LbTransfer(aOutQ);
       
  1144 			sch->RelSLock();
       
  1145 			}
       
  1146 		s.iEnumerateLock.FlashPreempt();
       
  1147 		}
       
  1148 	iter.Detach();
       
  1149 	s.iEnumerateLock.UnlockOnly();
       
  1150 	NKern::Unlock();
       
  1151 	}
       
  1152 
       
  1153 void GetAll(SDblQue& aOutQ)
       
  1154 	{
       
  1155 	TScheduler& s = TheScheduler;
       
  1156 	GetAll(aOutQ, &s.iAllGroups);
       
  1157 	GetAll(aOutQ, &s.iAllThreads);
       
  1158 /*
       
  1159 	SDblQueLink* l0 = aOutQ.Last();
       
  1160 	SDblQueLink* anchor = &aOutQ.iA;
       
  1161 	GetLbThreads(aOutQ);
       
  1162 	TInt i;
       
  1163 	for (i=0; i<s.iNumCpus; ++i)
       
  1164 		s.iSub[i]->GetLbThreads(aOutQ);
       
  1165 	SDblQueLink* l = l0->iNext;
       
  1166 	for (; l!=anchor; l=l->iNext)
       
  1167 		{
       
  1168 		NSchedulable* sch = _LOFF(l, NSchedulable, iLbLink);
       
  1169 		sch->LAcqSLock();
       
  1170 		sch->iLbState = (sch->iLbState & ELbState_ExtraRef) | ELbState_Temp;
       
  1171 		sch->RelSLockU();
       
  1172 		}
       
  1173 */
       
  1174 	}
       
  1175 
       
  1176 void GetCpuSet(TCpuSet& aSet, SDblQue& aQ)
       
  1177 	{
       
  1178 	SDblQueLink* anchor = &aQ.iA;
       
  1179 	SDblQueLink* l = aQ.First();
       
  1180 	for (; l!=anchor; l=l->iNext)
       
  1181 		{
       
  1182 		NSchedulable* sch = _LOFF(l, NSchedulable, iLbLink);
       
  1183 		if (!sch->IsGroup() && ((NThreadBase*)sch)->i_NThread_Initial )
       
  1184 			continue;	// skip idle threads since they are locked to their respective CPU
       
  1185 		TUint32 aff = sch->iCpuAffinity;
       
  1186 		aSet.Consider(aff);
       
  1187 		}
       
  1188 	}
       
  1189 
       
  1190 
       
  1191 void TScheduler::CCReactivateDfcFn(TAny* a)
       
  1192 	{
       
  1193 	((TScheduler*)a)->CCReactivate(0);
       
  1194 	}
       
  1195 
       
  1196 void TScheduler::CCRequestDfcFn(TAny* a)
       
  1197 	{
       
  1198 	((TScheduler*)a)->CCRequest();
       
  1199 	}
       
  1200 
       
  1201 void TScheduler::CCIpiReactivateFn(TAny* a)
       
  1202 	{
       
  1203 	((TScheduler*)a)->CCIpiReactivate();
       
  1204 	}
       
  1205 
       
  1206 TUint32 TScheduler::ModifyCCState(TUint32 aAnd, TUint32 aXor)
       
  1207 	{
       
  1208 	TInt irq = iGenIPILock.LockIrqSave();
       
  1209 	TUint32 orig = iCCState;
       
  1210 	iCCState = (orig & aAnd) ^ aXor;
       
  1211 	iGenIPILock.UnlockIrqRestore(irq);
       
  1212 	return orig;
       
  1213 	}
       
  1214 
       
  1215 
       
  1216 /**
       
  1217 Runs if a thread is made ready on a CPU marked for shutdown (apart from on
       
  1218 account of core cycling) or if a core wakes up from shutdown.
       
  1219 */
       
  1220 void TScheduler::CCReactivate(TUint32 aMore)
       
  1221 	{
       
  1222 	TUint32 startPowerUp = 0;		// cores which need to be powered up
       
  1223 	TUint32 finishPowerUp = 0;		// cores which have just powered up
       
  1224 	TInt irq = iGenIPILock.LockIrqSave();
       
  1225 	iCCReactivateCpus |= aMore;
       
  1226 	TUint32 cu = iCpusComingUp | iIpiAcceptCpus;
       
  1227 	finishPowerUp = iCCReactivateCpus & cu;
       
  1228 	iCCReactivateCpus &= ~finishPowerUp;
       
  1229 	if (iCCDeferCount == 0)
       
  1230 		{
       
  1231 		startPowerUp = iCCReactivateCpus &~ cu;
       
  1232 		iCCReactivateCpus = 0;
       
  1233 		iCpusComingUp |= startPowerUp;
       
  1234 		}
       
  1235 	TUint32 ccs = iCCState;
       
  1236 	iGenIPILock.UnlockIrqRestore(irq);
       
  1237 	if (startPowerUp)
       
  1238 		{
       
  1239 		// Begin powering up cores
       
  1240 		CCInitiatePowerUp(startPowerUp);
       
  1241 		}
       
  1242 	if (finishPowerUp)
       
  1243 		{
       
  1244 		// ?Rebalance load to new cores now or wait till next periodic?
       
  1245 		ChangeThreadAcceptCpus(iThreadAcceptCpus | finishPowerUp);
       
  1246 		if ((iThreadAcceptCpus & (iThreadAcceptCpus-1)) && !(ccs & ECCPeriodicBalancingActive))
       
  1247 			{
       
  1248 			// more than 1 core so restart periodic balancing
       
  1249 			StartRebalanceTimer(TRUE);
       
  1250 			}
       
  1251 		if (startPowerUp == 0)
       
  1252 			ModifyCCState(~ECCPowerUpInProgress, 0);
       
  1253 		}
       
  1254 	if (iNeedBal)
       
  1255 		{
       
  1256 		if ( (ccs & (ECCPeriodicBalancingActive|ECCRebalanceTimerQueued)) == ECCPeriodicBalancingActive)
       
  1257 			{
       
  1258 			StartRebalanceTimer(FALSE);
       
  1259 			}
       
  1260 		}
       
  1261 	}
       
  1262 
       
  1263 extern "C" void wake_up_for_ipi(TSubScheduler* aSS, TInt)
       
  1264 	{
       
  1265 	TScheduler& s = *aSS->iScheduler;
       
  1266 	if (__e32_atomic_ior_ord32(&s.iCCIpiReactivate, aSS->iCpuMask)==0)
       
  1267 		{
       
  1268 		s.iCCIpiReactIDFC.RawAdd();
       
  1269 		}
       
  1270 	}
       
  1271 
       
  1272 /**
       
  1273 Runs if a core needs to wake up on account of a transferred tied IRQ or IDFC
       
  1274 */
       
  1275 void TScheduler::CCIpiReactivate()
       
  1276 	{
       
  1277 	TUint32 cores = __e32_atomic_swp_ord32(&iCCIpiReactivate, 0);
       
  1278 	TInt irq = iGenIPILock.LockIrqSave();
       
  1279 	iCCReactivateCpus |= cores;
       
  1280 	iGenIPILock.UnlockIrqRestore(irq);
       
  1281 	iCCReactivateDfc.DoEnque();
       
  1282 	}
       
  1283 
       
  1284 TUint32 TScheduler::ReschedInactiveCpus(TUint32 aMask)
       
  1285 	{
       
  1286 	TUint32 rm = aMask & 0x7FFFFFFFu;
       
  1287 	if (aMask & 0x80000000u)
       
  1288 		{
       
  1289 		TSubScheduler& ss = SubScheduler();
       
  1290 		TUint32 me = ss.iCpuMask;
       
  1291 		if (__e32_atomic_and_ord32(&iCCSyncCpus, ~me) == me)
       
  1292 			{
       
  1293 			rm |= me;
       
  1294 			iCCSyncIDFC.RawAdd();
       
  1295 			}
       
  1296 		}
       
  1297 	return rm;
       
  1298 	}
       
  1299 
       
  1300 TUint32 TScheduler::CpuShuttingDown(TSubScheduler& aSS)
       
  1301 	{
       
  1302 	TUint32 m = aSS.iCpuMask;
       
  1303 	iIpiAcceptCpus &= ~m;		// no more IPIs for us
       
  1304 	iCpusGoingDown |= m;		// we are now past the 'point of no return'
       
  1305 	TUint32 more = iIpiAcceptCpus &~ (iThreadAcceptCpus | iCpusComingUp | iCCReactivateCpus);
       
  1306 	if (more)
       
  1307 		return more;
       
  1308 	if (iCCState & ECCPowerDownInProgress)
       
  1309 		return KMaxTUint32;
       
  1310 	return 0;
       
  1311 	}
       
  1312 
       
  1313 // Called just before last CPU goes idle
       
  1314 void TScheduler::AllCpusIdle()
       
  1315 	{
       
  1316 	}
       
  1317 
       
  1318 // Called just after first CPU wakes up from idle
       
  1319 void TScheduler::FirstBackFromIdle()
       
  1320 	{
       
  1321 	}
       
  1322 
       
  1323 
       
  1324 struct SCoreControlAction
       
  1325 	{
       
  1326 	SCoreControlAction();
       
  1327 
       
  1328 	TInt	iPowerUpCount;			// number of cores to power on ...
       
  1329 	TUint32	iPowerUpCandidates;		// ... out of these
       
  1330 	TUint32 iPowerUpChoice;			// chosen to power on
       
  1331 	TInt	iPowerDownCount;		// number of cores to power off ...
       
  1332 	TUint32	iPowerDownCandidates;	// ... out of these
       
  1333 	TUint32 iPowerDownChoice;		// chosen to power off
       
  1334 
       
  1335 	// snapshot of core control state
       
  1336 	TInt	iCCRequestLevel;
       
  1337 	TUint32	iThreadAcceptCpus;
       
  1338 	TUint32	iIpiAcceptCpus;
       
  1339 	TUint32	iCpusComingUp;
       
  1340 	TUint32 iCCReactivateCpus;
       
  1341 
       
  1342 	TBool	iCCDefer;
       
  1343 	SDblQue	iBalanceQ;
       
  1344 	};
       
  1345 
       
  1346 SCoreControlAction::SCoreControlAction()
       
  1347 	:	iPowerUpCount(0),
       
  1348 		iPowerUpCandidates(0),
       
  1349 		iPowerUpChoice(0),
       
  1350 		iPowerDownCount(0),
       
  1351 		iPowerDownCandidates(0),
       
  1352 		iPowerDownChoice(0),
       
  1353 		iCCRequestLevel(0),
       
  1354 		iThreadAcceptCpus(0),
       
  1355 		iIpiAcceptCpus(0),
       
  1356 		iCpusComingUp(0),
       
  1357 		iCCReactivateCpus(0),
       
  1358 		iCCDefer(0)
       
  1359 	{
       
  1360 	}
       
  1361 
       
  1362 void TScheduler::InitCCAction(SCoreControlAction& aA)
       
  1363 	{
       
  1364 	aA.iPowerUpCount = 0;
       
  1365 	aA.iPowerUpCandidates = 0;
       
  1366 	aA.iPowerUpChoice = 0;
       
  1367 	aA.iPowerDownCount = 0;
       
  1368 	aA.iPowerDownCandidates = 0;
       
  1369 	aA.iPowerDownChoice = 0;
       
  1370 	aA.iCCDefer = FALSE;
       
  1371 
       
  1372 	TUint32 all = (1u<<iNumCpus)-1;
       
  1373 
       
  1374 	TInt irq = iGenIPILock.LockIrqSave();
       
  1375 
       
  1376 	// cores fully operative and not being powered off
       
  1377 	TUint32 c1 = iThreadAcceptCpus;
       
  1378 
       
  1379 	// cores in the process of being retired
       
  1380 	TUint32 c0 = iIpiAcceptCpus &~ (iThreadAcceptCpus | iCpusComingUp | iCCReactivateCpus);
       
  1381 
       
  1382 	// cores on (including those being retired) or coming up
       
  1383 	TUint32 c2 = (iIpiAcceptCpus | iCpusComingUp | iCCReactivateCpus);
       
  1384 	TInt n2 = __e32_bit_count_32(c2);
       
  1385 
       
  1386 	// cores on and not being retired, plus cores being reactivated
       
  1387 	TUint32 c3 = c2 &~ c0;
       
  1388 	TInt n3 = __e32_bit_count_32(c3);
       
  1389 
       
  1390 	TInt req = iCCRequestLevel;
       
  1391 
       
  1392 	// take snapshot of state
       
  1393 	aA.iCCRequestLevel = req;
       
  1394 	aA.iThreadAcceptCpus = c1;
       
  1395 	aA.iIpiAcceptCpus = iIpiAcceptCpus;
       
  1396 	aA.iCpusComingUp = iCpusComingUp;
       
  1397 	aA.iCCReactivateCpus = iCCReactivateCpus;
       
  1398 
       
  1399 	if (req > n2)
       
  1400 		{
       
  1401 		// need to activate some more cores
       
  1402 		aA.iPowerUpCount = req - n2;
       
  1403 		aA.iPowerUpCandidates = all &~ c2;
       
  1404 		iCCReactivateCpus |= c0;	// revive cores currently in the process of powering down
       
  1405 		iCCState &= ~ECCReqPending;
       
  1406 		iCCState |= ECCPowerUpInProgress;
       
  1407 		}
       
  1408 	else if (req > n3)
       
  1409 		{
       
  1410 		// need to reactivate some cores which are currently powering down
       
  1411 		aA.iPowerUpCount = req - n3;
       
  1412 		aA.iPowerUpCandidates = c0;
       
  1413 		iCCState &= ~ECCReqPending;
       
  1414 		iCCState |= ECCPowerUpInProgress;
       
  1415 		aA.iCCDefer = TRUE;
       
  1416 		++iCCDeferCount;	// stop cores going down past recovery
       
  1417 		}
       
  1418 	else if (req == n3)
       
  1419 		{
       
  1420 		// don't need to do anything
       
  1421 		iCCState &= ~ECCReqPending;
       
  1422 		}
       
  1423 	else if (iCpusComingUp | iCCReactivateCpus)
       
  1424 		{
       
  1425 		// defer this request until reactivations in progress have happened
       
  1426 		iCCState |= ECCReqDeferred;
       
  1427 		}
       
  1428 	else
       
  1429 		{
       
  1430 		// need to retire some more cores
       
  1431 		aA.iPowerDownCount = n3 - req;
       
  1432 		aA.iPowerDownCandidates = c3;
       
  1433 		iCCState &= ~ECCReqPending;
       
  1434 		iCCState |= ECCPowerDownInProgress;
       
  1435 		}
       
  1436 	iGenIPILock.UnlockIrqRestore(irq);
       
  1437 	}
       
  1438 
       
  1439 
       
  1440 /**
       
  1441 Runs when a request is made to change the number of active cores
       
  1442 */
       
  1443 void TScheduler::CCRequest()
       
  1444 	{
       
  1445 	SCoreControlAction action;
       
  1446 	InitCCAction(action);
       
  1447 	if (action.iPowerDownCount > 0)
       
  1448 		{
       
  1449 		TCpuSet cpuSet(action.iIpiAcceptCpus);
       
  1450 		GetAll(action.iBalanceQ);
       
  1451 		GetCpuSet(cpuSet, action.iBalanceQ);
       
  1452 
       
  1453 		TUint32 leaveOn = cpuSet.Select(action.iCCRequestLevel, action.iIpiAcceptCpus, action.iIpiAcceptCpus&~action.iPowerDownCandidates);
       
  1454 		if (leaveOn)
       
  1455 			{
       
  1456 			action.iPowerDownChoice = action.iPowerDownCandidates &~ leaveOn;
       
  1457 
       
  1458 			// remove CPUs to be shut down from iThreadAcceptCpus
       
  1459 			ChangeThreadAcceptCpus(iThreadAcceptCpus &~ action.iPowerDownChoice);
       
  1460 			}
       
  1461 
       
  1462 		// rebalance to remaining cores
       
  1463 		StopRebalanceTimer(TRUE);
       
  1464 		ReBalance(action.iBalanceQ, TRUE);
       
  1465 		if (iThreadAcceptCpus & (iThreadAcceptCpus - 1))
       
  1466 			{
       
  1467 			// more than 1 CPU on
       
  1468 			ModifyCCState(~ECCPowerDownInProgress, 0);
       
  1469 			StartRebalanceTimer(FALSE);
       
  1470 			}
       
  1471 		else
       
  1472 			ModifyCCState(~(ECCPowerDownInProgress|ECCPeriodicBalancingActive), 0);	// stop periodic balancing
       
  1473 		}
       
  1474 	if (action.iPowerUpCount > 0)
       
  1475 		{
       
  1476 		TUint32 ch = 0;
       
  1477 		TUint32 ca = action.iPowerUpCandidates;
       
  1478 		TInt n = action.iPowerUpCount;
       
  1479 		while(n)
       
  1480 			{
       
  1481 			TInt b = __e32_find_ls1_32(ca);
       
  1482 			ch |= (1u<<b);
       
  1483 			ca &= ~(1u<<b);
       
  1484 			--n;
       
  1485 			}
       
  1486 		action.iPowerUpChoice = ch;
       
  1487 		CCReactivate(action.iPowerUpChoice);
       
  1488 		if (action.iCCDefer)
       
  1489 			CCUnDefer();
       
  1490 		}
       
  1491 	}
       
  1492 
       
  1493 /**
       
  1494 Initiates a change to the number of active cores
       
  1495 */
       
  1496 EXPORT_C void NKern::SetNumberOfActiveCpus(TInt aNumber)
       
  1497 	{
       
  1498 	__NK_ASSERT_ALWAYS(aNumber>0 && aNumber<=NKern::NumberOfCpus());
       
  1499 	TScheduler& s = TheScheduler;
       
  1500 	if (!s.CoreControlSupported())
       
  1501 		return;
       
  1502 	TBool chrl = FALSE;
       
  1503 	TBool kick = FALSE;
       
  1504 	NKern::Lock();
       
  1505 	TInt irq = s.iGenIPILock.LockIrqSave();
       
  1506 	if (s.iCCRequestLevel != (TUint32)aNumber)
       
  1507 		{
       
  1508 		s.iCCRequestLevel = aNumber;
       
  1509 		chrl = TRUE;
       
  1510 		}
       
  1511 
       
  1512 	// cores in the process of being retired
       
  1513 	TUint32 c0 = s.iIpiAcceptCpus &~ (s.iThreadAcceptCpus | s.iCpusComingUp | s.iCCReactivateCpus);
       
  1514 
       
  1515 	// cores on (including those being retired) or coming up
       
  1516 	TUint32 c2 = (s.iIpiAcceptCpus | s.iCpusComingUp | s.iCCReactivateCpus);
       
  1517 
       
  1518 	// cores on and not being retired, plus cores being reactivated
       
  1519 	TUint32 c3 = c2 &~ c0;
       
  1520 	TUint32 cc_active = __e32_bit_count_32(c3);
       
  1521 
       
  1522 	if (s.iCCRequestLevel != cc_active)
       
  1523 		{
       
  1524 		if (chrl || !(s.iCCState & (ECCReqPending|ECCPowerDownInProgress|ECCPowerUpInProgress) ))
       
  1525 			{
       
  1526 			kick = TRUE;
       
  1527 			}
       
  1528 		s.iCCState |= ECCReqPending;
       
  1529 		}
       
  1530 	s.iGenIPILock.UnlockIrqRestore(irq);
       
  1531 	if (kick)
       
  1532 		s.iCCRequestDfc.Add();
       
  1533 	NKern::Unlock();
       
  1534 	}
       
  1535 
       
  1536 
       
  1537 
       
  1538 
       
  1539