kernel/eka/common/arm/cmem.cia
changeset 0 a41df078684a
equal deleted inserted replaced
-1:000000000000 0:a41df078684a
       
     1 // Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of the License "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // e32\common\arm\cmem.cia
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "../common.h"
       
    19 #include <e32cia.h>
       
    20 #if defined(__REPLACE_GENERIC_UTILS)
       
    21 #include "replacement_utils.h"
       
    22 #endif
       
    23 
       
    24 #if defined(__MEM_MACHINE_CODED__)
       
    25 
       
    26 #ifndef USE_REPLACEMENT_MEMSET
       
    27 
       
    28 #if defined(_DEBUG)
       
    29 
       
    30 #ifdef __STANDALONE_NANOKERNEL__
       
    31 
       
    32 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)	\
       
    33 	asm("tst "#rt1", #3"); \
       
    34 	asm("ldrne "#rt1", ["#rt1"]")
       
    35 
       
    36 #else	// __STANDALONE_NANOKERNEL__
       
    37 GLDEF_C void PanicEWordMoveLengthNotMultipleOf4();
       
    38 GLDEF_C void PanicEWordMoveSourceNotAligned();
       
    39 GLDEF_C void PanicEWordMoveTargetNotAligned();
       
    40 
       
    41 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)	\
       
    42 	asm("tst "#rt1", #3"); \
       
    43 	asm("bne " panicfunc )
       
    44 
       
    45 #endif	// __STANDALONE_NANOKERNEL__
       
    46 
       
    47 #else	// _DEBUG
       
    48 
       
    49 #define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc)
       
    50 
       
    51 #endif	//_DEBUG
       
    52 
       
    53 
       
    54 // See header file e32cmn.h for the in-source documentation.
       
    55 extern "C" EXPORT_C __NAKED__ TAny* memclr(TAny* /*aTrg*/, unsigned int /*aLength*/)
       
    56 	{
       
    57 	KMEMCLRHOOK
       
    58 	asm("mov r2, #0 ");
       
    59 	asm("b fill ");
       
    60 	}
       
    61 
       
    62 // See header file e32cmn.h for the in-source documentation.
       
    63 extern "C" EXPORT_C __NAKED__ TAny* memset(TAny* /*aTrg*/, TInt /*aValue*/, unsigned int /*aLength*/)
       
    64     {
       
    65     KMEMSETHOOK
       
    66     asm("   mov		 r3, r2 ");				/* length into r3 */
       
    67     asm("   and      r2,r1,#255");			/* fill value into r2 */
       
    68 	asm("	mov		 r1, r3 ");				/* length into r1 */
       
    69 
       
    70     asm("fill:");
       
    71     asm("   cmp      r1,#8");
       
    72 	asm("   bls      small_fill");			// only taken ~20% of the time
       
    73 
       
    74     asm("   stmfd    sp!,{r0,r4-r9,lr}");
       
    75 	asm("   movs     r3, r0, lsl #30 ");	// Check if word aligned
       
    76 	asm("   orr      r2,r2,r2,lsl #8");
       
    77     asm("   orr      r2,r2,r2,lsl #16");
       
    78 	asm("   bne		 unaligned_fill ");
       
    79 		
       
    80 	// Align destination address to 32 byte boundary if possible
       
    81 	
       
    82 	asm("word_aligned_fill: ");
       
    83     asm("   mov      r4,r2");
       
    84     asm("   mov      r5,r2");
       
    85     asm("   mov      r6,r2");
       
    86 	asm("   movs     r3, r0, lsl #27 ");
       
    87     asm("   beq      aligned_fill ");
       
    88     asm("   rsb      r3, r3, #0 ");				// calculate fill length necessary for aligment
       
    89 	asm("   cmp      r1, r3, lsr #27 ");		// compare with remaining length
       
    90 	asm("   blo		 smaller_fill ");			// skip alignment if greater
       
    91 	asm("   msr      cpsr_f, r3 ");				// put length bits 4, 3, 2 into N, Z, C flags
       
    92     asm("   strcs    r2, [r0], #4 ");			// align to 8 byte boundary
       
    93     asm("   stmeqia  r0!, {r2, r4} ");			// align to 16 byte boundary
       
    94     asm("   stmmiia  r0!, {r2, r4-r6} ");		// align to 32 byte boundary
       
    95 	asm("   sub      r1, r1, r3, lsr #27 ");	// adjust remaining length
       
    96 
       
    97     asm("aligned_fill:");
       
    98 	asm("   cmp		 r1, #64 ");
       
    99 	asm("   bhs		 big_fill ");
       
   100 
       
   101 	// Fill 0-63 bytes
       
   102 	
       
   103     asm("smaller_fill:");
       
   104     asm("   movs     r1, r1, lsl #26");
       
   105 	asm("	beq		 mem_fill_end ");
       
   106     asm("   msr      cpsr_flg, r1 ");
       
   107     asm("   stmmiia  r0!,{r2,r4-r6}");	// Fill 32
       
   108     asm("   stmmiia  r0!,{r2,r4-r6}");
       
   109     asm("   stmeqia  r0!,{r2,r4-r6}");	// Fill 16
       
   110     asm("   stmcsia  r0!,{r2,r4}");		// Fill 8
       
   111     asm("   strvs    r2,[r0],#4");		// Fill 4
       
   112 	asm("   movs	 r1, r1, lsl #4 ");
       
   113 	asm("	bne		 smallest_fill ");
       
   114 	asm("mem_fill_end: ");
       
   115 	__POPRET("r0,r4-r9,");
       
   116 
       
   117 	// Fill last 1-3 bytes
       
   118 	
       
   119     asm("smallest_fill: ");
       
   120     asm("   msr      cpsr_flg,r1");
       
   121     asm("   strmih   r2,[r0],#2");  	// Fill 2
       
   122     asm("   streqb   r2,[r0],#1");  	// Fill 1
       
   123 	__POPRET("r0,r4-r9,");
       
   124 
       
   125 	// Fill loop for length >= 64
       
   126 	
       
   127 	asm("big_fill: ");
       
   128 	asm("   mov      r3,r2");
       
   129     asm("   mov      r7,r2");
       
   130     asm("   mov      r8,r2");
       
   131     asm("   mov      r9,r2");
       
   132     asm("   movs     ip,r1,lsr #8");	// Number of 256 byte blocks to fill
       
   133 	asm("   beq		 medium_fill ");
       
   134     asm("fill_256_bytes_loop:");
       
   135     asm("   stmia    r0!,{r2-r9}");		// Fill 256 bytes
       
   136     asm("   stmia    r0!,{r2-r9}");
       
   137     asm("   stmia    r0!,{r2-r9}");
       
   138     asm("   stmia    r0!,{r2-r9}");
       
   139     asm("   stmia    r0!,{r2-r9}");
       
   140     asm("   stmia    r0!,{r2-r9}");
       
   141     asm("   stmia    r0!,{r2-r9}");
       
   142     asm("   stmia    r0!,{r2-r9}");
       
   143     asm("   subs     ip,ip,#1");
       
   144     asm("   bne      fill_256_bytes_loop");
       
   145 	asm("medium_fill: ");
       
   146     asm("   movs     ip,r1,lsl #24");
       
   147     asm("   msr      cpsr_flg,ip");	
       
   148     asm("   stmmiia  r0!,{r2-r9}");		// Fill 128
       
   149     asm("   stmmiia  r0!,{r2-r9}");  
       
   150     asm("   stmmiia  r0!,{r2-r9}"); 
       
   151     asm("   stmmiia  r0!,{r2-r9}"); 
       
   152     asm("   stmeqia  r0!,{r2-r9}");		// Fill 64
       
   153     asm("   stmeqia  r0!,{r2-r9}"); 
       
   154 	asm("   and		 r1, r1, #63 ");
       
   155 	asm("   b 		 smaller_fill");
       
   156 
       
   157 	// Word-align destination address, length >= 8
       
   158 	
       
   159 	asm("unaligned_fill: ");
       
   160     asm("   rsb      r3, r3, #0 ");				// calculate fill length necessary for aligment
       
   161     asm("   msr      cpsr_flg, r3");
       
   162 	asm("   streqb   r2, [r0], #1 ");			// align to 2 byte boundary
       
   163     asm("   strmih   r2, [r0], #2 ");			// align to 4 byte boundary
       
   164 	asm("   sub      r1, r1, r3, lsr #30 ");
       
   165 	asm("	b		 word_aligned_fill ");
       
   166 
       
   167 	// Fill for length <= 8
       
   168 	
       
   169 	asm("small_fill: ");
       
   170 	asm("	mov		 r3, r0 ");				/* r3=dest */
       
   171 	asm("   adr      ip, small_fill_end ");
       
   172 	asm("   sub		 pc, ip, r1, lsl #2 ");
       
   173     asm("   strb     r2, [r3], #1");
       
   174     asm("   strb     r2, [r3], #1");
       
   175     asm("   strb     r2, [r3], #1");
       
   176     asm("   strb     r2, [r3], #1");
       
   177     asm("   strb     r2, [r3], #1");
       
   178     asm("   strb     r2, [r3], #1");
       
   179     asm("   strb     r2, [r3], #1");
       
   180     asm("   strb     r2, [r3], #1");
       
   181 	asm("small_fill_end: ");
       
   182 	__JUMP(,lr);
       
   183 
       
   184 #ifdef __EABI__
       
   185 	// The AEABI switched the order of arg2 and arg3 to save an intruction when
       
   186 	// calling 'memset' from 'memclr'	
       
   187 	asm(".global __aeabi_memset8 ");
       
   188 	asm("__aeabi_memset8: 		 ");
       
   189 	asm(".global __aeabi_memset4 ");
       
   190 	asm("__aeabi_memset4: 		 ");
       
   191 	asm(".global __aeabi_memset  ");
       
   192 	asm("__aeabi_memset: 		 ");
       
   193     asm("   and      r2, r2, #255");
       
   194 	asm("	b		 fill		 ");
       
   195 #endif
       
   196     }
       
   197 
       
   198 #endif  // USE_REPLACEMENT_MEMSET
       
   199 
       
   200 #ifndef USE_REPLACEMENT_MEMCPY
       
   201 
       
   202 // See header file e32cmn.h for the in-source documentation.
       
   203 
       
   204 extern "C" EXPORT_C __NAKED__ TAny* wordmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
       
   205 //
       
   206 // Assumes all is aligned
       
   207 //
       
   208     {
       
   209 	ARM_ASSERT_MULTIPLE_OF_FOUR(r0, CSM_Z30PanicEWordMoveTargetNotAlignedv);
       
   210 	ARM_ASSERT_MULTIPLE_OF_FOUR(r1, CSM_Z30PanicEWordMoveSourceNotAlignedv);
       
   211 	ARM_ASSERT_MULTIPLE_OF_FOUR(r2, CSM_Z34PanicEWordMoveLengthNotMultipleOf4v);
       
   212 
       
   213 	// Mask length to a multiple of four bytes to avoid memory, or register
       
   214 	// corruption by the special cases below.
       
   215 	asm("bic r2,r2,#3");
       
   216 
       
   217 	// Length <= 24 in ~90% of cases, however can only copy > 16 bytes in 4
       
   218 	// instructions if LDM instuction restores thumb state when loading the PC.	
       
   219 #ifdef __CPU_ARM_LDR_PC_SETS_TBIT
       
   220 	asm("cmp r2, #24 ");
       
   221 #else
       
   222 	asm("cmp r2, #16 ");
       
   223 #endif
       
   224 	PLD(1);
       
   225 	asm("addls pc, pc, r2, lsl #2 ");		// take branch depending on size
       
   226 	asm("b 9f ");							// too big
       
   227 
       
   228 	// 0 words
       
   229 	__JUMP(,lr);
       
   230 	__JUMP(,lr);
       
   231 	__JUMP(,lr);
       
   232 	__JUMP(,lr);
       
   233 
       
   234 	// 1 word
       
   235 	asm("ldr ip, [r1] ");
       
   236 	asm("str ip, [r0] ");
       
   237 	__JUMP(,lr);
       
   238 	__JUMP(,lr);
       
   239 
       
   240 	// 2 words
       
   241 	asm("ldmia r1, {r2,r3}");
       
   242 	asm("stmia r0, {r2,r3}");
       
   243 	__JUMP(,lr);
       
   244 	__JUMP(,lr);
       
   245 
       
   246 	// 3 words
       
   247 	asm("ldmia r1, {r2,r3,ip}");
       
   248 	asm("stmia r0, {r2,r3,ip}");
       
   249 	__JUMP(,lr);
       
   250 	__JUMP(,lr);
       
   251 
       
   252 	// 4 words
       
   253 	asm("ldmia r1, {r1,r2,r3,ip}");
       
   254 	asm("stmia r0, {r1,r2,r3,ip}");
       
   255 	__JUMP(,lr);
       
   256 	__JUMP(,lr);
       
   257 
       
   258 #ifdef __CPU_ARM_LDR_PC_SETS_TBIT
       
   259 	// 5 words
       
   260 	asm("stmfd sp!, {lr}");
       
   261 	asm("ldmia r1, {r1,r2,r3,ip,lr}");
       
   262 	asm("stmia r0, {r1,r2,r3,ip,lr}");
       
   263 	asm("ldmfd sp!, {pc}");
       
   264 
       
   265 	// 6 words
       
   266 	asm("stmfd sp!, {r4,lr}");
       
   267 	asm("ldmia r1, {r1,r2,r3,r4,ip,lr}");
       
   268 	asm("stmia r0, {r1,r2,r3,r4,ip,lr}");
       
   269 	asm("ldmfd sp!, {r4,pc}");
       
   270 #endif
       
   271 
       
   272 	asm("9: ");
       
   273     asm("subs r3, r0, r1 ");				// r3 = dest - source
       
   274 	__JUMP(eq,lr);							// return if source = dest
       
   275     asm("stmfd sp!, {r0,r4-r11,lr} ");
       
   276 	asm("cmphi r2, r3 ");					// if dest>source, compare length with dest-source
       
   277     asm("bls mem_move_fore ");				// if dest<source or length<=dest-source do forwards aligned copy
       
   278     asm("add r0, r0, r2 ");
       
   279     asm("add r1, r1, r2 ");
       
   280     asm("b mem_move_back ");				// Backwards aligned copy
       
   281     }
       
   282 
       
   283 
       
   284 
       
   285 
       
   286 // See header file e32cmn.h for the in-source documentation.
       
   287 extern "C" EXPORT_C __NAKED__ TAny* memmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
       
   288 	{
       
   289 	KMEMMOVEHOOK
       
   290 	// fall through
       
   291 	}
       
   292 
       
   293 
       
   294 
       
   295 // See header file e32cmn.h for the in-source documentation.
       
   296 extern "C" EXPORT_C __NAKED__ TAny* memcpy(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/)
       
   297     {
       
   298     KMEMCPYHOOK
       
   299 //
       
   300 // Check for zero length or source and target being the same
       
   301 //
       
   302     asm("	cmp		r2, #0 ");				// zero length?
       
   303     asm("	subnes	r3, r0, r1 ");			// if not, r3 = dest-source
       
   304 	__JUMP(eq,lr);							// if zero length or dest=source, nothing to do
       
   305 	asm("	cmphi	r2, r3 ");				// if dest>source compare length to dest-source
       
   306 	asm("	movhi	r3, #0 ");				// if dest>source and length>dest-source need to go backwards - set r3=0
       
   307 //
       
   308 //	If <16 bytes, just do byte moves
       
   309 //
       
   310     asm("	cmp		r2,	#15 ");
       
   311 	asm("	bhi		main_copy ");
       
   312 
       
   313 	asm("	ldrb	r12, [r0] ");			// read dest so it's in cache - avoid lots of single accesses to external memory
       
   314 	asm("	sub		r12, r0, #1 ");
       
   315 	asm("	ldrb	r12, [r12, r2] ");		// read dest+length-1
       
   316 	asm("	cmp		r3, #0 ");
       
   317 	asm("	beq		small_copy_back ");		// r3=0 means go backwards
       
   318 
       
   319 	asm("small_copy_fwd: ");
       
   320 	asm("	mov		r3, r0 ");
       
   321 	asm("	adr		r12, small_copy_fwd_end ");
       
   322 	asm("	sub		pc, r12, r2, lsl #3 ");
       
   323 
       
   324 	asm("	ldrb	r12, [r1], #1 ");
       
   325 	asm("	strb	r12, [r3], #1 ");
       
   326 	asm("	ldrb	r12, [r1], #1 ");
       
   327 	asm("	strb	r12, [r3], #1 ");
       
   328 	asm("	ldrb	r12, [r1], #1 ");
       
   329 	asm("	strb	r12, [r3], #1 ");
       
   330 	asm("	ldrb	r12, [r1], #1 ");
       
   331 	asm("	strb	r12, [r3], #1 ");
       
   332 	asm("	ldrb	r12, [r1], #1 ");
       
   333 	asm("	strb	r12, [r3], #1 ");
       
   334 	asm("	ldrb	r12, [r1], #1 ");
       
   335 	asm("	strb	r12, [r3], #1 ");
       
   336 	asm("	ldrb	r12, [r1], #1 ");
       
   337 	asm("	strb	r12, [r3], #1 ");
       
   338 	asm("	ldrb	r12, [r1], #1 ");
       
   339 	asm("	strb	r12, [r3], #1 ");
       
   340 	asm("	ldrb	r12, [r1], #1 ");
       
   341 	asm("	strb	r12, [r3], #1 ");
       
   342 	asm("	ldrb	r12, [r1], #1 ");
       
   343 	asm("	strb	r12, [r3], #1 ");
       
   344 	asm("	ldrb	r12, [r1], #1 ");
       
   345 	asm("	strb	r12, [r3], #1 ");
       
   346 	asm("	ldrb	r12, [r1], #1 ");
       
   347 	asm("	strb	r12, [r3], #1 ");
       
   348 	asm("	ldrb	r12, [r1], #1 ");
       
   349 	asm("	strb	r12, [r3], #1 ");
       
   350 	asm("	ldrb	r12, [r1], #1 ");
       
   351 	asm("	strb	r12, [r3], #1 ");
       
   352 	asm("	ldrb	r12, [r1], #1 ");
       
   353 	asm("	strb	r12, [r3], #1 ");
       
   354 	asm("small_copy_fwd_end: ");
       
   355 	__JUMP(,lr);
       
   356 
       
   357 	asm("small_copy_back: ");
       
   358 	asm("	add		r3, r0, r2 ");
       
   359 	asm("	add		r1, r1, r2 ");
       
   360 	asm("	adr		r12, small_copy_back_end ");
       
   361 	asm("	sub		pc, r12, r2, lsl #3 ");
       
   362 
       
   363 	asm("	ldrb	r12, [r1, #-1]! ");
       
   364 	asm("	strb	r12, [r3, #-1]! ");
       
   365 	asm("	ldrb	r12, [r1, #-1]! ");
       
   366 	asm("	strb	r12, [r3, #-1]! ");
       
   367 	asm("	ldrb	r12, [r1, #-1]! ");
       
   368 	asm("	strb	r12, [r3, #-1]! ");
       
   369 	asm("	ldrb	r12, [r1, #-1]! ");
       
   370 	asm("	strb	r12, [r3, #-1]! ");
       
   371 	asm("	ldrb	r12, [r1, #-1]! ");
       
   372 	asm("	strb	r12, [r3, #-1]! ");
       
   373 	asm("	ldrb	r12, [r1, #-1]! ");
       
   374 	asm("	strb	r12, [r3, #-1]! ");
       
   375 	asm("	ldrb	r12, [r1, #-1]! ");
       
   376 	asm("	strb	r12, [r3, #-1]! ");
       
   377 	asm("	ldrb	r12, [r1, #-1]! ");
       
   378 	asm("	strb	r12, [r3, #-1]! ");
       
   379 	asm("	ldrb	r12, [r1, #-1]! ");
       
   380 	asm("	strb	r12, [r3, #-1]! ");
       
   381 	asm("	ldrb	r12, [r1, #-1]! ");
       
   382 	asm("	strb	r12, [r3, #-1]! ");
       
   383 	asm("	ldrb	r12, [r1, #-1]! ");
       
   384 	asm("	strb	r12, [r3, #-1]! ");
       
   385 	asm("	ldrb	r12, [r1, #-1]! ");
       
   386 	asm("	strb	r12, [r3, #-1]! ");
       
   387 	asm("	ldrb	r12, [r1, #-1]! ");
       
   388 	asm("	strb	r12, [r3, #-1]! ");
       
   389 	asm("	ldrb	r12, [r1, #-1]! ");
       
   390 	asm("	strb	r12, [r3, #-1]! ");
       
   391 	asm("	ldrb	r12, [r1, #-1]! ");
       
   392 	asm("	strb	r12, [r3, #-1]! ");
       
   393 	asm("small_copy_back_end: ");
       
   394 	__JUMP(,lr);
       
   395 
       
   396 	
       
   397 	asm("main_copy: ");
       
   398 	PLD(1);											// preload first two cache lines
       
   399 	PLD_ioff(1, 32);
       
   400 	asm("	stmfd	sp!, {r0,r4-r11,lr} ");			// r0 == dest, r1 == src, r2 == len
       
   401 	asm("	cmp		r3, #0 ");
       
   402 	asm("	beq		copy_back ");					// we must go backwards
       
   403     asm("   movs	r3, r0, lsl #30 ");				// check destination word aligned
       
   404 	asm("   bne		dest_unaligned_fore ");
       
   405 
       
   406 //
       
   407 // Normal copy forwards. r0 should point to end address on exit
       
   408 // Destination now word-aligned; if source is also word-aligned, do aligned copy.
       
   409 //	
       
   410 	asm("dest_aligned_fore: ");
       
   411     asm("   ands	r12, r1, #3 ");		// r12=alignment of source
       
   412     asm("   bne		copy_fwd_nonaligned ");
       
   413 
       
   414 //
       
   415 // We are now word aligned, at least 13 bytes to do
       
   416 //
       
   417 	
       
   418     asm("mem_move_fore:");
       
   419 //
       
   420 // superalign
       
   421 //
       
   422     asm("	movs	r4, r0, lsl #27 ");		 		 		// destination alignment into r4
       
   423 	asm("	beq		f_al_already_aligned ");				// fast path
       
   424 	asm("	rsb		r4, r4, #0 ");							// bytes required to align destination to 32
       
   425 	asm("	cmp		r2, r4, lsr #27 ");						// check that many remaining
       
   426 	asm("	blo		its_smaller_fore ");					// if too short, just stick with word alignment
       
   427 	asm("	msr		cpsr_flg, r4 ");		 		 		// destination alignment into N, Z, C flags
       
   428 															// do word moves to align destination
       
   429 	asm("	ldrcs	lr, [r1], #4 ");						// C flag == 1 word (we are already word aligned)
       
   430 	asm("	ldmeqia	r1!, {r3,r9} ");						// Z flag == 2 words
       
   431 	asm("	ldmmiia	r1!, {r5-r8} ");						// N flag == 4 words, destination now 32 byte aligned
       
   432 	asm("	sub		r2, r2, r4, lsr #27 ");		 			// adjust length
       
   433 	asm("	strcs	lr, [r0], #4 ");						// destination now 8 byte aligned
       
   434 	asm("	stmeqia	r0!, {r3,r9} ");						// destination now 16 byte aligned
       
   435 	asm("	stmmiia	r0!, {r5-r8} ");						// destination now 32 byte aligned	
       
   436 
       
   437 	asm("f_al_already_aligned: ");
       
   438 	asm("	cmp		r2, #64 ");
       
   439 	asm("	bhs		large_copy_fore ");
       
   440 //
       
   441 // Less than 64 bytes to go...
       
   442 //	
       
   443     asm("its_smaller_fore:");
       
   444     asm("	movs	ip, r2, lsl #26 ");		// length bits 5, 4, 3, 2 into N, Z, C, V
       
   445 	asm("	beq		mem_copy_end ");		// skip if remaining length zero
       
   446     asm("	msr		cpsr_flg, ip ");
       
   447     asm("	ldmmiia	r1!, {r3-r10} ");
       
   448     asm("	stmmiia	r0!, {r3-r10} ");		// copy 32	
       
   449     asm("	ldmeqia	r1!, {r3-r6} ");
       
   450     asm("	ldmcsia	r1!, {r7-r8} ");
       
   451     asm("	ldrvs	r9, [r1], #4 ");
       
   452     asm("	stmeqia	r0!, {r3-r6} ");		// copy 16
       
   453     asm("	stmcsia	r0!, {r7-r8} ");		// copy 8
       
   454     asm("	strvs	r9, [r0], #4 ");		// copy 4
       
   455 
       
   456     asm("	movs	ip, r2, lsl #30 ");	
       
   457 	asm("	bne		smallest_copy_fore ");
       
   458 	
       
   459 	asm("mem_copy_end: ");
       
   460 	__POPRET("r0,r4-r11,");
       
   461 
       
   462 	
       
   463 //
       
   464 // Less than 4 bytes to go...
       
   465 //
       
   466 	
       
   467 	asm("smallest_copy_fore: ");
       
   468     asm("	msr		cpsr_flg, ip ");
       
   469     asm("	ldrmih	r3, [r1], #2 ");
       
   470     asm("	ldreqb	r4, [r1], #1 ");
       
   471     asm("	strmih	r3, [r0], #2 ");		// copy 2
       
   472     asm("	streqb	r4, [r0], #1 ");		// copy 1
       
   473 	__POPRET("r0,r4-r11,");
       
   474 
       
   475 	
       
   476 //
       
   477 // Do byte moves if necessary to word-align destination
       
   478 //
       
   479 	asm("dest_unaligned_fore: ");
       
   480 	asm("	rsb		r3, r3, #0 ");
       
   481 	asm("	msr		cpsr_flg, r3 ");
       
   482 	asm("	ldrmib	r4, [r1], #1 ");				// move bytes to align destination
       
   483 	asm("	ldrmib	r5, [r1], #1 ");
       
   484 	asm("	ldreqb	r6, [r1], #1 ");
       
   485 	asm("	sub		r2, r2, r3, lsr #30 ");			// adjust length, at least 13 bytes remaining
       
   486 	asm("	strmib	r4, [r0], #1 ");
       
   487 	asm("	strmib	r5, [r0], #1 ");
       
   488 	asm("	streqb	r6, [r0], #1 ");
       
   489 	asm("   b		dest_aligned_fore ");
       
   490 
       
   491 	
       
   492 //
       
   493 //	Large copy, length >= 64
       
   494 //
       
   495 	
       
   496 	asm("large_copy_fore: ");
       
   497 	asm("	movs	ip, r2, lsr #6 ");						// ip = number of 64 blocks to copy
       
   498 	asm("1: ");
       
   499 	PLD_ioff(1, 32);
       
   500 	PLD_ioff(1, 64);
       
   501     asm("	ldmia	r1!, {r3-r10} ");		// Copy 64
       
   502     asm("	stmia	r0!, {r3-r10} "); 
       
   503     asm("	ldmia	r1!, {r3-r10} ");
       
   504     asm("	subs	ip, ip, #1 ");
       
   505     asm("	stmia	r0!, {r3-r10} "); 	
       
   506 	asm("	bne		1b ");		
       
   507 	asm("	and		r2, r2, #63 ");
       
   508 	asm("	b		its_smaller_fore ");
       
   509 
       
   510 	
       
   511 //
       
   512 // Forward unlaigned copy
       
   513 //	
       
   514 	
       
   515 	asm("copy_fwd_nonaligned:");
       
   516 //
       
   517 // superalign
       
   518 //	
       
   519 	asm("	bic		r1, r1, #3 ");					// align source
       
   520 	asm("	ldr		r11, [r1], #4 ");				// get first word
       
   521 	asm("	mov		r12, r12, lsl #3 ");			// r12 = 8*source alignment
       
   522 	asm("	ands	r4, r0, #31 ");					// destination alignment into r4
       
   523 	asm("	beq		medium_unal_copy ");			// skip if already aligned
       
   524 	asm("	rsb		r4, r4, #32 ");					// r4 = bytes to align dest to 32
       
   525 	asm("	cmp		r2, r4 ");						// check if length big enough to align to 32
       
   526 	asm("	blo		copy_fwd_remainder ");			// skip if too small
       
   527 	asm("	sub		r2, r2, r4 ");					// adjust length
       
   528 	asm("	rsb		r3, r12, #32 ");				// r3 = 32 - 8*source alignment
       
   529 
       
   530 	asm("1: ");
       
   531 	asm("	mov		r5, r11, lsr r12 ");			// r5 = part of previous source word required to make destination word
       
   532 	asm("	ldr		r11, [r1], #4 ");				// get next word
       
   533 	asm("	subs	r4, r4, #4 ");					// 4 bytes less to do
       
   534 	asm("	orr		r5, r5, r11, lsl r3 ");			// form next destination word
       
   535 	asm("	str		r5, [r0], #4 ");				// and store it
       
   536 	asm("	bne		1b ");							// loop until destination 32 byte aligned
       
   537 
       
   538 	asm("medium_unal_copy: ");						// destination now aligned to 32 bytes
       
   539 	asm("	movs	lr, r2, lsr #5 ");				// lr=number of 32-byte blocks
       
   540 	asm("	beq		copy_fwd_remainder ");			// skip if length < 32
       
   541 
       
   542 	asm("	cmp		r12, #16 ");
       
   543 	asm("	beq		copy_fwd_nonaligned_2 ");		// branch if source = 2 mod 4
       
   544 	asm("	bhi		copy_fwd_nonaligned_3 ");		// branch if source = 3 mod 4, else source = 1 mod 4
       
   545 
       
   546 // source = 1 mod 4
       
   547 	asm("copy_fwd_nonaligned_1: ");
       
   548 	asm("	mov		r3, r11, lsr #8 ");
       
   549 	asm("	ldmia	r1!, {r4-r11} ");
       
   550 	PLD_ioff(1, 32);
       
   551 	asm("	subs	lr, lr, #1 ");
       
   552 	asm("	orr		r3, r3, r4, lsl #24 ");
       
   553 	asm("	mov		r4, r4, lsr #8 ");
       
   554 	asm("	orr		r4, r4, r5, lsl #24 ");
       
   555 	asm("	mov		r5, r5, lsr #8 ");
       
   556 	asm("	orr		r5, r5, r6, lsl #24 ");
       
   557 	asm("	mov		r6, r6, lsr #8 ");
       
   558 	asm("	orr		r6, r6, r7, lsl #24 ");
       
   559 	asm("	mov		r7, r7, lsr #8 ");
       
   560 	asm("	orr		r7, r7, r8, lsl #24 ");
       
   561 	asm("	mov		r8, r8, lsr #8 ");
       
   562 	asm("	orr		r8, r8, r9, lsl #24 ");
       
   563 	asm("	mov		r9, r9, lsr #8 ");
       
   564 	asm("	orr		r9, r9, r10, lsl #24 ");
       
   565 	asm("	mov		r10, r10, lsr #8 ");
       
   566 	asm("	orr		r10, r10, r11, lsl #24 ");
       
   567 	asm("	stmia	r0!, {r3-r10} ");
       
   568 	asm("	bne		copy_fwd_nonaligned_1 ");
       
   569 	asm("	b		copy_fwd_remainder ");
       
   570 
       
   571 // source = 2 mod 4
       
   572 	asm("copy_fwd_nonaligned_2: ");
       
   573 	asm("	mov		r3, r11, lsr #16 ");
       
   574 	asm("	ldmia	r1!, {r4-r11} ");
       
   575 	PLD_ioff(1, 32);
       
   576 	asm("	subs	lr, lr, #1 ");
       
   577 	asm("	orr		r3, r3, r4, lsl #16 ");
       
   578 	asm("	mov		r4, r4, lsr #16 ");
       
   579 	asm("	orr		r4, r4, r5, lsl #16 ");
       
   580 	asm("	mov		r5, r5, lsr #16 ");
       
   581 	asm("	orr		r5, r5, r6, lsl #16 ");
       
   582 	asm("	mov		r6, r6, lsr #16 ");
       
   583 	asm("	orr		r6, r6, r7, lsl #16 ");
       
   584 	asm("	mov		r7, r7, lsr #16 ");
       
   585 	asm("	orr		r7, r7, r8, lsl #16 ");
       
   586 	asm("	mov		r8, r8, lsr #16 ");
       
   587 	asm("	orr		r8, r8, r9, lsl #16 ");
       
   588 	asm("	mov		r9, r9, lsr #16 ");
       
   589 	asm("	orr		r9, r9, r10, lsl #16 ");
       
   590 	asm("	mov		r10, r10, lsr #16 ");
       
   591 	asm("	orr		r10, r10, r11, lsl #16 ");
       
   592 	asm("	stmia	r0!, {r3-r10} ");
       
   593 	asm("	bne		copy_fwd_nonaligned_2 ");
       
   594 	asm("	b		copy_fwd_remainder ");
       
   595 
       
   596 // source = 3 mod 4
       
   597 	asm("copy_fwd_nonaligned_3: ");
       
   598 	asm("	mov		r3, r11, lsr #24 ");
       
   599 	asm("	ldmia	r1!, {r4-r11} ");
       
   600 	PLD_ioff(1, 32);
       
   601 	asm("	subs	lr, lr, #1 ");
       
   602 	asm("	orr		r3, r3, r4, lsl #8 ");
       
   603 	asm("	mov		r4, r4, lsr #24 ");
       
   604 	asm("	orr		r4, r4, r5, lsl #8 ");
       
   605 	asm("	mov		r5, r5, lsr #24 ");
       
   606 	asm("	orr		r5, r5, r6, lsl #8 ");
       
   607 	asm("	mov		r6, r6, lsr #24 ");
       
   608 	asm("	orr		r6, r6, r7, lsl #8 ");
       
   609 	asm("	mov		r7, r7, lsr #24 ");
       
   610 	asm("	orr		r7, r7, r8, lsl #8 ");
       
   611 	asm("	mov		r8, r8, lsr #24 ");
       
   612 	asm("	orr		r8, r8, r9, lsl #8 ");
       
   613 	asm("	mov		r9, r9, lsr #24 ");
       
   614 	asm("	orr		r9, r9, r10, lsl #8 ");
       
   615 	asm("	mov		r10, r10, lsr #24 ");
       
   616 	asm("	orr		r10, r10, r11, lsl #8 ");
       
   617 	asm("	stmia	r0!, {r3-r10} ");
       
   618 	asm("	bne		copy_fwd_nonaligned_3 ");
       
   619 
       
   620 // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
       
   621 // r12 = 8 * (source mod 4)
       
   622 	asm("copy_fwd_remainder: ");
       
   623 	asm("	ands	r4, r2, #0x1c ");			// r4 = 4*number of words left
       
   624 	asm("	beq		2f ");						// skip if none
       
   625 	asm("	rsb		r3, r12, #32 ");			// r3 = 32 - 8*source alignment
       
   626 
       
   627 	asm("1: ");
       
   628 	asm("	mov		r5, r11, lsr r12 ");		// r5 = part of previous source word required to make destination word
       
   629 	asm("	ldr		r11, [r1], #4 ");			// get next word
       
   630 	asm("	subs	r4, r4, #4 ");				// 4 bytes less to do
       
   631 	asm("	orr		r5, r5, r11, lsl r3 ");		// form next destination word
       
   632 	asm("	str		r5, [r0], #4 ");			// and store it
       
   633 	asm("	bne		1b ");						// loop until destination 32 byte aligned
       
   634 
       
   635 	asm("2: ");
       
   636 	asm("	sub		r1, r1, #4 ");
       
   637 	asm("	add		r1, r1, r12, lsr #3 ");		// r1 = real unaligned source address
       
   638 	asm("	tst		r2, #2 ");					// 2 bytes left?
       
   639 	asm("	ldrneb	r5, [r1], #1 ");			// copy 2
       
   640 	asm("	strneb	r5, [r0], #1 ");
       
   641 	asm("	ldrneb	r5, [r1], #1 ");
       
   642 	asm("	strneb	r5, [r0], #1 ");
       
   643 	asm("	tst		r2, #1 ");					// 1 byte left?
       
   644 	asm("	ldrneb	r5, [r1], #1 ");			// copy 1
       
   645 	asm("	strneb	r5, [r0], #1 ");
       
   646 	__POPRET("r0,r4-r11,");
       
   647 
       
   648 	
       
   649 //
       
   650 // Source is before destination and they overlap, so need to copy backwards
       
   651 //
       
   652 	
       
   653     asm("copy_back:");
       
   654 	asm("	add		r0, r0, r2 ");				// r0=last dest address+1
       
   655 	asm("	add		r1, r1, r2 ");				// r1=last source address+1
       
   656 	PLD_noff(1, 33);							// preload last two cache lines
       
   657 	PLD_noff(1, 1);
       
   658 
       
   659     asm("	movs	r3, r0, lsl #30 ");			// check destination word aligned
       
   660 	asm("	bne		dest_unaligned_back ");
       
   661 	
       
   662 	asm("dest_aligned_back: ");
       
   663 	asm("	ands	r12, r1, #3 ");					// r12=alignment of source
       
   664     asm("	bne		copy_back_nonaligned ");
       
   665 
       
   666 //
       
   667 // Backwards copying, addresses both word aligned, at least 13 bytes to go
       
   668 //
       
   669 	
       
   670     asm("mem_move_back:");
       
   671 //
       
   672 // superalign
       
   673 // 
       
   674 	asm("	movs	r4, r0, lsl #27 ");					// bytes required to align destination to 32
       
   675 	asm("	beq		bal_already_aligned ");				// skip if already aligned to 32
       
   676 	asm("	cmp		r2, r4, lsr #27 ");					// check that many remaining
       
   677 	asm("	blo		its_smaller_back ");				// if too short, just stick with word alignment
       
   678 	asm("	msr		cpsr_flg, r4 ");		 		 	// destination alignment into N, Z, C flags
       
   679 														// do word moves to align destination
       
   680 	asm("	ldrcs	lr, [r1, #-4]! ");					// C flag == 1 word (we are already word aligned)
       
   681 	asm("	ldmeqdb	r1!, {r3,r9} ");					// Z flag == 2 words
       
   682 	asm("	ldmmidb	r1!, {r5-r8} ");
       
   683 	asm("	sub		r2, r2, r4, lsr #27 ");		 		// adjust length
       
   684 	asm("	strcs	lr, [r0, #-4]! ");					// destination now 8 byte aligned
       
   685 	asm("	stmeqdb	r0!, {r3,r9} ");					// destination now 16 byte aligned
       
   686 	asm("	stmmidb	r0!, {r5-r8} ");					// N flag == 4 words, destination now 32 byte aligned
       
   687 
       
   688 	asm("bal_already_aligned: ");
       
   689 	asm("	cmp		r2, #64 ");
       
   690 	asm("	bhs		large_copy_back ");
       
   691 //
       
   692 // Less than 64 bytes to go
       
   693 //
       
   694     asm("its_smaller_back: ");
       
   695     asm("	movs	ip, r2, lsl #26 ");		// r2 = remaining length (<256) << 24
       
   696 	asm("	beq		mem_copy_end2 ");		// skip if remaining length zero
       
   697     asm("	msr		cpsr_flg, ip ");
       
   698     asm("	ldmmidb	r1!, {r3-r10} ");
       
   699     asm("	stmmidb	r0!, {r3-r10} ");		// copy 32
       
   700     asm("	ldmeqdb	r1!, {r3-r6} ");
       
   701     asm("	ldmcsdb	r1!, {r7,r8} ");
       
   702     asm("   ldrvs	r9, [r1, #-4]! ");
       
   703     asm("	stmeqdb	r0!, {r3-r6} ");		// copy 16
       
   704     asm("	stmcsdb	r0!, {r7,r8} ");		// copy 8
       
   705     asm("   strvs	r9, [r0, #-4]! ");		// copy 4
       
   706 	
       
   707     asm("	movs	ip, r2, lsl #30 ");
       
   708 	asm("	bne		smallest_copy_back ");
       
   709 
       
   710 	asm("mem_copy_end2: ");
       
   711 	__POPRET("r0,r4-r11,");
       
   712 
       
   713 	
       
   714 //
       
   715 // Less than 4 bytes to go...
       
   716 //
       
   717 	
       
   718 	asm("smallest_copy_back: ");
       
   719     asm("	msr		cpsr_flg, ip ");
       
   720     asm("	ldrmih	r3, [r1, #-2]! ");
       
   721     asm("	ldreqb	r4, [r1, #-1]! ");
       
   722     asm("	strmih	r3, [r0, #-2]! ");		// copy 2
       
   723     asm("	streqb	r4, [r0, #-1]! ");		// copy 1
       
   724 	__POPRET("r0,r4-r11,");
       
   725 	
       
   726 
       
   727 //
       
   728 // Do byte moves if necessary to word-align destination
       
   729 //
       
   730 	asm("dest_unaligned_back: ");
       
   731 	asm("	msr		cpsr_flg, r3 ");				// destination alignment in r3 into N,Z flags
       
   732 	asm("	ldrmib	r4, [r1, #-1]! ");				// do byte moves to align destination
       
   733 	asm("	ldrmib	r5, [r1, #-1]! ");
       
   734 	asm("	ldreqb	r6, [r1, #-1]! ");
       
   735 	asm("	sub		r2, r2, r3, lsr #30 ");			// adjust length, at least 13 bytes remaining
       
   736 	asm("	strmib	r4, [r0, #-1]! ");
       
   737 	asm("	strmib	r5, [r0, #-1]! ");
       
   738 	asm("	streqb	r6, [r0, #-1]! ");
       
   739 	asm("	b		dest_aligned_back ");
       
   740 
       
   741 
       
   742 //
       
   743 //	Large backwards copy, length >= 64
       
   744 //	
       
   745 
       
   746 	asm("large_copy_back: ");
       
   747     asm("	movs	ip, r2, lsr #6 ");
       
   748 	asm("1: ");
       
   749 	PLD_noff(1, 65);
       
   750 	PLD_noff(1, 33);
       
   751     asm("	ldmdb	r1!, {r3-r10} ");		// Copy 64
       
   752     asm("	stmdb	r0!, {r3-r10} "); 
       
   753     asm("	ldmdb	r1!, {r3-r10} ");
       
   754     asm("	subs	ip, ip, #1 ");
       
   755     asm("	stmdb	r0!, {r3-r10} "); 
       
   756 	asm("	bne		1b ");		
       
   757 	asm("	and		r2, r2, #63 ");
       
   758 	asm("	b		its_smaller_back ");
       
   759 
       
   760 //
       
   761 // Backwards unlaigned copy
       
   762 //	
       
   763 
       
   764 	asm("copy_back_nonaligned: ");
       
   765 //
       
   766 // superalign
       
   767 //
       
   768 	asm("	bic		r1, r1, #3 ");					// align source
       
   769 	asm("	ldr		r3, [r1] ");					// get first word
       
   770 	asm("	mov		r12, r12, lsl #3 ");			// r12 = 8*source alignment
       
   771 	asm("	ands	r4, r0, #31 ");					// r4 = bytes to align dest to 32
       
   772 	asm("	beq		bunal_already_aligned ");		// skip if already aligned
       
   773 	asm("	cmp		r2, r4 ");						// check if length big enough to align to 32
       
   774 	asm("	blo		copy_back_remainder ");			// skip if too small
       
   775 	asm("	sub		r2, r2, r4 ");					// adjust length
       
   776 	asm("	rsb		r6, r12, #32 ");				// r6 = 32 - 8*source alignment
       
   777 
       
   778 	asm("1: ");
       
   779 	asm("	mov		r5, r3, lsl r6 ");				// r5 = part of previous source word required to make destination word
       
   780 	asm("	ldr		r3, [r1, #-4]! ");				// get next word
       
   781 	asm("	subs	r4, r4, #4 ");					// 4 bytes less to do
       
   782 	asm("	orr		r5, r5, r3, lsr r12 ");			// form next destination word
       
   783 	asm("	str		r5, [r0, #-4]! ");				// and store it
       
   784 	asm("	bne		1b ");							// loop until destination 32 byte aligned
       
   785 
       
   786 	asm("bunal_already_aligned: ");					// destination now aligned to 32 bytes
       
   787 	asm("	movs	lr, r2, lsr #5 ");				// lr=number of 32-byte blocks
       
   788 	asm("	beq		copy_back_remainder ");			// skip if length < 32
       
   789 
       
   790 	asm("	cmp		r12, #16 ");
       
   791 	asm("	beq		copy_back_nonaligned_2 ");		// branch if source = 2 mod 4
       
   792 	asm("	bhi		copy_back_nonaligned_3 ");		// branch if source = 3 mod 4, else source = 1 mod 4
       
   793 
       
   794 // source = 1 mod 4
       
   795 	asm("copy_back_nonaligned_1: ");
       
   796 	asm("	mov		r11, r3, lsl #24 ");
       
   797 	asm("	ldmdb	r1!, {r3-r10} ");
       
   798 	PLD_noff(1, 64);
       
   799 	asm("	orr		r11, r11, r10, lsr #8 ");
       
   800 	asm("	mov		r10, r10, lsl #24 ");
       
   801 	asm("	orr		r10, r10, r9, lsr #8 ");
       
   802 	asm("	mov		r9, r9, lsl #24 ");
       
   803 	asm("	orr		r9, r9, r8, lsr #8 ");
       
   804 	asm("	mov		r8, r8, lsl #24 ");
       
   805 	asm("	orr		r8, r8, r7, lsr #8 ");
       
   806 	asm("	mov		r7, r7, lsl #24 ");
       
   807 	asm("	orr		r7, r7, r6, lsr #8 ");
       
   808 	asm("	mov		r6, r6, lsl #24 ");
       
   809 	asm("	orr		r6, r6, r5, lsr #8 ");
       
   810 	asm("	mov		r5, r5, lsl #24 ");
       
   811 	asm("	orr		r5, r5, r4, lsr #8 ");
       
   812 	asm("	mov		r4, r4, lsl #24 ");
       
   813 	asm("	orr		r4, r4, r3, lsr #8 ");
       
   814 	asm("	stmdb	r0!, {r4-r11} ");
       
   815 	asm("	subs	lr, lr, #1 ");
       
   816 	asm("	bne		copy_back_nonaligned_1 ");
       
   817 	asm("	b		copy_back_remainder ");
       
   818 
       
   819 // source = 2 mod 4
       
   820 	asm("copy_back_nonaligned_2: ");
       
   821 	asm("	mov		r11, r3, lsl #16 ");
       
   822 	asm("	ldmdb	r1!, {r3-r10} ");
       
   823 	PLD_noff(1, 64);
       
   824 	asm("	orr		r11, r11, r10, lsr #16 ");
       
   825 	asm("	mov		r10, r10, lsl #16 ");
       
   826 	asm("	orr		r10, r10, r9, lsr #16 ");
       
   827 	asm("	mov		r9, r9, lsl #16 ");
       
   828 	asm("	orr		r9, r9, r8, lsr #16 ");
       
   829 	asm("	mov		r8, r8, lsl #16 ");
       
   830 	asm("	orr		r8, r8, r7, lsr #16 ");
       
   831 	asm("	mov		r7, r7, lsl #16 ");
       
   832 	asm("	orr		r7, r7, r6, lsr #16 ");
       
   833 	asm("	mov		r6, r6, lsl #16 ");
       
   834 	asm("	orr		r6, r6, r5, lsr #16 ");
       
   835 	asm("	mov		r5, r5, lsl #16 ");
       
   836 	asm("	orr		r5, r5, r4, lsr #16 ");
       
   837 	asm("	mov		r4, r4, lsl #16 ");
       
   838 	asm("	orr		r4, r4, r3, lsr #16 ");
       
   839 	asm("	stmdb	r0!, {r4-r11} ");
       
   840 	asm("	subs	lr, lr, #1 ");
       
   841 	asm("	bne		copy_back_nonaligned_2 ");
       
   842 	asm("	b		copy_back_remainder ");
       
   843 
       
   844 // source = 3 mod 4
       
   845 	asm("copy_back_nonaligned_3: ");
       
   846 	asm("	mov		r11, r3, lsl #8 ");
       
   847 	asm("	ldmdb	r1!, {r3-r10} ");
       
   848 	PLD_noff(1, 64);
       
   849 	asm("	orr		r11, r11, r10, lsr #24 ");
       
   850 	asm("	mov		r10, r10, lsl #8 ");
       
   851 	asm("	orr		r10, r10, r9, lsr #24 ");
       
   852 	asm("	mov		r9, r9, lsl #8 ");
       
   853 	asm("	orr		r9, r9, r8, lsr #24 ");
       
   854 	asm("	mov		r8, r8, lsl #8 ");
       
   855 	asm("	orr		r8, r8, r7, lsr #24 ");
       
   856 	asm("	mov		r7, r7, lsl #8 ");
       
   857 	asm("	orr		r7, r7, r6, lsr #24 ");
       
   858 	asm("	mov		r6, r6, lsl #8 ");
       
   859 	asm("	orr		r6, r6, r5, lsr #24 ");
       
   860 	asm("	mov		r5, r5, lsl #8 ");
       
   861 	asm("	orr		r5, r5, r4, lsr #24 ");
       
   862 	asm("	mov		r4, r4, lsl #8 ");
       
   863 	asm("	orr		r4, r4, r3, lsr #24 ");
       
   864 	asm("	stmdb	r0!, {r4-r11} ");
       
   865 	asm("	subs	lr, lr, #1 ");
       
   866 	asm("	bne		copy_back_nonaligned_3 ");
       
   867 
       
   868 // <32 bytes to go, source alignment could be 1, 2 or 3 mod 4
       
   869 // r12 = 8 * (source mod 4)
       
   870 	asm("copy_back_remainder: ");
       
   871 	asm("	ands	r4, r2, #0x1c ");			// r4 = 4*number of words left
       
   872 	asm("	beq		2f ");						// skip if none
       
   873 	asm("	rsb		r6, r12, #32 ");			// r6 = 32 - 8*source alignment
       
   874 
       
   875 	asm("1: ");
       
   876 	asm("	mov		r5, r3, lsl r6 ");			// r5 = part of previous source word required to make destination word
       
   877 	asm("	ldr		r3, [r1, #-4]! ");			// get next word
       
   878 	asm("	subs	r4, r4, #4 ");				// 4 bytes less to do
       
   879 	asm("	orr		r5, r5, r3, lsr r12 ");		// form next destination word
       
   880 	asm("	str		r5, [r0, #-4]! ");			// and store it
       
   881 	asm("	bne		1b ");						// loop until destination 32 byte aligned
       
   882 
       
   883 	asm("2: ");
       
   884 	asm("	add		r1, r1, r12, lsr #3 ");		// r1 = real unaligned source address
       
   885 	asm("	tst		r2, #2 ");					// 2 bytes left?
       
   886 	asm("	ldrneb	r3, [r1, #-1]! ");			// copy 2
       
   887 	asm("	strneb	r3, [r0, #-1]! ");
       
   888 	asm("	ldrneb	r3, [r1, #-1]! ");
       
   889 	asm("	strneb	r3, [r0, #-1]! ");
       
   890 	asm("	tst		r2, #1 ");					// 1 byte left?
       
   891 	asm("	ldrneb	r3, [r1, #-1]! ");			// copy 1
       
   892 	asm("	strneb	r3, [r0, #-1]! ");
       
   893 	__POPRET("r0,r4-r11,");
       
   894     }
       
   895 
       
   896 #endif  // USE_REPLACEMENT_MEMCPY
       
   897 
       
   898 
       
   899 #ifndef __KERNEL_MODE__
       
   900 #ifdef __GCC32__ 
       
   901 /**
       
   902 Compares a block of data at one specified location with a block of data at 
       
   903 another specified location.
       
   904 
       
   905 The comparison proceeds on a byte for byte basis, the result of the comparison 
       
   906 is based on the difference of the first bytes to disagree.
       
   907 
       
   908 The data at the two locations are equal if they have the same length and content. 
       
   909 Where the lengths are different and the shorter section of data is the same 
       
   910 as the first part of the longer section of data, the shorter is considered 
       
   911 to be less than the longer.
       
   912 
       
   913 @param aLeft   A pointer to the first (or left) block of 8 bit data
       
   914                to be compared.
       
   915 @param aLeftL  The length of the first (or left) block of data to be compared,  
       
   916                i.e. the number of bytes.
       
   917 @param aRight  A pointer to the second (or right) block of 8 bit data to be 
       
   918                compared.
       
   919 @param aRightL The length of the second (or right) block of data to be compared 
       
   920                i.e. the number of bytes.
       
   921                
       
   922 @return Positive, if the first (or left) block of data is greater than the 
       
   923         second (or right) block of data.
       
   924         Negative, if the first (or left) block of data is less than the
       
   925         second (or right) block of data.
       
   926         Zero, if both the first (or left) and second (or right) blocks of data
       
   927         have the same length and the same content.
       
   928 */
       
   929 EXPORT_C __NAKED__ TInt Mem::Compare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
       
   930 	{
       
   931 	// fall through
       
   932 	}
       
   933 #endif
       
   934 #endif
       
   935 
       
   936 
       
   937 
       
   938 // See header file e32cmn.h for the in-source documentation.
       
   939 extern "C" EXPORT_C __NAKED__ TInt memcompare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/)
       
   940 //
       
   941 // Compares until the smaller of the two lengths is reached.
       
   942 // If the lengths differ, returns leftlen-rightlen
       
   943 // If a difference is encountered, returns left byte-right byte
       
   944 //
       
   945     {
       
   946 
       
   947     asm("   stmfd    sp!,{r4,r5,r6,lr}");
       
   948     asm("   mov      r4,r0");
       
   949 //
       
   950 // Get the shorter of the two lengths, and check for zero length
       
   951 //
       
   952     asm("   cmp      r1,r3");
       
   953     asm("   mov      r6,r1");
       
   954     asm("   movge    r6,r3");
       
   955     asm("   cmp      r6,#0");
       
   956     asm("   beq      compare_done");
       
   957     asm("   cmp      r6,#16");
       
   958 //
       
   959 // Check for aligned buffers for faster comparing if more than 16 bytes
       
   960 //
       
   961     asm("   andge    r0,r4,#3");
       
   962     asm("   andge    r5,r2,#3");
       
   963     asm("   addlt    r0,r5,#1");
       
   964     asm("   cmp      r0,r5");
       
   965     asm("   beq      aligned_compare");
       
   966 //
       
   967 // Get aLeft+Min(aLeftL,aRightL)
       
   968 //
       
   969     asm("   add      r6,r4,r6");
       
   970 
       
   971     asm("compare_loop:");
       
   972     asm("   ldrb     r0,[r4],#1");
       
   973     asm("   ldrb     r5,[r2],#1");
       
   974     asm("   subs     r0,r0,r5");
       
   975 	asm("bne compare_exit ");
       
   976     asm("   cmp      r4,r6");
       
   977     asm("   beq      compare_done");
       
   978 
       
   979     asm("   ldrb     r0,[r4],#1");
       
   980     asm("   ldrb     r5,[r2],#1");
       
   981     asm("   subs     r0,r0,r5");
       
   982 	asm("bne compare_exit ");
       
   983     asm("   cmp      r4,r6");
       
   984     asm("   beq      compare_done");
       
   985 
       
   986     asm("   ldrb     r0,[r4],#1");
       
   987     asm("   ldrb     r5,[r2],#1");
       
   988     asm("   subs     r0,r0,r5");
       
   989 	asm("bne compare_exit ");
       
   990     asm("   cmp      r4,r6");
       
   991     asm("   beq      compare_done");
       
   992 
       
   993     asm("   ldrb     r0,[r4],#1");
       
   994     asm("   ldrb     r5,[r2],#1");
       
   995     asm("   subs     r0,r0,r5");
       
   996 	asm("bne compare_exit ");
       
   997     asm("   cmp      r4,r6");
       
   998     asm("   bne      compare_loop");
       
   999 //
       
  1000 // Return difference of lengths
       
  1001 //
       
  1002     asm("compare_done:");
       
  1003     asm("   sub      r0,r1,r3");
       
  1004 
       
  1005     asm("compare_exit:");
       
  1006 	__POPRET("r4-r6,");
       
  1007 //
       
  1008 // Compare byte at a time until word aligned...
       
  1009 //
       
  1010     asm("aligned_compare:");
       
  1011 //
       
  1012 // Get number of bytes to compare before word alignment reached...and jump to appropriate point
       
  1013 //
       
  1014     asm("   mov      ip,r6");
       
  1015     asm("   add      r6,r4,r6");
       
  1016     asm("   subs     r0,r0,#1");
       
  1017     asm("   movmi    r0,#3");
       
  1018     asm("   rsb      r5,r0,#3");
       
  1019     asm("   sub      ip,ip,r5");
       
  1020     asm("   mov      ip,ip,lsr #2");
       
  1021 	asm("   add      pc,pc,r0,asl #4");
       
  1022     asm("   b        compare_done"); // Never executed
       
  1023 //
       
  1024 // Jump here if alignment is 1. Do not use more than 4 instructions without altering above relative jump
       
  1025 //
       
  1026     asm("   ldrb     r0,[r4],#1");
       
  1027     asm("   ldrb     r5,[r2],#1");
       
  1028     asm("   subs     r0,r0,r5");
       
  1029 	asm("bne compare_exit ");
       
  1030 //
       
  1031 // Jump here if alignment is 2. Do not use more than 4 instructions without altering above relative jump
       
  1032 //
       
  1033     asm("   ldrb     r0,[r4],#1");
       
  1034     asm("   ldrb     r5,[r2],#1");
       
  1035     asm("   subs     r0,r0,r5");
       
  1036 	asm("bne compare_exit ");
       
  1037 //
       
  1038 // Jump here if alignment is 3. Do not use more than 4 instructions without altering above relative jump
       
  1039 //
       
  1040     asm("   ldrb     r0,[r4],#1");
       
  1041     asm("   ldrb     r5,[r2],#1");
       
  1042     asm("   subs     r0,r0,r5");
       
  1043 	asm("bne compare_exit ");
       
  1044 //
       
  1045 // Must now be word aligned
       
  1046 //
       
  1047     asm("aligned_compare_loop:");
       
  1048     asm("   ldr      r0,[r4],#4");
       
  1049     asm("   ldr      r5,[r2],#4");
       
  1050     asm("   eors     r0,r0,r5");
       
  1051     asm("   bne      word_different");
       
  1052     asm("   subs     ip,ip,#1");
       
  1053     asm("   bne      aligned_compare_loop");
       
  1054 //
       
  1055 // Less than 4 bytes to go...
       
  1056 //
       
  1057     asm("   cmp      r4,r6");
       
  1058     asm("   bne      compare_loop");
       
  1059     asm("   sub      r0,r1,r3");
       
  1060 	__POPRET("r4-r6,");
       
  1061 //
       
  1062 // A difference encountered while word comparing, find out which byte it was
       
  1063 //
       
  1064     asm("word_different:");
       
  1065     asm("   ldrb     r0,[r4,#-4]");
       
  1066     asm("   ldrb     r5,[r2,#-4]");
       
  1067     asm("   subs     r0,r0,r5");
       
  1068 	asm("bne compare_exit ");
       
  1069     asm("   ldrb     r0,[r4,#-3]");
       
  1070     asm("   ldrb     r5,[r2,#-3]");
       
  1071     asm("   subs     r0,r0,r5");
       
  1072 	asm("bne compare_exit ");
       
  1073     asm("   ldrb     r0,[r4,#-2]");
       
  1074     asm("   ldrb     r5,[r2,#-2]");
       
  1075     asm("   subs     r0,r0,r5");
       
  1076 	asm("bne compare_exit ");
       
  1077 //
       
  1078 // This must be the different byte...
       
  1079 //
       
  1080     asm("   ldrb     r0,[r4,#-1]");
       
  1081     asm("   ldrb     r5,[r2,#-1]");
       
  1082     asm("   sub      r0,r0,r5");
       
  1083 	__POPRET("r4-r6,");
       
  1084     }
       
  1085 #endif
       
  1086