diff -r 000000000000 -r a41df078684a kernel/eka/common/arm/cmem.cia --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kernel/eka/common/arm/cmem.cia Mon Oct 19 15:55:17 2009 +0100 @@ -0,0 +1,1086 @@ +// Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies). +// All rights reserved. +// This component and the accompanying materials are made available +// under the terms of the License "Eclipse Public License v1.0" +// which accompanies this distribution, and is available +// at the URL "http://www.eclipse.org/legal/epl-v10.html". +// +// Initial Contributors: +// Nokia Corporation - initial contribution. +// +// Contributors: +// +// Description: +// e32\common\arm\cmem.cia +// +// + +#include "../common.h" +#include +#if defined(__REPLACE_GENERIC_UTILS) +#include "replacement_utils.h" +#endif + +#if defined(__MEM_MACHINE_CODED__) + +#ifndef USE_REPLACEMENT_MEMSET + +#if defined(_DEBUG) + +#ifdef __STANDALONE_NANOKERNEL__ + +#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \ + asm("tst "#rt1", #3"); \ + asm("ldrne "#rt1", ["#rt1"]") + +#else // __STANDALONE_NANOKERNEL__ +GLDEF_C void PanicEWordMoveLengthNotMultipleOf4(); +GLDEF_C void PanicEWordMoveSourceNotAligned(); +GLDEF_C void PanicEWordMoveTargetNotAligned(); + +#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) \ + asm("tst "#rt1", #3"); \ + asm("bne " panicfunc ) + +#endif // __STANDALONE_NANOKERNEL__ + +#else // _DEBUG + +#define ARM_ASSERT_MULTIPLE_OF_FOUR(rt1, panicfunc) + +#endif //_DEBUG + + +// See header file e32cmn.h for the in-source documentation. +extern "C" EXPORT_C __NAKED__ TAny* memclr(TAny* /*aTrg*/, unsigned int /*aLength*/) + { + KMEMCLRHOOK + asm("mov r2, #0 "); + asm("b fill "); + } + +// See header file e32cmn.h for the in-source documentation. +extern "C" EXPORT_C __NAKED__ TAny* memset(TAny* /*aTrg*/, TInt /*aValue*/, unsigned int /*aLength*/) + { + KMEMSETHOOK + asm(" mov r3, r2 "); /* length into r3 */ + asm(" and r2,r1,#255"); /* fill value into r2 */ + asm(" mov r1, r3 "); /* length into r1 */ + + asm("fill:"); + asm(" cmp r1,#8"); + asm(" bls small_fill"); // only taken ~20% of the time + + asm(" stmfd sp!,{r0,r4-r9,lr}"); + asm(" movs r3, r0, lsl #30 "); // Check if word aligned + asm(" orr r2,r2,r2,lsl #8"); + asm(" orr r2,r2,r2,lsl #16"); + asm(" bne unaligned_fill "); + + // Align destination address to 32 byte boundary if possible + + asm("word_aligned_fill: "); + asm(" mov r4,r2"); + asm(" mov r5,r2"); + asm(" mov r6,r2"); + asm(" movs r3, r0, lsl #27 "); + asm(" beq aligned_fill "); + asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment + asm(" cmp r1, r3, lsr #27 "); // compare with remaining length + asm(" blo smaller_fill "); // skip alignment if greater + asm(" msr cpsr_f, r3 "); // put length bits 4, 3, 2 into N, Z, C flags + asm(" strcs r2, [r0], #4 "); // align to 8 byte boundary + asm(" stmeqia r0!, {r2, r4} "); // align to 16 byte boundary + asm(" stmmiia r0!, {r2, r4-r6} "); // align to 32 byte boundary + asm(" sub r1, r1, r3, lsr #27 "); // adjust remaining length + + asm("aligned_fill:"); + asm(" cmp r1, #64 "); + asm(" bhs big_fill "); + + // Fill 0-63 bytes + + asm("smaller_fill:"); + asm(" movs r1, r1, lsl #26"); + asm(" beq mem_fill_end "); + asm(" msr cpsr_flg, r1 "); + asm(" stmmiia r0!,{r2,r4-r6}"); // Fill 32 + asm(" stmmiia r0!,{r2,r4-r6}"); + asm(" stmeqia r0!,{r2,r4-r6}"); // Fill 16 + asm(" stmcsia r0!,{r2,r4}"); // Fill 8 + asm(" strvs r2,[r0],#4"); // Fill 4 + asm(" movs r1, r1, lsl #4 "); + asm(" bne smallest_fill "); + asm("mem_fill_end: "); + __POPRET("r0,r4-r9,"); + + // Fill last 1-3 bytes + + asm("smallest_fill: "); + asm(" msr cpsr_flg,r1"); + asm(" strmih r2,[r0],#2"); // Fill 2 + asm(" streqb r2,[r0],#1"); // Fill 1 + __POPRET("r0,r4-r9,"); + + // Fill loop for length >= 64 + + asm("big_fill: "); + asm(" mov r3,r2"); + asm(" mov r7,r2"); + asm(" mov r8,r2"); + asm(" mov r9,r2"); + asm(" movs ip,r1,lsr #8"); // Number of 256 byte blocks to fill + asm(" beq medium_fill "); + asm("fill_256_bytes_loop:"); + asm(" stmia r0!,{r2-r9}"); // Fill 256 bytes + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" stmia r0!,{r2-r9}"); + asm(" subs ip,ip,#1"); + asm(" bne fill_256_bytes_loop"); + asm("medium_fill: "); + asm(" movs ip,r1,lsl #24"); + asm(" msr cpsr_flg,ip"); + asm(" stmmiia r0!,{r2-r9}"); // Fill 128 + asm(" stmmiia r0!,{r2-r9}"); + asm(" stmmiia r0!,{r2-r9}"); + asm(" stmmiia r0!,{r2-r9}"); + asm(" stmeqia r0!,{r2-r9}"); // Fill 64 + asm(" stmeqia r0!,{r2-r9}"); + asm(" and r1, r1, #63 "); + asm(" b smaller_fill"); + + // Word-align destination address, length >= 8 + + asm("unaligned_fill: "); + asm(" rsb r3, r3, #0 "); // calculate fill length necessary for aligment + asm(" msr cpsr_flg, r3"); + asm(" streqb r2, [r0], #1 "); // align to 2 byte boundary + asm(" strmih r2, [r0], #2 "); // align to 4 byte boundary + asm(" sub r1, r1, r3, lsr #30 "); + asm(" b word_aligned_fill "); + + // Fill for length <= 8 + + asm("small_fill: "); + asm(" mov r3, r0 "); /* r3=dest */ + asm(" adr ip, small_fill_end "); + asm(" sub pc, ip, r1, lsl #2 "); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm(" strb r2, [r3], #1"); + asm("small_fill_end: "); + __JUMP(,lr); + +#ifdef __EABI__ + // The AEABI switched the order of arg2 and arg3 to save an intruction when + // calling 'memset' from 'memclr' + asm(".global __aeabi_memset8 "); + asm("__aeabi_memset8: "); + asm(".global __aeabi_memset4 "); + asm("__aeabi_memset4: "); + asm(".global __aeabi_memset "); + asm("__aeabi_memset: "); + asm(" and r2, r2, #255"); + asm(" b fill "); +#endif + } + +#endif // USE_REPLACEMENT_MEMSET + +#ifndef USE_REPLACEMENT_MEMCPY + +// See header file e32cmn.h for the in-source documentation. + +extern "C" EXPORT_C __NAKED__ TAny* wordmove(TAny* /*aTrg*/, const TAny* /*aSrc*/, unsigned int /*aLength*/) +// +// Assumes all is aligned +// + { + ARM_ASSERT_MULTIPLE_OF_FOUR(r0, CSM_Z30PanicEWordMoveTargetNotAlignedv); + ARM_ASSERT_MULTIPLE_OF_FOUR(r1, CSM_Z30PanicEWordMoveSourceNotAlignedv); + ARM_ASSERT_MULTIPLE_OF_FOUR(r2, CSM_Z34PanicEWordMoveLengthNotMultipleOf4v); + + // Mask length to a multiple of four bytes to avoid memory, or register + // corruption by the special cases below. + asm("bic r2,r2,#3"); + + // Length <= 24 in ~90% of cases, however can only copy > 16 bytes in 4 + // instructions if LDM instuction restores thumb state when loading the PC. +#ifdef __CPU_ARM_LDR_PC_SETS_TBIT + asm("cmp r2, #24 "); +#else + asm("cmp r2, #16 "); +#endif + PLD(1); + asm("addls pc, pc, r2, lsl #2 "); // take branch depending on size + asm("b 9f "); // too big + + // 0 words + __JUMP(,lr); + __JUMP(,lr); + __JUMP(,lr); + __JUMP(,lr); + + // 1 word + asm("ldr ip, [r1] "); + asm("str ip, [r0] "); + __JUMP(,lr); + __JUMP(,lr); + + // 2 words + asm("ldmia r1, {r2,r3}"); + asm("stmia r0, {r2,r3}"); + __JUMP(,lr); + __JUMP(,lr); + + // 3 words + asm("ldmia r1, {r2,r3,ip}"); + asm("stmia r0, {r2,r3,ip}"); + __JUMP(,lr); + __JUMP(,lr); + + // 4 words + asm("ldmia r1, {r1,r2,r3,ip}"); + asm("stmia r0, {r1,r2,r3,ip}"); + __JUMP(,lr); + __JUMP(,lr); + +#ifdef __CPU_ARM_LDR_PC_SETS_TBIT + // 5 words + asm("stmfd sp!, {lr}"); + asm("ldmia r1, {r1,r2,r3,ip,lr}"); + asm("stmia r0, {r1,r2,r3,ip,lr}"); + asm("ldmfd sp!, {pc}"); + + // 6 words + asm("stmfd sp!, {r4,lr}"); + asm("ldmia r1, {r1,r2,r3,r4,ip,lr}"); + asm("stmia r0, {r1,r2,r3,r4,ip,lr}"); + asm("ldmfd sp!, {r4,pc}"); +#endif + + asm("9: "); + asm("subs r3, r0, r1 "); // r3 = dest - source + __JUMP(eq,lr); // return if source = dest + asm("stmfd sp!, {r0,r4-r11,lr} "); + asm("cmphi r2, r3 "); // if dest>source, compare length with dest-source + asm("bls mem_move_fore "); // if destsource compare length to dest-source + asm(" movhi r3, #0 "); // if dest>source and length>dest-source need to go backwards - set r3=0 +// +// If <16 bytes, just do byte moves +// + asm(" cmp r2, #15 "); + asm(" bhi main_copy "); + + asm(" ldrb r12, [r0] "); // read dest so it's in cache - avoid lots of single accesses to external memory + asm(" sub r12, r0, #1 "); + asm(" ldrb r12, [r12, r2] "); // read dest+length-1 + asm(" cmp r3, #0 "); + asm(" beq small_copy_back "); // r3=0 means go backwards + + asm("small_copy_fwd: "); + asm(" mov r3, r0 "); + asm(" adr r12, small_copy_fwd_end "); + asm(" sub pc, r12, r2, lsl #3 "); + + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm(" ldrb r12, [r1], #1 "); + asm(" strb r12, [r3], #1 "); + asm("small_copy_fwd_end: "); + __JUMP(,lr); + + asm("small_copy_back: "); + asm(" add r3, r0, r2 "); + asm(" add r1, r1, r2 "); + asm(" adr r12, small_copy_back_end "); + asm(" sub pc, r12, r2, lsl #3 "); + + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm(" ldrb r12, [r1, #-1]! "); + asm(" strb r12, [r3, #-1]! "); + asm("small_copy_back_end: "); + __JUMP(,lr); + + + asm("main_copy: "); + PLD(1); // preload first two cache lines + PLD_ioff(1, 32); + asm(" stmfd sp!, {r0,r4-r11,lr} "); // r0 == dest, r1 == src, r2 == len + asm(" cmp r3, #0 "); + asm(" beq copy_back "); // we must go backwards + asm(" movs r3, r0, lsl #30 "); // check destination word aligned + asm(" bne dest_unaligned_fore "); + +// +// Normal copy forwards. r0 should point to end address on exit +// Destination now word-aligned; if source is also word-aligned, do aligned copy. +// + asm("dest_aligned_fore: "); + asm(" ands r12, r1, #3 "); // r12=alignment of source + asm(" bne copy_fwd_nonaligned "); + +// +// We are now word aligned, at least 13 bytes to do +// + + asm("mem_move_fore:"); +// +// superalign +// + asm(" movs r4, r0, lsl #27 "); // destination alignment into r4 + asm(" beq f_al_already_aligned "); // fast path + asm(" rsb r4, r4, #0 "); // bytes required to align destination to 32 + asm(" cmp r2, r4, lsr #27 "); // check that many remaining + asm(" blo its_smaller_fore "); // if too short, just stick with word alignment + asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags + // do word moves to align destination + asm(" ldrcs lr, [r1], #4 "); // C flag == 1 word (we are already word aligned) + asm(" ldmeqia r1!, {r3,r9} "); // Z flag == 2 words + asm(" ldmmiia r1!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned + asm(" sub r2, r2, r4, lsr #27 "); // adjust length + asm(" strcs lr, [r0], #4 "); // destination now 8 byte aligned + asm(" stmeqia r0!, {r3,r9} "); // destination now 16 byte aligned + asm(" stmmiia r0!, {r5-r8} "); // destination now 32 byte aligned + + asm("f_al_already_aligned: "); + asm(" cmp r2, #64 "); + asm(" bhs large_copy_fore "); +// +// Less than 64 bytes to go... +// + asm("its_smaller_fore:"); + asm(" movs ip, r2, lsl #26 "); // length bits 5, 4, 3, 2 into N, Z, C, V + asm(" beq mem_copy_end "); // skip if remaining length zero + asm(" msr cpsr_flg, ip "); + asm(" ldmmiia r1!, {r3-r10} "); + asm(" stmmiia r0!, {r3-r10} "); // copy 32 + asm(" ldmeqia r1!, {r3-r6} "); + asm(" ldmcsia r1!, {r7-r8} "); + asm(" ldrvs r9, [r1], #4 "); + asm(" stmeqia r0!, {r3-r6} "); // copy 16 + asm(" stmcsia r0!, {r7-r8} "); // copy 8 + asm(" strvs r9, [r0], #4 "); // copy 4 + + asm(" movs ip, r2, lsl #30 "); + asm(" bne smallest_copy_fore "); + + asm("mem_copy_end: "); + __POPRET("r0,r4-r11,"); + + +// +// Less than 4 bytes to go... +// + + asm("smallest_copy_fore: "); + asm(" msr cpsr_flg, ip "); + asm(" ldrmih r3, [r1], #2 "); + asm(" ldreqb r4, [r1], #1 "); + asm(" strmih r3, [r0], #2 "); // copy 2 + asm(" streqb r4, [r0], #1 "); // copy 1 + __POPRET("r0,r4-r11,"); + + +// +// Do byte moves if necessary to word-align destination +// + asm("dest_unaligned_fore: "); + asm(" rsb r3, r3, #0 "); + asm(" msr cpsr_flg, r3 "); + asm(" ldrmib r4, [r1], #1 "); // move bytes to align destination + asm(" ldrmib r5, [r1], #1 "); + asm(" ldreqb r6, [r1], #1 "); + asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining + asm(" strmib r4, [r0], #1 "); + asm(" strmib r5, [r0], #1 "); + asm(" streqb r6, [r0], #1 "); + asm(" b dest_aligned_fore "); + + +// +// Large copy, length >= 64 +// + + asm("large_copy_fore: "); + asm(" movs ip, r2, lsr #6 "); // ip = number of 64 blocks to copy + asm("1: "); + PLD_ioff(1, 32); + PLD_ioff(1, 64); + asm(" ldmia r1!, {r3-r10} "); // Copy 64 + asm(" stmia r0!, {r3-r10} "); + asm(" ldmia r1!, {r3-r10} "); + asm(" subs ip, ip, #1 "); + asm(" stmia r0!, {r3-r10} "); + asm(" bne 1b "); + asm(" and r2, r2, #63 "); + asm(" b its_smaller_fore "); + + +// +// Forward unlaigned copy +// + + asm("copy_fwd_nonaligned:"); +// +// superalign +// + asm(" bic r1, r1, #3 "); // align source + asm(" ldr r11, [r1], #4 "); // get first word + asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment + asm(" ands r4, r0, #31 "); // destination alignment into r4 + asm(" beq medium_unal_copy "); // skip if already aligned + asm(" rsb r4, r4, #32 "); // r4 = bytes to align dest to 32 + asm(" cmp r2, r4 "); // check if length big enough to align to 32 + asm(" blo copy_fwd_remainder "); // skip if too small + asm(" sub r2, r2, r4 "); // adjust length + asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment + + asm("1: "); + asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word + asm(" ldr r11, [r1], #4 "); // get next word + asm(" subs r4, r4, #4 "); // 4 bytes less to do + asm(" orr r5, r5, r11, lsl r3 "); // form next destination word + asm(" str r5, [r0], #4 "); // and store it + asm(" bne 1b "); // loop until destination 32 byte aligned + + asm("medium_unal_copy: "); // destination now aligned to 32 bytes + asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks + asm(" beq copy_fwd_remainder "); // skip if length < 32 + + asm(" cmp r12, #16 "); + asm(" beq copy_fwd_nonaligned_2 "); // branch if source = 2 mod 4 + asm(" bhi copy_fwd_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4 + +// source = 1 mod 4 + asm("copy_fwd_nonaligned_1: "); + asm(" mov r3, r11, lsr #8 "); + asm(" ldmia r1!, {r4-r11} "); + PLD_ioff(1, 32); + asm(" subs lr, lr, #1 "); + asm(" orr r3, r3, r4, lsl #24 "); + asm(" mov r4, r4, lsr #8 "); + asm(" orr r4, r4, r5, lsl #24 "); + asm(" mov r5, r5, lsr #8 "); + asm(" orr r5, r5, r6, lsl #24 "); + asm(" mov r6, r6, lsr #8 "); + asm(" orr r6, r6, r7, lsl #24 "); + asm(" mov r7, r7, lsr #8 "); + asm(" orr r7, r7, r8, lsl #24 "); + asm(" mov r8, r8, lsr #8 "); + asm(" orr r8, r8, r9, lsl #24 "); + asm(" mov r9, r9, lsr #8 "); + asm(" orr r9, r9, r10, lsl #24 "); + asm(" mov r10, r10, lsr #8 "); + asm(" orr r10, r10, r11, lsl #24 "); + asm(" stmia r0!, {r3-r10} "); + asm(" bne copy_fwd_nonaligned_1 "); + asm(" b copy_fwd_remainder "); + +// source = 2 mod 4 + asm("copy_fwd_nonaligned_2: "); + asm(" mov r3, r11, lsr #16 "); + asm(" ldmia r1!, {r4-r11} "); + PLD_ioff(1, 32); + asm(" subs lr, lr, #1 "); + asm(" orr r3, r3, r4, lsl #16 "); + asm(" mov r4, r4, lsr #16 "); + asm(" orr r4, r4, r5, lsl #16 "); + asm(" mov r5, r5, lsr #16 "); + asm(" orr r5, r5, r6, lsl #16 "); + asm(" mov r6, r6, lsr #16 "); + asm(" orr r6, r6, r7, lsl #16 "); + asm(" mov r7, r7, lsr #16 "); + asm(" orr r7, r7, r8, lsl #16 "); + asm(" mov r8, r8, lsr #16 "); + asm(" orr r8, r8, r9, lsl #16 "); + asm(" mov r9, r9, lsr #16 "); + asm(" orr r9, r9, r10, lsl #16 "); + asm(" mov r10, r10, lsr #16 "); + asm(" orr r10, r10, r11, lsl #16 "); + asm(" stmia r0!, {r3-r10} "); + asm(" bne copy_fwd_nonaligned_2 "); + asm(" b copy_fwd_remainder "); + +// source = 3 mod 4 + asm("copy_fwd_nonaligned_3: "); + asm(" mov r3, r11, lsr #24 "); + asm(" ldmia r1!, {r4-r11} "); + PLD_ioff(1, 32); + asm(" subs lr, lr, #1 "); + asm(" orr r3, r3, r4, lsl #8 "); + asm(" mov r4, r4, lsr #24 "); + asm(" orr r4, r4, r5, lsl #8 "); + asm(" mov r5, r5, lsr #24 "); + asm(" orr r5, r5, r6, lsl #8 "); + asm(" mov r6, r6, lsr #24 "); + asm(" orr r6, r6, r7, lsl #8 "); + asm(" mov r7, r7, lsr #24 "); + asm(" orr r7, r7, r8, lsl #8 "); + asm(" mov r8, r8, lsr #24 "); + asm(" orr r8, r8, r9, lsl #8 "); + asm(" mov r9, r9, lsr #24 "); + asm(" orr r9, r9, r10, lsl #8 "); + asm(" mov r10, r10, lsr #24 "); + asm(" orr r10, r10, r11, lsl #8 "); + asm(" stmia r0!, {r3-r10} "); + asm(" bne copy_fwd_nonaligned_3 "); + +// <32 bytes to go, source alignment could be 1, 2 or 3 mod 4 +// r12 = 8 * (source mod 4) + asm("copy_fwd_remainder: "); + asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left + asm(" beq 2f "); // skip if none + asm(" rsb r3, r12, #32 "); // r3 = 32 - 8*source alignment + + asm("1: "); + asm(" mov r5, r11, lsr r12 "); // r5 = part of previous source word required to make destination word + asm(" ldr r11, [r1], #4 "); // get next word + asm(" subs r4, r4, #4 "); // 4 bytes less to do + asm(" orr r5, r5, r11, lsl r3 "); // form next destination word + asm(" str r5, [r0], #4 "); // and store it + asm(" bne 1b "); // loop until destination 32 byte aligned + + asm("2: "); + asm(" sub r1, r1, #4 "); + asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address + asm(" tst r2, #2 "); // 2 bytes left? + asm(" ldrneb r5, [r1], #1 "); // copy 2 + asm(" strneb r5, [r0], #1 "); + asm(" ldrneb r5, [r1], #1 "); + asm(" strneb r5, [r0], #1 "); + asm(" tst r2, #1 "); // 1 byte left? + asm(" ldrneb r5, [r1], #1 "); // copy 1 + asm(" strneb r5, [r0], #1 "); + __POPRET("r0,r4-r11,"); + + +// +// Source is before destination and they overlap, so need to copy backwards +// + + asm("copy_back:"); + asm(" add r0, r0, r2 "); // r0=last dest address+1 + asm(" add r1, r1, r2 "); // r1=last source address+1 + PLD_noff(1, 33); // preload last two cache lines + PLD_noff(1, 1); + + asm(" movs r3, r0, lsl #30 "); // check destination word aligned + asm(" bne dest_unaligned_back "); + + asm("dest_aligned_back: "); + asm(" ands r12, r1, #3 "); // r12=alignment of source + asm(" bne copy_back_nonaligned "); + +// +// Backwards copying, addresses both word aligned, at least 13 bytes to go +// + + asm("mem_move_back:"); +// +// superalign +// + asm(" movs r4, r0, lsl #27 "); // bytes required to align destination to 32 + asm(" beq bal_already_aligned "); // skip if already aligned to 32 + asm(" cmp r2, r4, lsr #27 "); // check that many remaining + asm(" blo its_smaller_back "); // if too short, just stick with word alignment + asm(" msr cpsr_flg, r4 "); // destination alignment into N, Z, C flags + // do word moves to align destination + asm(" ldrcs lr, [r1, #-4]! "); // C flag == 1 word (we are already word aligned) + asm(" ldmeqdb r1!, {r3,r9} "); // Z flag == 2 words + asm(" ldmmidb r1!, {r5-r8} "); + asm(" sub r2, r2, r4, lsr #27 "); // adjust length + asm(" strcs lr, [r0, #-4]! "); // destination now 8 byte aligned + asm(" stmeqdb r0!, {r3,r9} "); // destination now 16 byte aligned + asm(" stmmidb r0!, {r5-r8} "); // N flag == 4 words, destination now 32 byte aligned + + asm("bal_already_aligned: "); + asm(" cmp r2, #64 "); + asm(" bhs large_copy_back "); +// +// Less than 64 bytes to go +// + asm("its_smaller_back: "); + asm(" movs ip, r2, lsl #26 "); // r2 = remaining length (<256) << 24 + asm(" beq mem_copy_end2 "); // skip if remaining length zero + asm(" msr cpsr_flg, ip "); + asm(" ldmmidb r1!, {r3-r10} "); + asm(" stmmidb r0!, {r3-r10} "); // copy 32 + asm(" ldmeqdb r1!, {r3-r6} "); + asm(" ldmcsdb r1!, {r7,r8} "); + asm(" ldrvs r9, [r1, #-4]! "); + asm(" stmeqdb r0!, {r3-r6} "); // copy 16 + asm(" stmcsdb r0!, {r7,r8} "); // copy 8 + asm(" strvs r9, [r0, #-4]! "); // copy 4 + + asm(" movs ip, r2, lsl #30 "); + asm(" bne smallest_copy_back "); + + asm("mem_copy_end2: "); + __POPRET("r0,r4-r11,"); + + +// +// Less than 4 bytes to go... +// + + asm("smallest_copy_back: "); + asm(" msr cpsr_flg, ip "); + asm(" ldrmih r3, [r1, #-2]! "); + asm(" ldreqb r4, [r1, #-1]! "); + asm(" strmih r3, [r0, #-2]! "); // copy 2 + asm(" streqb r4, [r0, #-1]! "); // copy 1 + __POPRET("r0,r4-r11,"); + + +// +// Do byte moves if necessary to word-align destination +// + asm("dest_unaligned_back: "); + asm(" msr cpsr_flg, r3 "); // destination alignment in r3 into N,Z flags + asm(" ldrmib r4, [r1, #-1]! "); // do byte moves to align destination + asm(" ldrmib r5, [r1, #-1]! "); + asm(" ldreqb r6, [r1, #-1]! "); + asm(" sub r2, r2, r3, lsr #30 "); // adjust length, at least 13 bytes remaining + asm(" strmib r4, [r0, #-1]! "); + asm(" strmib r5, [r0, #-1]! "); + asm(" streqb r6, [r0, #-1]! "); + asm(" b dest_aligned_back "); + + +// +// Large backwards copy, length >= 64 +// + + asm("large_copy_back: "); + asm(" movs ip, r2, lsr #6 "); + asm("1: "); + PLD_noff(1, 65); + PLD_noff(1, 33); + asm(" ldmdb r1!, {r3-r10} "); // Copy 64 + asm(" stmdb r0!, {r3-r10} "); + asm(" ldmdb r1!, {r3-r10} "); + asm(" subs ip, ip, #1 "); + asm(" stmdb r0!, {r3-r10} "); + asm(" bne 1b "); + asm(" and r2, r2, #63 "); + asm(" b its_smaller_back "); + +// +// Backwards unlaigned copy +// + + asm("copy_back_nonaligned: "); +// +// superalign +// + asm(" bic r1, r1, #3 "); // align source + asm(" ldr r3, [r1] "); // get first word + asm(" mov r12, r12, lsl #3 "); // r12 = 8*source alignment + asm(" ands r4, r0, #31 "); // r4 = bytes to align dest to 32 + asm(" beq bunal_already_aligned "); // skip if already aligned + asm(" cmp r2, r4 "); // check if length big enough to align to 32 + asm(" blo copy_back_remainder "); // skip if too small + asm(" sub r2, r2, r4 "); // adjust length + asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment + + asm("1: "); + asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word + asm(" ldr r3, [r1, #-4]! "); // get next word + asm(" subs r4, r4, #4 "); // 4 bytes less to do + asm(" orr r5, r5, r3, lsr r12 "); // form next destination word + asm(" str r5, [r0, #-4]! "); // and store it + asm(" bne 1b "); // loop until destination 32 byte aligned + + asm("bunal_already_aligned: "); // destination now aligned to 32 bytes + asm(" movs lr, r2, lsr #5 "); // lr=number of 32-byte blocks + asm(" beq copy_back_remainder "); // skip if length < 32 + + asm(" cmp r12, #16 "); + asm(" beq copy_back_nonaligned_2 "); // branch if source = 2 mod 4 + asm(" bhi copy_back_nonaligned_3 "); // branch if source = 3 mod 4, else source = 1 mod 4 + +// source = 1 mod 4 + asm("copy_back_nonaligned_1: "); + asm(" mov r11, r3, lsl #24 "); + asm(" ldmdb r1!, {r3-r10} "); + PLD_noff(1, 64); + asm(" orr r11, r11, r10, lsr #8 "); + asm(" mov r10, r10, lsl #24 "); + asm(" orr r10, r10, r9, lsr #8 "); + asm(" mov r9, r9, lsl #24 "); + asm(" orr r9, r9, r8, lsr #8 "); + asm(" mov r8, r8, lsl #24 "); + asm(" orr r8, r8, r7, lsr #8 "); + asm(" mov r7, r7, lsl #24 "); + asm(" orr r7, r7, r6, lsr #8 "); + asm(" mov r6, r6, lsl #24 "); + asm(" orr r6, r6, r5, lsr #8 "); + asm(" mov r5, r5, lsl #24 "); + asm(" orr r5, r5, r4, lsr #8 "); + asm(" mov r4, r4, lsl #24 "); + asm(" orr r4, r4, r3, lsr #8 "); + asm(" stmdb r0!, {r4-r11} "); + asm(" subs lr, lr, #1 "); + asm(" bne copy_back_nonaligned_1 "); + asm(" b copy_back_remainder "); + +// source = 2 mod 4 + asm("copy_back_nonaligned_2: "); + asm(" mov r11, r3, lsl #16 "); + asm(" ldmdb r1!, {r3-r10} "); + PLD_noff(1, 64); + asm(" orr r11, r11, r10, lsr #16 "); + asm(" mov r10, r10, lsl #16 "); + asm(" orr r10, r10, r9, lsr #16 "); + asm(" mov r9, r9, lsl #16 "); + asm(" orr r9, r9, r8, lsr #16 "); + asm(" mov r8, r8, lsl #16 "); + asm(" orr r8, r8, r7, lsr #16 "); + asm(" mov r7, r7, lsl #16 "); + asm(" orr r7, r7, r6, lsr #16 "); + asm(" mov r6, r6, lsl #16 "); + asm(" orr r6, r6, r5, lsr #16 "); + asm(" mov r5, r5, lsl #16 "); + asm(" orr r5, r5, r4, lsr #16 "); + asm(" mov r4, r4, lsl #16 "); + asm(" orr r4, r4, r3, lsr #16 "); + asm(" stmdb r0!, {r4-r11} "); + asm(" subs lr, lr, #1 "); + asm(" bne copy_back_nonaligned_2 "); + asm(" b copy_back_remainder "); + +// source = 3 mod 4 + asm("copy_back_nonaligned_3: "); + asm(" mov r11, r3, lsl #8 "); + asm(" ldmdb r1!, {r3-r10} "); + PLD_noff(1, 64); + asm(" orr r11, r11, r10, lsr #24 "); + asm(" mov r10, r10, lsl #8 "); + asm(" orr r10, r10, r9, lsr #24 "); + asm(" mov r9, r9, lsl #8 "); + asm(" orr r9, r9, r8, lsr #24 "); + asm(" mov r8, r8, lsl #8 "); + asm(" orr r8, r8, r7, lsr #24 "); + asm(" mov r7, r7, lsl #8 "); + asm(" orr r7, r7, r6, lsr #24 "); + asm(" mov r6, r6, lsl #8 "); + asm(" orr r6, r6, r5, lsr #24 "); + asm(" mov r5, r5, lsl #8 "); + asm(" orr r5, r5, r4, lsr #24 "); + asm(" mov r4, r4, lsl #8 "); + asm(" orr r4, r4, r3, lsr #24 "); + asm(" stmdb r0!, {r4-r11} "); + asm(" subs lr, lr, #1 "); + asm(" bne copy_back_nonaligned_3 "); + +// <32 bytes to go, source alignment could be 1, 2 or 3 mod 4 +// r12 = 8 * (source mod 4) + asm("copy_back_remainder: "); + asm(" ands r4, r2, #0x1c "); // r4 = 4*number of words left + asm(" beq 2f "); // skip if none + asm(" rsb r6, r12, #32 "); // r6 = 32 - 8*source alignment + + asm("1: "); + asm(" mov r5, r3, lsl r6 "); // r5 = part of previous source word required to make destination word + asm(" ldr r3, [r1, #-4]! "); // get next word + asm(" subs r4, r4, #4 "); // 4 bytes less to do + asm(" orr r5, r5, r3, lsr r12 "); // form next destination word + asm(" str r5, [r0, #-4]! "); // and store it + asm(" bne 1b "); // loop until destination 32 byte aligned + + asm("2: "); + asm(" add r1, r1, r12, lsr #3 "); // r1 = real unaligned source address + asm(" tst r2, #2 "); // 2 bytes left? + asm(" ldrneb r3, [r1, #-1]! "); // copy 2 + asm(" strneb r3, [r0, #-1]! "); + asm(" ldrneb r3, [r1, #-1]! "); + asm(" strneb r3, [r0, #-1]! "); + asm(" tst r2, #1 "); // 1 byte left? + asm(" ldrneb r3, [r1, #-1]! "); // copy 1 + asm(" strneb r3, [r0, #-1]! "); + __POPRET("r0,r4-r11,"); + } + +#endif // USE_REPLACEMENT_MEMCPY + + +#ifndef __KERNEL_MODE__ +#ifdef __GCC32__ +/** +Compares a block of data at one specified location with a block of data at +another specified location. + +The comparison proceeds on a byte for byte basis, the result of the comparison +is based on the difference of the first bytes to disagree. + +The data at the two locations are equal if they have the same length and content. +Where the lengths are different and the shorter section of data is the same +as the first part of the longer section of data, the shorter is considered +to be less than the longer. + +@param aLeft A pointer to the first (or left) block of 8 bit data + to be compared. +@param aLeftL The length of the first (or left) block of data to be compared, + i.e. the number of bytes. +@param aRight A pointer to the second (or right) block of 8 bit data to be + compared. +@param aRightL The length of the second (or right) block of data to be compared + i.e. the number of bytes. + +@return Positive, if the first (or left) block of data is greater than the + second (or right) block of data. + Negative, if the first (or left) block of data is less than the + second (or right) block of data. + Zero, if both the first (or left) and second (or right) blocks of data + have the same length and the same content. +*/ +EXPORT_C __NAKED__ TInt Mem::Compare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/) + { + // fall through + } +#endif +#endif + + + +// See header file e32cmn.h for the in-source documentation. +extern "C" EXPORT_C __NAKED__ TInt memcompare(const TUint8* /*aLeft*/, TInt /*aLeftL*/, const TUint8* /*aRight*/, TInt /*aRightL*/) +// +// Compares until the smaller of the two lengths is reached. +// If the lengths differ, returns leftlen-rightlen +// If a difference is encountered, returns left byte-right byte +// + { + + asm(" stmfd sp!,{r4,r5,r6,lr}"); + asm(" mov r4,r0"); +// +// Get the shorter of the two lengths, and check for zero length +// + asm(" cmp r1,r3"); + asm(" mov r6,r1"); + asm(" movge r6,r3"); + asm(" cmp r6,#0"); + asm(" beq compare_done"); + asm(" cmp r6,#16"); +// +// Check for aligned buffers for faster comparing if more than 16 bytes +// + asm(" andge r0,r4,#3"); + asm(" andge r5,r2,#3"); + asm(" addlt r0,r5,#1"); + asm(" cmp r0,r5"); + asm(" beq aligned_compare"); +// +// Get aLeft+Min(aLeftL,aRightL) +// + asm(" add r6,r4,r6"); + + asm("compare_loop:"); + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" cmp r4,r6"); + asm(" beq compare_done"); + + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" cmp r4,r6"); + asm(" beq compare_done"); + + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" cmp r4,r6"); + asm(" beq compare_done"); + + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" cmp r4,r6"); + asm(" bne compare_loop"); +// +// Return difference of lengths +// + asm("compare_done:"); + asm(" sub r0,r1,r3"); + + asm("compare_exit:"); + __POPRET("r4-r6,"); +// +// Compare byte at a time until word aligned... +// + asm("aligned_compare:"); +// +// Get number of bytes to compare before word alignment reached...and jump to appropriate point +// + asm(" mov ip,r6"); + asm(" add r6,r4,r6"); + asm(" subs r0,r0,#1"); + asm(" movmi r0,#3"); + asm(" rsb r5,r0,#3"); + asm(" sub ip,ip,r5"); + asm(" mov ip,ip,lsr #2"); + asm(" add pc,pc,r0,asl #4"); + asm(" b compare_done"); // Never executed +// +// Jump here if alignment is 1. Do not use more than 4 instructions without altering above relative jump +// + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); +// +// Jump here if alignment is 2. Do not use more than 4 instructions without altering above relative jump +// + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); +// +// Jump here if alignment is 3. Do not use more than 4 instructions without altering above relative jump +// + asm(" ldrb r0,[r4],#1"); + asm(" ldrb r5,[r2],#1"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); +// +// Must now be word aligned +// + asm("aligned_compare_loop:"); + asm(" ldr r0,[r4],#4"); + asm(" ldr r5,[r2],#4"); + asm(" eors r0,r0,r5"); + asm(" bne word_different"); + asm(" subs ip,ip,#1"); + asm(" bne aligned_compare_loop"); +// +// Less than 4 bytes to go... +// + asm(" cmp r4,r6"); + asm(" bne compare_loop"); + asm(" sub r0,r1,r3"); + __POPRET("r4-r6,"); +// +// A difference encountered while word comparing, find out which byte it was +// + asm("word_different:"); + asm(" ldrb r0,[r4,#-4]"); + asm(" ldrb r5,[r2,#-4]"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" ldrb r0,[r4,#-3]"); + asm(" ldrb r5,[r2,#-3]"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); + asm(" ldrb r0,[r4,#-2]"); + asm(" ldrb r5,[r2,#-2]"); + asm(" subs r0,r0,r5"); + asm("bne compare_exit "); +// +// This must be the different byte... +// + asm(" ldrb r0,[r4,#-1]"); + asm(" ldrb r5,[r2,#-1]"); + asm(" sub r0,r0,r5"); + __POPRET("r4-r6,"); + } +#endif +