genericopenlibs/liboil/src/motovec/vec_memcpy.s
author hgs
Fri, 17 Sep 2010 19:25:42 +0530
changeset 67 a1e347446159
parent 18 47c74d1534e1
permissions -rw-r--r--
201037
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
18
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
// file:  vec_memcpy.S
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
//    AltiVec enabled version of memcpy and bcopy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
//	Copyright Motorola, Inc. 2003
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
//	ALL RIGHTS RESERVED
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
//
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
//	You are hereby granted a copyright license to use, modify, and 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
//	distribute the SOFTWARE so long as this entire notice is retained 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
//	without alteration in any modified and/or redistributed versions, 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
//	and that such modified versions are clearly identified as such.  
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
//	No licenses are granted by implication, estoppel or otherwise under 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
//	any patents or trademarks of Motorola, Inc.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
//
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
//	The SOFTWARE is provided on an "AS IS" basis and without warranty.  
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
//	To the maximum extent permitted by applicable law, MOTOROLA DISCLAIMS 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
//	ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING IMPLIED 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
//	WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
//	PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
//	REGARD TO THE SOFTWARE (INCLUDING ANY MODIFIED VERSIONS 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
//	THEREOF) AND ANY ACCOMPANYING WRITTEN MATERIALS. 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
//
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
//	To the maximum extent permitted by applicable law, IN NO EVENT SHALL 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
//	MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
//	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
//	BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
//	INFORMATION, OR OTHER PECUNIARY LOSS) ARISING OF THE USE OR 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
//	INABILITY TO USE THE SOFTWARE.   Motorola assumes no responsibility 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
//	for the maintenance and support of the SOFTWARE.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
// extern  void * memcpy(void *dst, const void *src, size_t len);
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
// Returns:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
//  void *dst
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
// extern void * memmove( void *dst, const void *src, size_t len );
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
//   Copies len characters from src to dst and returns the value of
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
//   dst.  Works correctly for overlapping memory regions.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
//               - Harbison&Steele 4th ed (corrected as to return)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
// Returns:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
//  void *dst
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
// extern  void * bcopy(const void *src, void *dst,  size_t len);
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
// Returns:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
//  void *dst
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
//------------------------------------------------------------------
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
// memcpy and memmove are combined into one entry point here because of
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
// the similarity of operation and need to create fool-proof code.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
// The following conditions determine what is "fool proof":
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
//
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
// if:                                          then single entry:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
// (DST-SRC)<0 && (SRC-DST)>=BC && BC>MIN_VEC    will b to v_memcpy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
// (DST-SRC)<0 && (SRC-DST)< BC && BC>MIN_VEC    must b to v_memcpy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
// (DST-SRC)<0                  && BC<MIN_VEC    copy fwd byte-by-byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
// (DST-SRC)==0                 || BC==0         will just return
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
// (DST-SRC)>0                  && BC<MIN_VEC    copy bkwd byte-by-byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
// (DST-SRC)>0 && (DST-SRC)< BC && BC>MIN_VEC    must b to v_memmove
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
// (DST-SRC)>0 && (SRC-DST)>=BC && BC>MIN_VEC    will b to v_memmove
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
// If you call memmove (or vec_memmove) and |DST-SRC|>=BC,
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
// this code will branch to v_memcpy anyway for maximum performance.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
// Revision History:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
//    Rev 0.0	Original                          Chuck Corley	02/03/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
//              Can still add dst, 128B loop, and aligned option
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
//    Rev 0.01  Fixed JY's seg-fault violation              CJC 02/17/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
//    Rev 0.1   Added 128B loop and dst; cndtnlzd dcbz      CJC 02/18/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
//              (Creating separate path for QW aligned didn't help much)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
//    Rev 0.11  Small code schdling; chngd dst for memmove  CJC 02/23/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
//    Rev 0.20  Eliminated alternate entry and cleanup      CJC 02/27/03                   
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
//    Rev 0.21  Inproved loop branch targets for v_mempcy   CJC 03/01/03                   
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
//    Rev 0.22  Experimented with dst (sent to H.)          CJC 03/02/03                   
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
//    Rev 0.23  Substituted dcba for dcbz (sent to JY)      CJC 03/08/03                   
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
//    Rev 0.24  Use two dst streams                         CJC 03/12/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
//    Rev 0.25  Fix for all compilers, cleanup, and release with
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
//              libmotovec.a rev 0.10                       CJC 03/14/03
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
//    Rev 0.30  Fix for pre-empted destination (SNDF-DS)    CJC 04/02/03                   
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
//
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
//  Between Rev 0.25 and 0.30 the code was revised to store elements of
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
//  source at destination when first and/or last vector are less than 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
//  bytes. Areviewer at SNDF observed that loading the destination vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
//  for merging exposed the "uninvolved" destination bytes to incoherency 
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
//  if an interrupt pre-empted this routine and modified the "uninvolved"
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
//  destination vector(s) while held in register for merging.  It seems
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
//  like a low possibility but this revision is no longer subject to that
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
//  possibility.  (It is also slightly faster than Rev 0.25.)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
//  This is beta quality code; users are encouraged to make it faster.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
//  ASSUMPTIONS:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
//     Code is highly likely to be in the cache; data is not (streaming data)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
#define VRSV 256	//	VRSAVE spr
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
// Don't use vectors for BC <= MIN_VEC. Works only if MIN_VEC >= 16 bytes.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
#define MIN_VEC 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
// Don't use Big_loop in v_memcpy for |dst-src|<= minimum overlap.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   103
#define MIN_OVL 128
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   104
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   105
// Register useage
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   106
#define Rt r0	// 	r0 when used as a temporary register	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   107
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   108
#define DST r3	// 	entering: dst pointer; exiting: same dst pointer
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   109
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   110
#define SRC r4	// 	entering: src ptr; then end of src range index (SRC+BC) in memmove
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   111
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   112
#define BC r5	//	entering: Byte_Count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   113
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   114
#define PCS r6	//  	save for partial checksum entering
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   115
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   116
#define DMS r7	//      dst - src initially
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   117
#define BK r7	//  	BC - 1 +/- (n*16)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   118
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   119
// Codewarrior will put an unwelcome space as "lbzu	r0,1(r7 )"
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   120
// if you don't put the comment right after the r7.  CJC 030314
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   121
#define SM1 r8//	src -1 for byte-by-byte forwards initially
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   122
#define S r8	//	src[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   123
#define SMD r8	//      src[0:27]-dst[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   124
#define STR r8	//	data stream touch block & stride info for Big_loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   125
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   126
#define DM1 r9//	dst -1 for byte-by-byte forwards initially
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   127
#define D r9	//	dst[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   128
#define DNX r9	//	(dst+n*16)[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   129
#define BL r9	//	second byte_kount index pointer
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   130
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   131
#define SBC r10//	src + byte count initially then src[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   132
#define BLK r10	//      temporary data stream touch block & stride info
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   133
#define DR r10	//	(dst+16)[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   134
#define QW r10	//  	number of quad words (vectors)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   135
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   136
#define DBC r11//	dst + byte count initially
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   137
#define BLL r11	//      temporary data stream touch block & stride info
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   138
#define SBK r11	//	(src+byte_count-1)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   139
#define SBR r11	//	(src+byte_count-1)[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   140
#define DBK r11	//	(dst+byte_count-1) then (dst+byte_count-1)[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   141
#define BIG r11	//	QW/8 or 128 byte loop count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   142
#define SP8 r11	//      SRC + n*128 (8 QWs) for data streaming after first call
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   143
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   144
#define RSV r12	//  	storage for VRSAVE register if used
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   145
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
#define VS0   v0	//  	src vector for permuting
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
#define VS1   v1	//  	src vector for permuting
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
#define VP3   v2	// 	d - s permute register
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
#define VPS0  v3	// 	permuted source vector to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
#define VPS1  v4	//  	2nd permuted source vector to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
#define VPS2  v5	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
#define VS2   v6	//  	src vector for permuting
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
#define VPS3  v6	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   160
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   161
#define VS3   v7	//      additional src load in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   162
#define VPS4  v7	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   163
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   164
#define VS4   v8	//      additional src load in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   165
#define VPS5  v8	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   167
#define VS5   v9	//      additional src load in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
#define VPS6  v9	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   170
#define VS6   v10	//      additional src load in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   171
#define VPS7  v10	//      additional permuted src in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   172
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   173
#define VS7   v11	//      additional src load in Big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   174
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   175
// Conditionalize the use of dcba.  It will help if the data is
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   176
// not in cache and hurt if it is.  Generally, except for small
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   177
// benchmarks repeated many times, we assume data is not in cache
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
// (data streaming) and using dcbz is a performance boost.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
#ifndef NO_DCBA
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
#if defined(__GNUC__) || defined(__MWERKS__) || defined(_DIAB_TOOL)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
 // gcc and codewarrior and diab don't assemble dcba
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
#define DCBK .long 0x7c033dec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   183
// dcba r3,r7    or    dcba DST,BK
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   184
#define DCBL .long 0x7c034dec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   185
// dcba r3,r9     or    dcba DST,BL
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   187
#ifdef __ghs__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
.macro DCBK
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   189
.long 0x7c033dec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   190
.endm
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   191
.macro DCBL
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   192
.long 0x7c034dec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   193
.endm
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   194
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   195
#define DCBK dcba DST,BK
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
#define DCBL dcba DST,BL
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
#endif  // __ghs__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
#endif  // __GNUC__ or __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
#define DCBK nop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
#define DCBL nop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   202
#endif  // NO_DCBA
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   203
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
// Conditionalize the use of dst (data stream touch).  It will help
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
// if the data is not in cache and hurt if it is (though not as badly
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
// as dcbz).  Generally, except for small benchmarks repeated many times,
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   207
// we assume data is not in cache (data streaming) and using dst is a
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   208
// performance boost.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   209
#ifndef NO_DST
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   210
#define STRM_B dst	SBC,BLL,0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   211
#define STRM_F dst	SRC,BLK,0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   212
#define STRM_1 dst	SP8,STR,1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   214
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   215
#define STRM_B	nop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   216
#define STRM_F	nop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   217
#define STRM_1	nop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   218
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   219
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   220
//  Condition register use
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   221
//      cr0[0:2] = (dst-src==0)? return: ((dst-src>0)? copy_bkwd, copy_fwd;);
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   222
// then cr0[0:2] = (dst[28:31]-src[28:31]<0)? "shifting left", "shifting right";
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   223
//      cr1[0,2] = (BC == 0)? 1 : 0; (nothing to move)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   224
// then cr1[2]   = (DST[28:31] == 0)? 1 : 0;  (D0 left justified)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   225
// then cr1[2]   = ((DBK = DST+BC-1)[28:31] = 0xF)? 1 : 0; (DN right justified)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   226
//      cr5[0,2] = (|DST-SRC|<=MIN_OVL)?1:0;  (Overlap too small for Big loop?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   227
//      cr6[1,2] = (DST-SRC>=BC)?1:0;  (Okay for v_memmove to copy forward?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   228
// then cr6[2]   = (QW == 0)? 1 : 0; (Any full vectors to move?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   229
// then cr6[1]   = (QW > 4)? 1 : 0; (>4 vectors to move?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   230
// then cr6[3]   = (third store[27] == 1)? 1: 0; (cache line alignment)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   231
// then cr6[3]   = (last store[27] == 1)? 1: 0; (last store odd?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   232
//      cr7[2]   = (BC>MIN_VEC)?1:0;  (BC big enough to warrant vectors)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   233
// then cr7[0:3] = (DST+16)[0:27]-DST  (How many bytes (iff <16) in first vector?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   234
// then cr7[1]   = (QW > 14)? 1 : 0; (>14 vectors to move?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   235
// then cr7[0:3] = (DST+BC)[0:27]  (How many bytes (iff <16) in last vector?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   236
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   237
	.text
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   238
#ifdef __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   239
	.align	32
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   240
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   241
	.align	5
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   242
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   243
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   244
#ifdef LIBMOTOVEC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   245
	.globl	memmove     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   246
memmove:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   247
	nop			// IU1 Compilers forget first label
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   248
	.globl	memcpy     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   249
memcpy:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   250
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   251
	.globl	vec_memmove     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   252
vec_memmove:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   253
	nop			// IU1 Only way I know to preserve both labels
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   254
	.globl	_vec_memcpy     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   255
_vec_memcpy:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   256
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   257
	subf.	DMS,SRC,DST	// IU1 Compute dst-src difference
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   258
	cmpi	cr1,0,BC,0	// IU1 Eliminate zero byte count moves
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   259
	cmpi	cr7,0,BC,MIN_VEC	// IU1 Check for minimum byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   260
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   261
	addi	SM1,SRC,-1	// IU1 Pre-bias and duplicate src for fwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   262
	addi	DM1,DST,-1	// IU1 Pre-bias and duplicate destination
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   263
	add	SBC,SRC,BC	// IU1 Pre-bias and duplicate src for bkwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   264
	beqlr			// return if DST = SRC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   265
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   266
	add	DBC,DST,BC	// IU1 Pre-bias and duplicate destination
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   267
	subf	Rt,DST,SRC	// IU1 Form |DST-SRC| if DST-SRC<0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   268
	beqlr	cr1		// return if BC = 0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   269
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   270
	bgt	Cpy_bkwd	// b if DST-SRC>0 (have to copy backward)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   271
	cmpi	cr5,0,Rt,MIN_OVL	// IU1 (|DST-SRC|>128)?1:0; for v_memcpy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   272
	bgt	cr7,v_memcpy	// b if BC>MIN_VEC (okay to copy vectors fwd)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   273
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   274
// Copy byte-by-byte forwards if DST-SRC<0 and BC<=MIN_VEC	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   275
	mtctr	BC		// i=BC; do ...;i--; while (i>0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   276
Byte_cpy_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   277
	lbzu	Rt,1(SM1)	// LSU * ++(DST-1) = * ++(SRC-1)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   278
	stbu	Rt,1(DM1)	// LSU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   279
	bdnz	Byte_cpy_fwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   280
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   281
	blr
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   282
	nop			// IU1 Improve next label as branch target	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   283
Cpy_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   284
	cmpi	cr5,0,DMS,MIN_OVL	// IU1 ((DST-SRC)>128)?1:0; for v_memcpy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   285
	cmp	cr6,0,DMS,BC	// IU1 cr6[1,2]=(DST-SRC>=BC)?1:0;
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   286
	bgt	cr7,v_memmove	// b if BC>MIN_VEC (copy vectors bkwd)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   287
// Copy byte-by-byte backwards if DST-SRC>0 and BC<=MIN_VEC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   288
	mtctr	BC		// i=BC; do ...;i--; while (i>0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   289
Byte_cpy_bwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   290
	lbzu	Rt,-1(SBC)	// LSU * --(DST+BC) = * --(SRC+BC)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   291
	stbu	Rt,-1(DBC)	// LSU Store it
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   292
	bdnz	Byte_cpy_bwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   293
	blr
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   294
	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   295
#ifdef __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   296
	.align	16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   297
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   298
	.align	4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   299
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   300
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   301
v_memmove:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   302
// Byte count < MIN_VEC bytes will have been copied by scalar code above,
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   303
// so this will not deal with small block moves < MIN_VEC.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   304
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   305
// For systems using VRSAVE, define VRSAVE=1 when compiling.  For systems
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   306
// that don't, make sure VRSAVE is undefined.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   307
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   308
	mfspr	RSV,VRSV	// IU2 Get current VRSAVE contents
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   309
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   310
	rlwinm	S,SRC,0,28,31	// IU1 Save src address bits s[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   311
	rlwinm	D,DST,0,28,31	// IU1 D = dst[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   312
	bge	cr6,MC_entry	// b to v_memcpy if DST-SRC>=BC (fwd copy OK)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   313
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   314
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   315
	oris	Rt,RSV,0xfff0	// IU1 Or in registers used by this routine
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   316
#endif	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   317
	lis	BLL,0x010c	// IU1 Stream 12 blocks of 16 bytes
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   318
	subf.	SMD,D,S		// IU1 if S-D<0 essentially shifting right
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   319
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   320
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   321
	mtspr	VRSV,Rt		// IU2 Save in VRSAVE before first vec op
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   322
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   323
	lvsr	VP3,0,DMS	// LSU Permute vector for dst - src shft right
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   324
	ori	BLL,BLL,0xffe0	// IU1 Stream stride -32B
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   325
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   326
	STRM_B			// LSU Start data stream at SRC+BC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   327
	addi	SBK,SBC,-1	// IU1 Address of last src byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   328
	bgt	Rt_shft		// Bytes from upper vector = (s-d>0)?s-d:16+s-d;
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   329
	addi	SMD,SMD,16	// IU1 Save 16-(d-s)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   330
Rt_shft:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   331
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   332
	rlwinm	SBR,SBK,0,0,27	// IU1 (SRC+BC-1)[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   333
	addi	BK,BC,-1	// IU1 Initialize byte index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   334
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   335
	subf	Rt,SBR,SBC	// IU1 How many bytes in first source?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   336
	add	DBK,DST,BK	// IU1 Address of last dst byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   337
	addi	DR,DST,16	// IU1 Address of second dst vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   338
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   339
	subf.	SMD,Rt,SMD	// IU1 if bytes in 1st src>Bytes in 1st permute
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   340
	rlwinm	Rt,DBK,0,28,31	// IU1 (DST+BC-1)[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   341
	rlwinm	DR,DR,0,0,27	// IU1 (DST+16)[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   342
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   343
// If there are more useful bytes in the upper vector of a permute pair than we
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   344
// will get in the first permute, the first loaded vector needs to be in the
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   345
// lower half of the permute pair.  The upper half is a don't care then.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   346
	blt	Get_bytes_rt	// b if shifting left (D-S>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   347
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   348
	lvx	VS1,SRC,BK	// LSU Get SN load started
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   349
// Comments numbering source and destination assume single path through the
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   350
// code executing each instruction once.  For vec_memmove, an example would
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   351
// be the call memmove(BASE+0x0F, BASE+0x2F, 82). N = 6 in that case.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   352
	addi	SRC,SRC,-16	// IU1 Decrement src base (to keep BK useful)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   353
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   354
Get_bytes_rt:	// Come here to get VS0 & Don't care what VS1 is	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   355
	lvx	VS0,SRC,BK	// LSU Get SN-1 (SN if D-S<0) in lower vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   356
	subf	QW,DR,DBK	// IU1 Bytes of full vectors to move (-16)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   357
	cmpi	cr7,0,Rt,0xF	// IU1 Is Dn right justified?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   358
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   359
	cmpi	cr1,0,D,0	// IU1 Is D0 left justified?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   360
	rlwinm	QW,QW,28,4,31	// IU1 Quad words remaining
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   361
	add	Rt,DST,BC	// IU1 Refresh the value of DST+BC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   362
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   363
	cmpi	cr6,0,QW,0	// IU1 Any full vectors to move?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   364
	vperm	VPS0,VS0,VS1,VP3	// VPU Align SN-1 and SN to DN
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   365
	vor	VS1,VS0,VS0	// VIU1 Move lower vector to upper
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   366
	beq	cr7,Rt_just	// b if DN is right justified
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   367
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   368
	mtcrf	0x01,Rt		// IU2 Put final vector byte count in cr7
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   369
	rlwinm	DBK,DBK,0,0,27	// IU1 Address of first byte of final vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   370
	li	D,0		// IU1 Initialize an index pointer
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   371
	bnl	cr7,Only_1W_bkwd	// b if there was only one or zero words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   372
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   373
	stvewx	VPS0,DBK,D	// LSU store word 1 of two or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   374
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   375
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   376
	stvewx	VPS0,DBK,D	// LSU store word 2 of two or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   377
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   378
Only_1W_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   379
	bng	cr7,Only_2W_bkwd	// b if there were only two or zero words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   380
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   381
	stvewx	VPS0,DBK,D	// LSU store word 3 of three if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   382
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   383
Only_2W_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   384
	bne	cr7,Only_B_bkwd	// b if there are no half words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   385
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   386
	stvehx	VPS0,DBK,D	// LSU store one halfword if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   387
	addi	D,D,2		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   388
Only_B_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   389
	bns	cr7,All_done_bkwd	// b if there are no bytes to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   390
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   391
	stvebx	VPS0,DBK,D	// LSU store one byte if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   392
	b	All_done_bkwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   393
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   394
Rt_just:	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   395
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at DN
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   396
All_done_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   397
	addi	BK,BK,-16	// IU1 Decrement destination byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   398
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   399
	ble	cr6,Last_load	// b if no Quad words to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   400
	mtctr	QW		// IU2 for (i=0;i<=QW;i++)-execution serializng
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   401
	cmpi	cr6,0,QW,4	// IU1 Check QW>4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   402
QW_loop:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   403
	lvx	VS0,SRC,BK	// LSU Get SN-2 (or SN-1 if ADJ==0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   404
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   405
	vperm	VPS0,VS0,VS1,VP3	// VPU Align SN-2 and SN-1 to DN-1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   406
	vor	VS1,VS0,VS0	// VIU1 Move lower vector to upper
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   407
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   408
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at DN-1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   409
	addi	BK,BK,-16	// IU1 Decrement byte kount
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   410
	bdnzf	25,QW_loop	// b if 4 or less quad words to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   411
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   412
	add	DNX,DST,BK	// IU1 address of next store (DST+BC-1-16)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   413
	bgt	cr6,GT_4QW	// b if >4 quad words left
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   414
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   415
Last_load:	// if D-S>=0, next load will be from same address as last
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   416
	blt	No_ld_bkwd	// b if shifting right (S-D>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   417
	addi	SRC,SRC,16	// IU1 recorrect source if it was decremented
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   418
No_ld_bkwd:				
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   419
	lvx	VS0,0,SRC	// LSU Get last source SN-6 (guaranteed S0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   420
// Current 16 bytes is the last; we're done.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   421
	dss	0		// Data stream stop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   422
	vperm	VPS0,VS0,VS1,VP3	// VPU Align SN-6 and SN-5 to DN-6
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   423
	subfic	D,DST,16	// IU1 How many bytes in first destination?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   424
	beq	cr1,Lt_just	// b if last destination is left justified
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   425
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   426
	mtcrf	0x01,D		// IU2 Put byte count remaining in cr7
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   427
	li	D,0		// IU1 Initialize index pointer
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   428
	bns	cr7,No_B_bkwd	// b if only even number of bytes to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   429
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   430
	stvebx	VPS0,DST,D	// LSU store first byte at DST+0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   431
	addi	D,D,1		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   432
No_B_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   433
	bne	cr7,No_H_bkwd	// b if only words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   434
	stvehx	VPS0,DST,D	// LSU store halfword at DST+0/1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   435
	addi	D,D,2		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   436
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   437
No_H_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   438
	bng	cr7,No_W1_bkwd	// b if exactly zero or two words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   439
	stvewx	VPS0,DST,D	// LSU store word 1 of one or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   440
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   441
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   442
No_W1_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   443
	bnl	cr7,No_W2_bkwd	// b if there was only one word to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   444
	stvewx	VPS0,DST,D	// LSU store word 1 of two or 2 of three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   445
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   446
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   447
	stvewx	VPS0,DST,D	// LSU store word 2 of two or 3 of three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   448
	b	No_W2_bkwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   449
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   450
Lt_just:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   451
	stvx	VPS0,0,DST	// LSU Store 16 bytes at final dst addr D0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   452
No_W2_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   453
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   454
	mtspr	VRSV,RSV	// IU1 Restore VRSAVE	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   455
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   456
	blr			// Return destination address from entry
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   457
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   458
GT_4QW:	// Do once if next store is to even half of cache line, else twice
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   459
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   460
	lvx	VS0,SRC,BK	// LSU Get SN-3 (or SN-2)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   461
	mtcrf	0x02,DNX	// IU2 cr6[3]=((DST+BC-1)[27]==1)?1:0;
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   462
	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   463
	vperm	VPS0,VS0,VS1,VP3	// VPU Align SN-3 and SN-2 to Dn-2
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   464
	vor	VS1,VS0,VS0	// VIU1 Move lower vector to upper
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   465
	addi	DNX,DNX,-16	// IU1 Prepare to update cr6 next loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   466
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   467
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at DN-2
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   468
	vor	VS3,VS0,VS0	// VIU Make a copy of lower vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   469
	addi	BK,BK,-16	// IU1 Decrement byte count by 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   470
	bdnzt	27,GT_4QW	// b if next store is to upper (odd) half of CL
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   471
// At this point next store will be to even address.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   472
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   473
	lis	STR,0x102	// IU1 Stream 2 blocks of 16 bytes
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   474
	mtcrf	0x02,DST	// IU2 cr6[3]=(DST[27]==1)?1:0; (DST odd?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   475
	addi	BL,BK,-16	// IU1 Create an alternate byte count - 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   476
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   477
	ori	STR,STR,0xffe0	// IU1 Stream stride -32B
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   478
	addi	SP8,SRC,-64	// IU1 Starting address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   479
	bso	cr6,B32_bkwd	// b if DST[27] == 1; i.e, final store is odd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   480
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   481
	bdnz	B32_bkwd	// decrement counter for last odd QW store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   482
B32_bkwd:	// Should be at least 2 stores remaining and next 2 are cache aligned
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   483
	lvx	VS2,SRC,BK	// LSU Get SN-4 (or SN-3)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   484
	addi	SP8,SP8,-32	// IU1 Next starting address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   485
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   486
	lvx	VS1,SRC,BL	// LSU Get SN-5 (or SN-4)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   487
	vperm	VPS0,VS2,VS3,VP3	// VPU Align SN-4 and SN-3 to DN-3
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   488
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   489
	STRM_1			// LSU Stream 64 byte blocks ahead of loads
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   490
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   491
	DCBL			// LSU allocate next cache line
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   492
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   493
	vperm	VPS1,VS1,VS2,VP3	// VPU Align SN-5 and SN-4 to DN-4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   494
	vor	VS3,VS1,VS1	// VIU1 Move SN-5 to SN-3
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   495
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   496
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at DN-3
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   497
	addi	BK,BL,-16	// IU1 Decrement byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   498
	bdz	Nxt_loc_bkwd	// always decrement and branch to next instr		
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   499
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   500
Nxt_loc_bkwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   501
	stvx	VPS1,DST,BL	// LSU Store 16 bytes at DN-4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   502
	addi	BL,BK,-16	// IU1 Decrement alternate byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   503
	bdnz	B32_bkwd	// b if there are at least two more QWs to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   504
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   505
	bns	cr6,One_odd_QW	// b if there was one more odd QW to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   506
	b	Last_load
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   507
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   508
// Come here with two more loads and two stores to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   509
One_odd_QW:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   510
	lvx	VS1,SRC,BK	// LSU Get SN-6 (or SN-5)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   511
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   512
	vperm	VPS1,VS1,VS3,VP3	// VPU Align SN-6 and SN-5 to DN-5
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   513
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   514
	stvx	VPS1,DST,BK	// LSU Store 16 bytes at DN-5
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   515
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   516
	b	Last_load
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   517
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   518
// End of memmove in AltiVec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   519
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   520
#ifdef __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   521
	.align	16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   522
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   523
	.align	4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   524
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   525
v_memcpy:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   526
// Byte count < MIN_VEC bytes will have been copied by scalar code above,
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   527
// so this will not deal with small block moves < MIN_VEC.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   528
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   529
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   530
	mfspr	RSV,VRSV	// IU2 Get current VRSAVE contents
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   531
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   532
	rlwinm	S,SRC,0,28,31	// IU1 Save src address bits s[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   533
	rlwinm	D,DST,0,28,31	// IU1 D = dst[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   534
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   535
MC_entry:	// enter here from memmove if DST-SRC>=BC; this should be faster
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   536
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   537
	oris	Rt,RSV,0xfff0	// IU1 Or in registers used by this routine
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   538
#endif	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   539
	lis	BLK,0x010c	// IU1 Stream 12 blocks of 16 bytes
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   540
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   541
	subf.	S,S,D		// IU1 if D-S<0 essentially shifting left
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   542
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   543
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   544
	mtspr	VRSV,Rt		// IU2 Save in VRSAVE before first vec op
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   545
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   546
	lvsr	VP3,0,DMS	// LSU Permute vector for dst - src shft right
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   547
	ori	BLK,BLK,32	// IU1 Stream stride 32B
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   548
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   549
	STRM_F			// LSU Start data stream 0 at SRC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   550
	addi	DR,DST,16	// IU1 Address of second dst vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   551
	addi	DBK,DBC,-1	// IU1 Address of last dst byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   552
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   553
// If D-S<0 we are "kinda" shifting left with the right shift permute vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   554
// loaded to VP3 and we need both S0 and S1 to permute.  If D-S>=0 then the
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   555
// first loaded vector needs to be in the upper half of the permute pair and
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   556
// the lower half is a don't care then.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   557
	bge	Ld_bytes_rt	// b if shifting right (D-S>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   558
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   559
	lvx	VS0,0,SRC	// LSU Get S0 load started
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   560
// Comments numbering source and destination assume single path through the
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   561
// code executing each instruction once.  For vec_memcpy, an example would
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   562
// be the call memcpy(BASE+0x1E, BASE+0x1F, 259). N = 16 in that case.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   563
	addi	SRC,SRC,16	// IU1 Increment src base (to keep BK useful)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   564
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   565
Ld_bytes_rt:	// Come here to get VS1 & Don't care what VS0 is	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   566
	lvx	VS1,0,SRC	// LSU Get S1 (or S0 if D-S>=0) in upper vector
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   567
	rlwinm	DR,DR,0,0,27	// IU1 (DST+16)[0:27]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   568
	cmpi	cr1,0,D,0	// IU1 Is D0 left justified?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   569
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   570
	subf	Rt,DST,DR	// IU1 How many bytes in first destination?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   571
	subf	QW,DR,DBK	// IU1 Bytes of full vectors to move (-16)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   572
	li	BK,0		// IU1 Initialize byte kount index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   573
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   574
	mtcrf	0x01,Rt		// IU2 Put bytes in 1st dst in cr7
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   575
	rlwinm	QW,QW,28,4,31	// IU1 Quad words remaining
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   576
	vperm	VPS0,VS0,VS1,VP3	// VPU Align S0 and S1 to D0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   577
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   578
	vor	VS0,VS1,VS1	// VIU1 Move upper vector to lower
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   579
	beq	cr1,Left_just	// b if D0 is left justified
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   580
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   581
	bns	cr7,No_B_fwd	// b if only even number of bytes to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   582
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   583
	stvebx	VPS0,DST,BK	// LSU store first byte at DST+0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   584
	addi	BK,BK,1		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   585
No_B_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   586
	bne	cr7,No_H_fwd	// b if only words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   587
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   588
	stvehx	VPS0,DST,BK	// LSU store halfword at DST+0/1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   589
	addi	BK,BK,2		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   590
No_H_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   591
	bng	cr7,No_W1_fwd	// b if exactly zero or two words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   592
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   593
	stvewx	VPS0,DST,BK	// LSU store word 1 of one or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   594
	addi	BK,BK,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   595
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   596
No_W1_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   597
	bnl	cr7,No_W2_fwd	// b if there was only one word to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   598
	stvewx	VPS0,DST,BK	// LSU store word 1 of two or 2 of three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   599
	addi	BK,BK,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   600
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   601
	stvewx	VPS0,DST,BK	// LSU store word 2 of two or 3 of three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   602
	b	No_W2_fwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   603
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   604
Left_just:	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   605
	stvx	VPS0,0,DST	// LSU Store 16 bytes at D0
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   606
No_W2_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   607
	rlwinm	Rt,DBK,0,28,31	// IU1 (DBK = DST+BC-1)[28:31]
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   608
	cmpi	cr6,0,QW,0	// IU1 Any full vectors to move?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   609
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   610
	li	BK,16		// IU1 Re-initialize byte kount index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   611
	cmpi	cr1,0,Rt,0xF	// IU1 Is DN right justified?
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   612
	cmpi	cr7,0,QW,14	// IU1 Check QW>14
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   613
	ble	cr6,Last_ld_fwd	// b if no Quad words to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   614
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   615
	mtctr	QW		// IU2 for (i=0;i<=QW;i++)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   616
	cmpi	cr6,0,QW,4	// IU1 Check QW>4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   617
QW_fwd_loop:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   618
	lvx	VS1,SRC,BK	// LSU Get S2 (or S1)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   619
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   620
	vperm	VPS0,VS0,VS1,VP3	// VPU Align S1 and S2 to D1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   621
	vor	VS0,VS1,VS1	// VIU1 Move upper vector to lower
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   622
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   623
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at D1(+n*16 where n<4)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   624
	addi	BK,BK,16	// IU1 Increment byte kount index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   625
	bdnzf	25,QW_fwd_loop	// b if 4 or less quad words to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   626
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   627
	add	DNX,DST,BK	// IU1 address of next store (DST+32 if QW>4)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   628
	addi	QW,QW,-1	// IU1 One more QW stored by now
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   629
	bgt	cr6,GT_4QW_fwd	// b if >4 quad words left
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   630
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   631
Last_ld_fwd:	// Next 16 bytes is the last; we're done.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   632
	add	DBC,DST,BC	// IU1 Recompute address of last dst byte + 1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   633
	add	SBC,SRC,BC	// IU1 Recompute address of last src byte + 1
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   634
	bge	No_ld_fwd	// b if shifting right (D-S>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   635
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   636
	addi	SBC,SBC,-16	// IU1 if D-S>=0 we didn't add 16 to src
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   637
No_ld_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   638
	mtcrf	0x01,DBC	// IU2 Put final vector byte count in cr7
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   639
	addi	DBK,DBC,-1	// IU1 Recompute address of last dst byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   640
	addi	Rt,SBC,-1	// IU1 Recompute address of last src byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   641
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   642
// If D-S<0 we have already loaded all the source vectors.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   643
// If D-S>=0 then the first loaded vector went to the upper half of the permute
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   644
// pair and we need one more vector.  (This may be a duplicate.)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   645
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   646
	lvx	VS1,0,Rt	// LSU Get last source S14 (guaranteed SN)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   647
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   648
#ifndef NO_DST				
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   649
	dss	0		// Data stream 0 stop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   650
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   651
	dss	1		// Data stream 1 stop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   652
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   653
	vperm	VPS0,VS0,VS1,VP3	// VPU Align S13 and S14 to D14
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   654
	beq	cr1,Rt_just_fwd	// b if last destination is right justified
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   655
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   656
	rlwinm	DBK,DBK,0,0,27	// IU1 Round to QW addr of last byte
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   657
	li	D,0		// IU1 Initialize index pointer
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   658
	bnl	cr7,Only_1W_fwd	// b if there was only one or zero words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   659
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   660
	stvewx	VPS0,DBK,D	// LSU store word 1 of two or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   661
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   662
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   663
	stvewx	VPS0,DBK,D	// LSU store word 2 of two or three
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   664
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   665
Only_1W_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   666
	bng	cr7,Only_2W_fwd	// b if there were only two or zero words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   667
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   668
	stvewx	VPS0,DBK,D	// LSU store word 3 of three if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   669
	addi	D,D,4		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   670
Only_2W_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   671
	bne	cr7,Only_B_fwd	// b if there are no half words to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   672
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   673
	stvehx	VPS0,DBK,D	// LSU store one halfword if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   674
	addi	D,D,2		// IU1 increment index
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   675
Only_B_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   676
	bns	cr7,All_done_fwd	// b if there are no bytes to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   677
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   678
	stvebx	VPS0,DBK,D	// LSU store one byte if necessary
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   679
	b	All_done_fwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   680
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   681
Rt_just_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   682
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   683
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at D14
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   684
All_done_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   685
#ifdef VRSAVE
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   686
	mtspr	VRSV,RSV	// IU1 Restore VRSAVE	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   687
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   688
	blr			// Return destination address from entry
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   689
#ifdef __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   690
	.align	16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   691
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   692
	.align	4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   693
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   694
GT_4QW_fwd:	// Do once if nxt st is to odd half of cache line, else twice
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   695
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   696
	lvx	VS1,SRC,BK	// LSU Get S3 (or S2)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   697
	addi	QW,QW,-1	// IU1 Keeping track of QWs stored
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   698
	mtcrf	0x02,DNX	// IU2 cr6[3]=((DST+32)[27]==1)?1:0;
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   699
	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   700
	addi	DNX,DNX,16	// IU1 Update cr6 for next loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   701
	addi	Rt,QW,-2	// IU1 Insure at least 2 QW left after big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   702
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   703
	vperm	VPS0,VS0,VS1,VP3	// VPU Align S2 and S3 to D2
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   704
	vor	VS0,VS1,VS1	// VIU1 Move upper vector to lower
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   705
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   706
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at D2
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   707
	addi	BK,BK,16	// IU1 Increment byte count by 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   708
	bdnzf	27,GT_4QW_fwd	// b if next store is to lower (even) half of CL
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   709
// At this point next store will be to even address.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   710
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   711
	mtcrf	0x02,DBK	// IU2 cr6[3]=((last store)[27]==1)?1:0; (odd?)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   712
	lis	STR,0x104	// IU1 Stream 4 blocks of 16 bytes
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   713
	addi	BL,BK,16	// IU1 Create an alternate byte kount + 32
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   714
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   715
	ori	STR,STR,32	// IU1 Stream stride 32B
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   716
#ifndef NO_BIG_LOOP
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   717
	rlwinm	BIG,Rt,29,3,31	// IU1 QW/8 big loops to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   718
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   719
	rlwinm	Rt,Rt,0,0,28	// IU1 How many QWs will be done in big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   720
	bgt	cr7,Big_loop	// b if QW > 14
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   721
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   722
No_big_loop:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   723
// We need the ctr register to reflect an even byte count before entering
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   724
// the next block - faster to decrement than to reload.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   725
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   726
	addi	SP8,SRC,256	// IU1 Starting address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   727
	xoris	STR,STR,0x6	// IU1 Reset stream to 2 blocks of 16 bytes
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   728
	bns	cr6,B32_fwd	// b if DST[27] == 0; i.e, final store is even
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   729
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   730
	bdnz	B32_fwd		// decrement counter for last QW store odd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   731
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   732
B32_fwd:	// Should be at least 2 stores remaining and next 2 are cache aligned
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   733
	lvx	VS1,SRC,BK	// LSU Get S12
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   734
	addi	SP8,SP8,32	// IU1 Next starting address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   735
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   736
	lvx	VS2,SRC,BL	// LSU Get S13
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   737
	vperm	VPS1,VS0,VS1,VP3	// VPU Align S11 and S12 to D11
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   738
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   739
	STRM_1			// LSU Stream 64 byte blocks ahead of loads
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   740
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   741
	DCBK			// LSU then Kill instead of RWITM
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   742
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   743
	vperm	VPS0,VS1,VS2,VP3	// VPU Align S12 and S13 to D12
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   744
	vor	VS0,VS2,VS2	// VIU1 Move S13 to S11
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   745
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   746
	stvx	VPS1,DST,BK	// LSU Store 16 bytes at D11
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   747
	addi	BK,BL,16	// IU1 Increment byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   748
	bdz	Nxt_loc_fwd	// always decrement and branch to next instr		
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   749
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   750
Nxt_loc_fwd:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   751
	stvx	VPS0,DST,BL	// LSU Store 16 bytes at D12
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   752
	addi	BL,BK,16	// IU1 Increment alternate byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   753
	bdnz	B32_fwd		// b if there are at least two more QWs to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   754
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   755
	bso	cr6,One_even_QW	// b if there is one even and one odd QW to store
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   756
	b	Last_ld_fwd	// b if last store is to even address
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   757
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   758
// Come here with two more loads and two stores to do
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   759
One_even_QW:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   760
	lvx	VS1,SRC,BK	// LSU Get S14 (or S13 if if D-S>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   761
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   762
	vperm	VPS0,VS0,VS1,VP3	// VPU Align S13 and S14 to D13
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   763
	vor	VS0,VS1,VS1	// VIU1 Move upper vector to lower
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   764
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   765
	stvx	VPS0,DST,BK	// LSU Store 16 bytes at D13
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   766
	addi	BK,BK,16	// IU1 Increment byte count
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   767
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   768
	b	Last_ld_fwd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   769
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   770
#ifdef __MWERKS__
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   771
	.align	16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   772
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   773
	.align	4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   774
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   775
Big_loop:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   776
	subf	QW,Rt,QW	// IU1 Should be 2-7 QWs left after big loop
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   777
	blt	cr5,No_big_loop	// b back if |DST-SRC|<128; Big_loop won't work.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   778
	mtctr	BIG		// IU2 loop for as many 128B loops as possible
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   779
	addi	SP8,SRC,256	// IU1 Starting address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   780
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   781
Loop_of_128B:	// Come here with QW>=10 and next store even; VS0 last load
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   782
	lvx	VS1,SRC,BK	// LSU Get S4 (or S3 if D-S>=0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   783
	addi	BL,BK,32	// IU1 Increment Byte_Kount+16 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   784
	addi	SP8,SP8,128	// IU1 increment address for data stream touch
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   785
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   786
	lvx	VS3,SRC,BL	// LSU Get S6 (or S5)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   787
	addi	BL,BL,32	// IU1 Increment Byte_Kount+48 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   788
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   789
	lvx	VS5,SRC,BL	// LSU Get S8 (or S7)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   790
	addi	BL,BL,32	// IU1 Increment Byte_Kount+80 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   791
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   792
	lvx	VS7,SRC,BL	// LSU Get S10 (or S9)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   793
	addi	BL,BK,16	// IU1 Increment Byte_Kount+16 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   794
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   795
	lvx	VS2,SRC,BL	// LSU Get S5 (or S4)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   796
	addi	BL,BL,32	// IU1 Increment Byte_Kount+32 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   797
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   798
	lvx	VS4,SRC,BL	// LSU Get S7 (or S6)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   799
	addi	BL,BL,32	// IU1 Increment Byte_Kount+64 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   800
	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   801
	lvx	VS6,SRC,BL	// LSU Get S9 (or S8)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   802
	addi	BL,BL,32	// IU1 Increment Byte_Kount+96 by 32	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   803
	vperm	VPS0,VS0,VS1,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   804
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   805
	lvx	VS0,SRC,BL	// LSU Get S11 (or S10)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   806
	vperm	VPS1,VS1,VS2,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   807
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   808
	STRM_1			// LSU Stream 4 32B blocks, stride 32B
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   809
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   810
	DCBK			// LSU then Kill instead of RWITM
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   811
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   812
	stvx	VPS0,DST,BK	// LSU Store D3
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   813
	addi	BK,BK,16	// IU1 Increment Byte_Kount+16 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   814
	vperm	VPS2,VS2,VS3,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   815
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   816
	stvx	VPS1,DST,BK	// LSU Store D4
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   817
	addi	BK,BK,16	// IU1 Increment Byte_Kount+32 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   818
	vperm	VPS3,VS3,VS4,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   819
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   820
	DCBK			// LSU then Kill instead of RWITM
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   821
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   822
	stvx	VPS2,DST,BK	// LSU Store D5
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   823
	addi	BK,BK,16	// IU1 Increment Byte_Kount+48 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   824
	vperm	VPS4,VS4,VS5,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   825
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   826
	stvx	VPS3,DST,BK	// LSU Store D6
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   827
	addi	BK,BK,16	// IU1 Increment Byte_Kount+64 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   828
	vperm	VPS5,VS5,VS6,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   829
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   830
	DCBK			// LSU then Kill instead of RWITM
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   831
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   832
	stvx	VPS4,DST,BK	// LSU Store D7
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   833
	addi	BK,BK,16	// IU1 Increment Byte_Kount+80 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   834
	vperm	VPS6,VS6,VS7,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   835
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   836
	stvx	VPS5,DST,BK	// LSU Store D8
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   837
	addi	BK,BK,16	// IU1 Increment Byte_Kount+96 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   838
	vperm	VPS7,VS7,VS0,VP3	// VPU
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   839
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   840
	DCBK			// LSU then Kill instead of RWITM
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   841
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   842
	stvx	VPS6,DST,BK	// LSU Store D9
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   843
	addi	BK,BK,16	// IU1 Increment Byte_Kount+112 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   844
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   845
	stvx	VPS7,DST,BK	// LSU Store D10
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   846
	addi	BK,BK,16	// IU1 Increment Byte_Kount+128 by 16	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   847
	bdnz	Loop_of_128B	// b if ctr > 0 (QW/8 still > 0)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   848
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   849
	mtctr	QW		// IU1 Restore QW remaining to counter
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   850
	addi	BL,BK,16	// IU1 Create an alternate byte kount + 16
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   851
	bns	cr6,B32_fwd	// b if DST[27] == 0; i.e, final store is even
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   852
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   853
	bdnz	B32_fwd		// b and decrement counter for last QW store odd
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   854
				// One of the above branches should have taken
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   855
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   856
// End of memcpy in AltiVec
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   857
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   858
// bcopy works like memcpy, but the source and destination operands are reversed.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   859
// Following will just reverse the operands and branch to memcpy.
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   860
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   861
#ifdef LIBMOTOVEC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   862
	.globl	bcopy     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   863
bcopy:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   864
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   865
	.globl	vec_bcopy     
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   866
vec_bcopy:
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   867
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   868
	mr	Rt,DST		// temp storage for what is really source address (r3)
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   869
	mr	DST,SRC		// swap destination address to r3 to match memcpy dst
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   870
	mr	SRC,Rt		// Complete swap of destination and source for memcpy
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   871
#ifdef LIBMOTOVEC
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   872
	b	memcpy		// b to memcpy with correct args in r3 and r4	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   873
#else
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   874
	b	_vec_memcpy	// b to vec_memcpy with correct args in r3 and r4	
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   875
#endif
47c74d1534e1 Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   876
// End of bcopy in AltiVec