author | hgs |
Fri, 17 Sep 2010 19:25:42 +0530 | |
changeset 67 | a1e347446159 |
parent 18 | 47c74d1534e1 |
permissions | -rw-r--r-- |
18
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
// file: vec_memcpy.S |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
// AltiVec enabled version of memcpy and bcopy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
// Copyright Motorola, Inc. 2003 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
// ALL RIGHTS RESERVED |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
// |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
// You are hereby granted a copyright license to use, modify, and |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
// distribute the SOFTWARE so long as this entire notice is retained |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
// without alteration in any modified and/or redistributed versions, |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
// and that such modified versions are clearly identified as such. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
// No licenses are granted by implication, estoppel or otherwise under |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
// any patents or trademarks of Motorola, Inc. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
// |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
// The SOFTWARE is provided on an "AS IS" basis and without warranty. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
18 |
// To the maximum extent permitted by applicable law, MOTOROLA DISCLAIMS |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
// ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING IMPLIED |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
// WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
// PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
// REGARD TO THE SOFTWARE (INCLUDING ANY MODIFIED VERSIONS |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
// THEREOF) AND ANY ACCOMPANYING WRITTEN MATERIALS. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
// |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
// To the maximum extent permitted by applicable law, IN NO EVENT SHALL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
// MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
// (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
// BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
// INFORMATION, OR OTHER PECUNIARY LOSS) ARISING OF THE USE OR |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
// INABILITY TO USE THE SOFTWARE. Motorola assumes no responsibility |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
// for the maintenance and support of the SOFTWARE. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
34 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
// extern void * memcpy(void *dst, const void *src, size_t len); |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
36 |
// Returns: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
// void *dst |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
// extern void * memmove( void *dst, const void *src, size_t len ); |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
// Copies len characters from src to dst and returns the value of |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
// dst. Works correctly for overlapping memory regions. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
// - Harbison&Steele 4th ed (corrected as to return) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
// Returns: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
// void *dst |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
// extern void * bcopy(const void *src, void *dst, size_t len); |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
// Returns: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
52 |
// void *dst |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
//------------------------------------------------------------------ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
54 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
// memcpy and memmove are combined into one entry point here because of |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
// the similarity of operation and need to create fool-proof code. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
// The following conditions determine what is "fool proof": |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
// |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
// if: then single entry: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
// (DST-SRC)<0 && (SRC-DST)>=BC && BC>MIN_VEC will b to v_memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
// (DST-SRC)<0 && (SRC-DST)< BC && BC>MIN_VEC must b to v_memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
// (DST-SRC)<0 && BC<MIN_VEC copy fwd byte-by-byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
// (DST-SRC)==0 || BC==0 will just return |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
// (DST-SRC)>0 && BC<MIN_VEC copy bkwd byte-by-byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
// (DST-SRC)>0 && (DST-SRC)< BC && BC>MIN_VEC must b to v_memmove |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
// (DST-SRC)>0 && (SRC-DST)>=BC && BC>MIN_VEC will b to v_memmove |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
68 |
// If you call memmove (or vec_memmove) and |DST-SRC|>=BC, |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
// this code will branch to v_memcpy anyway for maximum performance. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
71 |
// Revision History: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
// Rev 0.0 Original Chuck Corley 02/03/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
// Can still add dst, 128B loop, and aligned option |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
// Rev 0.01 Fixed JY's seg-fault violation CJC 02/17/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
// Rev 0.1 Added 128B loop and dst; cndtnlzd dcbz CJC 02/18/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
76 |
// (Creating separate path for QW aligned didn't help much) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
// Rev 0.11 Small code schdling; chngd dst for memmove CJC 02/23/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
// Rev 0.20 Eliminated alternate entry and cleanup CJC 02/27/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
// Rev 0.21 Inproved loop branch targets for v_mempcy CJC 03/01/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
80 |
// Rev 0.22 Experimented with dst (sent to H.) CJC 03/02/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
81 |
// Rev 0.23 Substituted dcba for dcbz (sent to JY) CJC 03/08/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
// Rev 0.24 Use two dst streams CJC 03/12/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
// Rev 0.25 Fix for all compilers, cleanup, and release with |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
// libmotovec.a rev 0.10 CJC 03/14/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
85 |
// Rev 0.30 Fix for pre-empted destination (SNDF-DS) CJC 04/02/03 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
86 |
// |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
87 |
// Between Rev 0.25 and 0.30 the code was revised to store elements of |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
88 |
// source at destination when first and/or last vector are less than 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
89 |
// bytes. Areviewer at SNDF observed that loading the destination vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
90 |
// for merging exposed the "uninvolved" destination bytes to incoherency |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
// if an interrupt pre-empted this routine and modified the "uninvolved" |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
// destination vector(s) while held in register for merging. It seems |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
// like a low possibility but this revision is no longer subject to that |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
94 |
// possibility. (It is also slightly faster than Rev 0.25.) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
// This is beta quality code; users are encouraged to make it faster. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
// ASSUMPTIONS: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
97 |
// Code is highly likely to be in the cache; data is not (streaming data) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
98 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
99 |
#define VRSV 256 // VRSAVE spr |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
// Don't use vectors for BC <= MIN_VEC. Works only if MIN_VEC >= 16 bytes. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
#define MIN_VEC 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
// Don't use Big_loop in v_memcpy for |dst-src|<= minimum overlap. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
103 |
#define MIN_OVL 128 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
104 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
105 |
// Register useage |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
106 |
#define Rt r0 // r0 when used as a temporary register |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
107 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
108 |
#define DST r3 // entering: dst pointer; exiting: same dst pointer |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
109 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
110 |
#define SRC r4 // entering: src ptr; then end of src range index (SRC+BC) in memmove |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
111 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
112 |
#define BC r5 // entering: Byte_Count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
113 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
114 |
#define PCS r6 // save for partial checksum entering |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
115 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
116 |
#define DMS r7 // dst - src initially |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
117 |
#define BK r7 // BC - 1 +/- (n*16) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
118 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
119 |
// Codewarrior will put an unwelcome space as "lbzu r0,1(r7 )" |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
120 |
// if you don't put the comment right after the r7. CJC 030314 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
121 |
#define SM1 r8// src -1 for byte-by-byte forwards initially |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
122 |
#define S r8 // src[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
123 |
#define SMD r8 // src[0:27]-dst[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
124 |
#define STR r8 // data stream touch block & stride info for Big_loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
125 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
126 |
#define DM1 r9// dst -1 for byte-by-byte forwards initially |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
127 |
#define D r9 // dst[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
128 |
#define DNX r9 // (dst+n*16)[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
129 |
#define BL r9 // second byte_kount index pointer |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
130 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
131 |
#define SBC r10// src + byte count initially then src[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
132 |
#define BLK r10 // temporary data stream touch block & stride info |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
133 |
#define DR r10 // (dst+16)[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
134 |
#define QW r10 // number of quad words (vectors) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
135 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
136 |
#define DBC r11// dst + byte count initially |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
137 |
#define BLL r11 // temporary data stream touch block & stride info |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
138 |
#define SBK r11 // (src+byte_count-1) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
139 |
#define SBR r11 // (src+byte_count-1)[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
140 |
#define DBK r11 // (dst+byte_count-1) then (dst+byte_count-1)[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
141 |
#define BIG r11 // QW/8 or 128 byte loop count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
142 |
#define SP8 r11 // SRC + n*128 (8 QWs) for data streaming after first call |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
143 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
144 |
#define RSV r12 // storage for VRSAVE register if used |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
145 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
#define VS0 v0 // src vector for permuting |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
#define VS1 v1 // src vector for permuting |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
#define VP3 v2 // d - s permute register |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
#define VPS0 v3 // permuted source vector to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
#define VPS1 v4 // 2nd permuted source vector to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
#define VPS2 v5 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
#define VS2 v6 // src vector for permuting |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
#define VPS3 v6 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
160 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
161 |
#define VS3 v7 // additional src load in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
162 |
#define VPS4 v7 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
163 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
164 |
#define VS4 v8 // additional src load in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
165 |
#define VPS5 v8 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
167 |
#define VS5 v9 // additional src load in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
#define VPS6 v9 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
170 |
#define VS6 v10 // additional src load in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
171 |
#define VPS7 v10 // additional permuted src in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
172 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
173 |
#define VS7 v11 // additional src load in Big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
174 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
175 |
// Conditionalize the use of dcba. It will help if the data is |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
176 |
// not in cache and hurt if it is. Generally, except for small |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
177 |
// benchmarks repeated many times, we assume data is not in cache |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
// (data streaming) and using dcbz is a performance boost. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
#ifndef NO_DCBA |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
#if defined(__GNUC__) || defined(__MWERKS__) || defined(_DIAB_TOOL) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
// gcc and codewarrior and diab don't assemble dcba |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
#define DCBK .long 0x7c033dec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
183 |
// dcba r3,r7 or dcba DST,BK |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
184 |
#define DCBL .long 0x7c034dec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
185 |
// dcba r3,r9 or dcba DST,BL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
187 |
#ifdef __ghs__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
.macro DCBK |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
189 |
.long 0x7c033dec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
190 |
.endm |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
191 |
.macro DCBL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
192 |
.long 0x7c034dec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
193 |
.endm |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
194 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
195 |
#define DCBK dcba DST,BK |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
#define DCBL dcba DST,BL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
#endif // __ghs__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
#endif // __GNUC__ or __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
199 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
200 |
#define DCBK nop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
201 |
#define DCBL nop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
202 |
#endif // NO_DCBA |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
203 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
204 |
// Conditionalize the use of dst (data stream touch). It will help |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
205 |
// if the data is not in cache and hurt if it is (though not as badly |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
206 |
// as dcbz). Generally, except for small benchmarks repeated many times, |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
207 |
// we assume data is not in cache (data streaming) and using dst is a |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
208 |
// performance boost. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
209 |
#ifndef NO_DST |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
210 |
#define STRM_B dst SBC,BLL,0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
211 |
#define STRM_F dst SRC,BLK,0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
212 |
#define STRM_1 dst SP8,STR,1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
214 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
215 |
#define STRM_B nop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
216 |
#define STRM_F nop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
217 |
#define STRM_1 nop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
218 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
219 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
220 |
// Condition register use |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
221 |
// cr0[0:2] = (dst-src==0)? return: ((dst-src>0)? copy_bkwd, copy_fwd;); |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
222 |
// then cr0[0:2] = (dst[28:31]-src[28:31]<0)? "shifting left", "shifting right"; |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
223 |
// cr1[0,2] = (BC == 0)? 1 : 0; (nothing to move) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
224 |
// then cr1[2] = (DST[28:31] == 0)? 1 : 0; (D0 left justified) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
225 |
// then cr1[2] = ((DBK = DST+BC-1)[28:31] = 0xF)? 1 : 0; (DN right justified) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
226 |
// cr5[0,2] = (|DST-SRC|<=MIN_OVL)?1:0; (Overlap too small for Big loop?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
227 |
// cr6[1,2] = (DST-SRC>=BC)?1:0; (Okay for v_memmove to copy forward?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
228 |
// then cr6[2] = (QW == 0)? 1 : 0; (Any full vectors to move?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
229 |
// then cr6[1] = (QW > 4)? 1 : 0; (>4 vectors to move?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
230 |
// then cr6[3] = (third store[27] == 1)? 1: 0; (cache line alignment) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
231 |
// then cr6[3] = (last store[27] == 1)? 1: 0; (last store odd?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
232 |
// cr7[2] = (BC>MIN_VEC)?1:0; (BC big enough to warrant vectors) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
233 |
// then cr7[0:3] = (DST+16)[0:27]-DST (How many bytes (iff <16) in first vector?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
234 |
// then cr7[1] = (QW > 14)? 1 : 0; (>14 vectors to move?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
235 |
// then cr7[0:3] = (DST+BC)[0:27] (How many bytes (iff <16) in last vector?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
236 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
237 |
.text |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
238 |
#ifdef __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
239 |
.align 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
240 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
241 |
.align 5 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
242 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
243 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
244 |
#ifdef LIBMOTOVEC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
245 |
.globl memmove |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
246 |
memmove: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
247 |
nop // IU1 Compilers forget first label |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
248 |
.globl memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
249 |
memcpy: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
250 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
251 |
.globl vec_memmove |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
252 |
vec_memmove: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
253 |
nop // IU1 Only way I know to preserve both labels |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
254 |
.globl _vec_memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
255 |
_vec_memcpy: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
256 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
257 |
subf. DMS,SRC,DST // IU1 Compute dst-src difference |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
258 |
cmpi cr1,0,BC,0 // IU1 Eliminate zero byte count moves |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
259 |
cmpi cr7,0,BC,MIN_VEC // IU1 Check for minimum byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
260 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
261 |
addi SM1,SRC,-1 // IU1 Pre-bias and duplicate src for fwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
262 |
addi DM1,DST,-1 // IU1 Pre-bias and duplicate destination |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
263 |
add SBC,SRC,BC // IU1 Pre-bias and duplicate src for bkwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
264 |
beqlr // return if DST = SRC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
265 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
266 |
add DBC,DST,BC // IU1 Pre-bias and duplicate destination |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
267 |
subf Rt,DST,SRC // IU1 Form |DST-SRC| if DST-SRC<0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
268 |
beqlr cr1 // return if BC = 0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
269 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
270 |
bgt Cpy_bkwd // b if DST-SRC>0 (have to copy backward) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
271 |
cmpi cr5,0,Rt,MIN_OVL // IU1 (|DST-SRC|>128)?1:0; for v_memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
272 |
bgt cr7,v_memcpy // b if BC>MIN_VEC (okay to copy vectors fwd) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
273 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
274 |
// Copy byte-by-byte forwards if DST-SRC<0 and BC<=MIN_VEC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
275 |
mtctr BC // i=BC; do ...;i--; while (i>0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
276 |
Byte_cpy_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
277 |
lbzu Rt,1(SM1) // LSU * ++(DST-1) = * ++(SRC-1) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
278 |
stbu Rt,1(DM1) // LSU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
279 |
bdnz Byte_cpy_fwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
280 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
281 |
blr |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
282 |
nop // IU1 Improve next label as branch target |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
283 |
Cpy_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
284 |
cmpi cr5,0,DMS,MIN_OVL // IU1 ((DST-SRC)>128)?1:0; for v_memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
285 |
cmp cr6,0,DMS,BC // IU1 cr6[1,2]=(DST-SRC>=BC)?1:0; |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
286 |
bgt cr7,v_memmove // b if BC>MIN_VEC (copy vectors bkwd) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
287 |
// Copy byte-by-byte backwards if DST-SRC>0 and BC<=MIN_VEC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
288 |
mtctr BC // i=BC; do ...;i--; while (i>0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
289 |
Byte_cpy_bwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
290 |
lbzu Rt,-1(SBC) // LSU * --(DST+BC) = * --(SRC+BC) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
291 |
stbu Rt,-1(DBC) // LSU Store it |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
292 |
bdnz Byte_cpy_bwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
293 |
blr |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
294 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
295 |
#ifdef __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
296 |
.align 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
297 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
298 |
.align 4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
299 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
300 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
301 |
v_memmove: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
302 |
// Byte count < MIN_VEC bytes will have been copied by scalar code above, |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
303 |
// so this will not deal with small block moves < MIN_VEC. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
304 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
305 |
// For systems using VRSAVE, define VRSAVE=1 when compiling. For systems |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
306 |
// that don't, make sure VRSAVE is undefined. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
307 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
308 |
mfspr RSV,VRSV // IU2 Get current VRSAVE contents |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
309 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
310 |
rlwinm S,SRC,0,28,31 // IU1 Save src address bits s[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
311 |
rlwinm D,DST,0,28,31 // IU1 D = dst[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
312 |
bge cr6,MC_entry // b to v_memcpy if DST-SRC>=BC (fwd copy OK) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
313 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
314 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
315 |
oris Rt,RSV,0xfff0 // IU1 Or in registers used by this routine |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
316 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
317 |
lis BLL,0x010c // IU1 Stream 12 blocks of 16 bytes |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
318 |
subf. SMD,D,S // IU1 if S-D<0 essentially shifting right |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
319 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
320 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
321 |
mtspr VRSV,Rt // IU2 Save in VRSAVE before first vec op |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
322 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
323 |
lvsr VP3,0,DMS // LSU Permute vector for dst - src shft right |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
324 |
ori BLL,BLL,0xffe0 // IU1 Stream stride -32B |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
325 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
326 |
STRM_B // LSU Start data stream at SRC+BC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
327 |
addi SBK,SBC,-1 // IU1 Address of last src byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
328 |
bgt Rt_shft // Bytes from upper vector = (s-d>0)?s-d:16+s-d; |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
329 |
addi SMD,SMD,16 // IU1 Save 16-(d-s) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
330 |
Rt_shft: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
331 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
332 |
rlwinm SBR,SBK,0,0,27 // IU1 (SRC+BC-1)[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
333 |
addi BK,BC,-1 // IU1 Initialize byte index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
334 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
335 |
subf Rt,SBR,SBC // IU1 How many bytes in first source? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
336 |
add DBK,DST,BK // IU1 Address of last dst byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
337 |
addi DR,DST,16 // IU1 Address of second dst vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
338 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
339 |
subf. SMD,Rt,SMD // IU1 if bytes in 1st src>Bytes in 1st permute |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
340 |
rlwinm Rt,DBK,0,28,31 // IU1 (DST+BC-1)[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
341 |
rlwinm DR,DR,0,0,27 // IU1 (DST+16)[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
342 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
343 |
// If there are more useful bytes in the upper vector of a permute pair than we |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
344 |
// will get in the first permute, the first loaded vector needs to be in the |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
345 |
// lower half of the permute pair. The upper half is a don't care then. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
346 |
blt Get_bytes_rt // b if shifting left (D-S>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
347 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
348 |
lvx VS1,SRC,BK // LSU Get SN load started |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
349 |
// Comments numbering source and destination assume single path through the |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
350 |
// code executing each instruction once. For vec_memmove, an example would |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
351 |
// be the call memmove(BASE+0x0F, BASE+0x2F, 82). N = 6 in that case. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
352 |
addi SRC,SRC,-16 // IU1 Decrement src base (to keep BK useful) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
353 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
354 |
Get_bytes_rt: // Come here to get VS0 & Don't care what VS1 is |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
355 |
lvx VS0,SRC,BK // LSU Get SN-1 (SN if D-S<0) in lower vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
356 |
subf QW,DR,DBK // IU1 Bytes of full vectors to move (-16) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
357 |
cmpi cr7,0,Rt,0xF // IU1 Is Dn right justified? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
358 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
359 |
cmpi cr1,0,D,0 // IU1 Is D0 left justified? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
360 |
rlwinm QW,QW,28,4,31 // IU1 Quad words remaining |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
361 |
add Rt,DST,BC // IU1 Refresh the value of DST+BC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
362 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
363 |
cmpi cr6,0,QW,0 // IU1 Any full vectors to move? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
364 |
vperm VPS0,VS0,VS1,VP3 // VPU Align SN-1 and SN to DN |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
365 |
vor VS1,VS0,VS0 // VIU1 Move lower vector to upper |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
366 |
beq cr7,Rt_just // b if DN is right justified |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
367 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
368 |
mtcrf 0x01,Rt // IU2 Put final vector byte count in cr7 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
369 |
rlwinm DBK,DBK,0,0,27 // IU1 Address of first byte of final vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
370 |
li D,0 // IU1 Initialize an index pointer |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
371 |
bnl cr7,Only_1W_bkwd // b if there was only one or zero words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
372 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
373 |
stvewx VPS0,DBK,D // LSU store word 1 of two or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
374 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
375 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
376 |
stvewx VPS0,DBK,D // LSU store word 2 of two or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
377 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
378 |
Only_1W_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
379 |
bng cr7,Only_2W_bkwd // b if there were only two or zero words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
380 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
381 |
stvewx VPS0,DBK,D // LSU store word 3 of three if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
382 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
383 |
Only_2W_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
384 |
bne cr7,Only_B_bkwd // b if there are no half words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
385 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
386 |
stvehx VPS0,DBK,D // LSU store one halfword if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
387 |
addi D,D,2 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
388 |
Only_B_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
389 |
bns cr7,All_done_bkwd // b if there are no bytes to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
390 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
391 |
stvebx VPS0,DBK,D // LSU store one byte if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
392 |
b All_done_bkwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
393 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
394 |
Rt_just: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
395 |
stvx VPS0,DST,BK // LSU Store 16 bytes at DN |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
396 |
All_done_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
397 |
addi BK,BK,-16 // IU1 Decrement destination byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
398 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
399 |
ble cr6,Last_load // b if no Quad words to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
400 |
mtctr QW // IU2 for (i=0;i<=QW;i++)-execution serializng |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
401 |
cmpi cr6,0,QW,4 // IU1 Check QW>4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
402 |
QW_loop: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
403 |
lvx VS0,SRC,BK // LSU Get SN-2 (or SN-1 if ADJ==0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
404 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
405 |
vperm VPS0,VS0,VS1,VP3 // VPU Align SN-2 and SN-1 to DN-1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
406 |
vor VS1,VS0,VS0 // VIU1 Move lower vector to upper |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
407 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
408 |
stvx VPS0,DST,BK // LSU Store 16 bytes at DN-1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
409 |
addi BK,BK,-16 // IU1 Decrement byte kount |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
410 |
bdnzf 25,QW_loop // b if 4 or less quad words to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
411 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
412 |
add DNX,DST,BK // IU1 address of next store (DST+BC-1-16) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
413 |
bgt cr6,GT_4QW // b if >4 quad words left |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
414 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
415 |
Last_load: // if D-S>=0, next load will be from same address as last |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
416 |
blt No_ld_bkwd // b if shifting right (S-D>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
417 |
addi SRC,SRC,16 // IU1 recorrect source if it was decremented |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
418 |
No_ld_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
419 |
lvx VS0,0,SRC // LSU Get last source SN-6 (guaranteed S0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
420 |
// Current 16 bytes is the last; we're done. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
421 |
dss 0 // Data stream stop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
422 |
vperm VPS0,VS0,VS1,VP3 // VPU Align SN-6 and SN-5 to DN-6 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
423 |
subfic D,DST,16 // IU1 How many bytes in first destination? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
424 |
beq cr1,Lt_just // b if last destination is left justified |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
425 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
426 |
mtcrf 0x01,D // IU2 Put byte count remaining in cr7 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
427 |
li D,0 // IU1 Initialize index pointer |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
428 |
bns cr7,No_B_bkwd // b if only even number of bytes to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
429 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
430 |
stvebx VPS0,DST,D // LSU store first byte at DST+0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
431 |
addi D,D,1 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
432 |
No_B_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
433 |
bne cr7,No_H_bkwd // b if only words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
434 |
stvehx VPS0,DST,D // LSU store halfword at DST+0/1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
435 |
addi D,D,2 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
436 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
437 |
No_H_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
438 |
bng cr7,No_W1_bkwd // b if exactly zero or two words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
439 |
stvewx VPS0,DST,D // LSU store word 1 of one or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
440 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
441 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
442 |
No_W1_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
443 |
bnl cr7,No_W2_bkwd // b if there was only one word to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
444 |
stvewx VPS0,DST,D // LSU store word 1 of two or 2 of three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
445 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
446 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
447 |
stvewx VPS0,DST,D // LSU store word 2 of two or 3 of three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
448 |
b No_W2_bkwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
449 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
450 |
Lt_just: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
451 |
stvx VPS0,0,DST // LSU Store 16 bytes at final dst addr D0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
452 |
No_W2_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
453 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
454 |
mtspr VRSV,RSV // IU1 Restore VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
455 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
456 |
blr // Return destination address from entry |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
457 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
458 |
GT_4QW: // Do once if next store is to even half of cache line, else twice |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
459 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
460 |
lvx VS0,SRC,BK // LSU Get SN-3 (or SN-2) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
461 |
mtcrf 0x02,DNX // IU2 cr6[3]=((DST+BC-1)[27]==1)?1:0; |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
462 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
463 |
vperm VPS0,VS0,VS1,VP3 // VPU Align SN-3 and SN-2 to Dn-2 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
464 |
vor VS1,VS0,VS0 // VIU1 Move lower vector to upper |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
465 |
addi DNX,DNX,-16 // IU1 Prepare to update cr6 next loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
466 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
467 |
stvx VPS0,DST,BK // LSU Store 16 bytes at DN-2 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
468 |
vor VS3,VS0,VS0 // VIU Make a copy of lower vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
469 |
addi BK,BK,-16 // IU1 Decrement byte count by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
470 |
bdnzt 27,GT_4QW // b if next store is to upper (odd) half of CL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
471 |
// At this point next store will be to even address. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
472 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
473 |
lis STR,0x102 // IU1 Stream 2 blocks of 16 bytes |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
474 |
mtcrf 0x02,DST // IU2 cr6[3]=(DST[27]==1)?1:0; (DST odd?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
475 |
addi BL,BK,-16 // IU1 Create an alternate byte count - 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
476 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
477 |
ori STR,STR,0xffe0 // IU1 Stream stride -32B |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
478 |
addi SP8,SRC,-64 // IU1 Starting address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
479 |
bso cr6,B32_bkwd // b if DST[27] == 1; i.e, final store is odd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
480 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
481 |
bdnz B32_bkwd // decrement counter for last odd QW store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
482 |
B32_bkwd: // Should be at least 2 stores remaining and next 2 are cache aligned |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
483 |
lvx VS2,SRC,BK // LSU Get SN-4 (or SN-3) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
484 |
addi SP8,SP8,-32 // IU1 Next starting address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
485 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
486 |
lvx VS1,SRC,BL // LSU Get SN-5 (or SN-4) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
487 |
vperm VPS0,VS2,VS3,VP3 // VPU Align SN-4 and SN-3 to DN-3 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
488 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
489 |
STRM_1 // LSU Stream 64 byte blocks ahead of loads |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
490 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
491 |
DCBL // LSU allocate next cache line |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
492 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
493 |
vperm VPS1,VS1,VS2,VP3 // VPU Align SN-5 and SN-4 to DN-4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
494 |
vor VS3,VS1,VS1 // VIU1 Move SN-5 to SN-3 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
495 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
496 |
stvx VPS0,DST,BK // LSU Store 16 bytes at DN-3 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
497 |
addi BK,BL,-16 // IU1 Decrement byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
498 |
bdz Nxt_loc_bkwd // always decrement and branch to next instr |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
499 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
500 |
Nxt_loc_bkwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
501 |
stvx VPS1,DST,BL // LSU Store 16 bytes at DN-4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
502 |
addi BL,BK,-16 // IU1 Decrement alternate byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
503 |
bdnz B32_bkwd // b if there are at least two more QWs to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
504 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
505 |
bns cr6,One_odd_QW // b if there was one more odd QW to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
506 |
b Last_load |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
507 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
508 |
// Come here with two more loads and two stores to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
509 |
One_odd_QW: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
510 |
lvx VS1,SRC,BK // LSU Get SN-6 (or SN-5) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
511 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
512 |
vperm VPS1,VS1,VS3,VP3 // VPU Align SN-6 and SN-5 to DN-5 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
513 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
514 |
stvx VPS1,DST,BK // LSU Store 16 bytes at DN-5 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
515 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
516 |
b Last_load |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
517 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
518 |
// End of memmove in AltiVec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
519 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
520 |
#ifdef __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
521 |
.align 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
522 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
523 |
.align 4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
524 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
525 |
v_memcpy: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
526 |
// Byte count < MIN_VEC bytes will have been copied by scalar code above, |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
527 |
// so this will not deal with small block moves < MIN_VEC. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
528 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
529 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
530 |
mfspr RSV,VRSV // IU2 Get current VRSAVE contents |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
531 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
532 |
rlwinm S,SRC,0,28,31 // IU1 Save src address bits s[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
533 |
rlwinm D,DST,0,28,31 // IU1 D = dst[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
534 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
535 |
MC_entry: // enter here from memmove if DST-SRC>=BC; this should be faster |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
536 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
537 |
oris Rt,RSV,0xfff0 // IU1 Or in registers used by this routine |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
538 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
539 |
lis BLK,0x010c // IU1 Stream 12 blocks of 16 bytes |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
540 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
541 |
subf. S,S,D // IU1 if D-S<0 essentially shifting left |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
542 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
543 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
544 |
mtspr VRSV,Rt // IU2 Save in VRSAVE before first vec op |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
545 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
546 |
lvsr VP3,0,DMS // LSU Permute vector for dst - src shft right |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
547 |
ori BLK,BLK,32 // IU1 Stream stride 32B |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
548 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
549 |
STRM_F // LSU Start data stream 0 at SRC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
550 |
addi DR,DST,16 // IU1 Address of second dst vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
551 |
addi DBK,DBC,-1 // IU1 Address of last dst byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
552 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
553 |
// If D-S<0 we are "kinda" shifting left with the right shift permute vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
554 |
// loaded to VP3 and we need both S0 and S1 to permute. If D-S>=0 then the |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
555 |
// first loaded vector needs to be in the upper half of the permute pair and |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
556 |
// the lower half is a don't care then. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
557 |
bge Ld_bytes_rt // b if shifting right (D-S>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
558 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
559 |
lvx VS0,0,SRC // LSU Get S0 load started |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
560 |
// Comments numbering source and destination assume single path through the |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
561 |
// code executing each instruction once. For vec_memcpy, an example would |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
562 |
// be the call memcpy(BASE+0x1E, BASE+0x1F, 259). N = 16 in that case. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
563 |
addi SRC,SRC,16 // IU1 Increment src base (to keep BK useful) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
564 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
565 |
Ld_bytes_rt: // Come here to get VS1 & Don't care what VS0 is |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
566 |
lvx VS1,0,SRC // LSU Get S1 (or S0 if D-S>=0) in upper vector |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
567 |
rlwinm DR,DR,0,0,27 // IU1 (DST+16)[0:27] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
568 |
cmpi cr1,0,D,0 // IU1 Is D0 left justified? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
569 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
570 |
subf Rt,DST,DR // IU1 How many bytes in first destination? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
571 |
subf QW,DR,DBK // IU1 Bytes of full vectors to move (-16) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
572 |
li BK,0 // IU1 Initialize byte kount index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
573 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
574 |
mtcrf 0x01,Rt // IU2 Put bytes in 1st dst in cr7 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
575 |
rlwinm QW,QW,28,4,31 // IU1 Quad words remaining |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
576 |
vperm VPS0,VS0,VS1,VP3 // VPU Align S0 and S1 to D0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
577 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
578 |
vor VS0,VS1,VS1 // VIU1 Move upper vector to lower |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
579 |
beq cr1,Left_just // b if D0 is left justified |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
580 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
581 |
bns cr7,No_B_fwd // b if only even number of bytes to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
582 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
583 |
stvebx VPS0,DST,BK // LSU store first byte at DST+0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
584 |
addi BK,BK,1 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
585 |
No_B_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
586 |
bne cr7,No_H_fwd // b if only words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
587 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
588 |
stvehx VPS0,DST,BK // LSU store halfword at DST+0/1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
589 |
addi BK,BK,2 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
590 |
No_H_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
591 |
bng cr7,No_W1_fwd // b if exactly zero or two words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
592 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
593 |
stvewx VPS0,DST,BK // LSU store word 1 of one or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
594 |
addi BK,BK,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
595 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
596 |
No_W1_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
597 |
bnl cr7,No_W2_fwd // b if there was only one word to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
598 |
stvewx VPS0,DST,BK // LSU store word 1 of two or 2 of three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
599 |
addi BK,BK,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
600 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
601 |
stvewx VPS0,DST,BK // LSU store word 2 of two or 3 of three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
602 |
b No_W2_fwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
603 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
604 |
Left_just: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
605 |
stvx VPS0,0,DST // LSU Store 16 bytes at D0 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
606 |
No_W2_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
607 |
rlwinm Rt,DBK,0,28,31 // IU1 (DBK = DST+BC-1)[28:31] |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
608 |
cmpi cr6,0,QW,0 // IU1 Any full vectors to move? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
609 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
610 |
li BK,16 // IU1 Re-initialize byte kount index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
611 |
cmpi cr1,0,Rt,0xF // IU1 Is DN right justified? |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
612 |
cmpi cr7,0,QW,14 // IU1 Check QW>14 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
613 |
ble cr6,Last_ld_fwd // b if no Quad words to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
614 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
615 |
mtctr QW // IU2 for (i=0;i<=QW;i++) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
616 |
cmpi cr6,0,QW,4 // IU1 Check QW>4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
617 |
QW_fwd_loop: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
618 |
lvx VS1,SRC,BK // LSU Get S2 (or S1) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
619 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
620 |
vperm VPS0,VS0,VS1,VP3 // VPU Align S1 and S2 to D1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
621 |
vor VS0,VS1,VS1 // VIU1 Move upper vector to lower |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
622 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
623 |
stvx VPS0,DST,BK // LSU Store 16 bytes at D1(+n*16 where n<4) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
624 |
addi BK,BK,16 // IU1 Increment byte kount index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
625 |
bdnzf 25,QW_fwd_loop // b if 4 or less quad words to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
626 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
627 |
add DNX,DST,BK // IU1 address of next store (DST+32 if QW>4) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
628 |
addi QW,QW,-1 // IU1 One more QW stored by now |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
629 |
bgt cr6,GT_4QW_fwd // b if >4 quad words left |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
630 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
631 |
Last_ld_fwd: // Next 16 bytes is the last; we're done. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
632 |
add DBC,DST,BC // IU1 Recompute address of last dst byte + 1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
633 |
add SBC,SRC,BC // IU1 Recompute address of last src byte + 1 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
634 |
bge No_ld_fwd // b if shifting right (D-S>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
635 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
636 |
addi SBC,SBC,-16 // IU1 if D-S>=0 we didn't add 16 to src |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
637 |
No_ld_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
638 |
mtcrf 0x01,DBC // IU2 Put final vector byte count in cr7 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
639 |
addi DBK,DBC,-1 // IU1 Recompute address of last dst byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
640 |
addi Rt,SBC,-1 // IU1 Recompute address of last src byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
641 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
642 |
// If D-S<0 we have already loaded all the source vectors. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
643 |
// If D-S>=0 then the first loaded vector went to the upper half of the permute |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
644 |
// pair and we need one more vector. (This may be a duplicate.) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
645 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
646 |
lvx VS1,0,Rt // LSU Get last source S14 (guaranteed SN) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
647 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
648 |
#ifndef NO_DST |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
649 |
dss 0 // Data stream 0 stop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
650 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
651 |
dss 1 // Data stream 1 stop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
652 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
653 |
vperm VPS0,VS0,VS1,VP3 // VPU Align S13 and S14 to D14 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
654 |
beq cr1,Rt_just_fwd // b if last destination is right justified |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
655 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
656 |
rlwinm DBK,DBK,0,0,27 // IU1 Round to QW addr of last byte |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
657 |
li D,0 // IU1 Initialize index pointer |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
658 |
bnl cr7,Only_1W_fwd // b if there was only one or zero words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
659 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
660 |
stvewx VPS0,DBK,D // LSU store word 1 of two or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
661 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
662 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
663 |
stvewx VPS0,DBK,D // LSU store word 2 of two or three |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
664 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
665 |
Only_1W_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
666 |
bng cr7,Only_2W_fwd // b if there were only two or zero words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
667 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
668 |
stvewx VPS0,DBK,D // LSU store word 3 of three if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
669 |
addi D,D,4 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
670 |
Only_2W_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
671 |
bne cr7,Only_B_fwd // b if there are no half words to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
672 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
673 |
stvehx VPS0,DBK,D // LSU store one halfword if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
674 |
addi D,D,2 // IU1 increment index |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
675 |
Only_B_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
676 |
bns cr7,All_done_fwd // b if there are no bytes to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
677 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
678 |
stvebx VPS0,DBK,D // LSU store one byte if necessary |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
679 |
b All_done_fwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
680 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
681 |
Rt_just_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
682 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
683 |
stvx VPS0,DST,BK // LSU Store 16 bytes at D14 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
684 |
All_done_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
685 |
#ifdef VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
686 |
mtspr VRSV,RSV // IU1 Restore VRSAVE |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
687 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
688 |
blr // Return destination address from entry |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
689 |
#ifdef __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
690 |
.align 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
691 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
692 |
.align 4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
693 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
694 |
GT_4QW_fwd: // Do once if nxt st is to odd half of cache line, else twice |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
695 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
696 |
lvx VS1,SRC,BK // LSU Get S3 (or S2) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
697 |
addi QW,QW,-1 // IU1 Keeping track of QWs stored |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
698 |
mtcrf 0x02,DNX // IU2 cr6[3]=((DST+32)[27]==1)?1:0; |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
699 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
700 |
addi DNX,DNX,16 // IU1 Update cr6 for next loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
701 |
addi Rt,QW,-2 // IU1 Insure at least 2 QW left after big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
702 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
703 |
vperm VPS0,VS0,VS1,VP3 // VPU Align S2 and S3 to D2 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
704 |
vor VS0,VS1,VS1 // VIU1 Move upper vector to lower |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
705 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
706 |
stvx VPS0,DST,BK // LSU Store 16 bytes at D2 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
707 |
addi BK,BK,16 // IU1 Increment byte count by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
708 |
bdnzf 27,GT_4QW_fwd // b if next store is to lower (even) half of CL |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
709 |
// At this point next store will be to even address. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
710 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
711 |
mtcrf 0x02,DBK // IU2 cr6[3]=((last store)[27]==1)?1:0; (odd?) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
712 |
lis STR,0x104 // IU1 Stream 4 blocks of 16 bytes |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
713 |
addi BL,BK,16 // IU1 Create an alternate byte kount + 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
714 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
715 |
ori STR,STR,32 // IU1 Stream stride 32B |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
716 |
#ifndef NO_BIG_LOOP |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
717 |
rlwinm BIG,Rt,29,3,31 // IU1 QW/8 big loops to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
718 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
719 |
rlwinm Rt,Rt,0,0,28 // IU1 How many QWs will be done in big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
720 |
bgt cr7,Big_loop // b if QW > 14 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
721 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
722 |
No_big_loop: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
723 |
// We need the ctr register to reflect an even byte count before entering |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
724 |
// the next block - faster to decrement than to reload. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
725 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
726 |
addi SP8,SRC,256 // IU1 Starting address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
727 |
xoris STR,STR,0x6 // IU1 Reset stream to 2 blocks of 16 bytes |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
728 |
bns cr6,B32_fwd // b if DST[27] == 0; i.e, final store is even |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
729 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
730 |
bdnz B32_fwd // decrement counter for last QW store odd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
731 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
732 |
B32_fwd: // Should be at least 2 stores remaining and next 2 are cache aligned |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
733 |
lvx VS1,SRC,BK // LSU Get S12 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
734 |
addi SP8,SP8,32 // IU1 Next starting address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
735 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
736 |
lvx VS2,SRC,BL // LSU Get S13 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
737 |
vperm VPS1,VS0,VS1,VP3 // VPU Align S11 and S12 to D11 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
738 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
739 |
STRM_1 // LSU Stream 64 byte blocks ahead of loads |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
740 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
741 |
DCBK // LSU then Kill instead of RWITM |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
742 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
743 |
vperm VPS0,VS1,VS2,VP3 // VPU Align S12 and S13 to D12 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
744 |
vor VS0,VS2,VS2 // VIU1 Move S13 to S11 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
745 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
746 |
stvx VPS1,DST,BK // LSU Store 16 bytes at D11 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
747 |
addi BK,BL,16 // IU1 Increment byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
748 |
bdz Nxt_loc_fwd // always decrement and branch to next instr |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
749 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
750 |
Nxt_loc_fwd: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
751 |
stvx VPS0,DST,BL // LSU Store 16 bytes at D12 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
752 |
addi BL,BK,16 // IU1 Increment alternate byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
753 |
bdnz B32_fwd // b if there are at least two more QWs to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
754 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
755 |
bso cr6,One_even_QW // b if there is one even and one odd QW to store |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
756 |
b Last_ld_fwd // b if last store is to even address |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
757 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
758 |
// Come here with two more loads and two stores to do |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
759 |
One_even_QW: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
760 |
lvx VS1,SRC,BK // LSU Get S14 (or S13 if if D-S>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
761 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
762 |
vperm VPS0,VS0,VS1,VP3 // VPU Align S13 and S14 to D13 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
763 |
vor VS0,VS1,VS1 // VIU1 Move upper vector to lower |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
764 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
765 |
stvx VPS0,DST,BK // LSU Store 16 bytes at D13 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
766 |
addi BK,BK,16 // IU1 Increment byte count |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
767 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
768 |
b Last_ld_fwd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
769 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
770 |
#ifdef __MWERKS__ |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
771 |
.align 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
772 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
773 |
.align 4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
774 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
775 |
Big_loop: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
776 |
subf QW,Rt,QW // IU1 Should be 2-7 QWs left after big loop |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
777 |
blt cr5,No_big_loop // b back if |DST-SRC|<128; Big_loop won't work. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
778 |
mtctr BIG // IU2 loop for as many 128B loops as possible |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
779 |
addi SP8,SRC,256 // IU1 Starting address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
780 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
781 |
Loop_of_128B: // Come here with QW>=10 and next store even; VS0 last load |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
782 |
lvx VS1,SRC,BK // LSU Get S4 (or S3 if D-S>=0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
783 |
addi BL,BK,32 // IU1 Increment Byte_Kount+16 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
784 |
addi SP8,SP8,128 // IU1 increment address for data stream touch |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
785 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
786 |
lvx VS3,SRC,BL // LSU Get S6 (or S5) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
787 |
addi BL,BL,32 // IU1 Increment Byte_Kount+48 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
788 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
789 |
lvx VS5,SRC,BL // LSU Get S8 (or S7) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
790 |
addi BL,BL,32 // IU1 Increment Byte_Kount+80 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
791 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
792 |
lvx VS7,SRC,BL // LSU Get S10 (or S9) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
793 |
addi BL,BK,16 // IU1 Increment Byte_Kount+16 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
794 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
795 |
lvx VS2,SRC,BL // LSU Get S5 (or S4) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
796 |
addi BL,BL,32 // IU1 Increment Byte_Kount+32 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
797 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
798 |
lvx VS4,SRC,BL // LSU Get S7 (or S6) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
799 |
addi BL,BL,32 // IU1 Increment Byte_Kount+64 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
800 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
801 |
lvx VS6,SRC,BL // LSU Get S9 (or S8) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
802 |
addi BL,BL,32 // IU1 Increment Byte_Kount+96 by 32 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
803 |
vperm VPS0,VS0,VS1,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
804 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
805 |
lvx VS0,SRC,BL // LSU Get S11 (or S10) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
806 |
vperm VPS1,VS1,VS2,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
807 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
808 |
STRM_1 // LSU Stream 4 32B blocks, stride 32B |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
809 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
810 |
DCBK // LSU then Kill instead of RWITM |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
811 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
812 |
stvx VPS0,DST,BK // LSU Store D3 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
813 |
addi BK,BK,16 // IU1 Increment Byte_Kount+16 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
814 |
vperm VPS2,VS2,VS3,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
815 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
816 |
stvx VPS1,DST,BK // LSU Store D4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
817 |
addi BK,BK,16 // IU1 Increment Byte_Kount+32 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
818 |
vperm VPS3,VS3,VS4,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
819 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
820 |
DCBK // LSU then Kill instead of RWITM |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
821 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
822 |
stvx VPS2,DST,BK // LSU Store D5 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
823 |
addi BK,BK,16 // IU1 Increment Byte_Kount+48 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
824 |
vperm VPS4,VS4,VS5,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
825 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
826 |
stvx VPS3,DST,BK // LSU Store D6 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
827 |
addi BK,BK,16 // IU1 Increment Byte_Kount+64 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
828 |
vperm VPS5,VS5,VS6,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
829 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
830 |
DCBK // LSU then Kill instead of RWITM |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
831 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
832 |
stvx VPS4,DST,BK // LSU Store D7 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
833 |
addi BK,BK,16 // IU1 Increment Byte_Kount+80 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
834 |
vperm VPS6,VS6,VS7,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
835 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
836 |
stvx VPS5,DST,BK // LSU Store D8 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
837 |
addi BK,BK,16 // IU1 Increment Byte_Kount+96 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
838 |
vperm VPS7,VS7,VS0,VP3 // VPU |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
839 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
840 |
DCBK // LSU then Kill instead of RWITM |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
841 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
842 |
stvx VPS6,DST,BK // LSU Store D9 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
843 |
addi BK,BK,16 // IU1 Increment Byte_Kount+112 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
844 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
845 |
stvx VPS7,DST,BK // LSU Store D10 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
846 |
addi BK,BK,16 // IU1 Increment Byte_Kount+128 by 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
847 |
bdnz Loop_of_128B // b if ctr > 0 (QW/8 still > 0) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
848 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
849 |
mtctr QW // IU1 Restore QW remaining to counter |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
850 |
addi BL,BK,16 // IU1 Create an alternate byte kount + 16 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
851 |
bns cr6,B32_fwd // b if DST[27] == 0; i.e, final store is even |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
852 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
853 |
bdnz B32_fwd // b and decrement counter for last QW store odd |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
854 |
// One of the above branches should have taken |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
855 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
856 |
// End of memcpy in AltiVec |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
857 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
858 |
// bcopy works like memcpy, but the source and destination operands are reversed. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
859 |
// Following will just reverse the operands and branch to memcpy. |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
860 |
|
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
861 |
#ifdef LIBMOTOVEC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
862 |
.globl bcopy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
863 |
bcopy: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
864 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
865 |
.globl vec_bcopy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
866 |
vec_bcopy: |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
867 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
868 |
mr Rt,DST // temp storage for what is really source address (r3) |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
869 |
mr DST,SRC // swap destination address to r3 to match memcpy dst |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
870 |
mr SRC,Rt // Complete swap of destination and source for memcpy |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
871 |
#ifdef LIBMOTOVEC |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
872 |
b memcpy // b to memcpy with correct args in r3 and r4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
873 |
#else |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
874 |
b _vec_memcpy // b to vec_memcpy with correct args in r3 and r4 |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
875 |
#endif |
47c74d1534e1
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
876 |
// End of bcopy in AltiVec |