|
1 /* |
|
2 SDL - Simple DirectMedia Layer |
|
3 Copyright (C) 1997-2006 Sam Lantinga |
|
4 |
|
5 This library is free software; you can redistribute it and/or |
|
6 modify it under the terms of the GNU Lesser General Public |
|
7 License as published by the Free Software Foundation; either |
|
8 version 2.1 of the License, or (at your option) any later version. |
|
9 |
|
10 This library is distributed in the hope that it will be useful, |
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 Lesser General Public License for more details. |
|
14 |
|
15 You should have received a copy of the GNU Lesser General Public |
|
16 License along with this library; if not, write to the Free Software |
|
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
|
19 Sam Lantinga |
|
20 slouken@libsdl.org |
|
21 */ |
|
22 #include "SDL_config.h" |
|
23 |
|
24 /* This a stretch blit implementation based on ideas given to me by |
|
25 Tomasz Cejner - thanks! :) |
|
26 |
|
27 April 27, 2000 - Sam Lantinga |
|
28 */ |
|
29 |
|
30 #include "SDL_video.h" |
|
31 #include "SDL_blit.h" |
|
32 |
|
33 /* This isn't ready for general consumption yet - it should be folded |
|
34 into the general blitting mechanism. |
|
35 */ |
|
36 |
|
37 #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \ |
|
38 defined(__WATCOMC__) || \ |
|
39 (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES |
|
40 #define USE_ASM_STRETCH |
|
41 #endif |
|
42 |
|
43 #ifdef USE_ASM_STRETCH |
|
44 |
|
45 #ifdef HAVE_MPROTECT |
|
46 #include <sys/types.h> |
|
47 #include <sys/mman.h> |
|
48 #endif |
|
49 #ifdef __GNUC__ |
|
50 #define PAGE_ALIGNED __attribute__((__aligned__(4096))) |
|
51 #else |
|
52 #define PAGE_ALIGNED |
|
53 #endif |
|
54 |
|
55 #if defined(_M_IX86) || defined(i386) |
|
56 #define PREFIX16 0x66 |
|
57 #define STORE_BYTE 0xAA |
|
58 #define STORE_WORD 0xAB |
|
59 #define LOAD_BYTE 0xAC |
|
60 #define LOAD_WORD 0xAD |
|
61 #define RETURN 0xC3 |
|
62 #else |
|
63 #error Need assembly opcodes for this architecture |
|
64 #endif |
|
65 |
|
66 static unsigned char copy_row[4096] PAGE_ALIGNED; |
|
67 |
|
68 static int generate_rowbytes(int src_w, int dst_w, int bpp) |
|
69 { |
|
70 static struct { |
|
71 int bpp; |
|
72 int src_w; |
|
73 int dst_w; |
|
74 int status; |
|
75 } last; |
|
76 |
|
77 int i; |
|
78 int pos, inc; |
|
79 unsigned char *eip; |
|
80 unsigned char load, store; |
|
81 |
|
82 /* See if we need to regenerate the copy buffer */ |
|
83 if ( (src_w == last.src_w) && |
|
84 (dst_w == last.dst_w) && (bpp == last.bpp) ) { |
|
85 return(last.status); |
|
86 } |
|
87 last.bpp = bpp; |
|
88 last.src_w = src_w; |
|
89 last.dst_w = dst_w; |
|
90 last.status = -1; |
|
91 |
|
92 switch (bpp) { |
|
93 case 1: |
|
94 load = LOAD_BYTE; |
|
95 store = STORE_BYTE; |
|
96 break; |
|
97 case 2: |
|
98 case 4: |
|
99 load = LOAD_WORD; |
|
100 store = STORE_WORD; |
|
101 break; |
|
102 default: |
|
103 SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp); |
|
104 return(-1); |
|
105 } |
|
106 pos = 0x10000; |
|
107 inc = (src_w << 16) / dst_w; |
|
108 eip = copy_row; |
|
109 for ( i=0; i<dst_w; ++i ) { |
|
110 while ( pos >= 0x10000L ) { |
|
111 if ( bpp == 2 ) { |
|
112 *eip++ = PREFIX16; |
|
113 } |
|
114 *eip++ = load; |
|
115 pos -= 0x10000L; |
|
116 } |
|
117 if ( bpp == 2 ) { |
|
118 *eip++ = PREFIX16; |
|
119 } |
|
120 *eip++ = store; |
|
121 pos += inc; |
|
122 } |
|
123 *eip++ = RETURN; |
|
124 |
|
125 /* Verify that we didn't overflow (too late!!!) */ |
|
126 if ( eip > (copy_row+sizeof(copy_row)) ) { |
|
127 SDL_SetError("Copy buffer overflow"); |
|
128 return(-1); |
|
129 } |
|
130 #ifdef HAVE_MPROTECT |
|
131 /* Make the code executable */ |
|
132 if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_WRITE|PROT_EXEC) < 0 ) { |
|
133 SDL_SetError("Couldn't make copy buffer executable"); |
|
134 return(-1); |
|
135 } |
|
136 #endif |
|
137 last.status = 0; |
|
138 return(0); |
|
139 } |
|
140 |
|
141 #endif /* USE_ASM_STRETCH */ |
|
142 |
|
143 #define DEFINE_COPY_ROW(name, type) \ |
|
144 void name(type *src, int src_w, type *dst, int dst_w) \ |
|
145 { \ |
|
146 int i; \ |
|
147 int pos, inc; \ |
|
148 type pixel = 0; \ |
|
149 \ |
|
150 pos = 0x10000; \ |
|
151 inc = (src_w << 16) / dst_w; \ |
|
152 for ( i=dst_w; i>0; --i ) { \ |
|
153 while ( pos >= 0x10000L ) { \ |
|
154 pixel = *src++; \ |
|
155 pos -= 0x10000L; \ |
|
156 } \ |
|
157 *dst++ = pixel; \ |
|
158 pos += inc; \ |
|
159 } \ |
|
160 } |
|
161 DEFINE_COPY_ROW(copy_row1, Uint8) |
|
162 DEFINE_COPY_ROW(copy_row2, Uint16) |
|
163 DEFINE_COPY_ROW(copy_row4, Uint32) |
|
164 |
|
165 /* The ASM code doesn't handle 24-bpp stretch blits */ |
|
166 void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w) |
|
167 { |
|
168 int i; |
|
169 int pos, inc; |
|
170 Uint8 pixel[3] = { 0, 0, 0 }; |
|
171 |
|
172 pos = 0x10000; |
|
173 inc = (src_w << 16) / dst_w; |
|
174 for ( i=dst_w; i>0; --i ) { |
|
175 while ( pos >= 0x10000L ) { |
|
176 pixel[0] = *src++; |
|
177 pixel[1] = *src++; |
|
178 pixel[2] = *src++; |
|
179 pos -= 0x10000L; |
|
180 } |
|
181 *dst++ = pixel[0]; |
|
182 *dst++ = pixel[1]; |
|
183 *dst++ = pixel[2]; |
|
184 pos += inc; |
|
185 } |
|
186 } |
|
187 |
|
188 /* Perform a stretch blit between two surfaces of the same format. |
|
189 NOTE: This function is not safe to call from multiple threads! |
|
190 */ |
|
191 int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect, |
|
192 SDL_Surface *dst, SDL_Rect *dstrect) |
|
193 { |
|
194 int src_locked; |
|
195 int dst_locked; |
|
196 int pos, inc; |
|
197 int dst_width; |
|
198 int dst_maxrow; |
|
199 int src_row, dst_row; |
|
200 Uint8 *srcp = NULL; |
|
201 Uint8 *dstp; |
|
202 SDL_Rect full_src; |
|
203 SDL_Rect full_dst; |
|
204 #ifdef USE_ASM_STRETCH |
|
205 SDL_bool use_asm = SDL_TRUE; |
|
206 #ifdef __GNUC__ |
|
207 int u1, u2; |
|
208 #endif |
|
209 #endif /* USE_ASM_STRETCH */ |
|
210 const int bpp = dst->format->BytesPerPixel; |
|
211 |
|
212 if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) { |
|
213 SDL_SetError("Only works with same format surfaces"); |
|
214 return(-1); |
|
215 } |
|
216 |
|
217 /* Verify the blit rectangles */ |
|
218 if ( srcrect ) { |
|
219 if ( (srcrect->x < 0) || (srcrect->y < 0) || |
|
220 ((srcrect->x+srcrect->w) > src->w) || |
|
221 ((srcrect->y+srcrect->h) > src->h) ) { |
|
222 SDL_SetError("Invalid source blit rectangle"); |
|
223 return(-1); |
|
224 } |
|
225 } else { |
|
226 full_src.x = 0; |
|
227 full_src.y = 0; |
|
228 full_src.w = src->w; |
|
229 full_src.h = src->h; |
|
230 srcrect = &full_src; |
|
231 } |
|
232 if ( dstrect ) { |
|
233 if ( (dstrect->x < 0) || (dstrect->y < 0) || |
|
234 ((dstrect->x+dstrect->w) > dst->w) || |
|
235 ((dstrect->y+dstrect->h) > dst->h) ) { |
|
236 SDL_SetError("Invalid destination blit rectangle"); |
|
237 return(-1); |
|
238 } |
|
239 } else { |
|
240 full_dst.x = 0; |
|
241 full_dst.y = 0; |
|
242 full_dst.w = dst->w; |
|
243 full_dst.h = dst->h; |
|
244 dstrect = &full_dst; |
|
245 } |
|
246 |
|
247 /* Lock the destination if it's in hardware */ |
|
248 dst_locked = 0; |
|
249 if ( SDL_MUSTLOCK(dst) ) { |
|
250 if ( SDL_LockSurface(dst) < 0 ) { |
|
251 SDL_SetError("Unable to lock destination surface"); |
|
252 return(-1); |
|
253 } |
|
254 dst_locked = 1; |
|
255 } |
|
256 /* Lock the source if it's in hardware */ |
|
257 src_locked = 0; |
|
258 if ( SDL_MUSTLOCK(src) ) { |
|
259 if ( SDL_LockSurface(src) < 0 ) { |
|
260 if ( dst_locked ) { |
|
261 SDL_UnlockSurface(dst); |
|
262 } |
|
263 SDL_SetError("Unable to lock source surface"); |
|
264 return(-1); |
|
265 } |
|
266 src_locked = 1; |
|
267 } |
|
268 |
|
269 /* Set up the data... */ |
|
270 pos = 0x10000; |
|
271 inc = (srcrect->h << 16) / dstrect->h; |
|
272 src_row = srcrect->y; |
|
273 dst_row = dstrect->y; |
|
274 dst_width = dstrect->w*bpp; |
|
275 |
|
276 #ifdef USE_ASM_STRETCH |
|
277 /* Write the opcodes for this stretch */ |
|
278 if ( (bpp == 3) || |
|
279 (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) { |
|
280 use_asm = SDL_FALSE; |
|
281 } |
|
282 #endif |
|
283 |
|
284 /* Perform the stretch blit */ |
|
285 for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) { |
|
286 dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch) |
|
287 + (dstrect->x*bpp); |
|
288 while ( pos >= 0x10000L ) { |
|
289 srcp = (Uint8 *)src->pixels + (src_row*src->pitch) |
|
290 + (srcrect->x*bpp); |
|
291 ++src_row; |
|
292 pos -= 0x10000L; |
|
293 } |
|
294 #ifdef USE_ASM_STRETCH |
|
295 if (use_asm) { |
|
296 #ifdef __GNUC__ |
|
297 __asm__ __volatile__ ( |
|
298 "call *%4" |
|
299 : "=&D" (u1), "=&S" (u2) |
|
300 : "0" (dstp), "1" (srcp), "r" (copy_row) |
|
301 : "memory" ); |
|
302 #elif defined(_MSC_VER) || defined(__WATCOMC__) |
|
303 { void *code = copy_row; |
|
304 __asm { |
|
305 push edi |
|
306 push esi |
|
307 |
|
308 mov edi, dstp |
|
309 mov esi, srcp |
|
310 call dword ptr code |
|
311 |
|
312 pop esi |
|
313 pop edi |
|
314 } |
|
315 } |
|
316 #else |
|
317 #error Need inline assembly for this compiler |
|
318 #endif |
|
319 } else |
|
320 #endif |
|
321 switch (bpp) { |
|
322 case 1: |
|
323 copy_row1(srcp, srcrect->w, dstp, dstrect->w); |
|
324 break; |
|
325 case 2: |
|
326 copy_row2((Uint16 *)srcp, srcrect->w, |
|
327 (Uint16 *)dstp, dstrect->w); |
|
328 break; |
|
329 case 3: |
|
330 copy_row3(srcp, srcrect->w, dstp, dstrect->w); |
|
331 break; |
|
332 case 4: |
|
333 copy_row4((Uint32 *)srcp, srcrect->w, |
|
334 (Uint32 *)dstp, dstrect->w); |
|
335 break; |
|
336 } |
|
337 pos += inc; |
|
338 } |
|
339 |
|
340 /* We need to unlock the surfaces if they're locked */ |
|
341 if ( dst_locked ) { |
|
342 SDL_UnlockSurface(dst); |
|
343 } |
|
344 if ( src_locked ) { |
|
345 SDL_UnlockSurface(src); |
|
346 } |
|
347 return(0); |
|
348 } |
|
349 |