symbian-qemu-0.9.1-12/libsdl-trunk/src/video/SDL_yuv_sw.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /*
       
     2     SDL - Simple DirectMedia Layer
       
     3     Copyright (C) 1997-2006 Sam Lantinga
       
     4 
       
     5     This library is free software; you can redistribute it and/or
       
     6     modify it under the terms of the GNU Lesser General Public
       
     7     License as published by the Free Software Foundation; either
       
     8     version 2.1 of the License, or (at your option) any later version.
       
     9 
       
    10     This library is distributed in the hope that it will be useful,
       
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    13     Lesser General Public License for more details.
       
    14 
       
    15     You should have received a copy of the GNU Lesser General Public
       
    16     License along with this library; if not, write to the Free Software
       
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
       
    18 
       
    19     Sam Lantinga
       
    20     slouken@libsdl.org
       
    21 */
       
    22 #include "SDL_config.h"
       
    23 
       
    24 /* This is the software implementation of the YUV video overlay support */
       
    25 
       
    26 /* This code was derived from code carrying the following copyright notices:
       
    27 
       
    28  * Copyright (c) 1995 The Regents of the University of California.
       
    29  * All rights reserved.
       
    30  * 
       
    31  * Permission to use, copy, modify, and distribute this software and its
       
    32  * documentation for any purpose, without fee, and without written agreement is
       
    33  * hereby granted, provided that the above copyright notice and the following
       
    34  * two paragraphs appear in all copies of this software.
       
    35  * 
       
    36  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
       
    37  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
       
    38  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
       
    39  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    40  * 
       
    41  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
       
    42  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    43  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
       
    44  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
       
    45  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    46 
       
    47  * Copyright (c) 1995 Erik Corry
       
    48  * All rights reserved.
       
    49  * 
       
    50  * Permission to use, copy, modify, and distribute this software and its
       
    51  * documentation for any purpose, without fee, and without written agreement is
       
    52  * hereby granted, provided that the above copyright notice and the following
       
    53  * two paragraphs appear in all copies of this software.
       
    54  * 
       
    55  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
       
    56  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
       
    57  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
       
    58  * OF THE POSSIBILITY OF SUCH DAMAGE.
       
    59  * 
       
    60  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
       
    61  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
       
    62  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
       
    63  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
       
    64  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    65 
       
    66  * Portions of this software Copyright (c) 1995 Brown University.
       
    67  * All rights reserved.
       
    68  * 
       
    69  * Permission to use, copy, modify, and distribute this software and its
       
    70  * documentation for any purpose, without fee, and without written agreement
       
    71  * is hereby granted, provided that the above copyright notice and the
       
    72  * following two paragraphs appear in all copies of this software.
       
    73  * 
       
    74  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
       
    75  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
       
    76  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
       
    77  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    78  * 
       
    79  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
       
    80  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
       
    81  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
       
    82  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
       
    83  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
       
    84  */
       
    85 
       
    86 #include "SDL_video.h"
       
    87 #include "SDL_cpuinfo.h"
       
    88 #include "SDL_stretch_c.h"
       
    89 #include "SDL_yuvfuncs.h"
       
    90 #include "SDL_yuv_sw_c.h"
       
    91 
       
    92 /* The functions used to manipulate software video overlays */
       
    93 static struct private_yuvhwfuncs sw_yuvfuncs = {
       
    94 	SDL_LockYUV_SW,
       
    95 	SDL_UnlockYUV_SW,
       
    96 	SDL_DisplayYUV_SW,
       
    97 	SDL_FreeYUV_SW
       
    98 };
       
    99 
       
   100 /* RGB conversion lookup tables */
       
   101 struct private_yuvhwdata {
       
   102 	SDL_Surface *stretch;
       
   103 	SDL_Surface *display;
       
   104 	Uint8 *pixels;
       
   105 	int *colortab;
       
   106 	Uint32 *rgb_2_pix;
       
   107 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
       
   108                           unsigned char *lum, unsigned char *cr,
       
   109                           unsigned char *cb, unsigned char *out,
       
   110                           int rows, int cols, int mod );
       
   111 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
       
   112 	                  unsigned char *lum, unsigned char *cr,
       
   113                           unsigned char *cb, unsigned char *out,
       
   114                           int rows, int cols, int mod );
       
   115 
       
   116 	/* These are just so we don't have to allocate them separately */
       
   117 	Uint16 pitches[3];
       
   118 	Uint8 *planes[3];
       
   119 };
       
   120 
       
   121 
       
   122 /* The colorspace conversion functions */
       
   123 
       
   124 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
       
   125 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
       
   126                                      unsigned char *lum, unsigned char *cr,
       
   127                                      unsigned char *cb, unsigned char *out,
       
   128                                      int rows, int cols, int mod );
       
   129 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
       
   130                                      unsigned char *lum, unsigned char *cr,
       
   131                                      unsigned char *cb, unsigned char *out,
       
   132                                      int rows, int cols, int mod );
       
   133 #endif 
       
   134 
       
   135 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   136                                     unsigned char *lum, unsigned char *cr,
       
   137                                     unsigned char *cb, unsigned char *out,
       
   138                                     int rows, int cols, int mod )
       
   139 {
       
   140     unsigned short* row1;
       
   141     unsigned short* row2;
       
   142     unsigned char* lum2;
       
   143     int x, y;
       
   144     int cr_r;
       
   145     int crb_g;
       
   146     int cb_b;
       
   147     int cols_2 = cols / 2;
       
   148 
       
   149     row1 = (unsigned short*) out;
       
   150     row2 = row1 + cols + mod;
       
   151     lum2 = lum + cols;
       
   152 
       
   153     mod += cols + mod;
       
   154 
       
   155     y = rows / 2;
       
   156     while( y-- )
       
   157     {
       
   158         x = cols_2;
       
   159         while( x-- )
       
   160         {
       
   161             register int L;
       
   162 
       
   163             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   164             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   165                                + colortab[ *cb + 2*256 ];
       
   166             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   167             ++cr; ++cb;
       
   168 
       
   169             L = *lum++;
       
   170             *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   171                                        rgb_2_pix[ L + crb_g ] |
       
   172                                        rgb_2_pix[ L + cb_b ]);
       
   173 
       
   174             L = *lum++;
       
   175             *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   176                                        rgb_2_pix[ L + crb_g ] |
       
   177                                        rgb_2_pix[ L + cb_b ]);
       
   178 
       
   179 
       
   180             /* Now, do second row.  */
       
   181 
       
   182             L = *lum2++;
       
   183             *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   184                                        rgb_2_pix[ L + crb_g ] |
       
   185                                        rgb_2_pix[ L + cb_b ]);
       
   186 
       
   187             L = *lum2++;
       
   188             *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   189                                        rgb_2_pix[ L + crb_g ] |
       
   190                                        rgb_2_pix[ L + cb_b ]);
       
   191         }
       
   192 
       
   193         /*
       
   194          * These values are at the start of the next line, (due
       
   195          * to the ++'s above),but they need to be at the start
       
   196          * of the line after that.
       
   197          */
       
   198         lum  += cols;
       
   199         lum2 += cols;
       
   200         row1 += mod;
       
   201         row2 += mod;
       
   202     }
       
   203 }
       
   204 
       
   205 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   206                                     unsigned char *lum, unsigned char *cr,
       
   207                                     unsigned char *cb, unsigned char *out,
       
   208                                     int rows, int cols, int mod )
       
   209 {
       
   210     unsigned int value;
       
   211     unsigned char* row1;
       
   212     unsigned char* row2;
       
   213     unsigned char* lum2;
       
   214     int x, y;
       
   215     int cr_r;
       
   216     int crb_g;
       
   217     int cb_b;
       
   218     int cols_2 = cols / 2;
       
   219 
       
   220     row1 = out;
       
   221     row2 = row1 + cols*3 + mod*3;
       
   222     lum2 = lum + cols;
       
   223 
       
   224     mod += cols + mod;
       
   225     mod *= 3;
       
   226 
       
   227     y = rows / 2;
       
   228     while( y-- )
       
   229     {
       
   230         x = cols_2;
       
   231         while( x-- )
       
   232         {
       
   233             register int L;
       
   234 
       
   235             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   236             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   237                                + colortab[ *cb + 2*256 ];
       
   238             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   239             ++cr; ++cb;
       
   240 
       
   241             L = *lum++;
       
   242             value = (rgb_2_pix[ L + cr_r ] |
       
   243                      rgb_2_pix[ L + crb_g ] |
       
   244                      rgb_2_pix[ L + cb_b ]);
       
   245             *row1++ = (value      ) & 0xFF;
       
   246             *row1++ = (value >>  8) & 0xFF;
       
   247             *row1++ = (value >> 16) & 0xFF;
       
   248 
       
   249             L = *lum++;
       
   250             value = (rgb_2_pix[ L + cr_r ] |
       
   251                      rgb_2_pix[ L + crb_g ] |
       
   252                      rgb_2_pix[ L + cb_b ]);
       
   253             *row1++ = (value      ) & 0xFF;
       
   254             *row1++ = (value >>  8) & 0xFF;
       
   255             *row1++ = (value >> 16) & 0xFF;
       
   256 
       
   257 
       
   258             /* Now, do second row.  */
       
   259 
       
   260             L = *lum2++;
       
   261             value = (rgb_2_pix[ L + cr_r ] |
       
   262                      rgb_2_pix[ L + crb_g ] |
       
   263                      rgb_2_pix[ L + cb_b ]);
       
   264             *row2++ = (value      ) & 0xFF;
       
   265             *row2++ = (value >>  8) & 0xFF;
       
   266             *row2++ = (value >> 16) & 0xFF;
       
   267 
       
   268             L = *lum2++;
       
   269             value = (rgb_2_pix[ L + cr_r ] |
       
   270                      rgb_2_pix[ L + crb_g ] |
       
   271                      rgb_2_pix[ L + cb_b ]);
       
   272             *row2++ = (value      ) & 0xFF;
       
   273             *row2++ = (value >>  8) & 0xFF;
       
   274             *row2++ = (value >> 16) & 0xFF;
       
   275         }
       
   276 
       
   277         /*
       
   278          * These values are at the start of the next line, (due
       
   279          * to the ++'s above),but they need to be at the start
       
   280          * of the line after that.
       
   281          */
       
   282         lum  += cols;
       
   283         lum2 += cols;
       
   284         row1 += mod;
       
   285         row2 += mod;
       
   286     }
       
   287 }
       
   288 
       
   289 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   290                                     unsigned char *lum, unsigned char *cr,
       
   291                                     unsigned char *cb, unsigned char *out,
       
   292                                     int rows, int cols, int mod )
       
   293 {
       
   294     unsigned int* row1;
       
   295     unsigned int* row2;
       
   296     unsigned char* lum2;
       
   297     int x, y;
       
   298     int cr_r;
       
   299     int crb_g;
       
   300     int cb_b;
       
   301     int cols_2 = cols / 2;
       
   302 
       
   303     row1 = (unsigned int*) out;
       
   304     row2 = row1 + cols + mod;
       
   305     lum2 = lum + cols;
       
   306 
       
   307     mod += cols + mod;
       
   308 
       
   309     y = rows / 2;
       
   310     while( y-- )
       
   311     {
       
   312         x = cols_2;
       
   313         while( x-- )
       
   314         {
       
   315             register int L;
       
   316 
       
   317             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   318             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   319                                + colortab[ *cb + 2*256 ];
       
   320             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   321             ++cr; ++cb;
       
   322 
       
   323             L = *lum++;
       
   324             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   325                        rgb_2_pix[ L + crb_g ] |
       
   326                        rgb_2_pix[ L + cb_b ]);
       
   327 
       
   328             L = *lum++;
       
   329             *row1++ = (rgb_2_pix[ L + cr_r ] |
       
   330                        rgb_2_pix[ L + crb_g ] |
       
   331                        rgb_2_pix[ L + cb_b ]);
       
   332 
       
   333 
       
   334             /* Now, do second row.  */
       
   335 
       
   336             L = *lum2++;
       
   337             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   338                        rgb_2_pix[ L + crb_g ] |
       
   339                        rgb_2_pix[ L + cb_b ]);
       
   340 
       
   341             L = *lum2++;
       
   342             *row2++ = (rgb_2_pix[ L + cr_r ] |
       
   343                        rgb_2_pix[ L + crb_g ] |
       
   344                        rgb_2_pix[ L + cb_b ]);
       
   345         }
       
   346 
       
   347         /*
       
   348          * These values are at the start of the next line, (due
       
   349          * to the ++'s above),but they need to be at the start
       
   350          * of the line after that.
       
   351          */
       
   352         lum  += cols;
       
   353         lum2 += cols;
       
   354         row1 += mod;
       
   355         row2 += mod;
       
   356     }
       
   357 }
       
   358 
       
   359 /*
       
   360  * In this function I make use of a nasty trick. The tables have the lower
       
   361  * 16 bits replicated in the upper 16. This means I can write ints and get
       
   362  * the horisontal doubling for free (almost).
       
   363  */
       
   364 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   365                                     unsigned char *lum, unsigned char *cr,
       
   366                                     unsigned char *cb, unsigned char *out,
       
   367                                     int rows, int cols, int mod )
       
   368 {
       
   369     unsigned int* row1 = (unsigned int*) out;
       
   370     const int next_row = cols+(mod/2);
       
   371     unsigned int* row2 = row1 + 2*next_row;
       
   372     unsigned char* lum2;
       
   373     int x, y;
       
   374     int cr_r;
       
   375     int crb_g;
       
   376     int cb_b;
       
   377     int cols_2 = cols / 2;
       
   378 
       
   379     lum2 = lum + cols;
       
   380 
       
   381     mod = (next_row * 3) + (mod/2);
       
   382 
       
   383     y = rows / 2;
       
   384     while( y-- )
       
   385     {
       
   386         x = cols_2;
       
   387         while( x-- )
       
   388         {
       
   389             register int L;
       
   390 
       
   391             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   392             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   393                                + colortab[ *cb + 2*256 ];
       
   394             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   395             ++cr; ++cb;
       
   396 
       
   397             L = *lum++;
       
   398             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   399                                         rgb_2_pix[ L + crb_g ] |
       
   400                                         rgb_2_pix[ L + cb_b ]);
       
   401             row1++;
       
   402 
       
   403             L = *lum++;
       
   404             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   405                                         rgb_2_pix[ L + crb_g ] |
       
   406                                         rgb_2_pix[ L + cb_b ]);
       
   407             row1++;
       
   408 
       
   409 
       
   410             /* Now, do second row. */
       
   411 
       
   412             L = *lum2++;
       
   413             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   414                                         rgb_2_pix[ L + crb_g ] |
       
   415                                         rgb_2_pix[ L + cb_b ]);
       
   416             row2++;
       
   417 
       
   418             L = *lum2++;
       
   419             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   420                                         rgb_2_pix[ L + crb_g ] |
       
   421                                         rgb_2_pix[ L + cb_b ]);
       
   422             row2++;
       
   423         }
       
   424 
       
   425         /*
       
   426          * These values are at the start of the next line, (due
       
   427          * to the ++'s above),but they need to be at the start
       
   428          * of the line after that.
       
   429          */
       
   430         lum  += cols;
       
   431         lum2 += cols;
       
   432         row1 += mod;
       
   433         row2 += mod;
       
   434     }
       
   435 }
       
   436 
       
   437 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   438                                     unsigned char *lum, unsigned char *cr,
       
   439                                     unsigned char *cb, unsigned char *out,
       
   440                                     int rows, int cols, int mod )
       
   441 {
       
   442     unsigned int value;
       
   443     unsigned char* row1 = out;
       
   444     const int next_row = (cols*2 + mod) * 3;
       
   445     unsigned char* row2 = row1 + 2*next_row;
       
   446     unsigned char* lum2;
       
   447     int x, y;
       
   448     int cr_r;
       
   449     int crb_g;
       
   450     int cb_b;
       
   451     int cols_2 = cols / 2;
       
   452 
       
   453     lum2 = lum + cols;
       
   454 
       
   455     mod = next_row*3 + mod*3;
       
   456 
       
   457     y = rows / 2;
       
   458     while( y-- )
       
   459     {
       
   460         x = cols_2;
       
   461         while( x-- )
       
   462         {
       
   463             register int L;
       
   464 
       
   465             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   466             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   467                                + colortab[ *cb + 2*256 ];
       
   468             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   469             ++cr; ++cb;
       
   470 
       
   471             L = *lum++;
       
   472             value = (rgb_2_pix[ L + cr_r ] |
       
   473                      rgb_2_pix[ L + crb_g ] |
       
   474                      rgb_2_pix[ L + cb_b ]);
       
   475             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
       
   476                      (value      ) & 0xFF;
       
   477             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
       
   478                      (value >>  8) & 0xFF;
       
   479             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
       
   480                      (value >> 16) & 0xFF;
       
   481             row1 += 2*3;
       
   482 
       
   483             L = *lum++;
       
   484             value = (rgb_2_pix[ L + cr_r ] |
       
   485                      rgb_2_pix[ L + crb_g ] |
       
   486                      rgb_2_pix[ L + cb_b ]);
       
   487             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
       
   488                      (value      ) & 0xFF;
       
   489             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
       
   490                      (value >>  8) & 0xFF;
       
   491             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
       
   492                      (value >> 16) & 0xFF;
       
   493             row1 += 2*3;
       
   494 
       
   495 
       
   496             /* Now, do second row. */
       
   497 
       
   498             L = *lum2++;
       
   499             value = (rgb_2_pix[ L + cr_r ] |
       
   500                      rgb_2_pix[ L + crb_g ] |
       
   501                      rgb_2_pix[ L + cb_b ]);
       
   502             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
       
   503                      (value      ) & 0xFF;
       
   504             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
       
   505                      (value >>  8) & 0xFF;
       
   506             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
       
   507                      (value >> 16) & 0xFF;
       
   508             row2 += 2*3;
       
   509 
       
   510             L = *lum2++;
       
   511             value = (rgb_2_pix[ L + cr_r ] |
       
   512                      rgb_2_pix[ L + crb_g ] |
       
   513                      rgb_2_pix[ L + cb_b ]);
       
   514             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
       
   515                      (value      ) & 0xFF;
       
   516             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
       
   517                      (value >>  8) & 0xFF;
       
   518             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
       
   519                      (value >> 16) & 0xFF;
       
   520             row2 += 2*3;
       
   521         }
       
   522 
       
   523         /*
       
   524          * These values are at the start of the next line, (due
       
   525          * to the ++'s above),but they need to be at the start
       
   526          * of the line after that.
       
   527          */
       
   528         lum  += cols;
       
   529         lum2 += cols;
       
   530         row1 += mod;
       
   531         row2 += mod;
       
   532     }
       
   533 }
       
   534 
       
   535 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   536                                     unsigned char *lum, unsigned char *cr,
       
   537                                     unsigned char *cb, unsigned char *out,
       
   538                                     int rows, int cols, int mod )
       
   539 {
       
   540     unsigned int* row1 = (unsigned int*) out;
       
   541     const int next_row = cols*2+mod;
       
   542     unsigned int* row2 = row1 + 2*next_row;
       
   543     unsigned char* lum2;
       
   544     int x, y;
       
   545     int cr_r;
       
   546     int crb_g;
       
   547     int cb_b;
       
   548     int cols_2 = cols / 2;
       
   549 
       
   550     lum2 = lum + cols;
       
   551 
       
   552     mod = (next_row * 3) + mod;
       
   553 
       
   554     y = rows / 2;
       
   555     while( y-- )
       
   556     {
       
   557         x = cols_2;
       
   558         while( x-- )
       
   559         {
       
   560             register int L;
       
   561 
       
   562             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   563             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   564                                + colortab[ *cb + 2*256 ];
       
   565             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   566             ++cr; ++cb;
       
   567 
       
   568             L = *lum++;
       
   569             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
       
   570                                        (rgb_2_pix[ L + cr_r ] |
       
   571                                         rgb_2_pix[ L + crb_g ] |
       
   572                                         rgb_2_pix[ L + cb_b ]);
       
   573             row1 += 2;
       
   574 
       
   575             L = *lum++;
       
   576             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
       
   577                                        (rgb_2_pix[ L + cr_r ] |
       
   578                                         rgb_2_pix[ L + crb_g ] |
       
   579                                         rgb_2_pix[ L + cb_b ]);
       
   580             row1 += 2;
       
   581 
       
   582 
       
   583             /* Now, do second row. */
       
   584 
       
   585             L = *lum2++;
       
   586             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
       
   587                                        (rgb_2_pix[ L + cr_r ] |
       
   588                                         rgb_2_pix[ L + crb_g ] |
       
   589                                         rgb_2_pix[ L + cb_b ]);
       
   590             row2 += 2;
       
   591 
       
   592             L = *lum2++;
       
   593             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
       
   594                                        (rgb_2_pix[ L + cr_r ] |
       
   595                                         rgb_2_pix[ L + crb_g ] |
       
   596                                         rgb_2_pix[ L + cb_b ]);
       
   597             row2 += 2;
       
   598         }
       
   599 
       
   600         /*
       
   601          * These values are at the start of the next line, (due
       
   602          * to the ++'s above),but they need to be at the start
       
   603          * of the line after that.
       
   604          */
       
   605         lum  += cols;
       
   606         lum2 += cols;
       
   607         row1 += mod;
       
   608         row2 += mod;
       
   609     }
       
   610 }
       
   611 
       
   612 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   613                                     unsigned char *lum, unsigned char *cr,
       
   614                                     unsigned char *cb, unsigned char *out,
       
   615                                     int rows, int cols, int mod )
       
   616 {
       
   617     unsigned short* row;
       
   618     int x, y;
       
   619     int cr_r;
       
   620     int crb_g;
       
   621     int cb_b;
       
   622     int cols_2 = cols / 2;
       
   623 
       
   624     row = (unsigned short*) out;
       
   625 
       
   626     y = rows;
       
   627     while( y-- )
       
   628     {
       
   629         x = cols_2;
       
   630         while( x-- )
       
   631         {
       
   632             register int L;
       
   633 
       
   634             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   635             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   636                                + colortab[ *cb + 2*256 ];
       
   637             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   638             cr += 4; cb += 4;
       
   639 
       
   640             L = *lum; lum += 2;
       
   641             *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   642                                       rgb_2_pix[ L + crb_g ] |
       
   643                                       rgb_2_pix[ L + cb_b ]);
       
   644 
       
   645             L = *lum; lum += 2;
       
   646             *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
       
   647                                       rgb_2_pix[ L + crb_g ] |
       
   648                                       rgb_2_pix[ L + cb_b ]);
       
   649 
       
   650         }
       
   651 
       
   652         row += mod;
       
   653     }
       
   654 }
       
   655 
       
   656 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   657                                     unsigned char *lum, unsigned char *cr,
       
   658                                     unsigned char *cb, unsigned char *out,
       
   659                                     int rows, int cols, int mod )
       
   660 {
       
   661     unsigned int value;
       
   662     unsigned char* row;
       
   663     int x, y;
       
   664     int cr_r;
       
   665     int crb_g;
       
   666     int cb_b;
       
   667     int cols_2 = cols / 2;
       
   668 
       
   669     row = (unsigned char*) out;
       
   670     mod *= 3;
       
   671     y = rows;
       
   672     while( y-- )
       
   673     {
       
   674         x = cols_2;
       
   675         while( x-- )
       
   676         {
       
   677             register int L;
       
   678 
       
   679             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   680             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   681                                + colortab[ *cb + 2*256 ];
       
   682             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   683             cr += 4; cb += 4;
       
   684 
       
   685             L = *lum; lum += 2;
       
   686             value = (rgb_2_pix[ L + cr_r ] |
       
   687                      rgb_2_pix[ L + crb_g ] |
       
   688                      rgb_2_pix[ L + cb_b ]);
       
   689             *row++ = (value      ) & 0xFF;
       
   690             *row++ = (value >>  8) & 0xFF;
       
   691             *row++ = (value >> 16) & 0xFF;
       
   692 
       
   693             L = *lum; lum += 2;
       
   694             value = (rgb_2_pix[ L + cr_r ] |
       
   695                      rgb_2_pix[ L + crb_g ] |
       
   696                      rgb_2_pix[ L + cb_b ]);
       
   697             *row++ = (value      ) & 0xFF;
       
   698             *row++ = (value >>  8) & 0xFF;
       
   699             *row++ = (value >> 16) & 0xFF;
       
   700 
       
   701         }
       
   702         row += mod;
       
   703     }
       
   704 }
       
   705 
       
   706 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
       
   707                                     unsigned char *lum, unsigned char *cr,
       
   708                                     unsigned char *cb, unsigned char *out,
       
   709                                     int rows, int cols, int mod )
       
   710 {
       
   711     unsigned int* row;
       
   712     int x, y;
       
   713     int cr_r;
       
   714     int crb_g;
       
   715     int cb_b;
       
   716     int cols_2 = cols / 2;
       
   717 
       
   718     row = (unsigned int*) out;
       
   719     y = rows;
       
   720     while( y-- )
       
   721     {
       
   722         x = cols_2;
       
   723         while( x-- )
       
   724         {
       
   725             register int L;
       
   726 
       
   727             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   728             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   729                                + colortab[ *cb + 2*256 ];
       
   730             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   731             cr += 4; cb += 4;
       
   732 
       
   733             L = *lum; lum += 2;
       
   734             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   735                        rgb_2_pix[ L + crb_g ] |
       
   736                        rgb_2_pix[ L + cb_b ]);
       
   737 
       
   738             L = *lum; lum += 2;
       
   739             *row++ = (rgb_2_pix[ L + cr_r ] |
       
   740                        rgb_2_pix[ L + crb_g ] |
       
   741                        rgb_2_pix[ L + cb_b ]);
       
   742 
       
   743 
       
   744         }
       
   745         row += mod;
       
   746     }
       
   747 }
       
   748 
       
   749 /*
       
   750  * In this function I make use of a nasty trick. The tables have the lower
       
   751  * 16 bits replicated in the upper 16. This means I can write ints and get
       
   752  * the horisontal doubling for free (almost).
       
   753  */
       
   754 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   755                                     unsigned char *lum, unsigned char *cr,
       
   756                                     unsigned char *cb, unsigned char *out,
       
   757                                     int rows, int cols, int mod )
       
   758 {
       
   759     unsigned int* row = (unsigned int*) out;
       
   760     const int next_row = cols+(mod/2);
       
   761     int x, y;
       
   762     int cr_r;
       
   763     int crb_g;
       
   764     int cb_b;
       
   765     int cols_2 = cols / 2;
       
   766 
       
   767     y = rows;
       
   768     while( y-- )
       
   769     {
       
   770         x = cols_2;
       
   771         while( x-- )
       
   772         {
       
   773             register int L;
       
   774 
       
   775             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   776             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   777                                + colortab[ *cb + 2*256 ];
       
   778             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   779             cr += 4; cb += 4;
       
   780 
       
   781             L = *lum; lum += 2;
       
   782             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   783                                         rgb_2_pix[ L + crb_g ] |
       
   784                                         rgb_2_pix[ L + cb_b ]);
       
   785             row++;
       
   786 
       
   787             L = *lum; lum += 2;
       
   788             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
       
   789                                         rgb_2_pix[ L + crb_g ] |
       
   790                                         rgb_2_pix[ L + cb_b ]);
       
   791             row++;
       
   792 
       
   793         }
       
   794         row += next_row;
       
   795     }
       
   796 }
       
   797 
       
   798 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   799                                     unsigned char *lum, unsigned char *cr,
       
   800                                     unsigned char *cb, unsigned char *out,
       
   801                                     int rows, int cols, int mod )
       
   802 {
       
   803     unsigned int value;
       
   804     unsigned char* row = out;
       
   805     const int next_row = (cols*2 + mod) * 3;
       
   806     int x, y;
       
   807     int cr_r;
       
   808     int crb_g;
       
   809     int cb_b;
       
   810     int cols_2 = cols / 2;
       
   811     y = rows;
       
   812     while( y-- )
       
   813     {
       
   814         x = cols_2;
       
   815         while( x-- )
       
   816         {
       
   817             register int L;
       
   818 
       
   819             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   820             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   821                                + colortab[ *cb + 2*256 ];
       
   822             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   823             cr += 4; cb += 4;
       
   824 
       
   825             L = *lum; lum += 2;
       
   826             value = (rgb_2_pix[ L + cr_r ] |
       
   827                      rgb_2_pix[ L + crb_g ] |
       
   828                      rgb_2_pix[ L + cb_b ]);
       
   829             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
       
   830                      (value      ) & 0xFF;
       
   831             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
       
   832                      (value >>  8) & 0xFF;
       
   833             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
       
   834                      (value >> 16) & 0xFF;
       
   835             row += 2*3;
       
   836 
       
   837             L = *lum; lum += 2;
       
   838             value = (rgb_2_pix[ L + cr_r ] |
       
   839                      rgb_2_pix[ L + crb_g ] |
       
   840                      rgb_2_pix[ L + cb_b ]);
       
   841             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
       
   842                      (value      ) & 0xFF;
       
   843             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
       
   844                      (value >>  8) & 0xFF;
       
   845             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
       
   846                      (value >> 16) & 0xFF;
       
   847             row += 2*3;
       
   848 
       
   849         }
       
   850         row += next_row;
       
   851     }
       
   852 }
       
   853 
       
   854 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
       
   855                                     unsigned char *lum, unsigned char *cr,
       
   856                                     unsigned char *cb, unsigned char *out,
       
   857                                     int rows, int cols, int mod )
       
   858 {
       
   859     unsigned int* row = (unsigned int*) out;
       
   860     const int next_row = cols*2+mod;
       
   861     int x, y;
       
   862     int cr_r;
       
   863     int crb_g;
       
   864     int cb_b;
       
   865     int cols_2 = cols / 2;
       
   866     mod+=mod;
       
   867     y = rows;
       
   868     while( y-- )
       
   869     {
       
   870         x = cols_2;
       
   871         while( x-- )
       
   872         {
       
   873             register int L;
       
   874 
       
   875             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
       
   876             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
       
   877                                + colortab[ *cb + 2*256 ];
       
   878             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
       
   879             cr += 4; cb += 4;
       
   880 
       
   881             L = *lum; lum += 2;
       
   882             row[0] = row[1] = row[next_row] = row[next_row+1] =
       
   883                                        (rgb_2_pix[ L + cr_r ] |
       
   884                                         rgb_2_pix[ L + crb_g ] |
       
   885                                         rgb_2_pix[ L + cb_b ]);
       
   886             row += 2;
       
   887 
       
   888             L = *lum; lum += 2;
       
   889             row[0] = row[1] = row[next_row] = row[next_row+1] =
       
   890                                        (rgb_2_pix[ L + cr_r ] |
       
   891                                         rgb_2_pix[ L + crb_g ] |
       
   892                                         rgb_2_pix[ L + cb_b ]);
       
   893             row += 2;
       
   894 
       
   895 
       
   896         }
       
   897 
       
   898         row += next_row;
       
   899     }
       
   900 }
       
   901 
       
   902 /*
       
   903  * How many 1 bits are there in the Uint32.
       
   904  * Low performance, do not call often.
       
   905  */
       
   906 static int number_of_bits_set( Uint32 a )
       
   907 {
       
   908     if(!a) return 0;
       
   909     if(a & 1) return 1 + number_of_bits_set(a >> 1);
       
   910     return(number_of_bits_set(a >> 1));
       
   911 }
       
   912 
       
   913 /*
       
   914  * How many 0 bits are there at least significant end of Uint32.
       
   915  * Low performance, do not call often.
       
   916  */
       
   917 static int free_bits_at_bottom( Uint32 a )
       
   918 {
       
   919       /* assume char is 8 bits */
       
   920     if(!a) return sizeof(Uint32) * 8;
       
   921     if(((Sint32)a) & 1l) return 0;
       
   922     return 1 + free_bits_at_bottom ( a >> 1);
       
   923 }
       
   924 
       
   925 
       
   926 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
       
   927 {
       
   928 	SDL_Overlay *overlay;
       
   929 	struct private_yuvhwdata *swdata;
       
   930 	int *Cr_r_tab;
       
   931 	int *Cr_g_tab;
       
   932 	int *Cb_g_tab;
       
   933 	int *Cb_b_tab;
       
   934 	Uint32 *r_2_pix_alloc;
       
   935 	Uint32 *g_2_pix_alloc;
       
   936 	Uint32 *b_2_pix_alloc;
       
   937 	int i;
       
   938 	int CR, CB;
       
   939 	Uint32 Rmask, Gmask, Bmask;
       
   940 
       
   941 	/* Only RGB packed pixel conversion supported */
       
   942 	if ( (display->format->BytesPerPixel != 2) &&
       
   943 	     (display->format->BytesPerPixel != 3) &&
       
   944 	     (display->format->BytesPerPixel != 4) ) {
       
   945 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
       
   946 		return(NULL);
       
   947 	}
       
   948 
       
   949 	/* Verify that we support the format */
       
   950 	switch (format) {
       
   951 	    case SDL_YV12_OVERLAY:
       
   952 	    case SDL_IYUV_OVERLAY:
       
   953 	    case SDL_YUY2_OVERLAY:
       
   954 	    case SDL_UYVY_OVERLAY:
       
   955 	    case SDL_YVYU_OVERLAY:
       
   956 		break;
       
   957 	    default:
       
   958 		SDL_SetError("Unsupported YUV format");
       
   959 		return(NULL);
       
   960 	}
       
   961 
       
   962 	/* Create the overlay structure */
       
   963 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
       
   964 	if ( overlay == NULL ) {
       
   965 		SDL_OutOfMemory();
       
   966 		return(NULL);
       
   967 	}
       
   968 	SDL_memset(overlay, 0, (sizeof *overlay));
       
   969 
       
   970 	/* Fill in the basic members */
       
   971 	overlay->format = format;
       
   972 	overlay->w = width;
       
   973 	overlay->h = height;
       
   974 
       
   975 	/* Set up the YUV surface function structure */
       
   976 	overlay->hwfuncs = &sw_yuvfuncs;
       
   977 
       
   978 	/* Create the pixel data and lookup tables */
       
   979 	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
       
   980 	overlay->hwdata = swdata;
       
   981 	if ( swdata == NULL ) {
       
   982 		SDL_OutOfMemory();
       
   983 		SDL_FreeYUVOverlay(overlay);
       
   984 		return(NULL);
       
   985 	}
       
   986 	swdata->stretch = NULL;
       
   987 	swdata->display = display;
       
   988 	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
       
   989 	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
       
   990 	Cr_r_tab = &swdata->colortab[0*256];
       
   991 	Cr_g_tab = &swdata->colortab[1*256];
       
   992 	Cb_g_tab = &swdata->colortab[2*256];
       
   993 	Cb_b_tab = &swdata->colortab[3*256];
       
   994 	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
       
   995 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
       
   996 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
       
   997 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
       
   998 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
       
   999 		SDL_OutOfMemory();
       
  1000 		SDL_FreeYUVOverlay(overlay);
       
  1001 		return(NULL);
       
  1002 	}
       
  1003 
       
  1004 	/* Generate the tables for the display surface */
       
  1005 	for (i=0; i<256; i++) {
       
  1006 		/* Gamma correction (luminescence table) and chroma correction
       
  1007 		   would be done here.  See the Berkeley mpeg_play sources.
       
  1008 		*/
       
  1009 		CB = CR = (i-128);
       
  1010 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
       
  1011 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
       
  1012 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
       
  1013 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
       
  1014 	}
       
  1015 
       
  1016 	/* 
       
  1017 	 * Set up entries 0-255 in rgb-to-pixel value tables.
       
  1018 	 */
       
  1019 	Rmask = display->format->Rmask;
       
  1020 	Gmask = display->format->Gmask;
       
  1021 	Bmask = display->format->Bmask;
       
  1022 	for ( i=0; i<256; ++i ) {
       
  1023 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
       
  1024 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
       
  1025 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
       
  1026 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
       
  1027 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
       
  1028 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
       
  1029 	}
       
  1030 
       
  1031 	/*
       
  1032 	 * If we have 16-bit output depth, then we double the value
       
  1033 	 * in the top word. This means that we can write out both
       
  1034 	 * pixels in the pixel doubling mode with one op. It is 
       
  1035 	 * harmless in the normal case as storing a 32-bit value
       
  1036 	 * through a short pointer will lose the top bits anyway.
       
  1037 	 */
       
  1038 	if( display->format->BytesPerPixel == 2 ) {
       
  1039 		for ( i=0; i<256; ++i ) {
       
  1040 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
       
  1041 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
       
  1042 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
       
  1043 		}
       
  1044 	}
       
  1045 
       
  1046 	/*
       
  1047 	 * Spread out the values we have to the rest of the array so that
       
  1048 	 * we do not need to check for overflow.
       
  1049 	 */
       
  1050 	for ( i=0; i<256; ++i ) {
       
  1051 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
       
  1052 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
       
  1053 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
       
  1054 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
       
  1055 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
       
  1056 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
       
  1057 	}
       
  1058 
       
  1059 	/* You have chosen wisely... */
       
  1060 	switch (format) {
       
  1061 	    case SDL_YV12_OVERLAY:
       
  1062 	    case SDL_IYUV_OVERLAY:
       
  1063 		if ( display->format->BytesPerPixel == 2 ) {
       
  1064 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
       
  1065 			/* inline assembly functions */
       
  1066 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
       
  1067 			                     (Gmask == 0x07E0) &&
       
  1068 				             (Bmask == 0x001F) &&
       
  1069 			                     (width & 15) == 0) {
       
  1070 /*printf("Using MMX 16-bit 565 dither\n");*/
       
  1071 				swdata->Display1X = Color565DitherYV12MMX1X;
       
  1072 			} else {
       
  1073 /*printf("Using C 16-bit dither\n");*/
       
  1074 				swdata->Display1X = Color16DitherYV12Mod1X;
       
  1075 			}
       
  1076 #else
       
  1077 			swdata->Display1X = Color16DitherYV12Mod1X;
       
  1078 #endif
       
  1079 			swdata->Display2X = Color16DitherYV12Mod2X;
       
  1080 		}
       
  1081 		if ( display->format->BytesPerPixel == 3 ) {
       
  1082 			swdata->Display1X = Color24DitherYV12Mod1X;
       
  1083 			swdata->Display2X = Color24DitherYV12Mod2X;
       
  1084 		}
       
  1085 		if ( display->format->BytesPerPixel == 4 ) {
       
  1086 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
       
  1087 			/* inline assembly functions */
       
  1088 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
       
  1089 			                     (Gmask == 0x0000FF00) &&
       
  1090 				             (Bmask == 0x000000FF) && 
       
  1091 			                     (width & 15) == 0) {
       
  1092 /*printf("Using MMX 32-bit dither\n");*/
       
  1093 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
       
  1094 			} else {
       
  1095 /*printf("Using C 32-bit dither\n");*/
       
  1096 				swdata->Display1X = Color32DitherYV12Mod1X;
       
  1097 			}
       
  1098 #else
       
  1099 			swdata->Display1X = Color32DitherYV12Mod1X;
       
  1100 #endif
       
  1101 			swdata->Display2X = Color32DitherYV12Mod2X;
       
  1102 		}
       
  1103 		break;
       
  1104 	    case SDL_YUY2_OVERLAY:
       
  1105 	    case SDL_UYVY_OVERLAY:
       
  1106 	    case SDL_YVYU_OVERLAY:
       
  1107 		if ( display->format->BytesPerPixel == 2 ) {
       
  1108 			swdata->Display1X = Color16DitherYUY2Mod1X;
       
  1109 			swdata->Display2X = Color16DitherYUY2Mod2X;
       
  1110 		}
       
  1111 		if ( display->format->BytesPerPixel == 3 ) {
       
  1112 			swdata->Display1X = Color24DitherYUY2Mod1X;
       
  1113 			swdata->Display2X = Color24DitherYUY2Mod2X;
       
  1114 		}
       
  1115 		if ( display->format->BytesPerPixel == 4 ) {
       
  1116 			swdata->Display1X = Color32DitherYUY2Mod1X;
       
  1117 			swdata->Display2X = Color32DitherYUY2Mod2X;
       
  1118 		}
       
  1119 		break;
       
  1120 	    default:
       
  1121 		/* We should never get here (caught above) */
       
  1122 		break;
       
  1123 	}
       
  1124 
       
  1125 	/* Find the pitch and offset values for the overlay */
       
  1126 	overlay->pitches = swdata->pitches;
       
  1127 	overlay->pixels = swdata->planes;
       
  1128 	switch (format) {
       
  1129 	    case SDL_YV12_OVERLAY:
       
  1130 	    case SDL_IYUV_OVERLAY:
       
  1131 		overlay->pitches[0] = overlay->w;
       
  1132 		overlay->pitches[1] = overlay->pitches[0] / 2;
       
  1133 		overlay->pitches[2] = overlay->pitches[0] / 2;
       
  1134 	        overlay->pixels[0] = swdata->pixels;
       
  1135 	        overlay->pixels[1] = overlay->pixels[0] +
       
  1136 		                     overlay->pitches[0] * overlay->h;
       
  1137 	        overlay->pixels[2] = overlay->pixels[1] +
       
  1138 		                     overlay->pitches[1] * overlay->h / 2;
       
  1139 		overlay->planes = 3;
       
  1140 		break;
       
  1141 	    case SDL_YUY2_OVERLAY:
       
  1142 	    case SDL_UYVY_OVERLAY:
       
  1143 	    case SDL_YVYU_OVERLAY:
       
  1144 		overlay->pitches[0] = overlay->w*2;
       
  1145 	        overlay->pixels[0] = swdata->pixels;
       
  1146 		overlay->planes = 1;
       
  1147 		break;
       
  1148 	    default:
       
  1149 		/* We should never get here (caught above) */
       
  1150 		break;
       
  1151 	}
       
  1152 
       
  1153 	/* We're all done.. */
       
  1154 	return(overlay);
       
  1155 }
       
  1156 
       
  1157 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1158 {
       
  1159 	return(0);
       
  1160 }
       
  1161 
       
  1162 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1163 {
       
  1164 	return;
       
  1165 }
       
  1166 
       
  1167 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
       
  1168 {
       
  1169 	struct private_yuvhwdata *swdata;
       
  1170 	int stretch;
       
  1171 	int scale_2x;
       
  1172 	SDL_Surface *display;
       
  1173 	Uint8 *lum, *Cr, *Cb;
       
  1174 	Uint8 *dstp;
       
  1175 	int mod;
       
  1176 
       
  1177 	swdata = overlay->hwdata;
       
  1178 	stretch = 0;
       
  1179 	scale_2x = 0;
       
  1180 	if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) {
       
  1181 		/* The source rectangle has been clipped.
       
  1182 		   Using a scratch surface is easier than adding clipped
       
  1183 		   source support to all the blitters, plus that would
       
  1184 		   slow them down in the general unclipped case.
       
  1185 		*/
       
  1186 		stretch = 1;
       
  1187 	} else if ( (src->w != dst->w) || (src->h != dst->h) ) {
       
  1188 		if ( (dst->w == 2*src->w) &&
       
  1189 		     (dst->h == 2*src->h) ) {
       
  1190 			scale_2x = 1;
       
  1191 		} else {
       
  1192 			stretch = 1;
       
  1193 		}
       
  1194 	}
       
  1195 	if ( stretch ) {
       
  1196 		if ( ! swdata->stretch ) {
       
  1197 			display = swdata->display;
       
  1198 			swdata->stretch = SDL_CreateRGBSurface(
       
  1199 				SDL_SWSURFACE,
       
  1200 				overlay->w, overlay->h,
       
  1201 				display->format->BitsPerPixel,
       
  1202 				display->format->Rmask,
       
  1203 				display->format->Gmask,
       
  1204 				display->format->Bmask, 0);
       
  1205 			if ( ! swdata->stretch ) {
       
  1206 				return(-1);
       
  1207 			}
       
  1208 		}
       
  1209 		display = swdata->stretch;
       
  1210 	} else {
       
  1211 		display = swdata->display;
       
  1212 	}
       
  1213 	switch (overlay->format) {
       
  1214 	    case SDL_YV12_OVERLAY:
       
  1215 		lum = overlay->pixels[0];
       
  1216 		Cr =  overlay->pixels[1];
       
  1217 		Cb =  overlay->pixels[2];
       
  1218 		break;
       
  1219 	    case SDL_IYUV_OVERLAY:
       
  1220 		lum = overlay->pixels[0];
       
  1221 		Cr =  overlay->pixels[2];
       
  1222 		Cb =  overlay->pixels[1];
       
  1223 		break;
       
  1224 	    case SDL_YUY2_OVERLAY:
       
  1225 		lum = overlay->pixels[0];
       
  1226 		Cr = lum + 3;
       
  1227 		Cb = lum + 1;
       
  1228 		break;
       
  1229 	    case SDL_UYVY_OVERLAY:
       
  1230 		lum = overlay->pixels[0]+1;
       
  1231 		Cr = lum + 1;
       
  1232 		Cb = lum - 1;
       
  1233 		break;
       
  1234 	    case SDL_YVYU_OVERLAY:
       
  1235 		lum = overlay->pixels[0];
       
  1236 		Cr = lum + 1;
       
  1237 		Cb = lum + 3;
       
  1238 		break;
       
  1239 	    default:
       
  1240 		SDL_SetError("Unsupported YUV format in blit");
       
  1241 		return(-1);
       
  1242 	}
       
  1243 	if ( SDL_MUSTLOCK(display) ) {
       
  1244         	if ( SDL_LockSurface(display) < 0 ) {
       
  1245 			return(-1);
       
  1246 		}
       
  1247 	}
       
  1248 	if ( stretch ) {
       
  1249 		dstp = (Uint8 *)swdata->stretch->pixels;
       
  1250 	} else {
       
  1251 		dstp = (Uint8 *)display->pixels
       
  1252 			+ dst->x * display->format->BytesPerPixel
       
  1253 			+ dst->y * display->pitch;
       
  1254 	}
       
  1255 	mod = (display->pitch / display->format->BytesPerPixel);
       
  1256 
       
  1257 	if ( scale_2x ) {
       
  1258 		mod -= (overlay->w * 2);
       
  1259 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
       
  1260 		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
       
  1261 	} else {
       
  1262 		mod -= overlay->w;
       
  1263 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
       
  1264 		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
       
  1265 	}
       
  1266 	if ( SDL_MUSTLOCK(display) ) {
       
  1267 		SDL_UnlockSurface(display);
       
  1268 	}
       
  1269 	if ( stretch ) {
       
  1270 		display = swdata->display;
       
  1271 		SDL_SoftStretch(swdata->stretch, src, display, dst);
       
  1272 	}
       
  1273 	SDL_UpdateRects(display, 1, dst);
       
  1274 
       
  1275 	return(0);
       
  1276 }
       
  1277 
       
  1278 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
       
  1279 {
       
  1280 	struct private_yuvhwdata *swdata;
       
  1281 
       
  1282 	swdata = overlay->hwdata;
       
  1283 	if ( swdata ) {
       
  1284 		if ( swdata->stretch ) {
       
  1285 			SDL_FreeSurface(swdata->stretch);
       
  1286 		}
       
  1287 		if ( swdata->pixels ) {
       
  1288 			SDL_free(swdata->pixels);
       
  1289 		}
       
  1290 		if ( swdata->colortab ) {
       
  1291 			SDL_free(swdata->colortab);
       
  1292 		}
       
  1293 		if ( swdata->rgb_2_pix ) {
       
  1294 			SDL_free(swdata->rgb_2_pix);
       
  1295 		}
       
  1296 		SDL_free(swdata);
       
  1297 	}
       
  1298 }