Fawkes API Fawkes Development Version

yuvrgb.cpp

00001 
00002 /****************************************************************************
00003  *  yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants
00004  *
00005  *  Created: Sat Aug 12 15:02:41 2006
00006  *  based on colorspaces.h from Tue Feb 23 13:49:38 2005
00007  *  Copyright  2005-2006  Tim Niemueller [www.niemueller.de]
00008  *
00009  ****************************************************************************/
00010 
00011 /*  This program is free software; you can redistribute it and/or modify
00012  *  it under the terms of the GNU General Public License as published by
00013  *  the Free Software Foundation; either version 2 of the License, or
00014  *  (at your option) any later version. A runtime exception applies to
00015  *  this software (see LICENSE.GPL_WRE file mentioned below for details).
00016  *
00017  *  This program is distributed in the hope that it will be useful,
00018  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  *  GNU Library General Public License for more details.
00021  *
00022  *  Read the full text in the LICENSE.GPL_WRE file in the doc directory.
00023  */
00024 
00025 #include <fvutils/color/yuvrgb.h>
00026 #include <core/macros.h>
00027 
00028 #include <fvutils/cpu/mmx.h>
00029 
00030 namespace firevision {
00031 #if 0 /* just to make Emacs auto-indent happy */
00032 }
00033 #endif
00034 
00035 /** YUV to RGB Conversion
00036  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00037  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00038  * R = 1.164(Y - 16) + 1.596(V - 128)
00039  *
00040  * Values have to be clamped to keep them in the [0-255] range.
00041  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00042  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00043  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
00044  *            line
00045  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00046  *            (thus this is a 24bit RGB with one byte per color) line by line.
00047  * @param width Width of the image contained in the YUV buffer
00048  * @param height Height of the image contained in the YUV buffer
00049  */
00050 void
00051 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00052                            unsigned int width, unsigned int height)
00053 {
00054   register int y0, y1, y2, y3, u, v;
00055   register unsigned int i = 0;
00056   while (i < (width * height)*3/2) {
00057     u  = YUV[i++] - 128;
00058     y0 = YUV[i++] -  16;
00059     y1 = YUV[i++] -  16;
00060     v  = YUV[i++] - 128;
00061     y2 = YUV[i++] -  16;
00062     y3 = YUV[i++] -  16;
00063 
00064     // Set red, green and blue bytes for pixel 0
00065     *RGB++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00066     *RGB++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00067     *RGB++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00068 
00069     // Set red, green and blue bytes for pixel 1
00070     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00071     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00072     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00073 
00074     // Set red, green and blue bytes for pixel 2
00075     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00076     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00077     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00078 
00079     // Set red, green and blue bytes for pixel 3
00080     *RGB++ = clip( (76284 * y3 + 104595 * v             ) >> 16 );
00081     *RGB++ = clip( (76284 * y3 -  25625 * u - 53281 * v ) >> 16 );
00082     *RGB++ = clip( (76284 * y3 + 132252 * u             ) >> 16 );
00083 
00084   }
00085 }
00086 
00087 
00088 /** YUV to RGB Conversion
00089  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00090  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00091  * R = 1.164(Y - 16) + 1.596(V - 128)
00092  *
00093  * Values have to be clamped to keep them in the [0-255] range.
00094  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00095  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00096  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
00097  *            line
00098  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00099  *            (thus this is a 24bit RGB with one byte per color) line by line.
00100  * @param width Width of the image contained in the YUV buffer
00101  * @param height Height of the image contained in the YUV buffer
00102  */
00103 void
00104 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00105 {
00106 
00107   register short y1, y2, u, v;
00108   register const unsigned char *yp, *up, *vp;
00109   register unsigned int i;
00110 
00111   yp = planar;
00112   up = planar + (width * height);
00113   vp = up + (width * height / 2);
00114 
00115   for (i = 0; i < (width * height / 2); ++i) {
00116 
00117     y1 = *yp++;
00118     y2 = *yp++;
00119     u  = *up++;
00120     v  = *vp++;
00121 
00122     y1 -=  16;
00123     y2 -=  16;
00124     u  -= 128;
00125     v  -= 128;
00126 
00127     // Set red, green and blue bytes for pixel 0
00128     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00129     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00130     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00131 
00132     // Set red, green and blue bytes for pixel 1
00133     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00134     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00135     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00136 
00137   }
00138 }
00139 
00140 
00141 
00142 /** YUV to RGB Conversion
00143  * B = 1.164(Y - 16)                  + 2.018(U - 128)
00144  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
00145  * R = 1.164(Y - 16) + 1.596(V - 128)
00146  *
00147  * Values have to be clamped to keep them in the [0-255] range.
00148  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
00149  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
00150  * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after
00151  *            line
00152  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
00153  *            (thus this is a 24bit RGB with one byte per color) line by line.
00154  * @param width Width of the image contained in the YUV buffer
00155  * @param height Height of the image contained in the YUV buffer
00156  */
00157 void
00158 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00159                            unsigned int width, unsigned int height)
00160 {
00161   register int y0, y1, u, v;
00162   register unsigned int i = 0;
00163   while (i < (width * height)/2) {
00164     u  = YUV[i++] - 128;
00165     y0 = YUV[i++] -  16;
00166     v  = YUV[i++] - 128;
00167     y1 = YUV[i++] -  16;
00168 
00169     // Set red, green and blue bytes for pixel 0
00170     *RGB++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00171     *RGB++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00172     *RGB++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00173 
00174     // Set red, green and blue bytes for pixel 1
00175     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00176     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00177     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00178   }
00179 }
00180 
00181 /** Convert YUV422 planar to BGR.
00182  * Use formula in aforementioned function.
00183  * @param YUV YUV422 planar buffer
00184  * @param BGR BGR buffer
00185  * @param width Width of the image contained in the YUV buffer
00186  * @param height Height of the image contained in the YUV buffer
00187  */
00188 void
00189 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR,
00190                            unsigned int width, unsigned int height)
00191 {
00192 
00193   register short y1, y2, u, v;
00194   register const unsigned char *yp, *up, *vp;
00195   register unsigned int i;
00196 
00197   yp = planar;
00198   up = planar + (width * height);
00199   vp = up + (width * height / 2);
00200 
00201   for (i = 0; i < (width * height / 2); ++i) {
00202 
00203     y1 = *yp++;
00204     y2 = *yp++;
00205     u  = *up++;
00206     v  = *vp++;
00207 
00208     y1 -=  16;
00209     y2 -=  16;
00210     u  -= 128;
00211     v  -= 128;
00212 
00213     // Set red, green and blue bytes for pixel 0
00214     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00215     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00216     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00217 
00218     // Set red, green and blue bytes for pixel 1
00219     *BGR++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00220     *BGR++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00221     *BGR++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00222   }
00223 }
00224 
00225 
00226 void
00227 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00228 {
00229 
00230   register short y1, y2, u, v;
00231   register const unsigned char *yp, *up, *vp;
00232   register unsigned int i;
00233 
00234   yp = planar;
00235   up = planar + (width * height);
00236   vp = up + (width * height / 2);
00237 
00238   for (i = 0; i < (width * height / 2); ++i) {
00239 
00240     y1 = *yp++;
00241     y2 = *yp++;
00242     u  = *up++;
00243     v  = *vp++;
00244 
00245     y1 -=  16;
00246     y2 -=  16;
00247     u  -= 128;
00248     v  -= 128;
00249 
00250     // Set red, green and blue bytes for pixel 0
00251     *RGB++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00252     *RGB++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00253     *RGB++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00254     *RGB++ = 255;
00255 
00256     // Set red, green and blue bytes for pixel 1
00257     *RGB++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00258     *RGB++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00259     *RGB++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00260     *RGB++ = 255;
00261 
00262   }
00263 
00264 }
00265 
00266 
00267 void
00268 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height)
00269 {
00270 
00271   register short y1, y2, u, v;
00272   register const unsigned char *yp, *up, *vp;
00273   register unsigned int i;
00274 
00275   yp = planar;
00276   up = planar + (width * height);
00277   vp = up + (width * height / 2);
00278 
00279   for (i = 0; i < (width * height / 2); ++i) {
00280 
00281     y1 = *yp++;
00282     y2 = *yp++;
00283     u  = *up++;
00284     v  = *vp++;
00285 
00286     y1 -=  16;
00287     y2 -=  16;
00288     u  -= 128;
00289     v  -= 128;
00290 
00291     // Set red, green and blue bytes for pixel 0
00292     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00293     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00294     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00295     *BGR++ = 255;
00296 
00297     // Set red, green and blue bytes for pixel 1
00298     *BGR++ = clip( (76284 * y2 + 132252 * u             ) >> 16 );
00299     *BGR++ = clip( (76284 * y2 -  25625 * u - 53281 * v ) >> 16 );
00300     *BGR++ = clip( (76284 * y2 + 104595 * v             ) >> 16 );
00301     *BGR++ = 255;
00302 
00303   }
00304 
00305 }
00306 
00307 
00308 void
00309 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR,
00310                                       unsigned int width, unsigned int height)
00311 {
00312 
00313   register int y0, y1, u, v;
00314   register unsigned int i = 0;
00315   while (i < (width * height * 2)) {
00316     u  = YUV[i++] - 128;
00317     y0 = YUV[i++] -  16;
00318     v  = YUV[i++] - 128;
00319     y1 = YUV[i++] -  16;
00320 
00321     // Set red, green and blue bytes for pixel 0
00322     *BGR++ = clip( (76284 * y0 + 132252 * u             ) >> 16 );
00323     *BGR++ = clip( (76284 * y0 -  25625 * u - 53281 * v ) >> 16 );
00324     *BGR++ = clip( (76284 * y0 + 104595 * v             ) >> 16 );
00325     *BGR++ = 255;
00326 
00327     // Set red, green and blue bytes for pixel 1
00328     *BGR++ = clip( (76284 * y1 + 132252 * u             ) >> 16 );
00329     *BGR++ = clip( (76284 * y1 -  25625 * u - 53281 * v ) >> 16 );
00330     *BGR++ = clip( (76284 * y1 + 104595 * v             ) >> 16 );
00331     *BGR++ = 255;
00332 
00333   }
00334 }
00335 
00336 
00337 #if ( \
00338          defined __i386__ || \
00339          defined __386__ || \
00340          defined __X86__ || \
00341          defined _M_IX86 || \
00342          defined i386)
00343 
00344 #define CRV    104595
00345 #define CBU    132251
00346 #define CGU    25624
00347 #define CGV    53280
00348 #define YMUL   76283
00349 #define OFF    32768
00350 #define BITRES 16
00351 
00352 /* calculation float resolution in bits */
00353 /* ie RES = 6 is 10.6 fixed point */
00354 /*    RES = 8 is 8.8 fixed point */
00355 /*    RES = 4 is 12.4 fixed point */
00356 /* NB: going above 6 will lead to overflow... :( */
00357 #define RES    6
00358 
00359 #define RZ(i)  (i >> (BITRES - RES))
00360 #define FOUR(i) {i, i, i, i}
00361 
00362 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
00363 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
00364 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
00365 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
00366 __aligned(8) const volatile unsigned short _const_ymul  [4] = FOUR(RZ(YMUL));
00367 __aligned(8) const volatile unsigned short _const_128   [4] = FOUR(128);
00368 __aligned(8) const volatile unsigned short _const_32    [4] = FOUR(RZ(OFF));
00369 __aligned(8) const volatile unsigned short _const_16    [4] = FOUR(16);
00370 
00371 #define CONST_CRVCRV *_const_crvcrv
00372 #define CONST_CBUCBU *_const_cbucbu
00373 #define CONST_CGUCGU *_const_cgucgu
00374 #define CONST_CGVCGV *_const_cgvcgv
00375 #define CONST_YMUL   *_const_ymul
00376 #define CONST_128    *_const_128
00377 #define CONST_32     *_const_32
00378 #define CONST_16     *_const_16
00379 
00380 void
00381 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
00382                          unsigned int w, unsigned int h)
00383 {
00384   unsigned int xx, yy;
00385   register const unsigned char *yp1, *up, *vp;
00386   unsigned char *dp1;
00387 
00388   /* plane pointers */
00389   yp1 = yuv;
00390   up = yuv + (w * h);
00391   vp = up + (w * (h / 4));
00392   /* destination pointers */
00393   dp1 = rgb;
00394 
00395 
00396 
00397   yp1 = yuv;
00398   up = yuv + (w * h);
00399   vp = up + ((w / 2) * (h / 2));
00400   dp1 = rgb;
00401   for (yy = 0; yy < h; yy++)
00402     {
00403       for (xx = 0; xx < w; xx += 8)
00404         {
00405           movq_m2r(*yp1, mm0);
00406           movq_r2r(mm0, mm1);
00407           psrlw_i2r(8, mm0);
00408           psllw_i2r(8, mm1);
00409           psrlw_i2r(8, mm1);
00410 
00411           pxor_r2r(mm7, mm7);
00412           movd_m2r(*up, mm3);
00413           movd_m2r(*vp, mm2);
00414 
00415           punpcklbw_r2r(mm7, mm2);
00416           punpcklbw_r2r(mm7, mm3);
00417 
00418           movq_m2r(CONST_16, mm4);
00419           psubsw_r2r(mm4, mm0);
00420           psubsw_r2r(mm4, mm1);
00421 
00422           movq_m2r(CONST_128, mm5);
00423           psubsw_r2r(mm5, mm2);
00424           psubsw_r2r(mm5, mm3);
00425 
00426           movq_m2r(CONST_YMUL, mm4);
00427           pmullw_r2r(mm4, mm0);
00428           pmullw_r2r(mm4, mm1);
00429 
00430           movq_m2r(CONST_CRVCRV, mm7);
00431           pmullw_r2r(mm3, mm7);
00432 
00433           movq_m2r(CONST_CBUCBU, mm6);
00434           pmullw_r2r(mm2, mm6);
00435 
00436           movq_m2r(CONST_CGUCGU, mm5);
00437           pmullw_r2r(mm2, mm5);
00438 
00439           movq_m2r(CONST_CGVCGV, mm4);
00440           pmullw_r2r(mm3, mm4);
00441 
00442           movq_r2r(mm0, mm2);
00443           paddsw_r2r(mm7, mm2);
00444           paddsw_r2r(mm1, mm7);
00445 
00446           psraw_i2r(RES, mm2);
00447           psraw_i2r(RES, mm7);
00448           packuswb_r2r(mm7, mm2);
00449 
00450           pxor_r2r(mm7, mm7);
00451           movq_r2r(mm2, mm3);
00452           punpckhbw_r2r(mm7, mm2);
00453           punpcklbw_r2r(mm3, mm7);
00454           por_r2r(mm7, mm2);
00455 
00456           movq_r2r(mm0, mm3);
00457           psubsw_r2r(mm5, mm3);
00458           psubsw_r2r(mm4, mm3);
00459           paddsw_m2r(CONST_32, mm3);
00460 
00461           movq_r2r(mm1, mm7);
00462           psubsw_r2r(mm5, mm7);
00463           psubsw_r2r(mm4, mm7);
00464           paddsw_m2r(CONST_32, mm7);
00465 
00466           psraw_i2r(RES, mm3);
00467           psraw_i2r(RES, mm7);
00468           packuswb_r2r(mm7, mm3);
00469 
00470           pxor_r2r(mm7, mm7);
00471           movq_r2r(mm3, mm4);
00472           punpckhbw_r2r(mm7, mm3);
00473           punpcklbw_r2r(mm4, mm7);
00474           por_r2r(mm7, mm3);
00475 
00476           movq_m2r(CONST_32, mm4);
00477           paddsw_r2r(mm6, mm0);
00478           paddsw_r2r(mm6, mm1);
00479           paddsw_r2r(mm4, mm0);
00480           paddsw_r2r(mm4, mm1);
00481           psraw_i2r(RES, mm0);
00482           psraw_i2r(RES, mm1);
00483           packuswb_r2r(mm1, mm0);
00484 
00485           pxor_r2r(mm7, mm7);
00486           movq_r2r(mm0, mm5);
00487           punpckhbw_r2r(mm7, mm0);
00488           punpcklbw_r2r(mm5, mm7);
00489           por_r2r(mm7, mm0);
00490 
00491           pxor_r2r(mm1, mm1);
00492           movq_r2r(mm0, mm5);
00493           movq_r2r(mm3, mm6);
00494           movq_r2r(mm2, mm7);
00495           punpckhbw_r2r(mm3, mm2);
00496           punpcklbw_r2r(mm6, mm7);
00497           punpckhbw_r2r(mm1, mm0);
00498           punpcklbw_r2r(mm1, mm5);
00499 
00500           movq_r2r(mm7, mm1);
00501           punpckhwd_r2r(mm5, mm7);
00502           punpcklwd_r2r(mm5, mm1);
00503 
00504           movq_r2r(mm2, mm4);
00505           punpckhwd_r2r(mm0, mm2);
00506           punpcklwd_r2r(mm0, mm4);
00507 
00508           movntq_r2m(mm1, *(dp1));
00509           movntq_r2m(mm7, *(dp1 + 8));
00510           movntq_r2m(mm4, *(dp1 + 16));
00511           movntq_r2m(mm2, *(dp1 + 24));
00512 
00513           yp1 += 8;
00514           up += 4;
00515           vp += 4;
00516           dp1 += 8 * 4;
00517         }
00518       if (yy & 0x1)
00519         {
00520           up -= w / 2;
00521           vp -= w / 2;
00522         }
00523     }
00524   emms();
00525 }
00526 #endif
00527 
00528 } // end namespace firevision
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends