Fawkes API
Fawkes Development Version
|
00001 00002 /**************************************************************************** 00003 * yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants 00004 * 00005 * Created: Sat Aug 12 15:02:41 2006 00006 * based on colorspaces.h from Tue Feb 23 13:49:38 2005 00007 * Copyright 2005-2006 Tim Niemueller [www.niemueller.de] 00008 * 00009 ****************************************************************************/ 00010 00011 /* This program is free software; you can redistribute it and/or modify 00012 * it under the terms of the GNU General Public License as published by 00013 * the Free Software Foundation; either version 2 of the License, or 00014 * (at your option) any later version. A runtime exception applies to 00015 * this software (see LICENSE.GPL_WRE file mentioned below for details). 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU Library General Public License for more details. 00021 * 00022 * Read the full text in the LICENSE.GPL_WRE file in the doc directory. 00023 */ 00024 00025 #include <fvutils/color/yuvrgb.h> 00026 #include <core/macros.h> 00027 00028 #include <fvutils/cpu/mmx.h> 00029 00030 namespace firevision { 00031 #if 0 /* just to make Emacs auto-indent happy */ 00032 } 00033 #endif 00034 00035 /** YUV to RGB Conversion 00036 * B = 1.164(Y - 16) + 2.018(U - 128) 00037 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 00038 * R = 1.164(Y - 16) + 1.596(V - 128) 00039 * 00040 * Values have to be clamped to keep them in the [0-255] range. 00041 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range 00042 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results. 00043 * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after 00044 * line 00045 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel 00046 * (thus this is a 24bit RGB with one byte per color) line by line. 00047 * @param width Width of the image contained in the YUV buffer 00048 * @param height Height of the image contained in the YUV buffer 00049 */ 00050 void 00051 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB, 00052 unsigned int width, unsigned int height) 00053 { 00054 register int y0, y1, y2, y3, u, v; 00055 register unsigned int i = 0; 00056 while (i < (width * height)*3/2) { 00057 u = YUV[i++] - 128; 00058 y0 = YUV[i++] - 16; 00059 y1 = YUV[i++] - 16; 00060 v = YUV[i++] - 128; 00061 y2 = YUV[i++] - 16; 00062 y3 = YUV[i++] - 16; 00063 00064 // Set red, green and blue bytes for pixel 0 00065 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 ); 00066 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 ); 00067 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 ); 00068 00069 // Set red, green and blue bytes for pixel 1 00070 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00071 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00072 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00073 00074 // Set red, green and blue bytes for pixel 2 00075 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 ); 00076 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 ); 00077 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 ); 00078 00079 // Set red, green and blue bytes for pixel 3 00080 *RGB++ = clip( (76284 * y3 + 104595 * v ) >> 16 ); 00081 *RGB++ = clip( (76284 * y3 - 25625 * u - 53281 * v ) >> 16 ); 00082 *RGB++ = clip( (76284 * y3 + 132252 * u ) >> 16 ); 00083 00084 } 00085 } 00086 00087 00088 /** YUV to RGB Conversion 00089 * B = 1.164(Y - 16) + 2.018(U - 128) 00090 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 00091 * R = 1.164(Y - 16) + 1.596(V - 128) 00092 * 00093 * Values have to be clamped to keep them in the [0-255] range. 00094 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range 00095 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results. 00096 * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after 00097 * line 00098 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel 00099 * (thus this is a 24bit RGB with one byte per color) line by line. 00100 * @param width Width of the image contained in the YUV buffer 00101 * @param height Height of the image contained in the YUV buffer 00102 */ 00103 void 00104 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height) 00105 { 00106 00107 register short y1, y2, u, v; 00108 register const unsigned char *yp, *up, *vp; 00109 register unsigned int i; 00110 00111 yp = planar; 00112 up = planar + (width * height); 00113 vp = up + (width * height / 2); 00114 00115 for (i = 0; i < (width * height / 2); ++i) { 00116 00117 y1 = *yp++; 00118 y2 = *yp++; 00119 u = *up++; 00120 v = *vp++; 00121 00122 y1 -= 16; 00123 y2 -= 16; 00124 u -= 128; 00125 v -= 128; 00126 00127 // Set red, green and blue bytes for pixel 0 00128 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00129 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00130 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00131 00132 // Set red, green and blue bytes for pixel 1 00133 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 ); 00134 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 ); 00135 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 ); 00136 00137 } 00138 } 00139 00140 00141 00142 /** YUV to RGB Conversion 00143 * B = 1.164(Y - 16) + 2.018(U - 128) 00144 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 00145 * R = 1.164(Y - 16) + 1.596(V - 128) 00146 * 00147 * Values have to be clamped to keep them in the [0-255] range. 00148 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range 00149 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results. 00150 * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after 00151 * line 00152 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel 00153 * (thus this is a 24bit RGB with one byte per color) line by line. 00154 * @param width Width of the image contained in the YUV buffer 00155 * @param height Height of the image contained in the YUV buffer 00156 */ 00157 void 00158 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB, 00159 unsigned int width, unsigned int height) 00160 { 00161 register int y0, y1, u, v; 00162 register unsigned int i = 0; 00163 while (i < (width * height)/2) { 00164 u = YUV[i++] - 128; 00165 y0 = YUV[i++] - 16; 00166 v = YUV[i++] - 128; 00167 y1 = YUV[i++] - 16; 00168 00169 // Set red, green and blue bytes for pixel 0 00170 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 ); 00171 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 ); 00172 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 ); 00173 00174 // Set red, green and blue bytes for pixel 1 00175 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00176 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00177 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00178 } 00179 } 00180 00181 /** Convert YUV422 planar to BGR. 00182 * Use formula in aforementioned function. 00183 * @param YUV YUV422 planar buffer 00184 * @param BGR BGR buffer 00185 * @param width Width of the image contained in the YUV buffer 00186 * @param height Height of the image contained in the YUV buffer 00187 */ 00188 void 00189 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR, 00190 unsigned int width, unsigned int height) 00191 { 00192 00193 register short y1, y2, u, v; 00194 register const unsigned char *yp, *up, *vp; 00195 register unsigned int i; 00196 00197 yp = planar; 00198 up = planar + (width * height); 00199 vp = up + (width * height / 2); 00200 00201 for (i = 0; i < (width * height / 2); ++i) { 00202 00203 y1 = *yp++; 00204 y2 = *yp++; 00205 u = *up++; 00206 v = *vp++; 00207 00208 y1 -= 16; 00209 y2 -= 16; 00210 u -= 128; 00211 v -= 128; 00212 00213 // Set red, green and blue bytes for pixel 0 00214 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00215 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00216 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00217 00218 // Set red, green and blue bytes for pixel 1 00219 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 ); 00220 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 ); 00221 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 ); 00222 } 00223 } 00224 00225 00226 void 00227 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height) 00228 { 00229 00230 register short y1, y2, u, v; 00231 register const unsigned char *yp, *up, *vp; 00232 register unsigned int i; 00233 00234 yp = planar; 00235 up = planar + (width * height); 00236 vp = up + (width * height / 2); 00237 00238 for (i = 0; i < (width * height / 2); ++i) { 00239 00240 y1 = *yp++; 00241 y2 = *yp++; 00242 u = *up++; 00243 v = *vp++; 00244 00245 y1 -= 16; 00246 y2 -= 16; 00247 u -= 128; 00248 v -= 128; 00249 00250 // Set red, green and blue bytes for pixel 0 00251 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00252 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00253 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00254 *RGB++ = 255; 00255 00256 // Set red, green and blue bytes for pixel 1 00257 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 ); 00258 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 ); 00259 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 ); 00260 *RGB++ = 255; 00261 00262 } 00263 00264 } 00265 00266 00267 void 00268 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height) 00269 { 00270 00271 register short y1, y2, u, v; 00272 register const unsigned char *yp, *up, *vp; 00273 register unsigned int i; 00274 00275 yp = planar; 00276 up = planar + (width * height); 00277 vp = up + (width * height / 2); 00278 00279 for (i = 0; i < (width * height / 2); ++i) { 00280 00281 y1 = *yp++; 00282 y2 = *yp++; 00283 u = *up++; 00284 v = *vp++; 00285 00286 y1 -= 16; 00287 y2 -= 16; 00288 u -= 128; 00289 v -= 128; 00290 00291 // Set red, green and blue bytes for pixel 0 00292 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00293 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00294 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00295 *BGR++ = 255; 00296 00297 // Set red, green and blue bytes for pixel 1 00298 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 ); 00299 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 ); 00300 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 ); 00301 *BGR++ = 255; 00302 00303 } 00304 00305 } 00306 00307 00308 void 00309 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR, 00310 unsigned int width, unsigned int height) 00311 { 00312 00313 register int y0, y1, u, v; 00314 register unsigned int i = 0; 00315 while (i < (width * height * 2)) { 00316 u = YUV[i++] - 128; 00317 y0 = YUV[i++] - 16; 00318 v = YUV[i++] - 128; 00319 y1 = YUV[i++] - 16; 00320 00321 // Set red, green and blue bytes for pixel 0 00322 *BGR++ = clip( (76284 * y0 + 132252 * u ) >> 16 ); 00323 *BGR++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 ); 00324 *BGR++ = clip( (76284 * y0 + 104595 * v ) >> 16 ); 00325 *BGR++ = 255; 00326 00327 // Set red, green and blue bytes for pixel 1 00328 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 ); 00329 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 ); 00330 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 ); 00331 *BGR++ = 255; 00332 00333 } 00334 } 00335 00336 00337 #if ( \ 00338 defined __i386__ || \ 00339 defined __386__ || \ 00340 defined __X86__ || \ 00341 defined _M_IX86 || \ 00342 defined i386) 00343 00344 #define CRV 104595 00345 #define CBU 132251 00346 #define CGU 25624 00347 #define CGV 53280 00348 #define YMUL 76283 00349 #define OFF 32768 00350 #define BITRES 16 00351 00352 /* calculation float resolution in bits */ 00353 /* ie RES = 6 is 10.6 fixed point */ 00354 /* RES = 8 is 8.8 fixed point */ 00355 /* RES = 4 is 12.4 fixed point */ 00356 /* NB: going above 6 will lead to overflow... :( */ 00357 #define RES 6 00358 00359 #define RZ(i) (i >> (BITRES - RES)) 00360 #define FOUR(i) {i, i, i, i} 00361 00362 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV)); 00363 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU)); 00364 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU)); 00365 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV)); 00366 __aligned(8) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL)); 00367 __aligned(8) const volatile unsigned short _const_128 [4] = FOUR(128); 00368 __aligned(8) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF)); 00369 __aligned(8) const volatile unsigned short _const_16 [4] = FOUR(16); 00370 00371 #define CONST_CRVCRV *_const_crvcrv 00372 #define CONST_CBUCBU *_const_cbucbu 00373 #define CONST_CGUCGU *_const_cgucgu 00374 #define CONST_CGVCGV *_const_cgvcgv 00375 #define CONST_YMUL *_const_ymul 00376 #define CONST_128 *_const_128 00377 #define CONST_32 *_const_32 00378 #define CONST_16 *_const_16 00379 00380 void 00381 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb, 00382 unsigned int w, unsigned int h) 00383 { 00384 unsigned int xx, yy; 00385 register const unsigned char *yp1, *up, *vp; 00386 unsigned char *dp1; 00387 00388 /* plane pointers */ 00389 yp1 = yuv; 00390 up = yuv + (w * h); 00391 vp = up + (w * (h / 4)); 00392 /* destination pointers */ 00393 dp1 = rgb; 00394 00395 00396 00397 yp1 = yuv; 00398 up = yuv + (w * h); 00399 vp = up + ((w / 2) * (h / 2)); 00400 dp1 = rgb; 00401 for (yy = 0; yy < h; yy++) 00402 { 00403 for (xx = 0; xx < w; xx += 8) 00404 { 00405 movq_m2r(*yp1, mm0); 00406 movq_r2r(mm0, mm1); 00407 psrlw_i2r(8, mm0); 00408 psllw_i2r(8, mm1); 00409 psrlw_i2r(8, mm1); 00410 00411 pxor_r2r(mm7, mm7); 00412 movd_m2r(*up, mm3); 00413 movd_m2r(*vp, mm2); 00414 00415 punpcklbw_r2r(mm7, mm2); 00416 punpcklbw_r2r(mm7, mm3); 00417 00418 movq_m2r(CONST_16, mm4); 00419 psubsw_r2r(mm4, mm0); 00420 psubsw_r2r(mm4, mm1); 00421 00422 movq_m2r(CONST_128, mm5); 00423 psubsw_r2r(mm5, mm2); 00424 psubsw_r2r(mm5, mm3); 00425 00426 movq_m2r(CONST_YMUL, mm4); 00427 pmullw_r2r(mm4, mm0); 00428 pmullw_r2r(mm4, mm1); 00429 00430 movq_m2r(CONST_CRVCRV, mm7); 00431 pmullw_r2r(mm3, mm7); 00432 00433 movq_m2r(CONST_CBUCBU, mm6); 00434 pmullw_r2r(mm2, mm6); 00435 00436 movq_m2r(CONST_CGUCGU, mm5); 00437 pmullw_r2r(mm2, mm5); 00438 00439 movq_m2r(CONST_CGVCGV, mm4); 00440 pmullw_r2r(mm3, mm4); 00441 00442 movq_r2r(mm0, mm2); 00443 paddsw_r2r(mm7, mm2); 00444 paddsw_r2r(mm1, mm7); 00445 00446 psraw_i2r(RES, mm2); 00447 psraw_i2r(RES, mm7); 00448 packuswb_r2r(mm7, mm2); 00449 00450 pxor_r2r(mm7, mm7); 00451 movq_r2r(mm2, mm3); 00452 punpckhbw_r2r(mm7, mm2); 00453 punpcklbw_r2r(mm3, mm7); 00454 por_r2r(mm7, mm2); 00455 00456 movq_r2r(mm0, mm3); 00457 psubsw_r2r(mm5, mm3); 00458 psubsw_r2r(mm4, mm3); 00459 paddsw_m2r(CONST_32, mm3); 00460 00461 movq_r2r(mm1, mm7); 00462 psubsw_r2r(mm5, mm7); 00463 psubsw_r2r(mm4, mm7); 00464 paddsw_m2r(CONST_32, mm7); 00465 00466 psraw_i2r(RES, mm3); 00467 psraw_i2r(RES, mm7); 00468 packuswb_r2r(mm7, mm3); 00469 00470 pxor_r2r(mm7, mm7); 00471 movq_r2r(mm3, mm4); 00472 punpckhbw_r2r(mm7, mm3); 00473 punpcklbw_r2r(mm4, mm7); 00474 por_r2r(mm7, mm3); 00475 00476 movq_m2r(CONST_32, mm4); 00477 paddsw_r2r(mm6, mm0); 00478 paddsw_r2r(mm6, mm1); 00479 paddsw_r2r(mm4, mm0); 00480 paddsw_r2r(mm4, mm1); 00481 psraw_i2r(RES, mm0); 00482 psraw_i2r(RES, mm1); 00483 packuswb_r2r(mm1, mm0); 00484 00485 pxor_r2r(mm7, mm7); 00486 movq_r2r(mm0, mm5); 00487 punpckhbw_r2r(mm7, mm0); 00488 punpcklbw_r2r(mm5, mm7); 00489 por_r2r(mm7, mm0); 00490 00491 pxor_r2r(mm1, mm1); 00492 movq_r2r(mm0, mm5); 00493 movq_r2r(mm3, mm6); 00494 movq_r2r(mm2, mm7); 00495 punpckhbw_r2r(mm3, mm2); 00496 punpcklbw_r2r(mm6, mm7); 00497 punpckhbw_r2r(mm1, mm0); 00498 punpcklbw_r2r(mm1, mm5); 00499 00500 movq_r2r(mm7, mm1); 00501 punpckhwd_r2r(mm5, mm7); 00502 punpcklwd_r2r(mm5, mm1); 00503 00504 movq_r2r(mm2, mm4); 00505 punpckhwd_r2r(mm0, mm2); 00506 punpcklwd_r2r(mm0, mm4); 00507 00508 movntq_r2m(mm1, *(dp1)); 00509 movntq_r2m(mm7, *(dp1 + 8)); 00510 movntq_r2m(mm4, *(dp1 + 16)); 00511 movntq_r2m(mm2, *(dp1 + 24)); 00512 00513 yp1 += 8; 00514 up += 4; 00515 vp += 4; 00516 dp1 += 8 * 4; 00517 } 00518 if (yy & 0x1) 00519 { 00520 up -= w / 2; 00521 vp -= w / 2; 00522 } 00523 } 00524 emms(); 00525 } 00526 #endif 00527 00528 } // end namespace firevision