如何转换RGB565到YUV420SP更快的Andr​​oid?

编程入门 行业动态 更新时间:2024-10-21 19:38:23
本文介绍了如何转换RGB565到YUV420SP更快的Andr​​oid?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述

我需要显示JPEG图片,并将其转换为YUV420SP。首先,我用SkBitmap解析JPEG和显示它,然后我用下面的code转换RGB565到YUV420SP在Android上,但它花费75ms转换一个640 * 480的RGB565画面,所以任何人都知道,更快的方式转换成RGB565以YUV420SP在Android?或更快的方式转换成JPEG文件YUV420SP在Android?

//从RGB转换为YUV420 INT RGB2YUV_YR [256],RGB2YUV_YG [256],RGB2YUV_YB [256]; INT RGB2YUV_UR [256],RGB2YUV_UG [256],RGB2YUV_UBVR [256]; INT RGB2YUV_VG [256],RGB2YUV_VB [256]; // //用于RGB到YUV420转换表 // 无效InitLookupTable() {     静态布尔hasInited = FALSE;     如果(hasInited)         返回 ;     hasInited = TRUE;     INT I;     对于(i = 0; I< 256;我++)         RGB2YUV_YR [I] =(浮点)65.481 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_YG [I] =(浮点)128.553 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_YB [I] =(浮点)24.966 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_UR [I] =(浮点)37.797 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_UG [I] =(浮点)74.203 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_VG [I] =(浮点)93.786 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_VB [I] =(浮点)18.214 *(I<< 8);     对于(i = 0; I< 256;我++)         RGB2YUV_UBVR [I] =(浮点)112×(I<< 8); } INT ConvertRGB5652YUV420SP(INT W,INT小时,无符号字符* BMP,无符号字符* YUV) {     无符号字符* U,* V,* Y * UU,* VV;     无符号字符* PU1,* PU2,* PU3,* PU4;     无符号字符* PV1,* PV2,PV3 *,* PV4;     unsigned char型右值= 0,g值= 0,bValue = 0;     uint16_t * bmpPtr;     INT I,J;     的printf(ConvertRGB5652YUV420SP开始,W =%D,H =%D,BMP =%P,YUV =%P \ N,W,H,BMP,YUV);     timeval结构tpstart,tpend;     函数gettimeofday(安培; tpstart,NULL);     InitLookupTable();     函数gettimeofday(安培; tpend,NULL);     浮timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;     timeuse / = 1000;     的printf(InitLookupTable使用时间=%F \ N,timeuse);     函数gettimeofday(安培; tpstart,NULL);     UU =新的无符号的char [W * H]。     VV =新的无符号的char [W * H]。     如果(UU == NULL || VV == NULL || YUV == NULL)         返回0;     Y = YUV;     U = UU;     V = VV;     //获取R,G,从BMP图像数据B指针....     bmpPtr =(uint16_t *)BMP;     //获取YUV值RGB值...     对于(i = 0; I< H;我++){         为(J = 0; J<瓦; J ++){             uint16_t颜色= * bmpPtr;             unsigned int类型R =(彩色>> 11)及0x1F的;             无符号整型G =(彩色>> 5)及0x3F的;             unsigned int类型B =(彩色)及0x1F的;             右值=(为r 3;)| (R>&→2);             g值=(克&其中; 2)| (g取代;→4);             bValue =(B< 3;)| (B个→2);             * Y + =(RGB2YUV_YR [右值] + RGB2YUV_YG [g值] + RGB2YUV_YB [bValue] +                 1048576)>> 16;             * U + =(-RGB2YUV_UR [右值] - RGB2YUV_UG [g值] + RGB2YUV_UBVR [bValue] +                 8388608)>> 16;             * V + =(RGB2YUV_UBVR [右值] - RGB2YUV_VG [g值] - RGB2YUV_VB [bValue] +                 8388608)>> 16;             bmpPtr ++;         }     }     函数gettimeofday(安培; tpend,NULL);     timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;     timeuse / = 1000;     的printf(获取YUV值使用时间=%F \ N,timeuse);     函数gettimeofday(安培; tpstart,NULL);     //现在品尝U&放大器; V至获得的YUV 4:2:0格式     //获取正确的指针...     U = YUV + W * H;     V = U + 1;     // 为了你     PU1 = UU;     PU2 = PU1 + 1;     PU3 = PU1 + W;     PU4 = PU3 + 1;     //当V     PV1 = VV;     PV2 = PV1 + 1;     PV3 = PV1 + W;     PV4 = PV3 + 1;     //做抽样....     对于(I = 0; I&所述; H; I + = 2){         为(J = 0; J<瓦; J + = 2){             * U =(* PU1 + * PU2 + * PU3 + * PU4)>> 2;             U + = 2;             * V =(* PV1 + * PV2 + * + PV3 PV4 *)>> 2;             V + = 2;             PU1 + = 2;             PU2 + = 2;             PU3 + = 2;             PU4 + = 2;             PV1 + = 2;             PV2 + = 2;             PV3 + = 2;             PV4 + = 2;         }         PU1 + = W;         PU2 + = W;         PU3 + = W;         PU4 + = W;         PV1 + = W;         PV2 + = W;         PV3 + = W;         PV4 + = W;     }     函数gettimeofday(安培; tpend,NULL);     timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;     timeuse / = 1000;     的printf(做抽样使用时间=%F \ N,timeuse);     函数gettimeofday(安培; tpstart,NULL);     删除UU;     删除VV;     返回1; } INT主(INT ARGC,字符** argv的){     unsigned char型BMP [640 * 480 * 2] = {0};     unsigned char型YUV [(640 * 480 * 3)/ 2] = {0};     timeval结构tpstart,tpend;     函数gettimeofday(安培; tpstart,NULL);     ConvertRGB5652YUV420SP(640,480,BMP,YUV);     函数gettimeofday(安培; tpend,NULL);     浮timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;     timeuse / = 1000;     的printf(ConvertARGB2YUV420SP使用时间=%F \ N,timeuse);     返回0; }

输出在Android(ARMv6的):

ConvertRGB5652YUV420SP开始,W = 640,H = 480,BMP = 0xbe7314fc,YUV = 0xbe7c74fc InitLookupTable使用时间= 0.383000 获得YUV值所用时间= 61.394001 做采样使用时间= 11.918000 ConvertARGB2YUV420SP使用时间= 74.596001

CPU信息:

$猫的/ proc / cpuinfo的 执行cat / proc / cpuinfo的 处理器:用于ARMv6兼容处理器第5版(v6l) BogoMIPS:791.34 产品特点:SWP半个拇指fastmult VFP EDSP的java CPU实施者:0×41 CPU架构:6TEJ CPU的变体:为0x1 CPU部分:0xb36 CPU版本:5 硬件:IMAPX200 修订:0000 编号:0000000000000000

解决方案

在ARMv7的,使用NEON。它会做的工作,在不到1毫秒。 (VGA)

如果你被卡住的ARMv6,优化它在ARM汇编。 (关于VGA 8毫秒)

使用定点运算来代替查找表。摆脱他们。

请两个掩码:

  • 0x001f001f:掩码1
  • 0x003f003f:掩码

再一次加载两个像素为32位寄存器(这是快了很多比16位读)

和红色,掩码1,像素,LSR#11 与GRN,MASK2,像素,LSR#5 和蓝光,MASK1,像素

现在有三个寄存器,每个都包含两个值 - 一个在低,中,另一个在高16位

指令的SMULxy会做一些奇迹从这里开始。 (16位乘)

祝你好运。

PS:你的查找表是不是不太妙。为什么都是在256的长度? 你可以减少他们32(R和B相关的)和64(G相关),这将提高缓存命中率。 或许,这将只是做了有针对性的为40ms,而不诉诸组装。 是的,缓存缺失是痛苦的。

I need display a jpeg picture, and convert it to YUV420SP. First I use SkBitmap to parse jpeg and display it, then I use the code below to convert RGB565 to YUV420SP on android, but it spend 75ms to convert a 640*480 RGB565 picture, so anybody know the faster way to convert RGB565 to YUV420SP on android? or faster way to convert jpeg file to YUV420SP on android?

// Convert from RGB to YUV420 int RGB2YUV_YR[256], RGB2YUV_YG[256], RGB2YUV_YB[256]; int RGB2YUV_UR[256], RGB2YUV_UG[256], RGB2YUV_UBVR[256]; int RGB2YUV_VG[256], RGB2YUV_VB[256]; // // Table used for RGB to YUV420 conversion // void InitLookupTable() { static bool hasInited = false; if(hasInited) return ; hasInited = true; int i; for (i = 0; i < 256; i++) RGB2YUV_YR[i] = (float) 65.481 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_YG[i] = (float) 128.553 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_YB[i] = (float) 24.966 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_UR[i] = (float) 37.797 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_UG[i] = (float) 74.203 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_VG[i] = (float) 93.786 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_VB[i] = (float) 18.214 * (i << 8); for (i = 0; i < 256; i++) RGB2YUV_UBVR[i] = (float) 112 * (i << 8); } int ConvertRGB5652YUV420SP(int w, int h, unsigned char *bmp, unsigned char *yuv) { unsigned char *u, *v, *y, *uu, *vv; unsigned char *pu1, *pu2, *pu3, *pu4; unsigned char *pv1, *pv2, *pv3, *pv4; unsigned char rValue = 0, gValue = 0, bValue = 0; uint16_t* bmpPtr; int i, j; printf("ConvertRGB5652YUV420SP begin,w=%d,h=%d,bmp=%p,yuv=%p\n", w, h, bmp, yuv); struct timeval tpstart,tpend; gettimeofday(&tpstart,NULL); InitLookupTable(); gettimeofday(&tpend,NULL); float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec; timeuse/=1000; printf("InitLookupTable used time=%f\n", timeuse); gettimeofday(&tpstart,NULL); uu = new unsigned char[w * h]; vv = new unsigned char[w * h]; if (uu == NULL || vv == NULL || yuv == NULL) return 0; y = yuv; u = uu; v = vv; // Get r,g,b pointers from bmp image data.... bmpPtr = (uint16_t*)bmp; //Get YUV values for rgb values... for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { uint16_t color = *bmpPtr; unsigned int r = (color>>11) & 0x1f; unsigned int g = (color>> 5) & 0x3f; unsigned int b = (color ) & 0x1f; rValue = (r<<3) | (r>>2); gValue = (g<<2) | (g>>4); bValue = (b<<3) | (b>>2); *y++ = (RGB2YUV_YR[rValue] + RGB2YUV_YG[gValue] + RGB2YUV_YB[bValue] + 1048576) >> 16; *u++ = (-RGB2YUV_UR[rValue] - RGB2YUV_UG[gValue] + RGB2YUV_UBVR[bValue] + 8388608) >> 16; *v++ = (RGB2YUV_UBVR[rValue] - RGB2YUV_VG[gValue] - RGB2YUV_VB[bValue] + 8388608) >> 16; bmpPtr++; } } gettimeofday(&tpend,NULL); timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec; timeuse/=1000; printf("Get YUV values used time=%f\n", timeuse); gettimeofday(&tpstart,NULL); // Now sample the U & V to obtain YUV 4:2:0 format // Get the right pointers... u = yuv + w * h; v = u + 1; // For U pu1 = uu; pu2 = pu1 + 1; pu3 = pu1 + w; pu4 = pu3 + 1; // For V pv1 = vv; pv2 = pv1 + 1; pv3 = pv1 + w; pv4 = pv3 + 1; // Do sampling.... for (i = 0; i < h; i += 2) { for (j = 0; j < w; j += 2) { *u = (*pu1 + *pu2 + *pu3 + *pu4) >> 2; u += 2; *v = (*pv1 + *pv2 + *pv3 + *pv4) >> 2; v += 2; pu1 += 2; pu2 += 2; pu3 += 2; pu4 += 2; pv1 += 2; pv2 += 2; pv3 += 2; pv4 += 2; } pu1 += w; pu2 += w; pu3 += w; pu4 += w; pv1 += w; pv2 += w; pv3 += w; pv4 += w; } gettimeofday(&tpend,NULL); timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec; timeuse/=1000; printf("Do sampling used time=%f\n", timeuse); gettimeofday(&tpstart,NULL); delete uu; delete vv; return 1; } int main(int argc, char **argv) { unsigned char bmp[640*480*2] = {0}; unsigned char yuv[(640*480*3)/2] = {0}; struct timeval tpstart,tpend; gettimeofday(&tpstart,NULL); ConvertRGB5652YUV420SP(640, 480, bmp, yuv); gettimeofday(&tpend,NULL); float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec; timeuse/=1000; printf("ConvertARGB2YUV420SP used time=%f\n", timeuse); return 0; }

output on android(armv6):

ConvertRGB5652YUV420SP begin,w=640,h=480,bmp=0xbe7314fc,yuv=0xbe7c74fc InitLookupTable used time=0.383000 Get YUV values used time=61.394001 Do sampling used time=11.918000 ConvertARGB2YUV420SP used time=74.596001

cpu info:

$ cat /proc/cpuinfo cat /proc/cpuinfo Processor : ARMv6-compatible processor rev 5 (v6l) BogoMIPS : 791.34 Features : swp half thumb fastmult vfp edsp java CPU implementer : 0x41 CPU architecture: 6TEJ CPU variant : 0x1 CPU part : 0xb36 CPU revision : 5 Hardware : IMAPX200 Revision : 0000 Serial : 0000000000000000

解决方案

On ARMv7, use NEON. It will do the job in less than 1ms. (VGA)

If you are stuck with ARMv6, optimize it in ARM assembly. (about 8ms on VGA)

Use fixed-point arithmetic instead of the lookup tables. Get rid of them.

make two masks :

  • 0x001f001f : mask1
  • 0x003f003f : mask2

then load two pixels at once into a 32bit register (which is a lot faster than 16bit read)

and red, mask1, pixel, lsr #11 and grn, mask2, pixel, lsr #5 and blu, mask1, pixel

now you have three registers, each containing two values - one in the lower, and the other in the upper 16 bits.

smulxy instructions will do some miracles from here on. (16bit multiply)

Good luck.

PS : your lookup table isn't that good either. Why are they all in length of 256? You could reduce them to 32 (r and b related) and 64 (g related) Which will increase the cache hit rate. Probably that will just do for the targeted 40ms without resorting to assembly. Yes, cache-misses are THAT painful.

更多推荐

如何转换RGB565到YUV420SP更快的Andr​​oid?

本文发布于:2023-11-30 20:42:25,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1651415.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:更快   YUV420SP   Andr   oid

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!