一般的在arm嵌入式平臺,大多數板子都要硬解碼硬件渲染的框架,使用即可。
在x86下比較麻煩了。
優化的思路一共有以下幾個方面,
1. 軟解碼變成硬解碼
2. 將YUV轉QImage的操作轉移到GPU
3. QWidget渲染QImage變成opengGL渲染AVFrame
這三點優化來說2與3是優化的效率是非常顯著的。
1的優化效果往往需要將硬解碼的數據copy至CPU再使用2-3的優化。
這樣一來,解碼效率提升了,但是數據copy時候CPU使用率會上升。如果兩者抵消后CPU使用率還是上升那就得不償失。如果能實現硬解碼的數據不經過CPU直接打到GPU進行渲染,那就是最完美的方案。這個在x86下需要研究opengl渲染硬件類型數據,難度未知,理論如果用的是比較新的框架,資料會多一些。
本文主要是基于2-3的優化,在qt5.1下面基于opengl實現了這個方案,在多路1080P的使用場景下CPU使用率下降非常明顯。
#include "opengl_yuv_shader.h"
#include <QDebug>
#include <iostream>
#include <GL/gl.h>
#include <QGLShader>opengl_yuv_shader::opengl_yuv_shader(QWidget *parent) : QGLWidget(parent), useVBO(false),vboId(0),yuv420p_shaderProgram(0),yuvj422p_shaderProgram(0)
{textures[0]=0;textures[1]=0;textures[2]=0;av_frame = nullptr;connect(this,SIGNAL(render_frame()),this,SLOT(slot_render_frame()),Qt::QueuedConnection);//5 lu 60% cpu
}opengl_yuv_shader::~opengl_yuv_shader() {makeCurrent();glDeleteTextures(3, textures);if (yuv420p_shaderProgram) {glDeleteProgram(yuv420p_shaderProgram);}if (yuvj422p_shaderProgram) {glDeleteProgram(yuvj422p_shaderProgram);}doneCurrent();
}void opengl_yuv_shader::initTextures()
{glGenTextures(3, textures);for (int i = 0; i < 3; ++i) {glBindTexture(GL_TEXTURE_2D, textures[i]);glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);glBindTexture(GL_TEXTURE_2D, 0);}
}void opengl_yuv_shader::initShaders()
{QGLShader *vshader = new QGLShader(QGLShader::Vertex, this);const char *vsrc ="attribute vec4 vertex;\n""attribute vec2 texCoord;\n""varying vec2 texc;\n""void main(void)\n""{\n"" gl_Position = vertex;\n"" texc = texCoord;\n""}\n";vshader->compileSourceCode(vsrc);//編譯頂點著色器代碼QGLShader *fshader = new QGLShader(QGLShader::Fragment, this);//vec4(1.0,0,0,1.0);const char *fsrc ="uniform sampler2D texture;\n""varying vec2 texc;\n""void main(void)\n""{\n"" gl_FragColor = texture2D(texture,texc);\n""}\n";//本方案的核心點在于這個片段著色器,在GPU上完成YUV轉RGB的浮點運算。//由于測試的攝像機是基于YUV J420P轉換的所以算法上與YUV420P略有差別。// 實際使用需要根據具體的AVFrame格式,進行轉換。可初始化多個SHADER管理器、// 渲染時,根據像素格式選擇shader渲染const char* fragmentShaderSource = R"(varying vec2 texc;uniform sampler2D textureY;uniform sampler2D textureU;uniform sampler2D textureV;void main(){float y = texture2D(textureY, texc).r;float u = texture2D(textureU, texc).r;float v = texture2D(textureV, texc).r;float r = y + 1.402 * (v - 0.5);float g = y - 0.344136 * (u - 0.5) - 0.714136 * (v - 0.5);float b = y + 1.772 * (u - 0.5);// 確保 RGB 值在 0-1 范圍內r = clamp(r, 0.0, 1.0);g = clamp(g, 0.0, 1.0);b = clamp(b, 0.0, 1.0);gl_FragColor = vec4(r, g, b, 1.0);})";fshader->compileSourceCode(fragmentShaderSource); //編譯紋理著色器代碼program.addShader(vshader);//添加頂點著色器program.addShader(fshader);//添加紋理碎片著色器program.bindAttributeLocation("vertex", 0);//綁定頂點屬性位置program.bindAttributeLocation("texCoord", 1);//綁定紋理屬性位置// 鏈接著色器管道if (!program.link()){close();qDebug()<<"program.link() error"<<endl;}// 綁定著色器管道if (!program.bind()){close();qDebug()<<"program.bind() error"<<endl;}
}void opengl_yuv_shader::initializeGL()
{initializeOpenGLFunctions();glClearColor(0.0f, 0.0f, 0.0f, 1.0f);glEnable(GL_TEXTURE_2D);initTextures();initShaders();
// glDisable(GL_DEPTH_TEST);
// glDisable(GL_CULL_FACE);
// glDisable(GL_BLEND);const GLubyte* renderer = glGetString(GL_RENDERER);const GLubyte* vendor = glGetString(GL_VENDOR);const GLubyte* version = glGetString(GL_VERSION);const GLubyte* glslVersion = glGetString(GL_SHADING_LANGUAGE_VERSION);std::cout << "Renderer: " << renderer<<std::endl;std::cout << "Vendor: " << vendor<<std::endl;std::cout << "OpenGL Version: " << version<<std::endl;std::cout << "GLSL Version: " << glslVersion<<std::endl;texCoords.append(QVector2D(0, 1)); //左上texCoords.append(QVector2D(1, 1)); //右上texCoords.append(QVector2D(0, 0)); //左下texCoords.append(QVector2D(1, 0)); //右下//頂點坐標vertices.append(QVector3D(-1, -1, 1));//左下vertices.append(QVector3D(1, -1, 1)); //右下vertices.append(QVector3D(-1, 1, 1)); //左上vertices.append(QVector3D(1, 1, 1)); //右上
}void opengl_yuv_shader::resizeGL(int w, int h)
{qDebug() << "Oopengl_yuv_shader::resizeGL w=" << w<<endl;glViewport(0, 0, w, h);glMatrixMode(GL_PROJECTION);glLoadIdentity();glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);glMatrixMode(GL_MODELVIEW);
}void opengl_yuv_shader::paintGL()
{glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);render_lock.lock();if (!av_frame) {render_lock.unlock();return;}glEnable(GL_TEXTURE_2D);program.enableAttributeArray(0);//啟用頂點屬性0,也就是渲染平面的頂點坐標program.enableAttributeArray(1);//啟用頂點屬性1,也就是渲染平面的紋理坐標//紋理坐標的和頂點的對應關系完成渲染program.setAttributeArray(0, vertices.constData() );program.setAttributeArray(1, texCoords.constData() );if(av_frame->format == AV_PIX_FMT_YUV420P || av_frame->format == AV_PIX_FMT_YUVJ420P ){if (av_frame&&av_frame->data[0]) {glActiveTexture(GL_TEXTURE0);glBindTexture(GL_TEXTURE_2D, textures[0]);glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, av_frame->width, av_frame->height, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, av_frame->data[0]);glActiveTexture(GL_TEXTURE1);glBindTexture(GL_TEXTURE_2D, textures[1]);glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, av_frame->width/2, av_frame->height/2, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, av_frame->data[1]);glActiveTexture(GL_TEXTURE2);glBindTexture(GL_TEXTURE_2D, textures[2]);glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, av_frame->width/2, av_frame->height/2, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, av_frame->data[2]);program.setUniformValue("textureY", 0);program.setUniformValue("textureU", 1);program.setUniformValue("textureV", 2);}}render_lock.unlock();// 繪制glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);}void opengl_yuv_shader::set_yuv_frame(AVFrame *frame)
{// 1. 如果 av_frame 已經存在,先釋放它render_lock.lock();if (av_frame) {av_frame_free(&av_frame);av_frame = nullptr;}// 2. 深拷貝 AVFrameav_frame = av_frame_clone(frame);if (!av_frame) {av_log(NULL, AV_LOG_ERROR, "Failed to clone frame\n");render_lock.unlock();return;}render_lock.unlock();emit render_frame();}void opengl_yuv_shader::slot_render_frame()
{update();
}