??摘要：本文描述了FFmpeg中videotoobox解碼器如何進行解碼工作，如何將一個編碼的碼流解碼為最終的裸流。
??關鍵字：videotoobox,decoder,ffmpeg
??VideoToolbox 是一個低級框架，提供對硬件編碼器和解碼器的直接訪問。它提供視頻壓縮和解壓縮服務，以及存儲在 CoreVideo 像素緩沖區中的光柵圖像格式之間的轉換服務。這些服務以會話對象（壓縮、解壓縮和像素傳輸）的形式提供，并作為 Core Foundation (CF) 類型輸出。 VideoToolbox支持H.263, H.264, HEVC, MPEG-1, MPEG-2, MPEG-4 Part 2, ProRes解碼，H.264, HEVC, ProRes編碼，最新的版本似乎也支持了VP9解碼。

1 主流程

1.1 涉及的Context

??FFmpeg中每個解碼器都有自己的Context描述，該描述按照約定的格式描述對應的解碼器參數和解碼器的處理函數指針。FFmpeg中的VideoToolbox解碼器主要實現代碼在libavcodec/videotoobox.{h,c}中，其中針對每一種支持的解碼格式定義了一個獨立的Context，比如ff_h263_videotoolbox_hwaccel,ff_h263_videotoolbox_hwaccel,ff_h264_videotoolbox_hwaccel,...等，只是實現上有差異，我們主要關注其中一個即可，這里主要關注ff_h264_videotoolbox_hwaccel。

const AVHWAccel ff_h264_videotoolbox_hwaccel = {.name           = "h264_videotoolbox",.type           = AVMEDIA_TYPE_VIDEO,.id             = AV_CODEC_ID_H264,.pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,.alloc_frame    = ff_videotoolbox_alloc_frame,.start_frame    = ff_videotoolbox_h264_start_frame,.decode_slice   = ff_videotoolbox_h264_decode_slice,.decode_params  = videotoolbox_h264_decode_params,.end_frame      = videotoolbox_h264_end_frame,.frame_params   = ff_videotoolbox_frame_params,.init           = ff_videotoolbox_common_init,.uninit         = ff_videotoolbox_uninit,.priv_data_size = sizeof(VTContext),
};

??該結構中定義了：

解碼器的名稱；
解碼數據的類型；
解碼器ID；
硬件解碼的格式；
申請一個硬件相關的幀結構的函數指針；
解碼開始前針對幀進行內存拷貝之類的操作；
解碼數據；
解析解碼器需要的參數比如sps等；
送幀結束后的后處理；
初始化硬件解碼器；
銷毀硬件解碼器；
當前硬件解碼器的描述結構。

??ff_h264_videotoolbox_hwaccel是存儲在hw_configs中的，運行時遍歷該列表尋找期望的硬件解碼器。所以解碼工作是先經過FFmpeg內的ff_h264_decoder解碼器再進入硬件解碼器的。

const AVCodec ff_h264_decoder = {.name                  = "h264",.long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),.type                  = AVMEDIA_TYPE_VIDEO,.id                    = AV_CODEC_ID_H264,.priv_data_size        = sizeof(H264Context),.init                  = h264_decode_init,.close                 = h264_decode_end,.decode                = h264_decode_frame,.capabilities          = /*AV_CODEC_CAP_DRAW_HORIZ_BAND |*/ AV_CODEC_CAP_DR1 |AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS |AV_CODEC_CAP_FRAME_THREADS,.hw_configs            = (const AVCodecHWConfigInternal *const []) {
#if CONFIG_H264_DXVA2_HWACCELHWACCEL_DXVA2(h264),
#endif
#if CONFIG_H264_D3D11VA_HWACCELHWACCEL_D3D11VA(h264),
#endif
#if CONFIG_H264_D3D11VA2_HWACCELHWACCEL_D3D11VA2(h264),
#endif
#if CONFIG_H264_NVDEC_HWACCELHWACCEL_NVDEC(h264),
#endif
#if CONFIG_H264_VAAPI_HWACCELHWACCEL_VAAPI(h264),
#endif
#if CONFIG_H264_VDPAU_HWACCELHWACCEL_VDPAU(h264),
#endif
#if CONFIG_H264_VIDEOTOOLBOX_HWACCELHWACCEL_VIDEOTOOLBOX(h264),
#endifNULL},.caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |FF_CODEC_CAP_ALLOCATE_PROGRESS | FF_CODEC_CAP_INIT_CLEANUP,.flush                 = h264_decode_flush,.update_thread_context = ONLY_IF_THREADS_ENABLED(ff_h264_update_thread_context),.update_thread_context_for_user = ONLY_IF_THREADS_ENABLED(ff_h264_update_thread_context_for_user),.profiles              = NULL_IF_CONFIG_SMALL(ff_h264_profiles),.priv_class            = &h264_class,
};

??VTContextVT解碼過程中描述VT的Context。

typedef struct VTContext {// The current bitstream buffer.uint8_t                     *bitstream;// The current size of the bitstream.int                         bitstream_size;// The reference size used for fast reallocation.int                         allocated_size;// The core video bufferCVImageBufferRef            frame;// Current dummy frames context (depends on exact CVImageBufferRef params).struct AVBufferRef         *cached_hw_frames_ctx;// Non-NULL if the new hwaccel API is used. This is only a separate struct// to ease compatibility with the old API.struct AVVideotoolboxContext *vt_ctx;// Current H264 parameters (used to trigger decoder restart on SPS changes).uint8_t                     sps[3];bool                        reconfig_needed;void *logctx;
} VTContext;

1.2 主要流程

在這里插入圖片描述

2 每個步驟的具體實現

2.1`ff_videotoolbox_common_init`

??ff_videotoolbox_common_init在初始化解碼器時調用，一般是在avcodec_open2時初始化硬件解碼器。一般FFmpeg為了更加準確的探測當前視頻的媒體信息，在avformat_find_stream_info時就會初始化解碼器解碼少部分的幀來進行流媒體信息探測。
??初始化時首先就時申請VT的Context內存，并設置一些參數，實際上只設置了VT的callback函數和PixFormat。之后及時根據需要初始化AVHWFramesContext，主要就是申請內存并設置幀格式比如寬高，格式等等。
??最后就是調用videotoolbox_start創建VT的Session，創建的過程比較簡單就是直接調用Apple的API創建Session，需要重點關注的是如何設置的。具體的實現函數為videotoolbox_decoder_config_create，其中設置硬件加速的配置時寫死的，無法進行配置。另外就是從當前的CodecCteonxt中取出sps等信息送給解碼器，如果沒有這些信息，解碼器是無法準確識別出時間戳信息的。sps和pps的解析是由FFmpeg完成的。

    switch (codec_type) {case kCMVideoCodecType_MPEG4Video :if (avctx->extradata_size)data = videotoolbox_esds_extradata_create(avctx);if (data)CFDictionarySetValue(avc_info, CFSTR("esds"), data);break;case kCMVideoCodecType_H264 :data = ff_videotoolbox_avcc_extradata_create(avctx);if (data)CFDictionarySetValue(avc_info, CFSTR("avcC"), data);break;case kCMVideoCodecType_HEVC :data = ff_videotoolbox_hvcc_extradata_create(avctx);if (data)CFDictionarySetValue(avc_info, CFSTR("hvcC"), data);break;
#if CONFIG_VP9_VIDEOTOOLBOX_HWACCELcase kCMVideoCodecType_VP9 :data = ff_videotoolbox_vpcc_extradata_create(avctx);if (data)CFDictionarySetValue(avc_info, CFSTR("vpcC"), data);break;
#endifdefault:break;}

??解碼callback的實現比較簡單就是Retain一下CVPixelBuffer。

static void videotoolbox_decoder_callback(void *opaque,void *sourceFrameRefCon,OSStatus status,VTDecodeInfoFlags flags,CVImageBufferRef image_buffer,CMTime pts,CMTime duration)
{VTContext *vtctx = opaque;if (vtctx->frame) {CVPixelBufferRelease(vtctx->frame);vtctx->frame = NULL;}if (!image_buffer) {av_log(vtctx->logctx,  AV_LOG_DEBUG,"vt decoder cb: output image buffer is null: %i\n", status);return;}vtctx->frame = CVPixelBufferRetain(image_buffer);
}

2.2 `videotoolbox_h264_decode_params`和`ff_videotoolbox_frame_params`

?&esmp;videotoolbox_h264_decode_params主要的工作就是將上層解碼出來額sps和pps信息拷貝到VTContext中。

case H264_NAL_SPS: {GetBitContext tmp_gb = nal->gb;if (avctx->hwaccel && avctx->hwaccel->decode_params) {ret = avctx->hwaccel->decode_params(avctx,nal->type,nal->raw_data,nal->raw_size);if (ret < 0)goto end;}if (ff_h264_decode_seq_parameter_set(&tmp_gb, avctx, &h->ps, 0) >= 0)break;av_log(h->avctx, AV_LOG_DEBUG,"SPS decoding failure, trying again with the complete NAL\n");init_get_bits8(&tmp_gb, nal->raw_data + 1, nal->raw_size - 1);if (ff_h264_decode_seq_parameter_set(&tmp_gb, avctx, &h->ps, 0) >= 0)break;ff_h264_decode_seq_parameter_set(&nal->gb, avctx, &h->ps, 1);break;

??ff_videotoolbox_frame_params比較簡單就是將CodecContext中的參數傳遞給HWFramesContext。

`ff_videotoolbox_alloc_frame,ff_videotoolbox_h264_start_frame,ff_videotoolbox_h264_decode_slice,videotoolbox_h264_end_frame`

??這幾個函數每一幀都會調用，順序是alloc_frame->start_frame->decode_frame->end_frame。
??ff_videotoolbox_alloc_frame用來申請一塊內存，此時的內存只是一塊兒裸內存只是將release函數指針設置成了VT的release指針，還未與CVPixelBuffer綁定，綁定是在解碼器的Callback中進行的。
??ff_videotoolbox_h264_start_frame主要就是將上層傳下來的stream數據流拷貝到VTContext中。
??videotoolbox_common_decode_slice也是拷貝數據流。
??videotoolbox_h264_end_frame才是具體將數據送給解碼器的地方，核心的地方就是videotoolbox_session_decode_frame，這里送給解碼器的數據流就上上面拷貝的數據流，需要注意的是在初始化時的callback中只是做了拷貝內存其他什么也沒有做。這是因為在這里調用了VTDecompressionSessionWaitForAsynchronousFrames等待異步解碼完成，能夠保證上一幀解碼完成后才送下一幀數據。