【音視頻】音視頻FLV合成實戰

FFmpeg合成流程

示例本程序會?成?個合成的?頻和視頻流，并將它們編碼和封裝輸出到輸出?件，輸出格式是根據?件擴展名?動猜測的。

示例的流程圖如下所示。

在這里插入圖片描述

ffmpeg 的 Mux 主要分為三步操作：

avformat_write_header ：寫?件頭
av_write_frame/av_interleaved_write_frame：寫packet
av_write_trailer ：寫?件尾
avcodec_parameters_from_context：將AVCodecContext結構體中碼流參數拷?到AVCodecParameters結構體中，和avcodec_parameters_to_context剛好相反。

FFmpeg函數：avformat_write_header

int avformat_write_header(AVFormatContext *s, AVDictionary **options)
{int ret = 0;int already_initialized = s->internal->initialized;int streams_already_initialized = s->internal->streams_initialized;if (!already_initialized)if ((ret = avformat_init_output(s, options)) < 0)return ret;if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)avio_write_marker(s->pb, AV_NOPTS_VALUE,
AVIO_DATA_MARKER_HEADER);if (s->oformat->write_header) {ret = s->oformat->write_header(s);if (ret >= 0 && s->pb && s->pb->error < 0)ret = s->pb->error;if (ret < 0)goto fail;flush_if_needed(s);}if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)avio_write_marker(s->pb, AV_NOPTS_VALUE,
AVIO_DATA_MARKER_UNKNOWN);if (!s->internal->streams_initialized) {if ((ret = init_pts(s)) < 0)goto fail;}return streams_already_initialized;
fail:if (s->oformat->deinit)s->oformat->deinit(s);return ret;
}

最終調?到復?器的 write_header，?如

AVOutputFormat ff_flv_muxer = {.name           = "flv",.long_name      = NULL_IF_CONFIG_SMALL("FLV (Flash Video)"),.mime_type      = "video/x-flv",.extensions     = "flv",.priv_data_size = sizeof(FLVContext),.audio_codec    = CONFIG_LIBMP3LAME ? AV_CODEC_ID_MP3 :
AV_CODEC_ID_ADPCM_SWF,.video_codec    = AV_CODEC_ID_FLV1,.init           = flv_init,.write_header   = flv_write_header, // 寫頭部.write_packet   = flv_write_packet,.write_trailer  = flv_write_trailer, // 寫?件尾部.check_bitstream= flv_check_bitstream,.codec_tag      = (const AVCodecTag* const []) {flv_video_codec_ids, flv_audio_codec_ids, 0},.flags          = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS |AVFMT_TS_NONSTRICT,.priv_class     = &flv_muxer_class,
};

FFmpeg結構體：avformat_alloc_output_context2

函數在在libavformat.h??的定義

/**
* Allocate an AVFormatContext for an output format.
* avformat_free_context() can be used to free the context and
* everything allocated by the framework within it.
*
* @param *ctx is set to the created format context, or to NULL in
* case of failure
* @param oformat format to use for allocating the context, if NULL
* format_name and filename are used instead
* @param format_name the name of output format to use for allocating the
* context, if NULL filename is used instead
* @param filename the name of the filename to use for allocating the
* context, may be NULL
* @return >= 0 in case of success, a negative AVERROR code in case of
* failure
*/
int avformat_alloc_output_context2(AVFormatContext **ctx, ff_const59
AVOutputFormat *oformat,const char *format_name, const char *filename);

函數參數的介紹：

ctx:需要創建的context，返回NULL表示失敗。
oformat:指定對應的AVOutputFormat，如果不指定，可以通過后?format_name、filename兩個參數進?指定，讓ffmpeg??推斷。
format_name: 指定?視頻的格式，?如“flv”，“mpeg”等，如果設置為NULL，則由filename進?指定，讓ffmpeg??推斷。
filename: 指定?視頻?件的路徑，如果oformat、format_name為NULL，則ffmpeg內部根據filename后綴名選擇合適的復?器，?如xxx.flv則使?flv復?器。

int avformat_alloc_output_context2(AVFormatContext **avctx, ff_const59 AVOutputFormat *oformat,const char *format, const char *filename)
{AVFormatContext *s = avformat_alloc_context();int ret = 0;*avctx = NULL;if (!s)goto nomem;if (!oformat) {if (format) {oformat = av_guess_format(format, NULL, NULL);if (!oformat) {av_log(s, AV_LOG_ERROR, "Requested output format '%s' is not a suitable output format\n", format);ret = AVERROR(EINVAL);goto error;}} else {oformat = av_guess_format(NULL, filename, NULL);if (!oformat) {ret = AVERROR(EINVAL);av_log(s, AV_LOG_ERROR, "Unable to find a suitable output format for '%s'\n", filename);goto error;}}}s->oformat = oformat;if (s->oformat->priv_data_size > 0) {s->priv_data = av_mallocz(s->oformat->priv_data_size);if (!s->priv_data)goto nomem;if (s->oformat->priv_class) {*(const AVClass**)s->priv_data = s->oformat->priv_class;av_opt_set_defaults(s->priv_data);}} else {s->priv_data = NULL;}if (filename) {
#if FF_API_FORMAT_FILENAMEFF_DISABLE_DEPRECATION_WARNINGSav_strlcpy(s->filename, filename, sizeof(s->filename));FF_ENABLE_DEPRECATION_WARNINGS
#endifif (!(s->url = av_strdup(filename)))goto nomem;}*avctx = s;return 0;nomem:av_log(s, AV_LOG_ERROR, "Out of memory\n");ret = AVERROR(ENOMEM);
error:avformat_free_context(s);return ret;
}

可以看出，??最主要的就兩個函數，avformat_alloc_context和av_guess_format，?個
是申請內存分配上下?，?個是通過后?兩個參數獲取AVOutputFormat。
av_guess_format這個函數會通過filename和short_name來和所有的編碼器進??對，找
出最接近的編碼器然后返回。

FFmpeg結構體：AVOutputFormat

1.描述

AVOutpufFormat表示輸出?件容器格式，AVOutputFormat 結構主要包含的信息有：封裝名稱描述，編碼格式信息(video/audio 默認編碼格式，?持的編碼格式列表)，?些對封裝的操作函數(write_header,write_packet,write_tailer等)。
ffmpeg?持各種各樣的輸出?件格式，MP4，FLV，3GP等等。? AVOutputFormat 結構體則保存了這些格式的信息和?些常規設置。
每?種封裝對應?個 AVOutputFormat 結構，ffmpeg將AVOutputFormat 按照鏈表存儲：每?種封裝對應?個 AVOutputFormat 結構，ffmpeg將AVOutputFormat 按照鏈表存儲：

在這里插入圖片描述

2.結構體定義

/**
* @addtogroup lavf_encoding
* @{
*/
typedef struct AVOutputFormat {const char *name;/*** Descriptive name for the format, meant to be more human-readable* than name. You should use the NULL_IF_CONFIG_SMALL() macro* to define it.*/const char *long_name;const char *mime_type;const char *extensions; /**< comma-separated filename extensions *//* output support */enum AVCodecID audio_codec;    /**< default audio codec */enum AVCodecID video_codec;    /**< default video codec */enum AVCodecID subtitle_codec; /**< default subtitle codec *//*** can use flags: AVFMT_NOFILE, AVFMT_NEEDNUMBER,* AVFMT_GLOBALHEADER, AVFMT_NOTIMESTAMPS, AVFMT_VARIABLE_FPS,* AVFMT_NODIMENSIONS, AVFMT_NOSTREAMS, AVFMT_ALLOW_FLUSH,* AVFMT_TS_NONSTRICT, AVFMT_TS_NEGATIVE*/int flags;/*** List of supported codec_id-codec_tag pairs, ordered by "better* choice first". The arrays are all terminated by AV_CODEC_ID_NONE.*/const struct AVCodecTag * const *codec_tag;const AVClass *priv_class; ///< AVClass for the private context/****************************************************************** No fields below this line are part of the public API. They* may not be used outside of libavformat and can be changed and* removed at will.* New public fields should be added right above.******************************************************************//*** The ff_const59 define is not part of the public API and will* be removed without further warning.    */
#if FF_API_AVIOFORMAT
#define ff_const59
#else
#define ff_const59 const
#endifff_const59 struct AVOutputFormat *next;/*** size of private data so that it can be allocated in the wrapper*/int priv_data_size;int (*write_header)(struct AVFormatContext *);/*** Write a packet. If AVFMT_ALLOW_FLUSH is set in flags,* pkt can be NULL in order to flush data buffered in the muxer.* When flushing, return 0 if there still is more data to flush,* or 1 if everything was flushed and there is no more buffered* data.*/int (*write_packet)(struct AVFormatContext *, AVPacket *pkt);int (*write_trailer)(struct AVFormatContext *);/*** Currently only used to set pixel format if not YUV420P.*/int (*interleave_packet)(struct AVFormatContext *, AVPacket *out,AVPacket *in, int flush);/*** Test if the given codec can be stored in this container.** @return 1 if the codec is supported, 0 if it is not.*         A negative number if unknown.*         MKTAG('A', 'P', 'I', 'C') if the codec is only supported
as AV_DISPOSITION_ATTACHED_PIC*/int (*query_codec)(enum AVCodecID id, int std_compliance);void (*get_output_timestamp)(struct AVFormatContext *s, int stream,int64_t *dts, int64_t *wall);/*** Allows sending messages from application to device.*/int (*control_message)(struct AVFormatContext *s, int type,void *data, size_t data_size);/*** Write an uncoded AVFrame.** See av_write_uncoded_frame() for details.** The library will free *frame afterwards, but the muxer can
prevent it* by setting the pointer to NULL.*/int (*write_uncoded_frame)(struct AVFormatContext *, int
stream_index,AVFrame **frame, unsigned flags);/*** Returns device list with it properties.* @see avdevice_list_devices() for more details.*/int (*get_device_list)(struct AVFormatContext *s, struct
AVDeviceInfoList *device_list);/*** Initialize device capabilities submodule.* @see avdevice_capabilities_create() for more details.*/int (*create_device_capabilities)(struct AVFormatContext *s, struct
AVDeviceCapabilitiesQuery *caps);/*** Free device capabilities submodule.* @see avdevice_capabilities_free() for more details.*/int (*free_device_capabilities)(struct AVFormatContext *s, struct
AVDeviceCapabilitiesQuery *caps);enum AVCodecID data_codec; /**< default data codec *//*** Initialize format. May allocate data here, and set any
AVFormatContext or* AVStream parameters that need to be set before packets are sent.* This method must not write output.** Return 0 if streams were fully configured, 1 if not, negative
AVERROR on failure** Any allocations made here must be freed in deinit().*/int (*init)(struct AVFormatContext *);/*** Deinitialize format. If present, this is called whenever the
muxer is being* destroyed, regardless of whether or not the header has been
written.** If a trailer is being written, this is called after
write_trailer().** This is called if init() fails as well.*/void (*deinit)(struct AVFormatContext *);/*** Set up any necessary bitstream filtering and extract any extra
data needed* for the global header.* Return 0 if more packets from this stream must be checked; 1 if
not.*/int (*check_bitstream)(struct AVFormatContext *, const AVPacket
*pkt);
} AVOutputFormat;

3.常?變量及其作?

const char *name; // 復?器名稱
const char *long_name;//格式的描述性名稱，易于閱讀。
enum AVCodecID audio_codec; //默認的?頻編解碼器
enum AVCodecID video_codec; //默認的視頻編解碼器
enum AVCodecID subtitle_codec; //默認的字幕編解碼器

?部分復?器都有默認的編碼器，所以?家如果要調整編碼器類型則需要???動指定。

比如：

AVOutputFormat ff_flv_muxer
AVOutputFormat ff_flv_muxer = {.name = "flv",.audio_codec = CONFIG_LIBMP3LAME ? AV_CODEC_ID_MP3 : AV_CODEC_ID_ADPCM_SWF,// 默認了MP3.video_codec = AV_CODEC_ID_FLV1,....
};
AVOutputFormat ff_mpegts_muxer = {.name = "mpegts",.extensions = "ts,m2t,m2ts,mts",.audio_codec = AV_CODEC_ID_MP2,.video_codec = AV_CODEC_ID_MPEG2VIDEO,
....
};

int (*write_header)(struct AVFormatContext *);int (*write_packet)(struct AVFormatContext *, AVPacket *pkt);//寫?個數據包。 如果在標志中設置AVFMT_ALLOW_FLUSH，則pkt可以為NULL。int (*write_trailer)(struct AVFormatContext *);int (*interleave_packet)(struct AVFormatContext *, AVPacket *out, AVPacket *in, int flush);int (*control_message)(struct AVFormatContext *s, int type, void *data, size_t data_size);//允許從應?程序向設備發送消息。int (*write_uncoded_frame)(struct AVFormatContext *, int stream_index, AVFrame **frame,unsigned flags);//寫?個未編碼的AVFrame。int (*init)(struct AVFormatContext *);//初始化格式。 可以在此處分配數據，并設置在發送數據包之前需要設置的任何AVFormatContext或AVStream參數。void (*deinit)(struct AVFormatContext *);//取消初始化格式。int (*check_bitstream)(struct AVFormatContext *, const AVPacket *pkt);//設置任何必要的?特流過濾，并提取全局頭部所需的任何額外數據。

FFmpeg函數：avformat_new_stream

AVStream 即是流通道。例如我們將 H264 和 AAC 碼流存儲為MP4?件的時候，就需要在 MP4?件中增加兩個流通道，?個存儲Video：H264，?個存儲Audio：AAC。（假設H264和AAC只包含單個流通道）。

/**
* Add a new stream to a media file.
*
* When demuxing, it is called by the demuxer in read_header(). If the
* flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
* be called in read_packet().
*
* When muxing, should be called by the user before avformat_write_header().
*
* User is required to call avcodec_close() and avformat_free_context() to
* clean up the allocation by avformat_new_stream().
*
* @param s media file handle
* @param c If non-NULL, the AVCodecContext corresponding to the new stream
* will be initialized to use this codec. This is needed for e.g. codec-specific
* defaults to be set, so codec should be provided if it is known.
*
* @return newly created stream or NULL on error.
*/
AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c);

avformat_new_stream 在 AVFormatContext 中創建 Stream 通道。

關聯的結構體
AVFormatContext ：

unsigned int nb_streams; 記錄stream通道數?。
AVStream **streams; 存儲stream通道。

AVStream ：

int index; 在AVFormatContext 中所處的通道索引

avformat_new_stream之后便在 AVFormatContext ?增加了 AVStream 通道（相關的index已經被設置了）。之后，我們就可以??設置 AVStream 的?些參數信息。例如 : codec_id , format ,bit_rate,width , height

FFmpeg函數：av_interleaved_write_frame

函數原型：

int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt);

說明：

將數據包寫?輸出媒體?件，并確保正確的交織（保持packet dts的增?性）。
該函數會在內部根據需要緩存packet，以確保輸出?件中的packet按dts遞增的順序正確交織。如果??進?交織則應調?av_write_frame()。

參數：

s	媒體?件句柄
pkt	要寫?的packet。如果packet使?引?參考計數的內存?式，則此函數將獲取此引?權(可以理解為 move了reference)，并在內部在合適的時候進?釋放。此函數返回后，調?者不得通過此引?訪問數據。如果packet沒有引?計數，libavformat將進?復制。此參數可以為NULL（在任何時候，不僅在結尾），以刷新交織隊列。 Packet的stream_index字段必須設置為s-> streams中相應流的索引。時間戳記（pts，dts）必須設置為stream’s timebase中的正確值（除?輸出格式? AVFMT_NOTIMESTAMPS標志標記，然后可以將其設置為AV_NOPTS_VALUE）。同?stream后續packet的dts必須嚴格遞增（除?輸出格式? AVFMT_TS_NONSTRICT標記，則它們只必須不減少）。duration也應設置(如果已知)。

媒體?件句柄

pkt

要寫?的packet。
如果packet使?引?參考計數的內存?式，則此函數將獲取此引?權(可以理解為
move了reference)，并在內部在合適的時候進?釋放。此函數返回后，調?者不得
通過此引?訪問數據。如果packet沒有引?計數，libavformat將進?復制。
此參數可以為NULL（在任何時候，不僅在結尾），以刷新交織隊列。
Packet的stream_index字段必須設置為s-> streams中相應流的索引。
時間戳記（pts，dts）必須設置為stream’s timebase中的正確值（除?輸出格式?
AVFMT_NOTIMESTAMPS標志標記，然后可以將其設置為AV_NOPTS_VALUE）。
同?stream后續packet的dts必須嚴格遞增（除?輸出格式?
AVFMT_TS_NONSTRICT標記，則它們只必須不減少）。duration也應設置(如果已知)。

返回值：

成功時為0，錯誤時為負AVERROR。即使此函數調?失敗，Libavformat仍將始終釋放該packet。

FFmpeg函數：av_compare_ts

/**
* Compare two timestamps each in its own time base.
*
* @return One of the following values:
* - -1 if `ts_a` is before `ts_b`
* - 1 if `ts_a` is after `ts_b`
* - 0 if they represent the same position
*
* @warning
* The result of the function is undefined if one of the timestamps is outside
* the `int64_t` range when represented in the other's timebase.
*/
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b);

返回值：

-1 ts_a 在ts_b之前
1 ts_a 在ts_b之后
0 ts_a 在ts_b同?位置

?偽代碼：return ts_a == ts_b ? 0 : ts_a < ts_b ? -1 : 1

MediaInfo分析?件寫?

這?只是分析avformat_write_header和av_write_trailer的作?。

flv

只寫avformat_write_header
avformat_write_header+ av_write_trailer對于FLV??沒有任何變化。

000 File Header (9 bytes)
000 FLV header (9 bytes)
000 Signature: FLV
003 Version: 1 (0x01)
004 Flags: 5 (0x05)
005 Video: Yes
005 Audio: Yes
005 Size: 9 (0x00000009)
009 -------------------------
009 --- FLV, accepted ---
009 -------------------------
009 Meta - onMetaData - 12 elements (288 bytes)
009 Header (15 bytes)
009 PreviousTagSize: 0 (0x00000000)
00D Type: 18 (0x12)
00E BodyLength: 273 (0x000111)
011 Timestamp_Base: 0 (0x000000)
014 Timestamp_Extended: 0 (0x00)
015 StreamID: 0 (0x000000)
018 Type: 2 (0x02) - SCRIPTDATASTRING
019 Value_Size: 10 (0x000A)
01B Value: onMetaData
025 Type: 8 (0x08) - SCRIPTDATAVARIABLE[ECMAArrayLength]
026 ECMAArrayLength: 12 (0x0000000C)
02A duration (19 bytes)
02A StringLength: 8 (0x0008)
02C StringData: duration
034 Type: 0 (0x00) - DOUBLE
035 Value: 0.000
03D width - 352 (16 bytes)
03D StringLength: 5 (0x0005)
03F StringData: width
044 Type: 0 (0x00) - DOUBLE
045 Value: 352.000
04D height - 288 (17 bytes)
04D StringLength: 6 (0x0006)
04F StringData: height
055 Type: 0 (0x00) - DOUBLE
056 Value: 288.000
05E videodatarate - 390625 (24 bytes)
05E StringLength: 13 (0x000D)
060 StringData: videodatarate
06D Type: 0 (0x00) - DOUBLE
06E Value: 390.625
076 videocodecid - 2 (23 bytes)
076 StringLength: 12 (0x000C)
078 StringData: videocodecid
084 Type: 0 (0x00) - DOUBLE
085 Value: 2.000
08D audiodatarate - 62500 (24 bytes)
08D StringLength: 13 (0x000D)
08F StringData: audiodatarate
09C Type: 0 (0x00) - DOUBLE
09D Value: 62.500
0A5 audiosamplerate - 44100 (26 bytes)
0A5 StringLength: 15 (0x000F)
0A7 StringData: audiosamplerate
0B6 Type: 0 (0x00) - DOUBLE
0B7 Value: 44100.000
0BF audiosamplesize - 16 (26 bytes)
0BF StringLength: 15 (0x000F)
0C1 StringData: audiosamplesize
0D0 Type: 0 (0x00) - DOUBLE
0D1 Value: 16.000
0D9 stereo - 1 (0x1) (10 bytes)
0D9 StringLength: 6 (0x0006)
0DB StringData: stereo
0E1 Type: 1 (0x01) - UI8
0E2 Value: 1 (0x01)
0E3 audiocodecid - 2 (23 bytes)
0E3 StringLength: 12 (0x000C)
0E5 StringData: audiocodecid
0F1 Type: 0 (0x00) - DOUBLE
0F2 Value: 2.000
0FA encoder - Lavf58.29.100 (25 bytes)
0FA StringLength: 7 (0x0007)
0FC StringData: encoder
103 Type: 2 (0x02) - SCRIPTDATASTRING
104 Value_Size: 13 (0x000D)
106 Value: Lavf58.29.100
113 filesize (19 bytes)
113 StringLength: 8 (0x0008)
115 StringData: filesize
11D Type: 0 (0x00) - DOUBLE
11E Value: 0.000
129 End Of File (4 bytes)
129 Header (4 bytes)
129 PreviousTagSize: 284 (0x0000011C)
12D ------------------------
12D --- FLV, filling ---
12D ------------------------
12D -------------------------
12D --- FLV, finished ---
12D -------------------------

mp4

avformat_write_header

00 File Type (32 bytes)
00 Header (8 bytes)
00 Size: 32 (0x00000020)
04 Name: ftyp
08 MajorBrand: isom
0C MajorBrandVersion: 512 (0x00000200)
10 CompatibleBrand: isom
14 CompatibleBrand: iso2
18 CompatibleBrand: avc1
1C CompatibleBrand: mp41
20 ----------------------------
20 --- MPEG-4, accepted ---
20 ----------------------------
20 Free space (8 bytes)
20 Header (8 bytes)
20 Size: 8 (0x00000008)
24 Name: free
28 Junk (4 bytes)
28 Header (4 bytes)
28 Size: 0 (0x00000000)
2C Problem (4 bytes)
2C Header (4 bytes)
2C Size: 1835295092 (0x6D646174)
30 Size is wrong: 0 (0x00000000)
30 ---------------------------
30 --- MPEG-4, filling ---
30 ---------------------------
30 ----------------------------
30 --- MPEG-4, finished ---
30 ----------------------------

avformat_write_header+av_write_trailer

000 File Type (32 bytes)
000 Header (8 bytes)
000 Size: 32 (0x00000020)
004 Name: ftyp
008 MajorBrand: isom
00C MajorBrandVersion: 512 (0x00000200)
010 CompatibleBrand: isom
014 CompatibleBrand: iso2
018 CompatibleBrand: avc1
01C CompatibleBrand: mp41
020 ----------------------------
020 --- MPEG-4, accepted ---
020 ----------------------------
020 Free space (8 bytes)
020 Header (8 bytes)
020 Size: 8 (0x00000008)
024 Name: free
028 Data (8 bytes)
028 Header (8 bytes)
028 Size: 8 (0x00000008)
02C Name: mdat
030 File header (214 bytes)
030 Header (8 bytes)
030 Size: 214 (0x000000D6)
034 Name: moov
038 Movie header (108 bytes)
038 Header (8 bytes)
038 Size: 108 (0x0000006C)
03C Name: mvhd
040 Version: 0 (0x00)
041 Flags: 0 (0x000000)
044 Creation time: 0 (0x00000000) -
048 Modification time: 0 (0x00000000) -
04C Time scale: 1000 (0x000003E8) - 1000 Hz
050 Duration: 0 (0x00000000) - 0 ms
054 Preferred rate: 65536 (0x00010000) - 1.000
058 Preferred volume: 256 (0x0100) - 1.000
05A Reserved: (10 bytes)
064 Matrix structure (36 bytes)
064 a (width scale): 1.000
068 b (width rotate): 0.000
06C u (width angle): 0.000
070 c (height rotate): 0.000
074 d (height scale): 1.000
078 v (height angle): 0.000
07C x (position left): 0.000
080 y (position top): 0.000
084 w (divider): 1.000
088 Preview time: 0 (0x00000000)
08C Preview duration: 0 (0x00000000)
090 Poster time: 0 (0x00000000)
094 Selection time: 0 (0x00000000)
098 Selection duration: 0 (0x00000000)
09C Current time: 0 (0x00000000)
0A0 Next track ID: 2 (0x00000002)
0A4 User Data (98 bytes)
0A4 Header (8 bytes)
0A4 Size: 98 (0x00000062)
0A8 Name: udta
0AC Metadata (90 bytes)
0AC Header (8 bytes)
0AC Size: 90 (0x0000005A)
0B0 Name: meta
0B4 Version: 0 (0x00)
0B5 Flags: 0 (0x000000)
0B8 Metadata Header (33 bytes)
0B8 Header (8 bytes)
0B8 Size: 33 (0x00000021)
0BC Name: hdlr
0C0 Version: 0 (0x00)
0C1 Flags: 0 (0x000000)
0C4 Type (Quicktime):
0C8 Metadata type: mdir
0CC Manufacturer: appl
0D0 Component reserved flags: 0 (0x00000000)
0D4 Component reserved flags mask: 0 (0x00000000)
0D8 Component type name:
0D9 List (45 bytes)
0D9 Header (8 bytes)
0D9 Size: 45 (0x0000002D)
0DD Name: ilst
0E1 Element (37 bytes)
0E1 Header (8 bytes)
0E1 Size: 37 (0x00000025)
0E5 Name: ﹖oo
0E9 Data - Encoded_Application (29 bytes)
0E9 Header (8 bytes)
0E9 Size: 29 (0x0000001D)
0ED Name: data
0F1 Kind: 1 (0x00000001) - UTF8
0F5 Language: 0 (0x00000000)
0F9 Value: Lavf58.29.100
106 ---------------------------
106 --- MPEG-4, filling ---
106 ---------------------------
106 ----------------------------
106 --- MPEG-4, finished ---
106 ----------------------------

FFmpeg時間戳詳解

1. I幀/P幀/B幀

I幀：I幀(Intra-coded picture, 幀內編碼幀，常稱為關鍵幀)包含?幅完整的圖像信息，屬于幀內編碼圖像，不含運動?量，在解碼時不需要參考其他幀圖像。因此在I幀圖像處可以切換頻道，?不會導致圖像丟失或?法解碼。I幀圖像?于阻?誤差的累積和擴散。在閉合式GOP中，每個GOP的第?個幀?定是I幀，且當前GOP的數據不會參考前后GOP的數據。
P幀：P幀(Predictive-coded picture, 預測編碼圖像幀)是幀間編碼幀，利?之前的I幀或P幀進?預測編碼。
B幀：B幀(Bi-directionally predicted picture, 雙向預測編碼圖像幀)是幀間編碼幀，利?之前和(或)之后的I幀或P幀進?雙向預測編碼。B幀不可以作為參考幀。B幀具有更?的壓縮率，但需要更多的緩沖時間以及更?的CPU占?率，因此B幀適合本地存儲以及視頻點播，?不適?對實時性要求較?的直播系統。

2. DTS和PTS

DTS(Decoding Time Stamp, 解碼時間戳)，表示壓縮幀的解碼時間。
PTS(Presentation Time Stamp, 顯示時間戳)，表示將壓縮幀解碼后得到的原始幀的顯示時間。?頻中DTS和PTS是相同的。視頻中由于B幀需要雙向預測，B幀依賴于其前和其后的幀，因此含B幀的視頻解碼順序與顯示順序不同，即DTS與PTS不同。當然，不含B幀的視頻，其DTS和PTS是相同的。

下圖以?個開放式GOP示意圖為例，說明視頻流的解碼順序和顯示順序

![[Pasted image 20250406145605.png]]

采集順序指圖像傳感器采集原始信號得到圖像幀的順序。
編碼順序指編碼器編碼后圖像幀的順序。存儲到磁盤的本地視頻?件中圖像幀的順序與編碼順序相同。
傳輸順序指編碼后的流在?絡中傳輸過程中圖像幀的順序。
解碼順序指解碼器解碼圖像幀的順序。顯示順序指圖像幀在顯示器上顯示的順序。
采集順序與顯示順序相同。編碼順序、傳輸順序和解碼順序相同。

以圖中“B[1]”幀為例進?說明，“B[1]”幀解碼時需要參考“I[0]”幀和“P[3]”幀，因此“P[3]”幀必須?“B[1]”幀先解碼。這就導致了解碼順序和顯示順序的不?致，后顯示的幀需要先碼。

3. FFmpeg中的時間基與時間戳

3.1 時間基與時間戳的概念

在FFmpeg中，時間基(time_base)是時間戳(timestamp)的單位，時間戳值乘以時間基，可以得到實際的時刻值(以秒等為單位)。例如，如果?個視頻幀的dts是40，pts是160，其time_base是1/1000秒，那么可以計算出此視頻幀的解碼時刻是40毫秒(40/1000)，顯示時刻是160毫秒(160/1000)。

FFmpeg中時間戳(pts/dts)的類型是int64_t類型，把?個time_base看作?個時鐘脈沖，則可把dts/pts看作時鐘脈沖的計數。

3.2 三種時間基tbr、tbn和tbc

不同的封裝格式具有不同的時間基。在FFmpeg處理?視頻過程中的不同階段，也會采?不同的時間基。

FFmepg中有三種時間基，命令?中tbr、tbn和tbc的打印值就是這三種時間基的倒數：

0 tbn：對應容器中的時間基。值是AVStream.time_base的倒數
tbc：對應編解碼器中的時間基。值是AVCodecContext.time_base的倒數
tbr：從視頻流中猜算得到，可能是幀率或場率(幀率的2倍)

使?ffprobe探測媒體?件格式，如下：

think@opensuse> ffprobe tnmil3.flv
ffprobe version 4.1 Copyright (c) 2007-2018 the FFmpeg developers
Input #0, flv, from 'tnmil3.flv':Metadata:encoder         : Lavf58.20.100Duration: 00:00:03.60, start: 0.017000, bitrate: 513 kb/sStream #0:0: Video: h264 (High), yuv420p(progressive), 784x480, 25
fps, 25 tbr, 1k tbn, 50 tbcStream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 128 kb/s

關于tbr、tbn和tbc的說明，原?如下，來?FFmpeg郵件列表：

There are three different time bases for time stamps in FFmpeg. The values printed are actually
reciprocals of these, i.e. 1/tbr, 1/tbn and 1/tbc.

tbn is the time base in AVStream that has come from the container, I think. It is used for all AVStream time stamps.
tbc is the time base in AVCodecContext for the codec used for a particular stream. It is used for all AVCodecContext and related time stamps.
tbr is guessed from the video stream and is the value users want to see when they look for the video frame rate, except sometimes it is twice what one would expect because of field rate versus frame rate

3.3 內部時間基AV_TIME_BASE

除以上三種時間基外，FFmpeg還有?個內部時間基AV_TIME_BASE(以及分數形式的AV_TIME_BASE_Q)

// Internal time base represented as integer
#define AV_TIME_BASE           1000000    //微妙// Internal time base represented as fractional value
#define AV_TIME_BASE_Q         (AVRational){1, AV_TIME_BASE}

AV_TIME_BASE及AV_TIME_BASE_Q?于FFmpeg內部函數處理，使?此時間基計算得到時間值表示的是微秒。

3.4 時間值形式轉換

av_q2d()將時間從AVRational形式轉換為double形式。AVRational是分數類型，double是雙精度浮點數類型，轉換的結果單位是秒。轉換前后的值基于同?時間基，僅僅是數值的表現形式不同?已。

av_q2d()實現如下：

/**
* Convert an AVRational to a `double`.
* @param a AVRational to convert
* @return `a` in floating-point form
* @see av_d2q()
*/
static inline double av_q2d(AVRational a){return a.num / (double) a.den;
}

av_q2d()使??法如下：

AVStream stream;
AVPacket packet;
packet播放時刻值：timestamp(單位秒) = packet.pts * av_q2d(stream.time_base);
packet播放時?值：duration(單位秒) = packet.duration *
av_q2d(stream.time_base);

3.5 時間基轉換函數

av_rescale_q

av_rescale_q()?于不同時間基的轉換，?于將時間值從?種時間基轉換為另?種時間基。將a數值由 bq時間基轉成 cq的時間基，通過返回結果獲取以cq時間基表示的新數值。

/**
* Rescale a 64-bit integer by 2 rational numbers.
*
* The operation is mathematically equivalent to `a × bq / cq`.
*
* This function is equivalent to av_rescale_q_rnd() with
#AV_ROUND_NEAR_INF.
*
* @see av_rescale(), av_rescale_rnd(), av_rescale_q_rnd()
*/
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq) av_const;

av_rescale_rnd

它的作?是計算 “a * b / c” 的值并分五種?式來取整

int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd);AV_ROUND_ZERO = 0, // Round toward zero. 趨近于0， round(2.5) 為 2, ? round(-2.5)為 -2AV_ROUND_INF = 1, // Round away from zero. 趨遠于0 round(3.5)=4, round(-3.5)=-4AV_ROUND_DOWN = 2, // Round toward -infinity.向負?窮??向 [-2.9, -1.2, 2.4, 5.6,7.0, 2.4] -> [-3, -2, 2, 5, 7, 2]AV_ROUND_UP = 3, // Round toward +infinity. 向正?窮??向[-2.9, -1.2, 2.4, 5.6, 7.0,2.4] -> [-2, -1, 3, 6, 7, 3]AV_ROUND_NEAR_INF = 5, // Round to nearest and halfway cases away from zero. // 四舍五?,?于0.5取值趨向0,?于0.5取值趨遠于0

av_packet_rescale_ts

av_packet_rescale_ts()?于將AVPacket中各種時間值從?種時間基轉換為另?種時間基。

/**
* Convert valid timing fields (timestamps / durations) in a packet from
one
* timebase to another. Timestamps with unknown values (AV_NOPTS_VALUE)
will be
* ignored.
*
* @param pkt packet on which the conversion will be performed
* @param tb_src source timebase, in which the timing fields in pkt are
*               expressed
* @param tb_dst destination timebase, to which the timing fields will be
*               converted
*/
void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational
tb_dst);

3.6 轉封裝過程中的時間基轉換

容器中的時間基(AVStream.time_base，3.2節中的tbn)定義如下：

typedef struct AVStream {....../*** This is the fundamental unit of time (in seconds) in terms* of which frame timestamps are represented.** decoding: set by libavformat* encoding: May be set by the caller before avformat_write_header()* to provide a hint to the muxer about the desired timebase.* in avformat_write_header(), the muxer will overwrite this* field with the timebase that will actually be used for the* timestamps written into the file (which may or may not be related* to the user-provided one, depending on the format).*/AVRational time_base;......
}

AVStream.time_base是AVPacket中pts和dts的時間單位，輸?流與輸出流中time_base按如下?式確定：

對于輸?流：打開輸??件后，調?avformat_find_stream_info()可獲取到每個流中time_base
對于輸出流：打開輸出?件后，調?avformat_write_header()可根據輸出?件封裝格式確定每個流的time_base并寫?輸出?件中

不同封裝格式具有不同的時間基，在轉封裝(將?種封裝格式轉換為另?種封裝格式)過程中，時間基轉換相關代碼如下：

av_read_frame(ifmt_ctx, &pkt);pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base,AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base,AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);

下?的代碼具有和上?代碼相同的效果：

// 從輸??件中讀取packet
av_read_frame(ifmt_ctx, &pkt);
// 將packet中的各時間值從輸?流封裝格式時間基轉換到輸出流封裝格式時間基
av_packet_rescale_ts(&pkt, in_stream->time_base, out_stream->time_base);

這?流?的時間基 in_stream->time_base 和 out_stream->time_base ，是容器中的時間基，
就是3.2節中的tbn。

例如，flv封裝格式的time_base為{1,1000}，ts封裝格式的time_base為{1,90000}

我們編寫程序將flv封裝格式轉換為ts封裝格式，抓取原?件(flv)的前四幀顯示時間戳：

think@opensuse> ffprobe -show_frames -select_streams v tnmil3.flv | grep
pkt_pts  
ffprobe version 4.1 Copyright (c) 2007-2018 the FFmpeg developers
Input #0, flv, from 'tnmil3.flv':Metadata:encoder         : Lavf58.20.100Duration: 00:00:03.60, start: 0.017000, bitrate: 513 kb/sStream #0:0: Video: h264 (High), yuv420p(progressive), 784x480, 25
fps, 25 tbr, 1k tbn, 50 tbcStream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 128 kb/s
pkt_pts=80
pkt_pts_time=0.080000
pkt_pts=120
pkt_pts_time=0.120000
pkt_pts=160
pkt_pts_time=0.160000
pkt_pts=200
pkt_pts_time=0.200000

再抓取轉換的?件(ts)的前四幀顯示時間戳：

think@opensuse> ffprobe -show_frames -select_streams v tnmil3.ts | grep
pkt_pts  
ffprobe version 4.1 Copyright (c) 2007-2018 the FFmpeg developers
Input #0, mpegts, from 'tnmil3.ts':Duration: 00:00:03.58, start: 0.017000, bitrate: 619 kb/sProgram 1Metadata:service_name   : Service01service_provider: FFmpegStream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B),
yuv420p(progressive), 784x480, 25 fps, 25 tbr, 90k tbn, 50 tbcStream #0:1[0x101]: Audio: aac (LC) ([15][0][0][0] / 0x000F), 44100
Hz, stereo, fltp, 127 kb/s
pkt_pts=7200
pkt_pts_time=0.080000
pkt_pts=10800
pkt_pts_time=0.120000
pkt_pts=14400
pkt_pts_time=0.160000
pkt_pts=18000
pkt_pts_time=0.200000

可以發現，對于同?個視頻幀，它們時間基(tbn)不同因此時間戳(pkt_pts)也不同，但是計算出來的時刻值(pkt_pts_time)是相同的。

看第?幀的時間戳，計算關系：80*{1,1000} == 7200*{1,90000} == 0.080000

3.7 轉碼過程中的時間基轉換

編解碼器中的時間基(AVCodecContext.time_base，3.2節中的tbc)定義如下：

typedef struct AVCodecContext {....../*** This is the fundamental unit of time (in seconds) in terms* of which frame timestamps are represented. For fixed-fps content,* timebase should be 1/framerate and timestamp increments should be* identically 1.* This often, but not always is the inverse of the frame rate or field rate* for video. 1/time_base is not the average frame rate if the frame
rate is not* constant.** Like containers, elementary streams also can store timestamps,
1/time_base* is the unit in which these timestamps are specified.* As example of such codec time base see ISO/IEC 14496-2:2001(E)* vop_time_increment_resolution and fixed_vop_rate* (fixed_vop_rate == 0 implies that it is different from the
framerate)** - encoding: MUST be set by user.* - decoding: the use of this field for decoding is deprecated.*             Use framerate instead.*/AVRational time_base;......
}

上述注釋指出，AVCodecContext.time_base是幀率(視頻幀)的倒數，每幀時間戳遞增1，那么tbc就等于幀率。編碼過程中，應由?戶設置好此參數。解碼過程中，此參數已過時，建議直接使?幀率倒數?作時間基。

這?有?個問題：按照此處注釋說明，幀率為25的視頻流，tbc理應為25，但實際值卻為50，不知作何解釋？是否tbc已經過時，不具參考意義？

根據注釋中的建議，實際使?時，在視頻解碼過程中，我們不使?AVCodecContext.time_base，??幀率倒數作時間基，在視頻編碼過程中，我們將AVCodecContext.time_base設置為幀率的倒數。

3.7.1 視頻流

視頻按幀播放，所以解碼后的原始視頻幀時間基為 1 / framerate。

視頻解碼過程中的時間基轉換處理，packet的pts到底什么，要看實際的情況，從av_read_frame讀取的packet，是以AVSteam->time_base，送給解碼器之前沒有必要轉成AVcodecContext->time_base，需要注意的是avcodec_receive_frame后以AVSteam->time_base為單位即可

AVFormatContext *ifmt_ctx;
AVStream *in_stream;
AVCodecContext *dec_ctx;
AVPacket packet;
AVFrame *frame;
// 從輸??件中讀取編碼幀
av_read_frame(ifmt_ctx, &packet);
// 時間基轉換
int raw_video_time_base = av_inv_q(dec_ctx->framerate);
av_packet_rescale_ts(packet, in_stream->time_base, raw_video_time_base);
// 解碼
avcodec_send_packet(dec_ctx, packet)
avcodec_receive_frame(dec_ctx, frame);

視頻編碼過程中的時間基轉換處理，編碼的時候frame如果以AVstream為time_base送編碼器，則avcodec_receive_packet讀取的時候也是以轉成AVSteam->time_base，本質來講就是具體情況具體分析，沒必要硬套流程）：

AVFormatContext *ofmt_ctx;
AVStream *out_stream;
AVCodecContext *dec_ctx;
AVCodecContext *enc_ctx;
AVPacket packet;
AVFrame *frame;
// 編碼
avcodec_send_frame(enc_ctx, frame);
avcodec_receive_packet(enc_ctx, packet);
// 時間基轉換
packet.stream_index = out_stream_idx;
enc_ctx->time_base = av_inv_q(dec_ctx->framerate);
av_packet_rescale_ts(&opacket, enc_ctx->time_base, out_stream->time_base);
// 將編碼幀寫?輸出媒體?件
av_interleaved_write_frame(o_fmt_ctx, &packet);

3.7.2 ?頻流

對于?頻流也是類似的，本質來講就是具體情況具體分析，沒必要硬套流程，?如ffplay 解碼播
放時就是AVSteam的time_base為基準的packet進?到編碼器，然后出來的frame再?AVSteam的
time_base講對應的pts轉成秒。

但是要注意的是ffplay做了?個?較隱秘的設置：avctx->pkt_timebase = ic->streams[stream_index]->time_base; 即是對應的codeccontext??對pkt_timebase設置和AVStream?樣的time_base。

?頻按采樣點播放，所以解碼后的原始?頻幀時間基為 1/sample_rate

?頻解碼過程中的時間基轉換處理：

AVFormatContext *ifmt_ctx;
AVStream *in_stream;
AVCodecContext *dec_ctx;
AVPacket packet;
AVFrame *frame;
// 從輸??件中讀取編碼幀
av_read_frame(ifmt_ctx, &packet);
// 時間基轉換
int raw_audio_time_base = av_inv_q(dec_ctx->sample_rate);
av_packet_rescale_ts(packet, in_stream->time_base, raw_audio_time_base);
// 解碼
avcodec_send_packet(dec_ctx, packet)
avcodec_receive_frame(dec_ctx, frame);

?頻編碼過程中的時間基轉換處理：

AVFormatContext *ofmt_ctx;
AVStream *out_stream;
AVCodecContext *dec_ctx;
AVCodecContext *enc_ctx;
AVPacket packet;
AVFrame *frame;
// 編碼
avcodec_send_frame(enc_ctx, frame);
avcodec_receive_packet(enc_ctx, packet);
// 時間基轉換
packet.stream_index = out_stream_idx;
enc_ctx->time_base = av_inv_q(dec_ctx->sample_rate);
av_packet_rescale_ts(&opacket, enc_ctx->time_base, out_stream->time_base);
// 將編碼幀寫?輸出媒體?件
av_interleaved_write_frame(o_fmt_ctx, &packet);

實現流程

輸出文件

在main加入參數，表示輸出的文件，比如out.flv

在這里插入圖片描述

打開輸出文件，這里使用ffmpeg的AVIOContext處理相關IO操作
fmt->flags & AVFMT_NOFILE這一步操作檢驗是否可以進行文件IO，如果可以返回0

AVFormatContext *oc;
AVOutputFormat *fmt;const char *filename;
filename = argv[1];
if (!(fmt->flags & AVFMT_NOFILE))  //沒有設置 AVFMT_NOFILE 標志位，即需要文件IO
{// 打開對應的輸出文件，沒有則創建ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);if (ret < 0){fprintf(stderr, "Could not open '%s': %s\n", filename,av_err2str(ret));return 1;}
}

初始化格式上下文

根據文件名后綴來分配上下文格式，如果無法初始化則強制使用flv格式初始化

avformat_alloc_output_context2(&oc, NULL, NULL, filename);
if (!oc)
{// 如果不能根據文件后綴名找到合適的格式，那缺省使用flv格式printf("Could not deduce output format from file extension: using flv.\n");avformat_alloc_output_context2(&oc, NULL, "flv", filename);
}

初始化輸出格式上下文，這里直接賦值格式上下文中的輸出上下文指針即可
指定輸出上下文（復用器）的編碼器ID

AVOutputFormat *fmt;
fmt = oc->oformat; // 獲取綁定的AVOutputFormat
fmt->video_codec = AV_CODEC_ID_H264;    // 指定編碼器
fmt->audio_codec = AV_CODEC_ID_AAC;     // 指定編碼器

封裝音視頻流

這里我們使用了結構體封裝了一下AVStream相關的參數
分別是輸出流、編碼器、原始數據幀、以及重采樣相關的上下文

// 封裝單個輸出AVStream
typedef struct OutputStream
{AVStream *st;               // 代表一個stream, 1路audio或1路video都代表獨立的steamAVCodecContext *enc;        // 編碼器上下文/* pts of the next frame that will be generated */int64_t next_pts;int samples_count;  // 音頻的采樣數量累計AVFrame *frame;     // 重采樣后的frame，  視頻叫scaleAVFrame *tmp_frame; // 重采樣前float t, tincr, tincr2; // 這幾個參數用來生成PCM和YUV用的struct SwsContext *sws_ctx;     // 圖像scalestruct SwrContext *swr_ctx;     // 音頻重采樣
} OutputStream;

創建兩個輸出流結構體

OutputStream video_st = { 0 }; // 封裝視頻編碼相關的
OutputStream audio_st = { 0 }; // 封裝音頻編碼相關的

寫入視頻流和音頻流

if (fmt->video_codec != AV_CODEC_ID_NONE)
{add_stream(&video_st, oc, &video_codec, fmt->video_codec);have_video = 1;encode_video = 1;
}
if (fmt->audio_codec != AV_CODEC_ID_NONE)
{add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);have_audio = 1;encode_audio = 1;
}

add_stream函數用于在輸出上下文中添加對應的音視頻流

ost->st = avformat_new_stream(oc, NULL);

對音視頻流進行初始化，如ID、編碼器、流的數量、時間基等參數
如果是音頻，時間基設置為采樣率的倒數，如果是視頻，則為幀率的倒數

#define STREAM_FRAME_RATE 25*codec = avcodec_find_encoder(codec_id);
codec_ctx = avcodec_alloc_context3(*codec);
ost->enc = codec_ctx;ost->st->time_base = (AVRational){ 1, codec_ctx->sample_rate }; //音頻
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE }; //視頻

以及設置對應的音頻參數（采樣率、碼率、聲道數等），視頻參數（分辨率、碼率、幀率等）

    switch ((*codec)->type){case AVMEDIA_TYPE_AUDIO:codec_ctx->codec_id = codec_id;codec_ctx->sample_fmt  = (*codec)->sample_fmts ?    // 采樣格式(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;codec_ctx->bit_rate    = 64000;     // 碼率codec_ctx->sample_rate = 44100;     // 采樣率if ((*codec)->supported_samplerates){codec_ctx->sample_rate = (*codec)->supported_samplerates[0];for (i = 0; (*codec)->supported_samplerates[i]; i++){if ((*codec)->supported_samplerates[i] == 44100)codec_ctx->sample_rate = 44100;}}codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);if ((*codec)->channel_layouts){codec_ctx->channel_layout = (*codec)->channel_layouts[0];for (i = 0; (*codec)->channel_layouts[i]; i++) {if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;}}codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);// 設置timebase, 使用采樣率ost->st->time_base = (AVRational){ 1, codec_ctx->sample_rate };codec_ctx->time_base = ost->st->time_base;break;case AVMEDIA_TYPE_VIDEO:codec_ctx->codec_id = codec_id;codec_ctx->bit_rate = 256 * 1024; //256k/* Resolution must be a multiple of two. */codec_ctx->width    = 640;      // 分辨率codec_ctx->height   = 360;codec_ctx->max_b_frames = 2;/* timebase: This is the fundamental unit of time (in seconds) in terms* of which frame timestamps are represented. For fixed-fps content,* timebase should be 1/framerate and timestamp increments should be* identical to 1. */ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };  // 時基 25 frameratecodec_ctx->time_base       = ost->st->time_base;    // 為什么這里需要設置codec_ctx->gop_size      = STREAM_FRAME_RATE; //codec_ctx->pix_fmt       = STREAM_PIX_FMT;break;default:break;}

這里是寫入到本地視頻，因此可以設置將頭部信息（如sps、pps或adts）等信息，設置為統一寫在頭部，而不是每一幀I幀之前都寫入
oc->oformat->flags & AVFMT_GLOBALHEADER這里表示當前是否可以設置了這個全局頭部標識，如果設置了就可以將編碼器設置為這個模式

if (oc->oformat->flags & AVFMT_GLOBALHEADER){codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;    //
}

完整函數如下

static void add_stream(OutputStream *ost, AVFormatContext *oc,AVCodec **codec,enum AVCodecID codec_id)
{AVCodecContext *codec_ctx;int i;/* 查找編碼器 */*codec = avcodec_find_encoder(codec_id);    //通過codec_id找到編碼器if (!(*codec)){fprintf(stderr, "Could not find encoder for '%s'\n",avcodec_get_name(codec_id));exit(1);}// 新建碼流 綁定到 AVFormatContext stream->index 有設置ost->st = avformat_new_stream(oc, NULL);    // 創建一個流成分if (!ost->st){fprintf(stderr, "Could not allocate stream\n");exit(1);}/* 為什么要 -1呢？每次調用avformat_new_stream的時候nb_streams+1但id是從0開始, 比如第1個流：對應流id = nb_streams(1) -1 = 0第2個流：對應流id = nb_streams(2) -1 = 1*/ost->st->id = oc->nb_streams - 1;printf("nb_streams = %d\n",oc->nb_streams);codec_ctx = avcodec_alloc_context3(*codec); // 創建編碼器上下文if (!codec_ctx){fprintf(stderr, "Could not alloc an encoding context\n");exit(1);}ost->enc = codec_ctx;// 初始化編碼器參數switch ((*codec)->type){case AVMEDIA_TYPE_AUDIO:codec_ctx->codec_id = codec_id;codec_ctx->sample_fmt  = (*codec)->sample_fmts ?    // 采樣格式(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;codec_ctx->bit_rate    = 64000;     // 碼率codec_ctx->sample_rate = 44100;     // 采樣率if ((*codec)->supported_samplerates){codec_ctx->sample_rate = (*codec)->supported_samplerates[0];for (i = 0; (*codec)->supported_samplerates[i]; i++){if ((*codec)->supported_samplerates[i] == 44100)codec_ctx->sample_rate = 44100;}}codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);if ((*codec)->channel_layouts){codec_ctx->channel_layout = (*codec)->channel_layouts[0];for (i = 0; (*codec)->channel_layouts[i]; i++) {if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;}}codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);// 設置timebase, 使用采樣率ost->st->time_base = (AVRational){ 1, codec_ctx->sample_rate };codec_ctx->time_base = ost->st->time_base;break;case AVMEDIA_TYPE_VIDEO:codec_ctx->codec_id = codec_id;codec_ctx->bit_rate = 256 * 1024; //256k/* Resolution must be a multiple of two. */codec_ctx->width    = 640;      // 分辨率codec_ctx->height   = 360;codec_ctx->max_b_frames = 2;/* timebase: This is the fundamental unit of time (in seconds) in terms* of which frame timestamps are represented. For fixed-fps content,* timebase should be 1/framerate and timestamp increments should be* identical to 1. */ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };  // 時基 25 frameratecodec_ctx->time_base       = ost->st->time_base;    // 為什么這里需要設置codec_ctx->gop_size      = STREAM_FRAME_RATE; //codec_ctx->pix_fmt       = STREAM_PIX_FMT;break;default:break;}/* Some formats want stream headers to be separate. */if (oc->oformat->flags & AVFMT_GLOBALHEADER){codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;    //}
}

設置幀信息

主要是根據設置的音視頻格式分配對應幀的內存
同時，編碼器的參數拷貝到流的codecpar中

 if (have_video)open_video(oc, video_codec, &video_st, opt);if (have_audio)open_audio(oc, audio_codec, &audio_st, opt);

open_video函數如下

將視頻編碼器和編碼器上下文上下文關聯

AVCodecContext *codec_ctx = ost->enc;
ret = avcodec_open2(codec_ctx, codec, &opt);

為視頻流的frame分配內存，使用的是自定義的alloc_picture，實際上就是根據視頻參數分配幀內存

static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{AVFrame *picture;int ret;picture = av_frame_alloc();if (!picture)return NULL;picture->format = pix_fmt;picture->width  = width;picture->height = height;/* allocate the buffers for the frame data */ret = av_frame_get_buffer(picture, 32);if (ret < 0){fprintf(stderr, "Could not allocate frame data.\n");exit(1);}return picture;
}

將編碼器參數拷貝到流的編碼器參數codepar中

ret = avcodec_parameters_from_context(ost->st->codecpar, codec_ctx);if (ret < 0){fprintf(stderr, "Could not copy the stream parameters\n");exit(1);}

tmp_frame主要用于視頻重采樣，如果格式不為YUV420P，我們重采樣轉換過來
這里只是設置，如果格式不正確，則不為NULL

if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P)
{// 編碼器格式需要的數據不是 AV_PIX_FMT_YUV420P才需要 調用圖像scaleost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height);if (!ost->tmp_frame){fprintf(stderr, "Could not allocate temporary picture\n");exit(1);}
}

open_video函數如下

static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{int ret;AVCodecContext *codec_ctx = ost->enc;AVDictionary *opt = NULL;av_dict_copy(&opt, opt_arg, 0);/* open the codec */// 1. 關聯編碼器ret = avcodec_open2(codec_ctx, codec, &opt);av_dict_free(&opt);if (ret < 0){fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));exit(1);}// 2. 分配幀buffer/* allocate and init a re-usable frame */ost->frame = alloc_picture(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);if (!ost->frame){fprintf(stderr, "Could not allocate video frame\n");exit(1);}/* If the output format is not YUV420P, then a temporary YUV420P* picture is needed too. It is then converted to the required* output format. */ost->tmp_frame = NULL;if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P){// 編碼器格式需要的數據不是 AV_PIX_FMT_YUV420P才需要 調用圖像scaleost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height);if (!ost->tmp_frame){fprintf(stderr, "Could not allocate temporary picture\n");exit(1);}}/* copy the stream parameters to the muxer */ret = avcodec_parameters_from_context(ost->st->codecpar, codec_ctx);if (ret < 0){fprintf(stderr, "Could not copy the stream parameters\n");exit(1);}
}

音頻的操作類似
音頻在關聯解碼器上下文的時候，會自動設置時間基為采樣率的倒數

// 1. 關聯編碼器 會設置codec_ctx->time_base
ret = avcodec_open2(codec_ctx, codec, &opt);

同樣，如果音頻格式不是fltp，那我們還是要轉換
因為這里是我們手動生成的音頻，格式為s16，因此我們這里為tmp_frame分配內存，用于后續重采樣

ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, codec_ctx->channel_layout,codec_ctx->sample_rate, nb_samples);

可以在這里面就將重采樣上下文參數設置

ost->swr_ctx = swr_alloc();
if (!ost->swr_ctx)
{fprintf(stderr, "Could not allocate resampler context\n");exit(1);
}/* set options */
av_opt_set_int       (ost->swr_ctx, "in_channel_count",   codec_ctx->channels,       0);
av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     codec_ctx->sample_rate,    0);
av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
av_opt_set_int       (ost->swr_ctx, "out_channel_count",  codec_ctx->channels,       0);
av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    codec_ctx->sample_rate,    0);
av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     codec_ctx->sample_fmt,     0);/* initialize the resampling context */
if ((ret = swr_init(ost->swr_ctx)) < 0)
{fprintf(stderr, "Failed to initialize the resampling context\n");exit(1);
}

這些參數輔助生成pcm數據，主要是一些波的參數

ost->t = 0;
ost->tincr = 2 * M_PI * 110.0 / codec_ctx->sample_rate;
/* increment frequency by 110 Hz per second */
ost->tincr2 = 2 * M_PI * 110.0 / codec_ctx->sample_rate / codec_ctx->sample_rate;

open_audio整體函數如下

static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{AVCodecContext *codec_ctx;int nb_samples;int ret;AVDictionary *opt = NULL;codec_ctx = ost->enc;/* open it */av_dict_copy(&opt, opt_arg, 0);// 1. 關聯編碼器 會設置codec_ctx->time_baseret = avcodec_open2(codec_ctx, codec, &opt);av_dict_free(&opt);if (ret < 0){fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));exit(1);}/* init signal generator */// 2. 初始化產生PCM的參數ost->t     = 0;ost->tincr = 2 * M_PI * 110.0 / codec_ctx->sample_rate;/* increment frequency by 110 Hz per second */ost->tincr2 = 2 * M_PI * 110.0 / codec_ctx->sample_rate / codec_ctx->sample_rate;// 每次需要的samples
//    if (codec_ctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
//        nb_samples = 10000; // 支持可變FRAME size的編碼器極少，直接注釋掉
//    elsenb_samples = codec_ctx->frame_size;// printf("frame_size = %d\n",codec_ctx->frame_size);// signal generator -> PCM -> ost->tmp_frame -> swr_convert重采樣 -> ost->frame -> 編碼器// 分配送給編碼器的幀, 并申請對應的bufferost->frame     = alloc_audio_frame(codec_ctx->sample_fmt, codec_ctx->channel_layout,codec_ctx->sample_rate, nb_samples);// 分配送給信號生成PCM的幀, 并申請對應的bufferost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, codec_ctx->channel_layout,codec_ctx->sample_rate, nb_samples);/* copy the stream parameters to the muxer */ret = avcodec_parameters_from_context(ost->st->codecpar, codec_ctx);if (ret < 0){fprintf(stderr, "Could not copy the stream parameters\n");exit(1);}/* create resampler context 創建重采樣器 */ost->swr_ctx = swr_alloc();if (!ost->swr_ctx){fprintf(stderr, "Could not allocate resampler context\n");exit(1);}/* set options */av_opt_set_int       (ost->swr_ctx, "in_channel_count",   codec_ctx->channels,       0);av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     codec_ctx->sample_rate,    0);av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);av_opt_set_int       (ost->swr_ctx, "out_channel_count",  codec_ctx->channels,       0);av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    codec_ctx->sample_rate,    0);av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     codec_ctx->sample_fmt,     0);/* initialize the resampling context */if ((ret = swr_init(ost->swr_ctx)) < 0){fprintf(stderr, "Failed to initialize the resampling context\n");exit(1);}
}

寫入容器頭部

avformat_write_header函數用于寫入容器的頭部信息，如flv/mp4等文件頭部
寫入頭部的時候會改變音視頻流的時間基，不同容器的時間基不同，比如flv變為1/1000

ret = avformat_write_header(oc, &opt);

編碼音視頻數據

這里編碼采取的措施是優先編碼時間戳較小的，如果音視頻都編碼完就跳出循環

 if (encode_video &&         // video_st.next_pts值 <= audio_st.next_pts時(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,audio_st.next_pts, audio_st.enc->time_base) <= 0)) {printf("\nwrite_video_frame\n");encode_video = !write_video_frame(oc, &video_st);}else{printf("\nwrite_audio_frame\n");encode_audio = !write_audio_frame(oc, &audio_st);}

write_video_frame函數用于編碼視頻幀

使用avcodec_encode_video2 舊的API進行編碼視頻包，如果編碼成功，則got_packet置為1

static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{int ret;AVCodecContext *codec_ctx;AVFrame *frame;int got_packet = 0;AVPacket pkt = { 0 };codec_ctx = ost->enc;frame = get_video_frame(ost);av_init_packet(&pkt);/* encode the image */ret = avcodec_encode_video2(codec_ctx, &pkt, frame, &got_packet);if (ret < 0){fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));exit(1);}if (got_packet){ret = write_frame(oc, &codec_ctx->time_base, ost->st, &pkt);}else{ret = 0;}if (ret < 0){fprintf(stderr, "Error while writing video frame: %s\n", av_err2str(ret));exit(1);}// 這里之所以有兩個判斷條件// frame非NULL: 表示還在產生YUV數據幀// got_packet為1: 編碼器還有緩存的幀return (frame || got_packet) ? 0 : 1;
}

get_video_frame函數用于重采樣視頻幀，并且生成指定的frame返回

寫入的時候需要保證frame可寫，調用av_frame_make_writable
對視頻進行重采樣

   if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P){/* as we only generate a YUV420P picture, we must convert it* to the codec pixel format if needed */if (!ost->sws_ctx){ost->sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,AV_PIX_FMT_YUV420P,codec_ctx->width, codec_ctx->height,codec_ctx->pix_fmt,SCALE_FLAGS, NULL, NULL, NULL);if (!ost->sws_ctx) {fprintf(stderr,"Could not initialize the conversion context\n");exit(1);}}fill_yuv_image(ost->tmp_frame, ost->next_pts, codec_ctx->width, codec_ctx->height);sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,ost->tmp_frame->linesize, 0, codec_ctx->height, ost->frame->data,ost->frame->linesize);} else {fill_yuv_image(ost->frame, ost->next_pts, codec_ctx->width, codec_ctx->height);}

fill_yuv_image函數如下，不用在意這個生成數據的函數

/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,int width, int height)
{int x, y, i;i = frame_index;/* Y */for (y = 0; y < height; y++)for (x = 0; x < width; x++)pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;/* Cb and Cr */for (y = 0; y < height / 2; y++){for (x = 0; x < width / 2; x++) {pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;}}
}

需要設置時間戳，我們這里單位為視頻的時間基1 / 25
每次生成前需要保證不會超過預設生成的時間值，注意這里時間戳的轉換

if (av_compare_ts(ost->next_pts, codec_ctx->time_base,STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)return NULL;ost->frame->pts = ost->next_pts++;  // 為什么+1? 單位是 1/25 = 40ms

write_frame用于設置packet的stream_index，而且必須轉換時間戳，因為編碼器的時間基與視頻流的時間基不同，視頻流的時間基已經被修改，基于容器格式了

static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base,AVStream *st, AVPacket *pkt)
{/* rescale output packet timestamp values from codec to stream timebase */// 將packet的timestamp由codec to stream timebase pts_before = -1024av_packet_rescale_ts(pkt, *time_base, st->time_base);pkt->stream_index = st->index;  // pts_before * 1/44100 = pts_after *1/1000// pts_after = pts_before * 1/44100 * 1000 = -1024 * 1/44100 * 1000 = -23/* Write the compressed frame to the media file. */log_packet(fmt_ctx, pkt);return av_interleaved_write_frame(fmt_ctx, pkt);
}

get_video_frame整體函數如下

static AVFrame *get_video_frame(OutputStream *ost)
{AVCodecContext *codec_ctx = ost->enc;/* check if we want to generate more frames */// 我們測試時只產生STREAM_DURATION(這里是5.0秒)的視頻數據if (av_compare_ts(ost->next_pts, codec_ctx->time_base,STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)return NULL;/* when we pass a frame to the encoder, it may keep a reference to it* internally; make sure we do not overwrite it here */if (av_frame_make_writable(ost->frame) < 0)exit(1);if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P){/* as we only generate a YUV420P picture, we must convert it* to the codec pixel format if needed */if (!ost->sws_ctx){ost->sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,AV_PIX_FMT_YUV420P,codec_ctx->width, codec_ctx->height,codec_ctx->pix_fmt,SCALE_FLAGS, NULL, NULL, NULL);if (!ost->sws_ctx) {fprintf(stderr,"Could not initialize the conversion context\n");exit(1);}}fill_yuv_image(ost->tmp_frame, ost->next_pts, codec_ctx->width, codec_ctx->height);sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,ost->tmp_frame->linesize, 0, codec_ctx->height, ost->frame->data,ost->frame->linesize);} else {fill_yuv_image(ost->frame, ost->next_pts, codec_ctx->width, codec_ctx->height);}ost->frame->pts = ost->next_pts++;  // 為什么+1? 單位是 1/25 = 40ms// 0  1 2  -> 0 40ms 80msreturn ost->frame;
}

音頻的編碼和視頻類似，只不過要注意音頻的重采樣需要加上延遲的采樣點

 if (frame){/* convert samples from native format to destination codec format, using the resampler *//* compute destination number of samples */dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,codec_ctx->sample_rate, codec_ctx->sample_rate, AV_ROUND_UP);av_assert0(dst_nb_samples == frame->nb_samples);/* when we pass a frame to the encoder, it may keep a reference to it* internally;* make sure we do not overwrite it here*/ret = av_frame_make_writable(ost->frame);if (ret < 0)exit(1);/* convert to destination format */ret = swr_convert(ost->swr_ctx,ost->frame->data, dst_nb_samples,(const uint8_t **)frame->data, frame->nb_samples);if (ret < 0){fprintf(stderr, "Error while converting\n");exit(1);}frame = ost->frame;// 轉換time_baseframe->pts = av_rescale_q(ost->samples_count, (AVRational){1, codec_ctx->sample_rate},codec_ctx->time_base);ost->samples_count += dst_nb_samples;}

需要使用avcodec_encode_audio2寫入數據包

ret = avcodec_encode_audio2(codec_ctx, &pkt, frame, &got_packet);

整體的write_audio_frame函數如下

static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
{AVCodecContext *codec_ctx;AVPacket pkt = { 0 }; // data and size must be 0;AVFrame *frame;int ret;int got_packet;int dst_nb_samples;av_init_packet(&pkt);codec_ctx = ost->enc;frame = get_audio_frame(ost);if (frame){/* convert samples from native format to destination codec format, using the resampler *//* compute destination number of samples */dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,codec_ctx->sample_rate, codec_ctx->sample_rate, AV_ROUND_UP);av_assert0(dst_nb_samples == frame->nb_samples);/* when we pass a frame to the encoder, it may keep a reference to it* internally;* make sure we do not overwrite it here*/ret = av_frame_make_writable(ost->frame);if (ret < 0)exit(1);/* convert to destination format */ret = swr_convert(ost->swr_ctx,ost->frame->data, dst_nb_samples,(const uint8_t **)frame->data, frame->nb_samples);if (ret < 0){fprintf(stderr, "Error while converting\n");exit(1);}frame = ost->frame;// 轉換time_baseframe->pts = av_rescale_q(ost->samples_count, (AVRational){1, codec_ctx->sample_rate},codec_ctx->time_base);ost->samples_count += dst_nb_samples;}ret = avcodec_encode_audio2(codec_ctx, &pkt, frame, &got_packet);if (ret < 0){fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));exit(1);}if (got_packet){ret = write_frame(oc, &codec_ctx->time_base, ost->st, &pkt);if (ret < 0){fprintf(stderr, "Error while writing audio frame: %s\n",av_err2str(ret));exit(1);}}// frame == NULL 讀取不到frame（比如讀完了5秒的frame）; got_packet == 0 沒有幀了return (frame || got_packet) ? 0 : 1;
}

寫入`tariler`

如果要編碼mp4文件，還需要寫入trailer，使用av_write_trailer函數即可

av_write_trailer(oc);

結束工作

編碼后需要關閉文件，釋放資源
close_stream函數，用于關閉流相關內存

static void close_stream(AVFormatContext *oc, OutputStream *ost)
{avcodec_free_context(&ost->enc);av_frame_free(&ost->frame);av_frame_free(&ost->tmp_frame);sws_freeContext(ost->sws_ctx);swr_free(&ost->swr_ctx);
}
if (have_video)close_stream(oc, &video_st);
if (have_audio)close_stream(oc, &audio_st);

關閉文件

if (!(fmt->flags & AVFMT_NOFILE))/* Close the output file. */avio_closep(&oc->pb);

釋放上下文

avformat_free_context(oc);

整體代碼

main.c

 /*** @file* libavformat API example.** Output a media file in any supported libavformat format. The default* codecs are used.* @example muxing.c*/#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>#define STREAM_DURATION   5.0  //流時長 單位秒
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */#define SCALE_FLAGS SWS_BICUBIC     // scale flag// 封裝單個輸出AVStream
typedef struct OutputStream
{AVStream *st;               // 代表一個stream, 1路audio或1路video都代表獨立的steamAVCodecContext *enc;        // 編碼器上下文/* pts of the next frame that will be generated */int64_t next_pts;int samples_count;  // 音頻的采樣數量累計AVFrame *frame;     // 重采樣后的frame，  視頻叫scaleAVFrame *tmp_frame; // 重采樣前float t, tincr, tincr2; // 這幾個參數用來生成PCM和YUV用的struct SwsContext *sws_ctx;     // 圖像scalestruct SwrContext *swr_ctx;     // 音頻重采樣
} OutputStream;static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),pkt->stream_index);
}static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base,AVStream *st, AVPacket *pkt)
{/* rescale output packet timestamp values from codec to stream timebase */// 將packet的timestamp由codec to stream timebase pts_before = -1024av_packet_rescale_ts(pkt, *time_base, st->time_base);pkt->stream_index = st->index;  // pts_before * 1/44100 = pts_after *1/1000// pts_after = pts_before * 1/44100 * 1000 = -1024 * 1/44100 * 1000 = -23/* Write the compressed frame to the media file. */log_packet(fmt_ctx, pkt);return av_interleaved_write_frame(fmt_ctx, pkt);
}//增加輸出流，返回AVStream，并給codec賦值,但此時codec并未打開
static void add_stream(OutputStream *ost, AVFormatContext *oc,AVCodec **codec,enum AVCodecID codec_id)
{AVCodecContext *codec_ctx;int i;/* 查找編碼器 */*codec = avcodec_find_encoder(codec_id);    //通過codec_id找到編碼器if (!(*codec)){fprintf(stderr, "Could not find encoder for '%s'\n",avcodec_get_name(codec_id));exit(1);}// 新建碼流 綁定到 AVFormatContext stream->index 有設置ost->st = avformat_new_stream(oc, NULL);    // 創建一個流成分if (!ost->st){fprintf(stderr, "Could not allocate stream\n");exit(1);}/* 為什么要 -1呢？每次調用avformat_new_stream的時候nb_streams+1但id是從0開始, 比如第1個流：對應流id = nb_streams(1) -1 = 0第2個流：對應流id = nb_streams(2) -1 = 1*/ost->st->id = oc->nb_streams - 1;printf("nb_streams = %d\n",oc->nb_streams);codec_ctx = avcodec_alloc_context3(*codec); // 創建編碼器上下文if (!codec_ctx){fprintf(stderr, "Could not alloc an encoding context\n");exit(1);}ost->enc = codec_ctx;// 初始化編碼器參數switch ((*codec)->type){case AVMEDIA_TYPE_AUDIO:codec_ctx->codec_id = codec_id;codec_ctx->sample_fmt  = (*codec)->sample_fmts ?    // 采樣格式(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;codec_ctx->bit_rate    = 64000;     // 碼率codec_ctx->sample_rate = 44100;     // 采樣率if ((*codec)->supported_samplerates){codec_ctx->sample_rate = (*codec)->supported_samplerates[0];for (i = 0; (*codec)->supported_samplerates[i]; i++){if ((*codec)->supported_samplerates[i] == 44100)codec_ctx->sample_rate = 44100;}}codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);if ((*codec)->channel_layouts){codec_ctx->channel_layout = (*codec)->channel_layouts[0];for (i = 0; (*codec)->channel_layouts[i]; i++) {if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;}}codec_ctx->channels        = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);// 設置timebase, 使用采樣率ost->st->time_base = (AVRational){ 1, codec_ctx->sample_rate };codec_ctx->time_base = ost->st->time_base;break;case AVMEDIA_TYPE_VIDEO:codec_ctx->codec_id = codec_id;codec_ctx->bit_rate = 256 * 1024; //256k/* Resolution must be a multiple of two. */codec_ctx->width    = 640;      // 分辨率codec_ctx->height   = 360;codec_ctx->max_b_frames = 2;/* timebase: This is the fundamental unit of time (in seconds) in terms* of which frame timestamps are represented. For fixed-fps content,* timebase should be 1/framerate and timestamp increments should be* identical to 1. */ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };  // 時基 25 frameratecodec_ctx->time_base       = ost->st->time_base;    // 為什么這里需要設置codec_ctx->gop_size      = STREAM_FRAME_RATE; //codec_ctx->pix_fmt       = STREAM_PIX_FMT;break;default:break;}/* Some formats want stream headers to be separate. */if (oc->oformat->flags & AVFMT_GLOBALHEADER){codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;    //}
}/**************************************************************/
/* audio output */static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,uint64_t channel_layout,int sample_rate, int nb_samples)
{AVFrame *frame = av_frame_alloc();int ret;if (!frame){fprintf(stderr, "Error allocating an audio frame\n");exit(1);}frame->format = sample_fmt;frame->channel_layout = channel_layout;frame->sample_rate = sample_rate;frame->nb_samples = nb_samples;if (nb_samples){ret = av_frame_get_buffer(frame, 0);if (ret < 0){fprintf(stderr, "Error allocating an audio buffer\n");exit(1);}}return frame;
}static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{AVCodecContext *codec_ctx;int nb_samples;int ret;AVDictionary *opt = NULL;codec_ctx = ost->enc;/* open it */av_dict_copy(&opt, opt_arg, 0);// 1. 關聯編碼器 會設置codec_ctx->time_baseret = avcodec_open2(codec_ctx, codec, &opt);av_dict_free(&opt);if (ret < 0){fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));exit(1);}/* init signal generator */// 2. 初始化產生PCM的參數ost->t     = 0;ost->tincr = 2 * M_PI * 110.0 / codec_ctx->sample_rate;/* increment frequency by 110 Hz per second */ost->tincr2 = 2 * M_PI * 110.0 / codec_ctx->sample_rate / codec_ctx->sample_rate;// 每次需要的samples
//    if (codec_ctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
//        nb_samples = 10000; // 支持可變FRAME size的編碼器極少，直接注釋掉
//    elsenb_samples = codec_ctx->frame_size;// printf("frame_size = %d\n",codec_ctx->frame_size);// signal generator -> PCM -> ost->tmp_frame -> swr_convert重采樣 -> ost->frame -> 編碼器// 分配送給編碼器的幀, 并申請對應的bufferost->frame     = alloc_audio_frame(codec_ctx->sample_fmt, codec_ctx->channel_layout,codec_ctx->sample_rate, nb_samples);// 分配送給信號生成PCM的幀, 并申請對應的bufferost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, codec_ctx->channel_layout,codec_ctx->sample_rate, nb_samples);/* copy the stream parameters to the muxer */ret = avcodec_parameters_from_context(ost->st->codecpar, codec_ctx);if (ret < 0){fprintf(stderr, "Could not copy the stream parameters\n");exit(1);}/* create resampler context 創建重采樣器 */ost->swr_ctx = swr_alloc();if (!ost->swr_ctx){fprintf(stderr, "Could not allocate resampler context\n");exit(1);}/* set options */av_opt_set_int       (ost->swr_ctx, "in_channel_count",   codec_ctx->channels,       0);av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     codec_ctx->sample_rate,    0);av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);av_opt_set_int       (ost->swr_ctx, "out_channel_count",  codec_ctx->channels,       0);av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    codec_ctx->sample_rate,    0);av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     codec_ctx->sample_fmt,     0);/* initialize the resampling context */if ((ret = swr_init(ost->swr_ctx)) < 0){fprintf(stderr, "Failed to initialize the resampling context\n");exit(1);}
}/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and* 'nb_channels' channels. */
static AVFrame *get_audio_frame(OutputStream *ost)
{AVFrame *frame = ost->tmp_frame;int j, i, v;int16_t *q = (int16_t*)frame->data[0];/* check if we want to generate more frames */// 44100 * {1, 44100} = 1  ->44100*5 * {1, 44100} = 5// 5 *{1,1} = 5if (av_compare_ts(ost->next_pts, ost->enc->time_base,STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)return NULL;for (j = 0; j <frame->nb_samples; j++){v = (int)(sin(ost->t) * 10000);for (i = 0; i < ost->enc->channels; i++)*q++ = v;ost->t     += ost->tincr;ost->tincr += ost->tincr2;}frame->pts = ost->next_pts; // 使用samples作為計數 設置pts 0, nb_samples(1024) 2048ost->next_pts  += frame->nb_samples;    // 音頻PTS使用采樣samples疊加return frame;
}/** encode one audio frame and send it to the muxer* return 1 when encoding is finished, 0 otherwise*/
static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
{AVCodecContext *codec_ctx;AVPacket pkt = { 0 }; // data and size must be 0;AVFrame *frame;int ret;int got_packet;int dst_nb_samples;av_init_packet(&pkt);codec_ctx = ost->enc;frame = get_audio_frame(ost);if (frame){/* convert samples from native format to destination codec format, using the resampler *//* compute destination number of samples */dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,codec_ctx->sample_rate, codec_ctx->sample_rate, AV_ROUND_UP);av_assert0(dst_nb_samples == frame->nb_samples);/* when we pass a frame to the encoder, it may keep a reference to it* internally;* make sure we do not overwrite it here*/ret = av_frame_make_writable(ost->frame);if (ret < 0)exit(1);/* convert to destination format */ret = swr_convert(ost->swr_ctx,ost->frame->data, dst_nb_samples,(const uint8_t **)frame->data, frame->nb_samples);if (ret < 0){fprintf(stderr, "Error while converting\n");exit(1);}frame = ost->frame;// 轉換time_baseframe->pts = av_rescale_q(ost->samples_count, (AVRational){1, codec_ctx->sample_rate},codec_ctx->time_base);ost->samples_count += dst_nb_samples;}ret = avcodec_encode_audio2(codec_ctx, &pkt, frame, &got_packet);if (ret < 0){fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));exit(1);}if (got_packet){ret = write_frame(oc, &codec_ctx->time_base, ost->st, &pkt);if (ret < 0){fprintf(stderr, "Error while writing audio frame: %s\n",av_err2str(ret));exit(1);}}// frame == NULL 讀取不到frame（比如讀完了5秒的frame）; got_packet == 0 沒有幀了return (frame || got_packet) ? 0 : 1;
}/**************************************************************/
/* video output */static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{AVFrame *picture;int ret;picture = av_frame_alloc();if (!picture)return NULL;picture->format = pix_fmt;picture->width  = width;picture->height = height;/* allocate the buffers for the frame data */ret = av_frame_get_buffer(picture, 32);if (ret < 0){fprintf(stderr, "Could not allocate frame data.\n");exit(1);}return picture;
}static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{int ret;AVCodecContext *codec_ctx = ost->enc;AVDictionary *opt = NULL;av_dict_copy(&opt, opt_arg, 0);/* open the codec */// 1. 關聯編碼器ret = avcodec_open2(codec_ctx, codec, &opt);av_dict_free(&opt);if (ret < 0){fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));exit(1);}// 2. 分配幀buffer/* allocate and init a re-usable frame */ost->frame = alloc_picture(codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height);if (!ost->frame){fprintf(stderr, "Could not allocate video frame\n");exit(1);}/* If the output format is not YUV420P, then a temporary YUV420P* picture is needed too. It is then converted to the required* output format. */ost->tmp_frame = NULL;if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P){// 編碼器格式需要的數據不是 AV_PIX_FMT_YUV420P才需要 調用圖像scaleost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height);if (!ost->tmp_frame){fprintf(stderr, "Could not allocate temporary picture\n");exit(1);}}/* copy the stream parameters to the muxer */ret = avcodec_parameters_from_context(ost->st->codecpar, codec_ctx);if (ret < 0){fprintf(stderr, "Could not copy the stream parameters\n");exit(1);}
}/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,int width, int height)
{int x, y, i;i = frame_index;/* Y */for (y = 0; y < height; y++)for (x = 0; x < width; x++)pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;/* Cb and Cr */for (y = 0; y < height / 2; y++){for (x = 0; x < width / 2; x++) {pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;}}
}static AVFrame *get_video_frame(OutputStream *ost)
{AVCodecContext *codec_ctx = ost->enc;/* check if we want to generate more frames */// 我們測試時只產生STREAM_DURATION(這里是5.0秒)的視頻數據if (av_compare_ts(ost->next_pts, codec_ctx->time_base,STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)return NULL;/* when we pass a frame to the encoder, it may keep a reference to it* internally; make sure we do not overwrite it here */if (av_frame_make_writable(ost->frame) < 0)exit(1);if (codec_ctx->pix_fmt != AV_PIX_FMT_YUV420P){/* as we only generate a YUV420P picture, we must convert it* to the codec pixel format if needed */if (!ost->sws_ctx){ost->sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,AV_PIX_FMT_YUV420P,codec_ctx->width, codec_ctx->height,codec_ctx->pix_fmt,SCALE_FLAGS, NULL, NULL, NULL);if (!ost->sws_ctx) {fprintf(stderr,"Could not initialize the conversion context\n");exit(1);}}fill_yuv_image(ost->tmp_frame, ost->next_pts, codec_ctx->width, codec_ctx->height);sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,ost->tmp_frame->linesize, 0, codec_ctx->height, ost->frame->data,ost->frame->linesize);} else {fill_yuv_image(ost->frame, ost->next_pts, codec_ctx->width, codec_ctx->height);}ost->frame->pts = ost->next_pts++;  // 為什么+1? 單位是 1/25 = 40ms// 0  1 2  -> 0 40ms 80msreturn ost->frame;
}/** encode one video frame and send it to the muxer* return 1 when encoding is finished, 0 otherwise*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{int ret;AVCodecContext *codec_ctx;AVFrame *frame;int got_packet = 0;AVPacket pkt = { 0 };codec_ctx = ost->enc;frame = get_video_frame(ost);av_init_packet(&pkt);/* encode the image */ret = avcodec_encode_video2(codec_ctx, &pkt, frame, &got_packet);if (ret < 0){fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));exit(1);}if (got_packet){ret = write_frame(oc, &codec_ctx->time_base, ost->st, &pkt);}else{ret = 0;}if (ret < 0){fprintf(stderr, "Error while writing video frame: %s\n", av_err2str(ret));exit(1);}// 這里之所以有兩個判斷條件// frame非NULL: 表示還在產生YUV數據幀// got_packet為1: 編碼器還有緩存的幀return (frame || got_packet) ? 0 : 1;
}static void close_stream(AVFormatContext *oc, OutputStream *ost)
{avcodec_free_context(&ost->enc);av_frame_free(&ost->frame);av_frame_free(&ost->tmp_frame);sws_freeContext(ost->sws_ctx);swr_free(&ost->swr_ctx);
}/**************************************************************/
/* media file output */int main(int argc, char **argv)
{OutputStream video_st = { 0 }; // 封裝視頻編碼相關的OutputStream audio_st = { 0 }; // 封裝音頻編碼相關的const char *filename;   // 輸出文件// AVOutputFormat ff_flv_muxerAVOutputFormat *fmt;    // 輸出文件容器格式, 封裝了復用規則，AVInputFormat則是封裝了解復用規則AVFormatContext *oc;AVCodec *audio_codec, *video_codec;int ret;int have_video = 0, have_audio = 0;int encode_video = 0, encode_audio = 0;AVDictionary *opt = NULL;int i;if (argc < 2){printf("usage: %s output_file\n""API example program to output a media file with libavformat.\n""This program generates a synthetic audio and video stream, encodes and\n""muxes them into a file named output_file.\n""The output format is automatically guessed according to the file extension.\n""Raw images can also be output by using '%%d' in the filename.\n""\n", argv[0]);return 1;}filename = argv[1];for (i = 2; i+1 < argc; i+=2){if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))av_dict_set(&opt, argv[i]+1, argv[i+1], 0);}/* 分配AVFormatContext并根據filename綁定合適的AVOutputFormat */avformat_alloc_output_context2(&oc, NULL, NULL, filename);if (!oc){// 如果不能根據文件后綴名找到合適的格式，那缺省使用flv格式printf("Could not deduce output format from file extension: using flv.\n");avformat_alloc_output_context2(&oc, NULL, "flv", filename);}if (!oc)return 1;fmt = oc->oformat; // 獲取綁定的AVOutputFormat// 我們音視頻課程音視頻編解碼主要涉及H264和AAC, 所以我們指定為H264+AACfmt->video_codec = AV_CODEC_ID_H264;    // 指定編碼器fmt->audio_codec = AV_CODEC_ID_AAC;     // 指定編碼器/* 使用指定的音視頻編碼格式增加音頻流和視頻流 */if (fmt->video_codec != AV_CODEC_ID_NONE){add_stream(&video_st, oc, &video_codec, fmt->video_codec);have_video = 1;encode_video = 1;}if (fmt->audio_codec != AV_CODEC_ID_NONE){add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);have_audio = 1;encode_audio = 1;}/* Now that all the parameters are set, we can open the audio and* video codecs and allocate the necessary encode buffers. */if (have_video)open_video(oc, video_codec, &video_st, opt);if (have_audio)open_audio(oc, audio_codec, &audio_st, opt);av_dump_format(oc, 0, filename, 1);/* open the output file, if needed */if (!(fmt->flags & AVFMT_NOFILE))  //沒有設置 AVFMT_NOFILE 標志位，即需要文件IO{// 打開對應的輸出文件，沒有則創建ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);if (ret < 0){fprintf(stderr, "Could not open '%s': %s\n", filename,av_err2str(ret));return 1;}}// audio AVstream->base_time = 1/44100, video AVstream->base_time = 1/25/* 寫頭部. 到底做了什么操作呢？ 對應steam的time_base被改寫 和封裝格式有關系*/ret = avformat_write_header(oc, &opt);// base_time audio = 1/1000 video = 1/1000printf("time_base = %d/%d\n",video_st.st->time_base.num,video_st.st->time_base.den);if (ret < 0){fprintf(stderr, "Error occurred when opening output file: %s\n",av_err2str(ret));return 1;}while (encode_video || encode_audio){/* select the stream to encode */if (encode_video &&         // video_st.next_pts值 <= audio_st.next_pts時(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,audio_st.next_pts, audio_st.enc->time_base) <= 0)) {printf("\nwrite_video_frame\n");encode_video = !write_video_frame(oc, &video_st);}else{printf("\nwrite_audio_frame\n");encode_audio = !write_audio_frame(oc, &audio_st);}}/* Write the trailer, if any. The trailer must be written before you* close the CodecContexts open when you wrote the header; otherwise* av_write_trailer() may try to use memory that was freed on* av_codec_close(). */av_write_trailer(oc);/* Close each codec. */if (have_video)close_stream(oc, &video_st);if (have_audio)close_stream(oc, &audio_st);if (!(fmt->flags & AVFMT_NOFILE))/* Close the output file. */avio_closep(&oc->pb);/* free the stream */avformat_free_context(oc);return 0;
}