FFmpeg: improve threading settings

Generalize threading settings in proxy building and use them for encoding
and decoding in general. Check codec capabilities, prefer FF_THREAD_FRAME
threading over FF_THREAD_SLICE and automatic thread count over setting it
explicitly.

ffmpeg-codecs man page suggests that threads option is global and used by
codecs, that supports this option. Form some tests I have done, it seems that

`av_dict_set_int(&codec_opts, "threads", BLI_system_thread_count(), 0)`

has same effect as

```
pCodecCtx->thread_count = BLI_system_thread_count();
pCodecCtx->thread_type = FF_THREAD_FRAME;
```

Looking at `ff_frame_thread_encoder_init()` code, these cases are not
equivalent. It is probably safer to leave threading setup on libavcodec than
setting up each codec threading individually.

From what I have read all over the internet, frame multithreading should be
faster than slice multithreading. Slice multithreading is mainly used for low
latency streaming.

When running Blender with --debug-ffmpeg it complains about
`pCodecCtx->thread_count = BLI_system_thread_count()` that using thread count
above 16 is not recommended. Using too many threads can negatively affect image
quality, but I am not sure if this is the case for decoding as well - see
https://streaminglearningcenter.com/blogs/ffmpeg-command-threads-how-it-affects-quality-and-performance.html
This is fine for proxies but may be undesirable for final renders.

Number of threads is limited by image size, because of size of motion vectors,
so if it is possible let libavcodec determine optimal thread count.

Performance difference:
Proxy building: None
Playback speed: 2x better on 1920x1080 sample h264 file
Scrubbing: Hard to quantify, but it's much more responsive
Rendering speed: None on 1920x1080 sample h264 file, there is improvement with codecs that do support FF_THREAD_FRAME for encoding like MPNG

Reviewed By: sergey

Differential Revision: https://developer.blender.org/D10791
This commit is contained in:
Richard Antalik 2021-03-26 12:41:13 +01:00
parent 847002e1f8
commit 1614795ae2
Notes: blender-bot 2023-09-13 08:48:34 +02:00
Referenced by issue #70529, Verify ffmpeg and other image and video libraries use optimal build flags
3 changed files with 55 additions and 11 deletions

View File

@ -567,9 +567,6 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
/* Set up the codec context */
c = st->codec;
c->thread_count = BLI_system_thread_count();
c->thread_type = FF_THREAD_SLICE;
c->codec_id = codec_id;
c->codec_type = AVMEDIA_TYPE_VIDEO;
@ -727,6 +724,20 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
set_ffmpeg_properties(rd, c, "video", &opts);
if (codec->capabilities & AV_CODEC_CAP_AUTO_THREADS) {
c->thread_count = 0;
}
else {
c->thread_count = BLI_system_thread_count();
}
if (codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
c->thread_type = FF_THREAD_FRAME;
}
else if (codec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
c->thread_type = FF_THREAD_SLICE;
}
if (avcodec_open2(c, codec, &opts) < 0) {
BLI_strncpy(error, IMB_ffmpeg_last_error(), error_size);
av_dict_free(&opts);

View File

@ -574,8 +574,19 @@ static int startffmpeg(struct anim *anim)
pCodecCtx->workaround_bugs = 1;
pCodecCtx->thread_count = BLI_system_thread_count();
pCodecCtx->thread_type = FF_THREAD_SLICE;
if (pCodec->capabilities & AV_CODEC_CAP_AUTO_THREADS) {
pCodecCtx->thread_count = 0;
}
else {
pCodecCtx->thread_count = BLI_system_thread_count();
}
if (pCodec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
pCodecCtx->thread_type = FF_THREAD_FRAME;
}
else if (pCodec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
pCodecCtx->thread_type = FF_THREAD_SLICE;
}
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
avformat_close_input(&pFormatCtx);

View File

@ -541,8 +541,20 @@ static struct proxy_output_ctx *alloc_proxy_output_ffmpeg(
av_dict_set_int(&codec_opts, "crf", crf, 0);
/* Prefer smaller file-size. */
av_dict_set(&codec_opts, "preset", "slow", 0);
/* Thread count. */
av_dict_set_int(&codec_opts, "threads", BLI_system_thread_count(), 0);
if (rv->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS) {
rv->c->thread_count = 0;
}
else {
rv->c->thread_count = BLI_system_thread_count();
}
if (rv->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
rv->c->thread_type = FF_THREAD_FRAME;
}
else if (rv->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
rv->c->thread_type = FF_THREAD_SLICE;
}
if (rv->of->flags & AVFMT_GLOBALHEADER) {
rv->c->flags |= CODEC_FLAG_GLOBAL_HEADER;
@ -794,11 +806,21 @@ static IndexBuildContext *index_ffmpeg_create_context(struct anim *anim,
context->iCodecCtx->workaround_bugs = 1;
AVDictionary *codec_opts = NULL;
/* Thread count. */
av_dict_set_int(&codec_opts, "threads", BLI_system_thread_count(), 0);
if (context->iCodec->capabilities & AV_CODEC_CAP_AUTO_THREADS) {
context->iCodecCtx->thread_count = 0;
}
else {
context->iCodecCtx->thread_count = BLI_system_thread_count();
}
if (avcodec_open2(context->iCodecCtx, context->iCodec, &codec_opts) < 0) {
if (context->iCodec->capabilities & AV_CODEC_CAP_FRAME_THREADS) {
context->iCodecCtx->thread_type = FF_THREAD_FRAME;
}
else if (context->iCodec->capabilities & AV_CODEC_CAP_SLICE_THREADS) {
context->iCodecCtx->thread_type = FF_THREAD_SLICE;
}
if (avcodec_open2(context->iCodecCtx, context->iCodec, NULL) < 0) {
avformat_close_input(&context->iFormatCtx);
MEM_freeN(context);
return NULL;