Tracking: Improve multithreading of tracking many markers

This change solves a bottleneck which was caused by attempt to cache
postprocessed search areas used for tracking. It was a single cache
used by all threads, which required to have some synchronization
mechanism. This synchronization turned out to be making all threads
to idle while one thread is accessing the cache. The access was not
cheap, so the multi-threading did not provide expected speedup.

Current solution is to remove the cache of search areas. This avoids
any threading synchronization overhead because there is no need for
it anymore. The downside is that for certain configurations tracking
became slower when comparing to master branch. There is no expected
slowdown compared to 2.91 release.

The slowdown is mainly experienced when using big search area and
keyframe matching strategy. Other cases should still be within a
ballpark of performance of single-threaded code prior to this change.
The reason why is it so is because while this change makes it so the
image accessors needs to process images multiple times the complexity
of this process is almost the same as all the overhead for the cache
lookup and maintenance.

Here are Some numbers gained on different configurations.

CPU: Intel Xeom CPU E5-2699 v4
OS: Linux
Footage: Old_Factory MVI_4005.mov from the first part of Track Match
Blend training which can be found on the Blender Cloud.

Tracking 443 markers across 250 frames. The unit is seconds.
File: F9433209

  2.91: 401.520874
before: 358.650055
 after:  14.966302

Tracking single marker across 250 frames. The unit is seconds.
File: F9433211

                         2.91      before     after
        Big keyframe   1.307203   1.005324   1.227300
  Big previous frame   1.144055   0.881139   0.944044
      Small keyframe   0.434015   0.197760   0.224982
Small previous frame   0.463207   0.218058   0.234172
         All at once   2.338268   1.481220   1.518060
This commit is contained in:
Sergey Sharybin 2020-12-03 14:32:57 +01:00
parent a6c4e39876
commit 5d13082622
3 changed files with 4 additions and 114 deletions

View File

@ -34,7 +34,9 @@
#include "BLI_task.h"
#include "BLI_threads.h"
#include "BLI_utildefines.h"
#include "PIL_time.h"
#include "BKE_global.h"
#include "BKE_movieclip.h"
#include "BKE_tracking.h"

View File

@ -615,88 +615,6 @@ MovieTrackingMarker *tracking_get_keyframed_marker(MovieTrackingTrack *track,
/*********************** Frame accessr *************************/
typedef struct AccessCacheKey {
int clip_index;
int frame;
int downscale;
libmv_InputMode input_mode;
bool has_region;
float region_min[2], region_max[2];
int64_t transform_key;
} AccessCacheKey;
static unsigned int accesscache_hashhash(const void *key_v)
{
const AccessCacheKey *key = (const AccessCacheKey *)key_v;
/* TODP(sergey): Need better hashing here for faster frame access. */
return key->clip_index << 16 | key->frame;
}
static bool accesscache_hashcmp(const void *a_v, const void *b_v)
{
const AccessCacheKey *a = (const AccessCacheKey *)a_v;
const AccessCacheKey *b = (const AccessCacheKey *)b_v;
if (a->clip_index != b->clip_index || a->frame != b->frame || a->downscale != b->downscale ||
a->input_mode != b->input_mode || a->has_region != b->has_region ||
a->transform_key != b->transform_key) {
return true;
}
/* If there is region applied, compare it. */
if (a->has_region) {
if (!equals_v2v2(a->region_min, b->region_min) || !equals_v2v2(a->region_max, b->region_max)) {
return true;
}
}
return false;
}
static void accesscache_construct_key(AccessCacheKey *key,
int clip_index,
int frame,
libmv_InputMode input_mode,
int downscale,
const libmv_Region *region,
int64_t transform_key)
{
key->clip_index = clip_index;
key->frame = frame;
key->input_mode = input_mode;
key->downscale = downscale;
key->has_region = (region != NULL);
if (key->has_region) {
copy_v2_v2(key->region_min, region->min);
copy_v2_v2(key->region_max, region->max);
}
key->transform_key = transform_key;
}
static void accesscache_put(TrackingImageAccessor *accessor,
int clip_index,
int frame,
libmv_InputMode input_mode,
int downscale,
const libmv_Region *region,
int64_t transform_key,
ImBuf *ibuf)
{
AccessCacheKey key;
accesscache_construct_key(&key, clip_index, frame, input_mode, downscale, region, transform_key);
IMB_moviecache_put(accessor->cache, &key, ibuf);
}
static ImBuf *accesscache_get(TrackingImageAccessor *accessor,
int clip_index,
int frame,
libmv_InputMode input_mode,
int downscale,
const libmv_Region *region,
int64_t transform_key)
{
AccessCacheKey key;
accesscache_construct_key(&key, clip_index, frame, input_mode, downscale, region, transform_key);
return IMB_moviecache_get(accessor->cache, &key);
}
static ImBuf *accessor_get_preprocessed_ibuf(TrackingImageAccessor *accessor,
int clip_index,
int frame)
@ -776,33 +694,14 @@ static ImBuf *accessor_get_ibuf(TrackingImageAccessor *accessor,
const libmv_Region *region,
const libmv_FrameTransform *transform)
{
ImBuf *ibuf, *orig_ibuf, *final_ibuf;
int64_t transform_key = 0;
if (transform != NULL) {
transform_key = libmv_frameAccessorgetTransformKey(transform);
}
/* First try to get fully processed image from the cache. */
BLI_spin_lock(&accessor->cache_lock);
ibuf = accesscache_get(
accessor, clip_index, frame, input_mode, downscale, region, transform_key);
BLI_spin_unlock(&accessor->cache_lock);
if (ibuf != NULL) {
CACHE_PRINTF("Used cached buffer for frame %d\n", frame);
/* This is a little heuristic here: if we re-used image once, this is
* a high probability of the image to be related to a keyframe matched
* reference image. Those images we don't want to be thrown away because
* if we toss them out we'll be re-calculating them at the next
* iteration.
*/
ibuf->userflags |= IB_PERSISTENT;
return ibuf;
}
CACHE_PRINTF("Calculate new buffer for frame %d\n", frame);
/* And now we do postprocessing of the original frame. */
orig_ibuf = accessor_get_preprocessed_ibuf(accessor, clip_index, frame);
ImBuf *orig_ibuf = accessor_get_preprocessed_ibuf(accessor, clip_index, frame);
if (orig_ibuf == NULL) {
return NULL;
}
ImBuf *final_ibuf;
/* Cut a region if requested. */
if (region != NULL) {
int width = region->max[0] - region->min[0], height = region->max[1] - region->min[1];
@ -902,11 +801,6 @@ static ImBuf *accessor_get_ibuf(TrackingImageAccessor *accessor,
final_ibuf = IMB_dupImBuf(orig_ibuf);
}
IMB_freeImBuf(orig_ibuf);
BLI_spin_lock(&accessor->cache_lock);
/* Put final buffer to cache. */
accesscache_put(
accessor, clip_index, frame, input_mode, downscale, region, transform_key, final_ibuf);
BLI_spin_unlock(&accessor->cache_lock);
return final_ibuf;
}
@ -1016,9 +910,6 @@ TrackingImageAccessor *tracking_image_accessor_new(MovieClip *clips[MAX_ACCESSOR
BLI_assert(num_clips <= MAX_ACCESSOR_CLIP);
accessor->cache = IMB_moviecache_create(
"frame access cache", sizeof(AccessCacheKey), accesscache_hashhash, accesscache_hashcmp);
memcpy(accessor->clips, clips, num_clips * sizeof(MovieClip *));
accessor->num_clips = num_clips;
@ -1040,7 +931,6 @@ TrackingImageAccessor *tracking_image_accessor_new(MovieClip *clips[MAX_ACCESSOR
void tracking_image_accessor_destroy(TrackingImageAccessor *accessor)
{
IMB_moviecache_free(accessor->cache);
libmv_FrameAccessorDestroy(accessor->libmv_accessor);
BLI_spin_end(&accessor->cache_lock);
MEM_freeN(accessor->tracks);

View File

@ -131,8 +131,6 @@ struct libmv_FrameAccessor;
#define MAX_ACCESSOR_CLIP 64
typedef struct TrackingImageAccessor {
struct MovieCache *cache;
struct MovieClip *clips[MAX_ACCESSOR_CLIP];
int num_clips;