Tracking: Improve multithreading of tracking many markers

This change solves a bottleneck which was caused by attempt to cache postprocessed search areas used for tracking. It was a single cache used by all threads, which required to have some synchronization mechanism. This synchronization turned out to be making all threads to idle while one thread is accessing the cache. The access was not cheap, so the multi-threading did not provide expected speedup. Current solution is to remove the cache of search areas. This avoids any threading synchronization overhead because there is no need for it anymore. The downside is that for certain configurations tracking became slower when comparing to master branch. There is no expected slowdown compared to 2.91 release. The slowdown is mainly experienced when using big search area and keyframe matching strategy. Other cases should still be within a ballpark of performance of single-threaded code prior to this change. The reason why is it so is because while this change makes it so the image accessors needs to process images multiple times the complexity of this process is almost the same as all the overhead for the cache lookup and maintenance. Here are Some numbers gained on different configurations. CPU: Intel Xeom CPU E5-2699 v4 OS: Linux Footage: Old_Factory MVI_4005.mov from the first part of Track Match Blend training which can be found on the Blender Cloud. Tracking 443 markers across 250 frames. The unit is seconds. File: F9433209 2.91: 401.520874 before: 358.650055 after: 14.966302 Tracking single marker across 250 frames. The unit is seconds. File: F9433211 2.91 before after Big keyframe 1.307203 1.005324 1.227300 Big previous frame 1.144055 0.881139 0.944044 Small keyframe 0.434015 0.197760 0.224982 Small previous frame 0.463207 0.218058 0.234172 All at once 2.338268 1.481220 1.518060
2020-12-03 14:32:57 +01:00 · 2020-12-03 14:32:57 +01:00 · 5d13082622
parent a6c4e39876
commit 5d13082622
3 changed files with 4 additions and 114 deletions
--- a/source/blender/blenkernel/intern/tracking_auto.c
+++ b/source/blender/blenkernel/intern/tracking_auto.c
@ -34,7 +34,9 @@
 #include "BLI_task.h"
 #include "BLI_threads.h"
 #include "BLI_utildefines.h"
+#include "PIL_time.h"

+#include "BKE_global.h"
 #include "BKE_movieclip.h"
 #include "BKE_tracking.h"

--- a/source/blender/blenkernel/intern/tracking_util.c
+++ b/source/blender/blenkernel/intern/tracking_util.c
@ -615,88 +615,6 @@ MovieTrackingMarker *tracking_get_keyframed_marker(MovieTrackingTrack *track,

 /*********************** Frame accessr *************************/

-typedef struct AccessCacheKey {
-  int clip_index;
-  int frame;
-  int downscale;
-  libmv_InputMode input_mode;
-  bool has_region;
-  float region_min[2], region_max[2];
-  int64_t transform_key;
-} AccessCacheKey;
-
-static unsigned int accesscache_hashhash(const void *key_v)
-{
-  const AccessCacheKey *key = (const AccessCacheKey *)key_v;
-  /* TODP(sergey): Need better hashing here for faster frame access. */
-  return key->clip_index << 16 | key->frame;
-}
-
-static bool accesscache_hashcmp(const void *a_v, const void *b_v)
-{
-  const AccessCacheKey *a = (const AccessCacheKey *)a_v;
-  const AccessCacheKey *b = (const AccessCacheKey *)b_v;
-  if (a->clip_index != b->clip_index || a->frame != b->frame || a->downscale != b->downscale ||
-      a->input_mode != b->input_mode || a->has_region != b->has_region ||
-      a->transform_key != b->transform_key) {
-    return true;
-  }
-  /* If there is region applied, compare it. */
-  if (a->has_region) {
-    if (!equals_v2v2(a->region_min, b->region_min) || !equals_v2v2(a->region_max, b->region_max)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-static void accesscache_construct_key(AccessCacheKey *key,
-                                      int clip_index,
-                                      int frame,
-                                      libmv_InputMode input_mode,
-                                      int downscale,
-                                      const libmv_Region *region,
-                                      int64_t transform_key)
-{
-  key->clip_index = clip_index;
-  key->frame = frame;
-  key->input_mode = input_mode;
-  key->downscale = downscale;
-  key->has_region = (region != NULL);
-  if (key->has_region) {
-    copy_v2_v2(key->region_min, region->min);
-    copy_v2_v2(key->region_max, region->max);
-  }
-  key->transform_key = transform_key;
-}
-
-static void accesscache_put(TrackingImageAccessor *accessor,
-                            int clip_index,
-                            int frame,
-                            libmv_InputMode input_mode,
-                            int downscale,
-                            const libmv_Region *region,
-                            int64_t transform_key,
-                            ImBuf *ibuf)
-{
-  AccessCacheKey key;
-  accesscache_construct_key(&key, clip_index, frame, input_mode, downscale, region, transform_key);
-  IMB_moviecache_put(accessor->cache, &key, ibuf);
-}
-
-static ImBuf *accesscache_get(TrackingImageAccessor *accessor,
-                              int clip_index,
-                              int frame,
-                              libmv_InputMode input_mode,
-                              int downscale,
-                              const libmv_Region *region,
-                              int64_t transform_key)
-{
-  AccessCacheKey key;
-  accesscache_construct_key(&key, clip_index, frame, input_mode, downscale, region, transform_key);
-  return IMB_moviecache_get(accessor->cache, &key);
-}
-
 static ImBuf *accessor_get_preprocessed_ibuf(TrackingImageAccessor *accessor,
                                             int clip_index,
                                             int frame)
@ -776,33 +694,14 @@ static ImBuf *accessor_get_ibuf(TrackingImageAccessor *accessor,
                                const libmv_Region *region,
                                const libmv_FrameTransform *transform)
 {
-  ImBuf *ibuf, *orig_ibuf, *final_ibuf;
-  int64_t transform_key = 0;
-  if (transform != NULL) {
-    transform_key = libmv_frameAccessorgetTransformKey(transform);
-  }
  /* First try to get fully processed image from the cache. */
-  BLI_spin_lock(&accessor->cache_lock);
-  ibuf = accesscache_get(
-      accessor, clip_index, frame, input_mode, downscale, region, transform_key);
-  BLI_spin_unlock(&accessor->cache_lock);
-  if (ibuf != NULL) {
-    CACHE_PRINTF("Used cached buffer for frame %d\n", frame);
-    /* This is a little heuristic here: if we re-used image once, this is
-     * a high probability of the image to be related to a keyframe matched
-     * reference image. Those images we don't want to be thrown away because
-     * if we toss them out we'll be re-calculating them at the next
-     * iteration.
-     */
-    ibuf->userflags |= IB_PERSISTENT;
-    return ibuf;
-  }
  CACHE_PRINTF("Calculate new buffer for frame %d\n", frame);
  /* And now we do postprocessing of the original frame. */
-  orig_ibuf = accessor_get_preprocessed_ibuf(accessor, clip_index, frame);
+  ImBuf *orig_ibuf = accessor_get_preprocessed_ibuf(accessor, clip_index, frame);
  if (orig_ibuf == NULL) {
    return NULL;
  }
+  ImBuf *final_ibuf;
  /* Cut a region if requested. */
  if (region != NULL) {
    int width = region->max[0] - region->min[0], height = region->max[1] - region->min[1];
@ -902,11 +801,6 @@ static ImBuf *accessor_get_ibuf(TrackingImageAccessor *accessor,
    final_ibuf = IMB_dupImBuf(orig_ibuf);
  }
  IMB_freeImBuf(orig_ibuf);
-  BLI_spin_lock(&accessor->cache_lock);
-  /* Put final buffer to cache. */
-  accesscache_put(
-      accessor, clip_index, frame, input_mode, downscale, region, transform_key, final_ibuf);
-  BLI_spin_unlock(&accessor->cache_lock);
  return final_ibuf;
 }

@ -1016,9 +910,6 @@ TrackingImageAccessor *tracking_image_accessor_new(MovieClip *clips[MAX_ACCESSOR

  BLI_assert(num_clips <= MAX_ACCESSOR_CLIP);

-  accessor->cache = IMB_moviecache_create(
-      "frame access cache", sizeof(AccessCacheKey), accesscache_hashhash, accesscache_hashcmp);
-
  memcpy(accessor->clips, clips, num_clips * sizeof(MovieClip *));
  accessor->num_clips = num_clips;

@ -1040,7 +931,6 @@ TrackingImageAccessor *tracking_image_accessor_new(MovieClip *clips[MAX_ACCESSOR

 void tracking_image_accessor_destroy(TrackingImageAccessor *accessor)
 {
-  IMB_moviecache_free(accessor->cache);
  libmv_FrameAccessorDestroy(accessor->libmv_accessor);
  BLI_spin_end(&accessor->cache_lock);
  MEM_freeN(accessor->tracks);
--- a/source/blender/blenkernel/tracking_private.h
+++ b/source/blender/blenkernel/tracking_private.h
@ -131,8 +131,6 @@ struct libmv_FrameAccessor;

 #define MAX_ACCESSOR_CLIP 64
 typedef struct TrackingImageAccessor {
-  struct MovieCache *cache;
-
  struct MovieClip *clips[MAX_ACCESSOR_CLIP];
  int num_clips;