Realtime Compositor: Implement Streaks Glare node

This patch implements the Streaks Glare node. Which is an approximation to the existing implementation in the CPU compositor. The difference due to the approximation is bearily visible in artificial test cases, but is less visible in actual use cases. Since the difference is rather similar to that we discussed in the Simple Star mode, the decision to allow that difference would probably hold here. For the future, we can look into approximating this further using a closed form IIR recursive filter with parallel interconnection and block-based parallelism. That's because the streak filter is already very similar to the causal pass of a fourth order recursive filter, just with exponential steps. Differential Revision: https://developer.blender.org/D16789 Reviewed By: Clement Foucault
Referenced by issue #106855, Final composite and viewport/realtime composite are not the same image
2022-12-19 10:08:59 +02:00 · 2022-12-19 10:08:59 +02:00 · c3cc8d2f6a · 2023-04-18 18:08:51 +02:00
parent 67318b1977
commit c3cc8d2f6a
5 changed files with 267 additions and 36 deletions
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@ -106,6 +106,8 @@ set(GLSL_SRC
  shaders/compositor_glare_simple_star_diagonal_pass.glsl
  shaders/compositor_glare_simple_star_horizontal_pass.glsl
  shaders/compositor_glare_simple_star_vertical_pass.glsl
+  shaders/compositor_glare_streaks_accumulate.glsl
+  shaders/compositor_glare_streaks_filter.glsl
  shaders/compositor_image_crop.glsl
  shaders/compositor_morphological_distance.glsl
  shaders/compositor_morphological_distance_feather.glsl
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_streaks_accumulate.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_streaks_accumulate.glsl
@ -0,0 +1,9 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  vec4 attenuated_streak = texture_load(streak_tx, texel) * attenuation_factor;
+  vec4 current_accumulated_streaks = imageLoad(accumulated_streaks_img, texel);
+  imageStore(accumulated_streaks_img, texel, current_accumulated_streaks + attenuated_streak);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_streaks_filter.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_streaks_filter.glsl
@ -0,0 +1,41 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  ivec2 input_size = texture_size(input_streak_tx);
+
+  /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image size
+   * to get the coordinates into the sampler's expected [0, 1] range. Similarly, transform the
+   * vector into the sampler's space by dividing by the input size. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / input_size;
+  vec2 vector = streak_vector / input_size;
+
+  /* Load three equally spaced neighbours to the current pixel in the direction of the streak
+   * vector. */
+  vec4 neighbours[3];
+  neighbours[0] = texture(input_streak_tx, coordinates + vector);
+  neighbours[1] = texture(input_streak_tx, coordinates + vector * 2.0);
+  neighbours[2] = texture(input_streak_tx, coordinates + vector * 3.0);
+
+  /* Attenuate the value of two of the channels for each of the neighbours by multiplying by the
+   * color modulator. The particular channels for each neighbour were chosen to be visually similar
+   * to the modulation pattern of chromatic aberration. */
+  neighbours[0].gb *= color_modulator;
+  neighbours[1].rg *= color_modulator;
+  neighbours[2].rb *= color_modulator;
+
+  /* Compute the weighted sum of all neighbours using the given fade factors as weights. The
+   * weights are expected to be lower for neighbours that are further away. */
+  vec4 weighted_neighbours_sum = vec4(0.0);
+  for (int i = 0; i < 3; i++) {
+    weighted_neighbours_sum += fade_factors[i] * neighbours[i];
+  }
+
+  /* The output is the average between the center color and the weighted sum of the neighbours.
+   * Which intuitively mean that highlights will spread in the direction of the streak, which is
+   * the desired result. */
+  vec4 center_color = texture(input_streak_tx, coordinates);
+  vec4 output_color = (center_color + weighted_neighbours_sum) / 2.0;
+  imageStore(output_streak_img, texel, output_color);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
@ -82,3 +82,25 @@ GPU_SHADER_CREATE_INFO(compositor_glare_simple_star_anti_diagonal_pass)
    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "anti_diagonal_img")
    .compute_source("compositor_glare_simple_star_anti_diagonal_pass.glsl")
    .do_static_compilation(true);
+
+/* -------
+ * Streaks
+ * ------- */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_streaks_filter)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "color_modulator")
+    .push_constant(Type::VEC3, "fade_factors")
+    .push_constant(Type::VEC2, "streak_vector")
+    .sampler(0, ImageType::FLOAT_2D, "input_streak_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_streak_img")
+    .compute_source("compositor_glare_streaks_filter.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "attenuation_factor")
+    .sampler(0, ImageType::FLOAT_2D, "streak_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img")
+    .compute_source("compositor_glare_streaks_accumulate.glsl")
+    .do_static_compilation(true);
--- a/source/blender/nodes/composite/nodes/node_composite_glare.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_glare.cc
@ -9,6 +9,7 @@

 #include "BLI_assert.h"
 #include "BLI_index_range.hh"
+#include "BLI_math_base.h"
 #include "BLI_math_base.hh"
 #include "BLI_math_vec_types.hh"

@ -31,6 +32,8 @@

 #include "node_composite_util.hh"

+#define MAX_GLARE_ITERATIONS 5
+
 namespace blender::nodes::node_composite_glare_cc {

 NODE_STORAGE_FUNCS(NodeGlare)
@ -128,19 +131,9 @@ class GlareOperation : public NodeOperation {
      return true;
    }

-    /* Only the ghost and simple star operations are currently supported. */
-    switch (node_storage(bnode()).type) {
-      case CMP_NODE_GLARE_SIMPLE_STAR:
-        return false;
-      case CMP_NODE_GLARE_FOG_GLOW:
-        return true;
-      case CMP_NODE_GLARE_STREAKS:
-        return true;
-      case CMP_NODE_GLARE_GHOST:
-        return false;
-      default:
-        BLI_assert_unreachable();
-        return true;
+    /* The fog glow mode is currently unsupported. */
+    if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) {
+      return true;
    }

    return false;
@ -334,26 +327,174 @@ class GlareOperation : public NodeOperation {
    return size.x + size.y - 1;
  }

-  /* ---------------
-   * Fog Glow Glare.
-   * --------------- */
-
-  /* Not yet implemented. Unreachable code due to the is_identity method. */
-  Result execute_fog_glow(Result & /*highlights_result*/)
-  {
-    BLI_assert_unreachable();
-    return Result(ResultType::Color, texture_pool());
-  }
-
  /* --------------
   * Streaks Glare.
   * -------------- */

-  /* Not yet implemented. Unreachable code due to the is_identity method. */
-  Result execute_streaks(Result & /*highlights_result*/)
+  Result execute_streaks(Result &highlights_result)
  {
-    BLI_assert_unreachable();
-    return Result(ResultType::Color, texture_pool());
+    /* Create an initially zero image where streaks will be accumulated. */
+    const float4 zero_color = float4(0.0f);
+    const int2 glare_size = get_glare_size();
+    Result accumulated_streaks_result = Result::Temporary(ResultType::Color, texture_pool());
+    accumulated_streaks_result.allocate_texture(glare_size);
+    GPU_texture_clear(accumulated_streaks_result.texture(), GPU_DATA_FLOAT, zero_color);
+
+    /* For each streak, compute its direction and apply a streak filter in that direction, then
+     * accumulate the result into the accumulated streaks result. */
+    for (const int streak_index : IndexRange(get_number_of_streaks())) {
+      const float2 streak_direction = compute_streak_direction(streak_index);
+      Result streak_result = apply_streak_filter(highlights_result, streak_direction);
+
+      GPUShader *shader = shader_manager().get("compositor_glare_streaks_accumulate");
+      GPU_shader_bind(shader);
+
+      const float attenuation_factor = compute_streak_attenuation_factor();
+      GPU_shader_uniform_1f(shader, "attenuation_factor", attenuation_factor);
+
+      streak_result.bind_as_texture(shader, "streak_tx");
+      accumulated_streaks_result.bind_as_image(shader, "accumulated_streaks_img", true);
+
+      compute_dispatch_threads_at_least(shader, glare_size);
+
+      streak_result.unbind_as_texture();
+      accumulated_streaks_result.unbind_as_image();
+
+      streak_result.release();
+      GPU_shader_unbind();
+    }
+
+    return accumulated_streaks_result;
+  }
+
+  Result apply_streak_filter(Result &highlights_result, const float2 &streak_direction)
+  {
+    GPUShader *shader = shader_manager().get("compositor_glare_streaks_filter");
+    GPU_shader_bind(shader);
+
+    /* Copy the highlights result into a new image because the output will be copied to the input
+     * after each iteration and the highlights result is still needed to compute other streaks. */
+    const int2 glare_size = get_glare_size();
+    Result input_streak_result = Result::Temporary(ResultType::Color, texture_pool());
+    input_streak_result.allocate_texture(glare_size);
+    GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+    GPU_texture_copy(input_streak_result.texture(), highlights_result.texture());
+
+    Result output_streak_result = Result::Temporary(ResultType::Color, texture_pool());
+    output_streak_result.allocate_texture(glare_size);
+
+    /* For the given number of iterations, apply the streak filter in the given direction. The
+     * result of the previous iteration is used as the input of the current iteration. */
+    const IndexRange iterations_range = IndexRange(get_number_of_iterations());
+    for (const int iteration : iterations_range) {
+      const float color_modulator = compute_streak_color_modulator(iteration);
+      const float iteration_magnitude = compute_streak_iteration_magnitude(iteration);
+      const float3 fade_factors = compute_streak_fade_factors(iteration_magnitude);
+      const float2 streak_vector = streak_direction * iteration_magnitude;
+
+      GPU_shader_uniform_1f(shader, "color_modulator", color_modulator);
+      GPU_shader_uniform_3fv(shader, "fade_factors", fade_factors);
+      GPU_shader_uniform_2fv(shader, "streak_vector", streak_vector);
+
+      input_streak_result.bind_as_texture(shader, "input_streak_tx");
+      GPU_texture_filter_mode(input_streak_result.texture(), true);
+      GPU_texture_wrap_mode(input_streak_result.texture(), false, false);
+
+      output_streak_result.bind_as_image(shader, "output_streak_img");
+
+      compute_dispatch_threads_at_least(shader, glare_size);
+
+      input_streak_result.unbind_as_texture();
+      output_streak_result.unbind_as_image();
+
+      /* The accumulated result serves as the input for the next iteration, so copy the result to
+       * the input result since it can't be used for reading and writing simultaneously. Skip
+       * copying for the last iteration since it is not needed. */
+      if (iteration != iterations_range.last()) {
+        GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+        GPU_texture_copy(input_streak_result.texture(), output_streak_result.texture());
+      }
+    }
+
+    input_streak_result.release();
+    GPU_shader_unbind();
+
+    return output_streak_result;
+  }
+
+  /* As the number of iterations increase, the streaks spread farther and their intensity decrease.
+   * To maintain similar intensities regardless of the number of iterations, streaks with lower
+   * number of iteration are linearly attenuated. When the number of iterations is maximum, we need
+   * not attenuate, so the denominator should be one, and when the number of iterations is one, we
+   * need the attenuation to be maximum. This can be modeled as a simple decreasing linear equation
+   * by substituting the two aforementioned cases. */
+  float compute_streak_attenuation_factor()
+  {
+    return 1.0f / (MAX_GLARE_ITERATIONS + 1 - get_number_of_iterations());
+  }
+
+  /* Given the index of the streak in the [0, Number Of Streaks - 1] range, compute the unit
+   * direction vector defining the streak. The streak directions should make angles with the
+   * x-axis that are equally spaced and covers the whole two pi range, starting with the user
+   * supplied angle. */
+  float2 compute_streak_direction(int streak_index)
+  {
+    const int number_of_streaks = get_number_of_streaks();
+    const float start_angle = get_streaks_start_angle();
+    const float angle = start_angle + (float(streak_index) / number_of_streaks) * (M_PI * 2.0f);
+    return float2(math::cos(angle), math::sin(angle));
+  }
+
+  /* Different color channels of the streaks can be modulated by being multiplied by the color
+   * modulator computed by this method. The color modulation is expected to be maximum when the
+   * modulation factor is 1 and non existent when it is zero. But since the color modulator is
+   * multiplied to the channel and the multiplicative identity is 1, we invert the modulation
+   * factor. Moreover, color modulation should be less visible on higher iterations because they
+   * produce the farther more faded away parts of the streaks. To achieve that, the modulation
+   * factor is raised to the power of the iteration, noting that the modulation value is in the
+   * [0, 1] range so the higher the iteration the lower the resulting modulation factor. The plus
+   * one makes sure the power starts at one. */
+  float compute_streak_color_modulator(int iteration)
+  {
+    return 1.0f - std::pow(get_color_modulation_factor(), iteration + 1);
+  }
+
+  /* Streaks are computed by iteratively applying a filter that samples 3 neighbouring pixels in
+   * the direction of the streak. Those neighbouring pixels are then combined using a weighted sum.
+   * The weights of the neighbours are the fade factors computed by this method. Farther neighbours
+   * are expected to have lower weights because they contribute less to the combined result. Since
+   * the iteration magnitude represents how far the neighbours are, as noted in the description of
+   * the compute_streak_iteration_magnitude method, the fade factor for the closest neighbour is
+   * computed as the user supplied fade parameter raised to the power of the magnitude, noting that
+   * the fade value is in the [0, 1] range while the magnitude is larger than or equal one, so the
+   * higher the power the lower the resulting fade factor. Furthermore, the other two neighbours
+   * are just squared and cubed versions of the fade factor for the closest neighbour to get even
+   * lower fade factors for those farther neighbours. */
+  float3 compute_streak_fade_factors(float iteration_magnitude)
+  {
+    const float fade_factor = std::pow(node_storage(bnode()).fade, iteration_magnitude);
+    return float3(fade_factor, std::pow(fade_factor, 2.0f), std::pow(fade_factor, 3.0f));
+  }
+
+  /* Streaks are computed by iteratively applying a filter that samples the neighbouring pixels in
+   * the direction of the streak. Each higher iteration samples pixels that are farther away, the
+   * magnitude computed by this method describes how farther away the neighbours are sampled. The
+   * magnitude exponentially increase with the iteration. A base of 4, was chosen as compromise
+   * between better quality and performance, since a lower base corresponds to more tightly spaced
+   * neighbours but would require more iterations to produce a streak of the same length. */
+  float compute_streak_iteration_magnitude(int iteration)
+  {
+    return std::pow(4.0f, iteration);
+  }
+
+  float get_streaks_start_angle()
+  {
+    return node_storage(bnode()).angle_ofs;
+  }
+
+  int get_number_of_streaks()
+  {
+    return node_storage(bnode()).streaks;
  }

  /* ------------
@ -377,9 +518,9 @@ class GlareOperation : public NodeOperation {
    /* Create an initially zero image where ghosts will be accumulated. */
    const float4 zero_color = float4(0.0f);
    const int2 glare_size = get_glare_size();
-    Result accumulated_ghost_result = Result::Temporary(ResultType::Color, texture_pool());
-    accumulated_ghost_result.allocate_texture(glare_size);
-    GPU_texture_clear(accumulated_ghost_result.texture(), GPU_DATA_FLOAT, zero_color);
+    Result accumulated_ghosts_result = Result::Temporary(ResultType::Color, texture_pool());
+    accumulated_ghosts_result.allocate_texture(glare_size);
+    GPU_texture_clear(accumulated_ghosts_result.texture(), GPU_DATA_FLOAT, zero_color);

    /* For the given number of iterations, accumulate four ghosts with different scales and color
     * modulators. The result of the previous iteration is used as the input of the current
@ -392,26 +533,26 @@ class GlareOperation : public NodeOperation {
      GPU_shader_uniform_4fv(shader, "scales", scales.data());

      input_ghost_result.bind_as_texture(shader, "input_ghost_tx");
-      accumulated_ghost_result.bind_as_image(shader, "accumulated_ghost_img", true);
+      accumulated_ghosts_result.bind_as_image(shader, "accumulated_ghost_img", true);

      compute_dispatch_threads_at_least(shader, glare_size);

      input_ghost_result.unbind_as_texture();
-      accumulated_ghost_result.unbind_as_image();
+      accumulated_ghosts_result.unbind_as_image();

      /* The accumulated result serves as the input for the next iteration, so copy the result to
       * the input result since it can't be used for reading and writing simultaneously. Skip
       * copying for the last iteration since it is not needed. */
      if (i != iterations_range.last()) {
        GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
-        GPU_texture_copy(input_ghost_result.texture(), accumulated_ghost_result.texture());
+        GPU_texture_copy(input_ghost_result.texture(), accumulated_ghosts_result.texture());
      }
    }

    GPU_shader_unbind();
    input_ghost_result.release();

-    return accumulated_ghost_result;
+    return accumulated_ghosts_result;
  }

  /* Computes two ghosts by blurring the highlights with two different radii, then adds them into a
@ -544,7 +685,18 @@ class GlareOperation : public NodeOperation {
   * subtract from one. */
  float get_ghost_color_modulation_factor()
  {
-    return 1.0f - node_storage(bnode()).colmod;
+    return 1.0f - get_color_modulation_factor();
+  }
+
+  /* ---------------
+   * Fog Glow Glare.
+   * --------------- */
+
+  /* Not yet implemented. Unreachable code due to the is_identity method. */
+  Result execute_fog_glow(Result & /*highlights_result*/)
+  {
+    BLI_assert_unreachable();
+    return Result(ResultType::Color, texture_pool());
  }

  /* ----------
@ -595,6 +747,11 @@ class GlareOperation : public NodeOperation {
    return node_storage(bnode()).iter;
  }

+  float get_color_modulation_factor()
+  {
+    return node_storage(bnode()).colmod;
+  }
+
  /* The glare node can compute the glare on a fraction of the input image size to improve
   * performance. The quality values and their corresponding quality factors are as follows:
   *