Realtime Compositor: Implement dilate erode node

This patch implements the dilate/erode node for the realtime compositor. Differential Revision: https://developer.blender.org/D15790 Reviewed By: Clement Foucault
2022-09-02 14:47:39 +02:00 · 2022-09-02 14:47:39 +02:00 · 633117669b
parent 8cfca8e1bd
commit 633117669b
15 changed files with 809 additions and 18 deletions
--- a/source/blender/compositor/nodes/COM_DilateErodeNode.cc
+++ b/source/blender/compositor/nodes/COM_DilateErodeNode.cc
@ -28,7 +28,7 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
                                            const CompositorContext &context) const
 {
  const bNode *editor_node = this->get_bnode();
-  if (editor_node->custom1 == CMP_NODE_DILATEERODE_DISTANCE_THRESH) {
+  if (editor_node->custom1 == CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD) {
    DilateErodeThresholdOperation *operation = new DilateErodeThresholdOperation();
    operation->set_distance(editor_node->custom2);
    operation->set_inset(editor_node->custom3);
@ -47,7 +47,7 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
      converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
    }
  }
-  else if (editor_node->custom1 == CMP_NODE_DILATEERODE_DISTANCE) {
+  else if (editor_node->custom1 == CMP_NODE_DILATE_ERODE_DISTANCE) {
    if (editor_node->custom2 > 0) {
      DilateDistanceOperation *operation = new DilateDistanceOperation();
      operation->set_distance(editor_node->custom2);
@ -65,7 +65,7 @@ void DilateErodeNode::convert_to_operations(NodeConverter &converter,
      converter.map_output_socket(get_output_socket(0), operation->get_output_socket(0));
    }
  }
-  else if (editor_node->custom1 == CMP_NODE_DILATEERODE_DISTANCE_FEATHER) {
+  else if (editor_node->custom1 == CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER) {
    /* this uses a modified gaussian blur function otherwise its far too slow */
    eCompositorQuality quality = context.get_quality();

--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@ -17,6 +17,7 @@
 #define M_SQRT2 1.41421356237309504880   /* sqrt(2) */
 #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
 #define FLT_MAX 3.402823e+38
+#define FLT_MIN 1.175494e-38

 vec3 mul(mat3 m, vec3 v)
 {
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@ -340,6 +340,10 @@ set(GLSL_SRC
  shaders/compositor/compositor_filter.glsl
  shaders/compositor/compositor_flip.glsl
  shaders/compositor/compositor_image_crop.glsl
+  shaders/compositor/compositor_morphological_distance.glsl
+  shaders/compositor/compositor_morphological_distance_feather.glsl
+  shaders/compositor/compositor_morphological_distance_threshold.glsl
+  shaders/compositor/compositor_morphological_step.glsl
  shaders/compositor/compositor_projector_lens_distortion.glsl
  shaders/compositor/compositor_realize_on_domain.glsl
  shaders/compositor/compositor_screen_lens_distortion.glsl
@ -616,6 +620,10 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/compositor/infos/compositor_filter_info.hh
  shaders/compositor/infos/compositor_flip_info.hh
  shaders/compositor/infos/compositor_image_crop_info.hh
+  shaders/compositor/infos/compositor_morphological_distance_feather_info.hh
+  shaders/compositor/infos/compositor_morphological_distance_info.hh
+  shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh
+  shaders/compositor/infos/compositor_morphological_step_info.hh
  shaders/compositor/infos/compositor_projector_lens_distortion_info.hh
  shaders/compositor/infos/compositor_realize_on_domain_info.hh
  shaders/compositor/infos/compositor_screen_lens_distortion_info.hh
--- a/source/blender/gpu/shaders/compositor/compositor_morphological_distance.glsl
+++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance.glsl
@ -0,0 +1,24 @@
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Find the minimum/maximum value in the circular window of the given radius around the pixel. By
+   * circular window, we mean that pixels in the window whose distance to the center of window is
+   * larger than the given radius are skipped and not considered. Consequently, the dilation or
+   * erosion that take place produces round results as opposed to squarish ones. This is
+   * essentially a morphological operator with a circular structuring element. The LIMIT value
+   * should be FLT_MAX if OPERATOR is min and FLT_MIN if OPERATOR is max. */
+  float value = LIMIT;
+  for (int y = -radius; y <= radius; y++) {
+    for (int x = -radius; x <= radius; x++) {
+      if (x * x + y * y <= radius * radius) {
+        value = OPERATOR(value, texture_load(input_tx, texel + ivec2(x, y), vec4(LIMIT)).x);
+      }
+    }
+  }
+
+  imageStore(output_img, texel, vec4(value));
+}
--- a/source/blender/gpu/shaders/compositor/compositor_morphological_distance_feather.glsl
+++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_feather.glsl
@ -0,0 +1,101 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* The Morphological Distance Feather operation is a linear combination between the result of two
+ * operations. The first operation is a Gaussian blur with a radius equivalent to the dilate/erode
+ * distance, which is straightforward and implemented as a separable filter similar to the blur
+ * operation.
+ *
+ * The second operation is an approximation of a morphological inverse distance operation evaluated
+ * at a distance falloff function. The result of a morphological inverse distance operation is a
+ * narrow band distance field that starts at its maximum value at boundaries where a difference in
+ * values took place and linearly deceases until it reaches zero in the span of a number of pixels
+ * equivalent to the erode/dilate distance. Additionally, instead of linearly decreasing, the user
+ * may choose a different falloff which is evaluated at the computed distance. For dilation, the
+ * distance field decreases outwards, and for erosion, the distance field decreased inwards.
+ *
+ * The reason why the result of a Gaussian blur is mixed in with the distance field is because the
+ * distance field is merely approximated and not accurately computed, the defects of which is more
+ * apparent away from boundaries and especially at corners where the distance field should take a
+ * circular shape. That's why the Gaussian blur is mostly mixed only further from boundaries.
+ *
+ * The morphological inverse distance operation is approximated using a separable implementation
+ * and intertwined with the Gaussian blur implementation as follows. A search window of a radius
+ * equivalent to the dilate/erode distance is applied on the image to find either the minimum or
+ * maximum pixel value multiplied by its corresponding falloff value in the window. For dilation,
+ * we try to find the maximum, and for erosion, we try to find the minimum. Additionally, we also
+ * save the falloff value where the minimum or maximum was found. The found value will be that of
+ * the narrow band distance field and the saved falloff value will be used as the mixing factor
+ * with the Gaussian blur.
+ *
+ * To make sense of the aforementioned algorithm, assume we are dilating a binary image by 5 pixels
+ * whose half has a value of 1 and the other half has a value of zero. Consider the following:
+ *
+ * - A pixel of value 1 already has the maximum possible value, so its value will remain unchanged
+ *   regardless of its position.
+ * - A pixel of value 0 that is right at the boundary of the 1's region will have a maximum value
+ *   of around 0.8 depending on the falloff. That's because the search window intersects the 1's
+ *   region, which when multiplied by the falloff gives the first value of the falloff, which is
+ *   larger than the initially zero value computed at the center of the search window.
+ * - A pixel of value 0 that is 3 pixels away from the boundary will have a maximum value of around
+ *   0.4 depending on the falloff. That's because the search window intersects the 1's region,
+ *   which when multiplied by the falloff gives the third value of the falloff, which is larger
+ *   than the initially zero value computed at the center of the search window.
+ * - Finally, a pixel of value 0 that is 6 pixels away from the boundary will have a maximum value
+ *   of 0, because the search window doesn't intersects the 1's region and only spans zero values.
+ *
+ * The previous example demonstrates how the distance field naturally arises, and the same goes for
+ * the erode case, except the minimum value is computed instead.
+ */
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* A value for accumulating the blur result. */
+  float accumulated_value = 0.0;
+
+  /* Compute the contribution of the center pixel to the blur result. */
+  float center_value = texture_load(input_tx, texel).x;
+  accumulated_value += center_value * texture_load(weights_tx, 0).x;
+
+  /* Start with the center value as the maximum/minimum distance and reassign to the true maximum
+   * or minimum in the search loop below. Additionally, the center falloff is always 1.0, so start
+   * with that.  */
+  float limit_distance = center_value;
+  float limit_distance_falloff = 1.0;
+
+  /* Compute the contributions of the pixels to the right and left, noting that the weights and
+   * falloffs textures only store the weights and falloffs for the positive half, but since the
+   * they are both symmetric, the same weights and falloffs are used for the negative half and we
+   * compute both of their contributions. */
+  for (int i = 1; i < texture_size(weights_tx); i++) {
+    float weight = texture_load(weights_tx, i).x;
+    float falloff = texture_load(falloffs_tx, i).x;
+
+    /* Loop for two iterations, where s takes the value of -1 and 1, which is used as the sign
+     * needed to evaluated the positive and negative sides as explain above. */
+    for (int s = -1; s < 2; s += 2) {
+      /* Compute the contribution of the pixel to the blur result. */
+      float value = texture_load(input_tx, texel + ivec2(s * i, 0)).x;
+      accumulated_value += value * weight;
+
+      /* The distance is computed such that its highest value is the pixel value itself, so
+       * multiply the distance falloff by the pixel value. */
+      float falloff_distance = value * falloff;
+
+      /* Find either the maximum or the minimum for the dilate and erode cases respectively. */
+      if (COMPARE(falloff_distance, limit_distance)) {
+        limit_distance = falloff_distance;
+        limit_distance_falloff = falloff;
+      }
+    }
+  }
+
+  /* Mix between the limit distance and the blurred accumulated value such that the limit distance
+   * is used for pixels closer to the boundary and the blurred value is used for pixels away from
+   * the boundary. */
+  float value = mix(accumulated_value, limit_distance, limit_distance_falloff);
+
+  /* Write the value using the transposed texel. See the execute_distance_feather_horizontal_pass
+   * method for more information on the rational behind this. */
+  imageStore(output_img, texel.yx, vec4(value));
+}
--- a/source/blender/gpu/shaders/compositor/compositor_morphological_distance_threshold.glsl
+++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_threshold.glsl
@ -0,0 +1,88 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* The Morphological Distance Threshold operation is effectively three consecutive operations
+ * implemented as a single operation. The three operations are as follows:
+ *
+ * .-----------.   .--------------.   .----------------.
+ * | Threshold |-->| Dilate/Erode |-->| Distance Inset |
+ * '-----------'   '--------------'   '----------------'
+ *
+ * The threshold operation just converts the input into a binary image, where the pixel is 1 if it
+ * is larger than 0.5 and 0 otherwise. Pixels that are 1 in the output of the threshold operation
+ * are said to be masked. The dilate/erode operation is a dilate or erode morphological operation
+ * with a circular structuring element depending on the sign of the distance, where it is a dilate
+ * operation if the distance is positive and an erode operation otherwise. This is equivalent to
+ * the Morphological Distance operation, see its implementation for more information. Finally, the
+ * distance inset is an operation that converts the binary image into a narrow band distance field.
+ * That is, pixels that are unmasked will remain 0, while pixels that are masked will start from
+ * zero at the boundary of the masked region and linearly increase until reaching 1 in the span of
+ * a number pixels given by the inset value.
+ *
+ * As a performance optimization, the dilate/erode operation is omitted and its effective result is
+ * achieved by slightly adjusting the distance inset operation. The base distance inset operation
+ * works by computing the signed distance from the current center pixel to the nearest pixel with a
+ * different value. Since our image is a binary image, that means that if the pixel is masked, we
+ * compute the signed distance to the nearest unmasked pixel, and if the pixel unmasked, we compute
+ * the signed distance to the nearest masked pixel. The distance is positive if the pixel is masked
+ * and negative otherwise. The distance is then normalized by dividing by the given inset value and
+ * clamped to the [0, 1] range. Since distances larger than the inset value are eventually clamped,
+ * the distance search window is limited to a radius equivalent to the inset value.
+ *
+ * To archive the effective result of the omitted dilate/erode operation, we adjust the distance
+ * inset operation as follows. First, we increase the radius of the distance search window by the
+ * radius of the dilate/erode operation. Then we adjust the resulting narrow band signed distance
+ * field as follows.
+ *
+ * For the erode case, we merely subtract the erode distance, which makes the outermost erode
+ * distance number of pixels zero due to clamping, consequently achieving the result of the erode,
+ * while retaining the needed inset because we increased the distance search window by the same
+ * amount we subtracted.
+ *
+ * Similarly, for the dilate case, we add the dilate distance, which makes the dilate distance
+ * number of pixels just outside of the masked region positive and part of the narrow band distance
+ * field, consequently achieving the result of the dilate, while at the same time, the innermost
+ * dilate distance number of pixels become 1 due to clamping, retaining the needed inset because we
+ * increased the distance search window by the same amount we added.
+ *
+ * Since the erode/dilate distance is already signed appropriately as described before, we just add
+ * it in both cases. */
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Apply a threshold operation on the center pixel, where the threshold is currently hard-coded
+   * at 0.5. The pixels with values larger than the threshold are said to be masked. */
+  bool is_center_masked = texture_load(input_tx, texel).x > 0.5;
+
+  /* Since the distance search window will access pixels outside of the bounds of the image, we use
+   * a texture loader with a fallback value. And since we don't want those values to affect the
+   * result, the fallback value is chosen such that the inner condition fails, which is when the
+   * sampled pixel and the center pixel are the same, so choose a fallback that will be considered
+   * masked if the center pixel is masked and unmasked otherwise.  */
+  vec4 fallback = vec4(is_center_masked ? 1.0 : 0.0);
+
+  /* Since the distance search window is limited to the given radius, the maximum possible squared
+   * distance to the center is double the squared radius. */
+  int minimum_squared_distance = radius * radius * 2;
+
+  /* Find the squared distance to the nearest different pixel in the search window of the given
+   * radius. */
+  for (int y = -radius; y <= radius; y++) {
+    for (int x = -radius; x <= radius; x++) {
+      bool is_sample_masked = texture_load(input_tx, texel + ivec2(x, y), fallback).x > 0.5;
+      if (is_center_masked != is_sample_masked) {
+        minimum_squared_distance = min(minimum_squared_distance, x * x + y * y);
+      }
+    }
+  }
+
+  /* Compute the actual distance from the squared distance and assign it an appropriate sign
+   * depending on whether it lies in a masked region or not. */
+  float signed_minimum_distance = sqrt(minimum_squared_distance) * (is_center_masked ? 1.0 : -1.0);
+
+  /* Add the erode/dilate distance and divide by the inset amount as described in the discussion,
+   * then clamp to the [0, 1] range. */
+  float value = clamp((signed_minimum_distance + distance) / inset, 0.0, 1.0);
+
+  imageStore(output_img, texel, vec4(value));
+}
--- a/source/blender/gpu/shaders/compositor/compositor_morphological_step.glsl
+++ b/source/blender/gpu/shaders/compositor/compositor_morphological_step.glsl
@ -0,0 +1,19 @@
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Find the minimum/maximum value in the window of the given radius around the pixel. This is
+   * essentially a morphological operator with a square structuring element. The LIMIT value should
+   * be FLT_MAX if OPERATOR is min and FLT_MIN if OPERATOR is max. */
+  float value = LIMIT;
+  for (int i = -radius; i <= radius; i++) {
+    value = OPERATOR(value, texture_load(input_tx, texel + ivec2(i, 0), vec4(LIMIT)).x);
+  }
+
+  /* Write the value using the transposed texel. See the execute_step_horizontal_pass method for
+   * more information on the rational behind this. */
+  imageStore(output_img, texel.yx, vec4(value));
+}
--- a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_feather_info.hh
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_feather_info.hh
@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_shared)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_1D, "weights_tx")
+    .sampler(2, ImageType::FLOAT_1D, "falloffs_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_morphological_distance_feather.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_dilate)
+    .additional_info("compositor_morphological_distance_feather_shared")
+    .define("COMPARE(x, y)", "x > y")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_erode)
+    .additional_info("compositor_morphological_distance_feather_shared")
+    .define("COMPARE(x, y)", "x < y")
+    .do_static_compilation(true);
--- a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_info.hh
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_info.hh
@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_shared)
+    .local_group_size(16, 16)
+    .push_constant(Type::INT, "radius")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_morphological_distance.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_dilate)
+    .additional_info("compositor_morphological_distance_shared")
+    .define("OPERATOR(a, b)", "max(a, b)")
+    .define("LIMIT", "FLT_MIN")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_erode)
+    .additional_info("compositor_morphological_distance_shared")
+    .define("OPERATOR(a, b)", "min(a, b)")
+    .define("LIMIT", "FLT_MAX")
+    .do_static_compilation(true);
--- a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh
@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_distance_threshold)
+    .local_group_size(16, 16)
+    .push_constant(Type::INT, "radius")
+    .push_constant(Type::INT, "distance")
+    .push_constant(Type::FLOAT, "inset")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_morphological_distance_threshold.glsl")
+    .do_static_compilation(true);
--- a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_step_info.hh
+++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_step_info.hh
@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_step_shared)
+    .local_group_size(16, 16)
+    .push_constant(Type::INT, "radius")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_morphological_step.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_step_dilate)
+    .additional_info("compositor_morphological_step_shared")
+    .define("OPERATOR(a, b)", "max(a, b)")
+    .define("LIMIT", "FLT_MIN")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_morphological_step_erode)
+    .additional_info("compositor_morphological_step_shared")
+    .define("OPERATOR(a, b)", "min(a, b)")
+    .define("LIMIT", "FLT_MAX")
+    .do_static_compilation(true);
--- a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl
+++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl
@ -23,3 +23,13 @@ vec4 texture_load(sampler2D sampler, ivec2 texel)
  const ivec2 texture_bounds = texture_size(sampler) - ivec2(1);
  return texelFetch(sampler, clamp(texel, ivec2(0), texture_bounds), 0);
 }
+
+/* A shorthand for 2D texelFetch with zero LOD and a fallback value for out-of-bound access. */
+vec4 texture_load(sampler2D sampler, ivec2 texel, vec4 fallback)
+{
+  const ivec2 texture_bounds = texture_size(sampler) - ivec2(1);
+  if (any(lessThan(texel, ivec2(0))) || any(greaterThan(texel, texture_bounds))) {
+    return fallback;
+  }
+  return texelFetch(sampler, texel, 0);
+}
--- a/source/blender/makesdna/DNA_node_types.h
+++ b/source/blender/makesdna/DNA_node_types.h
@ -765,12 +765,12 @@ typedef enum CMPNodeMaskType {
  CMP_NODE_MASKTYPE_NOT = 3,
 } CMPNodeMaskType;

-enum {
-  CMP_NODE_DILATEERODE_STEP = 0,
-  CMP_NODE_DILATEERODE_DISTANCE_THRESH = 1,
-  CMP_NODE_DILATEERODE_DISTANCE = 2,
-  CMP_NODE_DILATEERODE_DISTANCE_FEATHER = 3,
-};
+typedef enum CMPNodeDilateErodeMethod {
+  CMP_NODE_DILATE_ERODE_STEP = 0,
+  CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD = 1,
+  CMP_NODE_DILATE_ERODE_DISTANCE = 2,
+  CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER = 3,
+} CMPNodeDilateErodeMethod;

 enum {
  CMP_NODE_INPAINT_SIMPLE = 0,
--- a/source/blender/makesrna/intern/rna_nodetree.c
+++ b/source/blender/makesrna/intern/rna_nodetree.c
@ -7108,10 +7108,10 @@ static void def_cmp_dilate_erode(StructRNA *srna)
  PropertyRNA *prop;

  static const EnumPropertyItem mode_items[] = {
-      {CMP_NODE_DILATEERODE_STEP, "STEP", 0, "Step", ""},
-      {CMP_NODE_DILATEERODE_DISTANCE_THRESH, "THRESHOLD", 0, "Threshold", ""},
-      {CMP_NODE_DILATEERODE_DISTANCE, "DISTANCE", 0, "Distance", ""},
-      {CMP_NODE_DILATEERODE_DISTANCE_FEATHER, "FEATHER", 0, "Feather", ""},
+      {CMP_NODE_DILATE_ERODE_STEP, "STEP", 0, "Step", ""},
+      {CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD, "THRESHOLD", 0, "Threshold", ""},
+      {CMP_NODE_DILATE_ERODE_DISTANCE, "DISTANCE", 0, "Distance", ""},
+      {CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER, "FEATHER", 0, "Feather", ""},
      {0, NULL, 0, NULL, NULL},
  };

@ -7128,7 +7128,7 @@ static void def_cmp_dilate_erode(StructRNA *srna)
  RNA_def_property_ui_text(prop, "Distance", "Distance to grow/shrink (number of iterations)");
  RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update");

-  /* CMP_NODE_DILATEERODE_DISTANCE_THRESH only */
+  /* CMP_NODE_DILATE_ERODE_DISTANCE_THRESH only */
  prop = RNA_def_property(srna, "edge", PROP_FLOAT, PROP_NONE);
  RNA_def_property_float_sdna(prop, NULL, "custom3");
  RNA_def_property_range(prop, -100, 100);
@ -7137,7 +7137,7 @@ static void def_cmp_dilate_erode(StructRNA *srna)

  RNA_def_struct_sdna_from(srna, "NodeDilateErode", "storage");

-  /* CMP_NODE_DILATEERODE_DISTANCE_FEATHER only */
+  /* CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER only */
  prop = RNA_def_property(srna, "falloff", PROP_ENUM, PROP_NONE);
  RNA_def_property_enum_sdna(prop, NULL, "falloff");
  RNA_def_property_enum_items(prop, rna_enum_proportional_falloff_curve_only_items);
--- a/source/blender/nodes/composite/nodes/node_composite_dilate.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_dilate.cc
@ -5,12 +5,27 @@
 * \ingroup cmpnodes
 */

+#include <cmath>
+
+#include "BLI_array.hh"
+#include "BLI_assert.h"
+#include "BLI_math_base.hh"
+
+#include "DNA_scene_types.h"
+
 #include "RNA_access.h"

 #include "UI_interface.h"
 #include "UI_resources.h"

+#include "RE_pipeline.h"
+
+#include "GPU_shader.h"
+#include "GPU_state.h"
+#include "GPU_texture.h"
+
 #include "COM_node_operation.hh"
+#include "COM_utilities.hh"

 #include "node_composite_util.hh"

@ -18,6 +33,8 @@

 namespace blender::nodes::node_composite_dilate_cc {

+NODE_STORAGE_FUNCS(NodeDilateErode)
+
 static void cmp_node_dilate_declare(NodeDeclarationBuilder &b)
 {
  b.add_input<decl::Float>(N_("Mask")).default_value(0.0f).min(0.0f).max(1.0f);
@ -36,10 +53,10 @@ static void node_composit_buts_dilateerode(uiLayout *layout, bContext *UNUSED(C)
  uiItemR(layout, ptr, "mode", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
  uiItemR(layout, ptr, "distance", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
  switch (RNA_enum_get(ptr, "mode")) {
-    case CMP_NODE_DILATEERODE_DISTANCE_THRESH:
+    case CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD:
      uiItemR(layout, ptr, "edge", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
      break;
-    case CMP_NODE_DILATEERODE_DISTANCE_FEATHER:
+    case CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER:
      uiItemR(layout, ptr, "falloff", UI_ITEM_R_SPLIT_EMPTY_NAME, nullptr, ICON_NONE);
      break;
  }
@ -47,13 +64,458 @@ static void node_composit_buts_dilateerode(uiLayout *layout, bContext *UNUSED(C)

 using namespace blender::realtime_compositor;

+/* Computes a falloff that is equal to 1 at an input of zero and decrease to zero at an input of 1,
+ * with the rate of decrease depending on the falloff type. */
+static float compute_distance_falloff(float x, int falloff_type)
+{
+  x = 1.0f - x;
+
+  switch (falloff_type) {
+    case PROP_SMOOTH:
+      return 3.0f * x * x - 2.0f * x * x * x;
+    case PROP_SPHERE:
+      return std::sqrt(2.0f * x - x * x);
+    case PROP_ROOT:
+      return std::sqrt(x);
+    case PROP_SHARP:
+      return x * x;
+    case PROP_INVSQUARE:
+      return x * (2.0f - x);
+    case PROP_LIN:
+      return x;
+    default:
+      BLI_assert_unreachable();
+      return x;
+  }
+}
+
+/* A helper class that computes and caches 1D GPU textures containing the weights of the separable
+ * Gaussian filter of the given radius as well as an inverse distance falloff of the given type and
+ * radius. The weights and falloffs are symmetric, because the Gaussian and falloff functions are
+ * all even functions. Consequently, only the positive half of the filter is computed and the
+ * shader takes that into consideration. */
+class SymmetricSeparableMorphologicalDistanceFeatherWeights {
+ private:
+  int radius_ = 1;
+  int falloff_type_ = PROP_SMOOTH;
+  GPUTexture *weights_texture_ = nullptr;
+  GPUTexture *distance_falloffs_texture_ = nullptr;
+
+ public:
+  ~SymmetricSeparableMorphologicalDistanceFeatherWeights()
+  {
+    if (weights_texture_) {
+      GPU_texture_free(weights_texture_);
+    }
+
+    if (distance_falloffs_texture_) {
+      GPU_texture_free(distance_falloffs_texture_);
+    }
+  }
+
+  /* Check if textures containing the weights and distance falloffs were already computed for the
+   * given distance falloff type and radius. If such textures exists, do nothing, otherwise, free
+   * the already computed textures and recompute it with the given distance falloff type and
+   * radius. */
+  void update(int radius, int falloff_type)
+  {
+    if (weights_texture_ && distance_falloffs_texture_ && falloff_type == falloff_type_ &&
+        radius == radius_) {
+      return;
+    }
+
+    radius_ = radius;
+    falloff_type_ = falloff_type;
+
+    compute_weights();
+    compute_distance_falloffs();
+  }
+
+  void compute_weights()
+  {
+    if (weights_texture_) {
+      GPU_texture_free(weights_texture_);
+    }
+
+    /* The size of filter is double the radius plus 1, but since the filter is symmetric, we only
+     * compute half of it and no doubling happens. We add 1 to make sure the filter size is always
+     * odd and there is a center weight. */
+    const int size = radius_ + 1;
+    Array<float> weights(size);
+
+    float sum = 0.0f;
+
+    /* First, compute the center weight. */
+    const float center_weight = RE_filter_value(R_FILTER_GAUSS, 0.0f);
+    weights[0] = center_weight;
+    sum += center_weight;
+
+    /* Second, compute the other weights in the positive direction, making sure to add double the
+     * weight to the sum of weights because the filter is symmetric and we only loop over half of
+     * it. Skip the center weight already computed by dropping the front index. */
+    const float scale = radius_ > 0.0f ? 1.0f / radius_ : 0.0f;
+    for (const int i : weights.index_range().drop_front(1)) {
+      const float weight = RE_filter_value(R_FILTER_GAUSS, i * scale);
+      weights[i] = weight;
+      sum += weight * 2.0f;
+    }
+
+    /* Finally, normalize the weights. */
+    for (const int i : weights.index_range()) {
+      weights[i] /= sum;
+    }
+
+    weights_texture_ = GPU_texture_create_1d("Weights", size, 1, GPU_R16F, weights.data());
+  }
+
+  void compute_distance_falloffs()
+  {
+    if (distance_falloffs_texture_) {
+      GPU_texture_free(distance_falloffs_texture_);
+    }
+
+    /* The size of the distance falloffs is double the radius plus 1, but since the falloffs are
+     * symmetric, we only compute half of them and no doubling happens. We add 1 to make sure the
+     * falloffs size is always odd and there is a center falloff. */
+    const int size = radius_ + 1;
+    Array<float> falloffs(size);
+
+    /* Compute the distance falloffs in the positive direction only, because the falloffs are
+     * symmetric. */
+    const float scale = radius_ > 0.0f ? 1.0f / radius_ : 0.0f;
+    for (const int i : falloffs.index_range()) {
+      falloffs[i] = compute_distance_falloff(i * scale, falloff_type_);
+    }
+
+    distance_falloffs_texture_ = GPU_texture_create_1d(
+        "Distance Factors", size, 1, GPU_R16F, falloffs.data());
+  }
+
+  void bind_weights_as_texture(GPUShader *shader, const char *texture_name)
+  {
+    const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name);
+    GPU_texture_bind(weights_texture_, texture_image_unit);
+  }
+
+  void unbind_weights_as_texture()
+  {
+    GPU_texture_unbind(weights_texture_);
+  }
+
+  void bind_distance_falloffs_as_texture(GPUShader *shader, const char *texture_name)
+  {
+    const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name);
+    GPU_texture_bind(distance_falloffs_texture_, texture_image_unit);
+  }
+
+  void unbind_distance_falloffs_as_texture()
+  {
+    GPU_texture_unbind(distance_falloffs_texture_);
+  }
+};
+
 class DilateErodeOperation : public NodeOperation {
+ private:
+  /* Cached symmetric blur weights and distance falloffs for the distance feature method. */
+  SymmetricSeparableMorphologicalDistanceFeatherWeights distance_feather_weights_;
+
 public:
  using NodeOperation::NodeOperation;

  void execute() override
  {
-    get_input("Mask").pass_through(get_result("Mask"));
+    if (is_identity()) {
+      get_input("Mask").pass_through(get_result("Mask"));
+      return;
+    }
+
+    switch (get_method()) {
+      case CMP_NODE_DILATE_ERODE_STEP:
+        execute_step();
+        return;
+      case CMP_NODE_DILATE_ERODE_DISTANCE:
+        execute_distance();
+        return;
+      case CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD:
+        execute_distance_threshold();
+        return;
+      case CMP_NODE_DILATE_ERODE_DISTANCE_FEATHER:
+        execute_distance_feather();
+        return;
+      default:
+        BLI_assert_unreachable();
+        return;
+    }
+  }
+
+  /* ----------------------------
+   * Step Morphological Operator.
+   * ---------------------------- */
+
+  void execute_step()
+  {
+    GPUTexture *horizontal_pass_result = execute_step_horizontal_pass();
+    execute_step_vertical_pass(horizontal_pass_result);
+  }
+
+  GPUTexture *execute_step_horizontal_pass()
+  {
+    GPUShader *shader = shader_manager().get(get_morphological_step_shader_name());
+    GPU_shader_bind(shader);
+
+    /* Pass the absolute value of the distance. We have specialized shaders for each sign. */
+    GPU_shader_uniform_1i(shader, "radius", math::abs(get_distance()));
+
+    const Result &input_mask = get_input("Mask");
+    input_mask.bind_as_texture(shader, "input_tx");
+
+    /* We allocate an output image of a transposed size, that is, with a height equivalent to the
+     * width of the input and vice versa. This is done as a performance optimization. The shader
+     * will process the image horizontally and write it to the intermediate output transposed. Then
+     * the vertical pass will execute the same horizontal pass shader, but since its input is
+     * transposed, it will effectively do a vertical pass and write to the output transposed,
+     * effectively undoing the transposition in the horizontal pass. This is done to improve
+     * spatial cache locality in the shader and to avoid having two separate shaders for each of
+     * the passes. */
+    const Domain domain = compute_domain();
+    const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+
+    GPUTexture *horizontal_pass_result = texture_pool().acquire_color(transposed_domain);
+    const int image_unit = GPU_shader_get_texture_binding(shader, "output_img");
+    GPU_texture_image_bind(horizontal_pass_result, image_unit);
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    GPU_shader_unbind();
+    input_mask.unbind_as_texture();
+    GPU_texture_image_unbind(horizontal_pass_result);
+
+    return horizontal_pass_result;
+  }
+
+  void execute_step_vertical_pass(GPUTexture *horizontal_pass_result)
+  {
+    GPUShader *shader = shader_manager().get(get_morphological_step_shader_name());
+    GPU_shader_bind(shader);
+
+    /* Pass the absolute value of the distance. We have specialized shaders for each sign. */
+    GPU_shader_uniform_1i(shader, "radius", math::abs(get_distance()));
+
+    GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
+    const int texture_image_unit = GPU_shader_get_texture_binding(shader, "input_tx");
+    GPU_texture_bind(horizontal_pass_result, texture_image_unit);
+
+    const Domain domain = compute_domain();
+    Result &output_mask = get_result("Mask");
+    output_mask.allocate_texture(domain);
+    output_mask.bind_as_image(shader, "output_img");
+
+    /* Notice that the domain is transposed, see the note on the horizontal pass method for more
+     * information on the reasoning behind this. */
+    compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
+
+    GPU_shader_unbind();
+    output_mask.unbind_as_image();
+    GPU_texture_unbind(horizontal_pass_result);
+  }
+
+  const char *get_morphological_step_shader_name()
+  {
+    if (get_distance() > 0) {
+      return "compositor_morphological_step_dilate";
+    }
+    return "compositor_morphological_step_erode";
+  }
+
+  /* --------------------------------
+   * Distance Morphological Operator.
+   * -------------------------------- */
+
+  void execute_distance()
+  {
+    GPUShader *shader = shader_manager().get(get_morphological_distance_shader_name());
+    GPU_shader_bind(shader);
+
+    /* Pass the absolute value of the distance. We have specialized shaders for each sign. */
+    GPU_shader_uniform_1i(shader, "radius", math::abs(get_distance()));
+
+    const Result &input_mask = get_input("Mask");
+    input_mask.bind_as_texture(shader, "input_tx");
+
+    const Domain domain = compute_domain();
+    Result &output_mask = get_result("Mask");
+    output_mask.allocate_texture(domain);
+    output_mask.bind_as_image(shader, "output_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    GPU_shader_unbind();
+    output_mask.unbind_as_image();
+    input_mask.unbind_as_texture();
+  }
+
+  const char *get_morphological_distance_shader_name()
+  {
+    if (get_distance() > 0) {
+      return "compositor_morphological_distance_dilate";
+    }
+    return "compositor_morphological_distance_erode";
+  }
+
+  /* ------------------------------------------
+   * Distance Threshold Morphological Operator.
+   * ------------------------------------------ */
+
+  void execute_distance_threshold()
+  {
+    GPUShader *shader = shader_manager().get("compositor_morphological_distance_threshold");
+    GPU_shader_bind(shader);
+
+    GPU_shader_uniform_1f(shader, "inset", get_inset());
+    GPU_shader_uniform_1i(shader, "radius", get_morphological_distance_threshold_radius());
+    GPU_shader_uniform_1i(shader, "distance", get_distance());
+
+    const Result &input_mask = get_input("Mask");
+    input_mask.bind_as_texture(shader, "input_tx");
+
+    const Domain domain = compute_domain();
+    Result &output_mask = get_result("Mask");
+    output_mask.allocate_texture(domain);
+    output_mask.bind_as_image(shader, "output_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    GPU_shader_unbind();
+    output_mask.unbind_as_image();
+    input_mask.unbind_as_texture();
+  }
+
+  /* See the discussion in the implementation for more information. */
+  int get_morphological_distance_threshold_radius()
+  {
+    return static_cast<int>(math::ceil(get_inset())) + math::abs(get_distance());
+  }
+
+  /* ----------------------------------------
+   * Distance Feather Morphological Operator.
+   * ---------------------------------------- */
+
+  void execute_distance_feather()
+  {
+    GPUTexture *horizontal_pass_result = execute_distance_feather_horizontal_pass();
+    execute_distance_feather_vertical_pass(horizontal_pass_result);
+  }
+
+  GPUTexture *execute_distance_feather_horizontal_pass()
+  {
+    GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
+    GPU_shader_bind(shader);
+
+    const Result &input_image = get_input("Mask");
+    input_image.bind_as_texture(shader, "input_tx");
+
+    distance_feather_weights_.update(math::abs(get_distance()), node_storage(bnode()).falloff);
+    distance_feather_weights_.bind_weights_as_texture(shader, "weights_tx");
+    distance_feather_weights_.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
+
+    /* We allocate an output image of a transposed size, that is, with a height equivalent to the
+     * width of the input and vice versa. This is done as a performance optimization. The shader
+     * will process the image horizontally and write it to the intermediate output transposed. Then
+     * the vertical pass will execute the same horizontal pass shader, but since its input is
+     * transposed, it will effectively do a vertical pass and write to the output transposed,
+     * effectively undoing the transposition in the horizontal pass. This is done to improve
+     * spatial cache locality in the shader and to avoid having two separate shaders for each of
+     * the passes. */
+    const Domain domain = compute_domain();
+    const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+
+    GPUTexture *horizontal_pass_result = texture_pool().acquire_color(transposed_domain);
+    const int image_unit = GPU_shader_get_texture_binding(shader, "output_img");
+    GPU_texture_image_bind(horizontal_pass_result, image_unit);
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    GPU_shader_unbind();
+    input_image.unbind_as_texture();
+    distance_feather_weights_.unbind_weights_as_texture();
+    distance_feather_weights_.unbind_distance_falloffs_as_texture();
+    GPU_texture_image_unbind(horizontal_pass_result);
+
+    return horizontal_pass_result;
+  }
+
+  void execute_distance_feather_vertical_pass(GPUTexture *horizontal_pass_result)
+  {
+    GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
+    GPU_shader_bind(shader);
+
+    GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
+    const int texture_image_unit = GPU_shader_get_texture_binding(shader, "input_tx");
+    GPU_texture_bind(horizontal_pass_result, texture_image_unit);
+
+    distance_feather_weights_.update(math::abs(get_distance()), node_storage(bnode()).falloff);
+    distance_feather_weights_.bind_weights_as_texture(shader, "weights_tx");
+    distance_feather_weights_.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
+
+    const Domain domain = compute_domain();
+    Result &output_image = get_result("Mask");
+    output_image.allocate_texture(domain);
+    output_image.bind_as_image(shader, "output_img");
+
+    /* Notice that the domain is transposed, see the note on the horizontal pass method for more
+     * information on the reasoning behind this. */
+    compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
+
+    GPU_shader_unbind();
+    output_image.unbind_as_image();
+    distance_feather_weights_.unbind_weights_as_texture();
+    distance_feather_weights_.unbind_distance_falloffs_as_texture();
+    GPU_texture_unbind(horizontal_pass_result);
+  }
+
+  const char *get_morphological_distance_feather_shader_name()
+  {
+    if (get_distance() > 0) {
+      return "compositor_morphological_distance_feather_dilate";
+    }
+    return "compositor_morphological_distance_feather_erode";
+  }
+
+  /* ---------------
+   * Common Methods.
+   * --------------- */
+
+  bool is_identity()
+  {
+    const Result &input = get_input("Mask");
+    if (input.is_single_value()) {
+      return true;
+    }
+
+    if (get_method() == CMP_NODE_DILATE_ERODE_DISTANCE_THRESHOLD && get_inset() != 0.0f) {
+      return false;
+    }
+
+    if (get_distance() == 0) {
+      return true;
+    }
+
+    return false;
+  }
+
+  int get_distance()
+  {
+    return bnode().custom2;
+  }
+
+  float get_inset()
+  {
+    return bnode().custom3;
+  }
+
+  CMPNodeDilateErodeMethod get_method()
+  {
+    return (CMPNodeDilateErodeMethod)bnode().custom1;
  }
 };