Realtime Compositor: Implement blur node

This patch implements the blur node for the realtime compositor. The patch is
still missing the Variable Size option because it depends on the Erode/Dilate
node, which is yet to be implemented. Furthermore, there are a number of
optimizations that can be implemented, the most important of which is the IIR
implementation of the Fast Gaussian filter, as well as the use of hardware
filtering and thread local memory. The latter of which was attempted but was
not robust enough, so it will be submitted as separate patch.

Differential Revision: https://developer.blender.org/D15663

Reviewed By: Clement Foucault
This commit is contained in:
Omar Emara 2022-09-09 13:57:04 +02:00
parent 1339fec22a
commit 04ae0fe46b
8 changed files with 596 additions and 2 deletions

View File

@ -345,8 +345,11 @@ set(GLSL_SRC
shaders/compositor/compositor_screen_lens_distortion.glsl
shaders/compositor/compositor_set_alpha.glsl
shaders/compositor/compositor_split_viewer.glsl
shaders/compositor/compositor_symmetric_blur.glsl
shaders/compositor/compositor_symmetric_separable_blur.glsl
shaders/compositor/library/gpu_shader_compositor_alpha_over.glsl
shaders/compositor/library/gpu_shader_compositor_blur_common.glsl
shaders/compositor/library/gpu_shader_compositor_bright_contrast.glsl
shaders/compositor/library/gpu_shader_compositor_channel_matte.glsl
shaders/compositor/library/gpu_shader_compositor_chroma_matte.glsl
@ -620,6 +623,8 @@ set(SRC_SHADER_CREATE_INFOS
shaders/compositor/infos/compositor_screen_lens_distortion_info.hh
shaders/compositor/infos/compositor_set_alpha_info.hh
shaders/compositor/infos/compositor_split_viewer_info.hh
shaders/compositor/infos/compositor_symmetric_blur_info.hh
shaders/compositor/infos/compositor_symmetric_separable_blur_info.hh
)
set(SRC_SHADER_CREATE_INFOS_MTL

View File

@ -0,0 +1,77 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_blur_common.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
vec4 load_input(ivec2 texel)
{
vec4 color;
if (extend_bounds) {
/* If bounds are extended, then we treat the input as padded by a radius amount of pixels. So
* we load the input with an offset by the radius amount and fallback to a transparent color if
* it is out of bounds. Notice that we subtract 1 because the weights texture have an extra
* center weight, see the SymmetricBlurWeights for more information. */
ivec2 blur_size = texture_size(weights_tx) - 1;
color = texture_load(input_tx, texel - blur_size, vec4(0.0));
}
else {
color = texture_load(input_tx, texel);
}
if (gamma_correct) {
color = gamma_correct_blur_input(color);
}
return color;
}
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 accumulated_color = vec4(0.0);
/* First, compute the contribution of the center pixel. */
vec4 center_color = load_input(texel);
accumulated_color += center_color * texture_load(weights_tx, ivec2(0)).x;
ivec2 weights_size = texture_size(weights_tx);
/* Then, compute the contributions of the pixels along the x axis of the filter, noting that the
* weights texture only stores the weights for the positive half, but since the filter is
* symmetric, the same weight is used for the negative half and we add both of their
* contributions. */
for (int x = 1; x < weights_size.x; x++) {
float weight = texture_load(weights_tx, ivec2(x, 0)).x;
accumulated_color += load_input(texel + ivec2(x, 0)) * weight;
accumulated_color += load_input(texel + ivec2(-x, 0)) * weight;
}
/* Then, compute the contributions of the pixels along the y axis of the filter, noting that the
* weights texture only stores the weights for the positive half, but since the filter is
* symmetric, the same weight is used for the negative half and we add both of their
* contributions. */
for (int y = 1; y < weights_size.y; y++) {
float weight = texture_load(weights_tx, ivec2(0, y)).x;
accumulated_color += load_input(texel + ivec2(0, y)) * weight;
accumulated_color += load_input(texel + ivec2(0, -y)) * weight;
}
/* Finally, compute the contributions of the pixels in the four quadrants of the filter, noting
* that the weights texture only stores the weights for the upper right quadrant, but since the
* filter is symmetric, the same weight is used for the rest of the quadrants and we add all four
* of their contributions. */
for (int y = 1; y < weights_size.y; y++) {
for (int x = 1; x < weights_size.x; x++) {
float weight = texture_load(weights_tx, ivec2(x, y)).x;
accumulated_color += load_input(texel + ivec2(x, y)) * weight;
accumulated_color += load_input(texel + ivec2(-x, y)) * weight;
accumulated_color += load_input(texel + ivec2(x, -y)) * weight;
accumulated_color += load_input(texel + ivec2(-x, -y)) * weight;
}
}
if (gamma_correct) {
accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
}
imageStore(output_img, texel, accumulated_color);
}

View File

@ -0,0 +1,53 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_blur_common.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
vec4 load_input(ivec2 texel)
{
vec4 color;
if (extend_bounds) {
/* If bounds are extended, then we treat the input as padded by a radius amount of pixels. So
* we load the input with an offset by the radius amount and fallback to a transparent color if
* it is out of bounds. Notice that we subtract 1 because the weights texture have an extra
* center weight, see the SymmetricSeparableBlurWeights for more information. */
int blur_size = texture_size(weights_tx) - 1;
color = texture_load(input_tx, texel - ivec2(blur_size, 0), vec4(0.0));
}
else {
color = texture_load(input_tx, texel);
}
if (gamma_correct_input) {
color = gamma_correct_blur_input(color);
}
return color;
}
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 accumulated_color = vec4(0.0);
/* First, compute the contribution of the center pixel. */
vec4 center_color = load_input(texel);
accumulated_color += center_color * texture_load(weights_tx, 0).x;
/* Then, compute the contributions of the pixel to the right and left, noting that the
* weights texture only stores the weights for the positive half, but since the filter is
* symmetric, the same weight is used for the negative half and we add both of their
* contributions. */
for (int i = 1; i < texture_size(weights_tx); i++) {
float weight = texture_load(weights_tx, i).x;
accumulated_color += load_input(texel + ivec2(i, 0)) * weight;
accumulated_color += load_input(texel + ivec2(-i, 0)) * weight;
}
if (gamma_uncorrect_output) {
accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
}
/* Write the color using the transposed texel. See the execute_separable_blur_horizontal_pass
* method for more information on the rational behind this. */
imageStore(output_img, texel.yx, accumulated_color);
}

View File

@ -0,0 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_symmetric_blur)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "extend_bounds")
.push_constant(Type::BOOL, "gamma_correct")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "weights_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_symmetric_blur.glsl")
.do_static_compilation(true);

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "extend_bounds")
.push_constant(Type::BOOL, "gamma_correct_input")
.push_constant(Type::BOOL, "gamma_uncorrect_output")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_1D, "weights_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_symmetric_separable_blur.glsl")
.do_static_compilation(true);

View File

@ -0,0 +1,32 @@
/* Preprocess the input of the blur filter by squaring it in its alpha straight form, assuming the
* given color is alpha premultiplied. */
vec4 gamma_correct_blur_input(vec4 color)
{
/* Unpremultiply alpha. */
color.rgb /= color.a > 0.0 ? color.a : 1.0;
/* Square color channel if it is positive, otherwise zero it. */
color.rgb *= mix(color.rgb, vec3(0.0), lessThan(color.rgb, vec3(0.0)));
/* Premultiply alpha to undo previous alpha unpremultiplication. */
color.rgb *= color.a > 0.0 ? color.a : 1.0;
return color;
}
/* Postprocess the output of the blur filter by taking its square root it in its alpha straight
* form, assuming the given color is alpha premultiplied. This essential undoes the processing done
* by the gamma_correct_blur_input function. */
vec4 gamma_uncorrect_blur_output(vec4 color)
{
/* Unpremultiply alpha. */
color.rgb /= color.a > 0.0 ? color.a : 1.0;
/* Take the square root of the color channel if it is positive, otherwise zero it. */
color.rgb = mix(sqrt(color.rgb), vec3(0.0), lessThan(color.rgb, vec3(0.0)));
/* Premultiply alpha to undo previous alpha unpremultiplication. */
color.rgb *= color.a > 0.0 ? color.a : 1.0;
return color;
}

View File

@ -5,12 +5,27 @@
* \ingroup cmpnodes
*/
#include <cstdint>
#include "BLI_array.hh"
#include "BLI_assert.h"
#include "BLI_index_range.hh"
#include "BLI_math_base.hh"
#include "BLI_math_vec_types.hh"
#include "BLI_math_vector.hh"
#include "RNA_access.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "RE_pipeline.h"
#include "GPU_state.h"
#include "GPU_texture.h"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "node_composite_util.hh"
@ -18,6 +33,8 @@
namespace blender::nodes::node_composite_blur_cc {
NODE_STORAGE_FUNCS(NodeBlurData)
static void cmp_node_blur_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
@ -75,13 +92,395 @@ static void node_composit_buts_blur(uiLayout *layout, bContext *UNUSED(C), Point
using namespace blender::realtime_compositor;
/* A helper class that computes and caches a 1D GPU texture containing the weights of the separable
* filter of the given type and radius. The filter is assumed to be symmetric, because the filter
* functions are all even functions. Consequently, only the positive half of the filter is computed
* and the shader takes that into consideration. */
class SymmetricSeparableBlurWeights {
private:
float radius_ = 1.0f;
int type_ = R_FILTER_GAUSS;
GPUTexture *texture_ = nullptr;
public:
~SymmetricSeparableBlurWeights()
{
if (texture_) {
GPU_texture_free(texture_);
}
}
/* Check if a texture containing the weights was already computed for the given filter type and
* radius. If such texture exists, do nothing, otherwise, free the already computed texture and
* recompute it with the given filter type and radius. */
void update(float radius, int type)
{
if (texture_ && type == type_ && radius == radius_) {
return;
}
if (texture_) {
GPU_texture_free(texture_);
}
/* The size of filter is double the radius plus 1, but since the filter is symmetric, we only
* compute half of it and no doubling happens. We add 1 to make sure the filter size is always
* odd and there is a center weight. */
const int size = math::ceil(radius) + 1;
Array<float> weights(size);
float sum = 0.0f;
/* First, compute the center weight. */
const float center_weight = RE_filter_value(type, 0.0f);
weights[0] = center_weight;
sum += center_weight;
/* Second, compute the other weights in the positive direction, making sure to add double the
* weight to the sum of weights because the filter is symmetric and we only loop over half of
* it. Skip the center weight already computed by dropping the front index. */
const float scale = radius > 0.0f ? 1.0f / radius : 0.0f;
for (const int i : weights.index_range().drop_front(1)) {
const float weight = RE_filter_value(type, i * scale);
weights[i] = weight;
sum += weight * 2.0f;
}
/* Finally, normalize the weights. */
for (const int i : weights.index_range()) {
weights[i] /= sum;
}
texture_ = GPU_texture_create_1d("Weights", size, 1, GPU_R16F, weights.data());
type_ = type;
radius_ = radius;
}
void bind_as_texture(GPUShader *shader, const char *texture_name)
{
const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name);
GPU_texture_bind(texture_, texture_image_unit);
}
void unbind_as_texture()
{
GPU_texture_unbind(texture_);
}
};
/* A helper class that computes and caches a 2D GPU texture containing the weights of the filter of
* the given type and radius. The filter is assumed to be symmetric, because the filter functions
* are evaluated on the normalized distance to the center. Consequently, only the upper right
* quadrant are computed and the shader takes that into consideration. */
class SymmetricBlurWeights {
private:
int type_ = R_FILTER_GAUSS;
float2 radius_ = float2(1.0f);
GPUTexture *texture_ = nullptr;
public:
~SymmetricBlurWeights()
{
if (texture_) {
GPU_texture_free(texture_);
}
}
/* Check if a texture containing the weights was already computed for the given filter type and
* radius. If such texture exists, do nothing, otherwise, free the already computed texture and
* recompute it with the given filter type and radius. */
void update(float2 radius, int type)
{
if (texture_ && type == type_ && radius == radius_) {
return;
}
if (texture_) {
GPU_texture_free(texture_);
}
/* The full size of filter is double the radius plus 1, but since the filter is symmetric, we
* only compute a single quadrant of it and so no doubling happens. We add 1 to make sure the
* filter size is always odd and there is a center weight. */
const float2 scale = math::safe_divide(float2(1.0f), radius);
const int2 size = int2(math::ceil(radius)) + int2(1);
Array<float> weights(size.x * size.y);
float sum = 0.0f;
/* First, compute the center weight. */
const float center_weight = RE_filter_value(type, 0.0f);
weights[0] = center_weight;
sum += center_weight;
/* Then, compute the weights along the positive x axis, making sure to add double the weight to
* the sum of weights because the filter is symmetric and we only loop over the positive half
* of the x axis. Skip the center weight already computed by dropping the front index. */
for (const int x : IndexRange(size.x).drop_front(1)) {
const float weight = RE_filter_value(type, x * scale.x);
weights[x] = weight;
sum += weight * 2.0f;
}
/* Then, compute the weights along the positive y axis, making sure to add double the weight to
* the sum of weights because the filter is symmetric and we only loop over the positive half
* of the y axis. Skip the center weight already computed by dropping the front index. */
for (const int y : IndexRange(size.y).drop_front(1)) {
const float weight = RE_filter_value(type, y * scale.y);
weights[size.x * y] = weight;
sum += weight * 2.0f;
}
/* Then, compute the other weights in the upper right quadrant, making sure to add quadruple
* the weight to the sum of weights because the filter is symmetric and we only loop over one
* quadrant of it. Skip the weights along the y and x axis already computed by dropping the
* front index. */
for (const int y : IndexRange(size.y).drop_front(1)) {
for (const int x : IndexRange(size.x).drop_front(1)) {
const float weight = RE_filter_value(type, math::length(float2(x, y) * scale));
weights[size.x * y + x] = weight;
sum += weight * 4.0f;
}
}
/* Finally, normalize the weights. */
for (const int y : IndexRange(size.y)) {
for (const int x : IndexRange(size.x)) {
weights[size.x * y + x] /= sum;
}
}
texture_ = GPU_texture_create_2d("Weights", size.x, size.y, 1, GPU_R16F, weights.data());
type_ = type;
radius_ = radius;
}
void bind_as_texture(GPUShader *shader, const char *texture_name)
{
const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name);
GPU_texture_bind(texture_, texture_image_unit);
}
void unbind_as_texture()
{
GPU_texture_unbind(texture_);
}
};
class BlurOperation : public NodeOperation {
private:
/* Cached symmetric blur weights. */
SymmetricBlurWeights blur_weights_;
/* Cached symmetric blur weights for the separable horizontal pass. */
SymmetricSeparableBlurWeights blur_horizontal_weights_;
/* Cached symmetric blur weights for the separable vertical pass. */
SymmetricSeparableBlurWeights blur_vertical_weights_;
public:
using NodeOperation::NodeOperation;
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
if (is_identity()) {
get_input("Image").pass_through(get_result("Image"));
return;
}
if (use_separable_filter()) {
GPUTexture *horizontal_pass_result = execute_separable_blur_horizontal_pass();
execute_separable_blur_vertical_pass(horizontal_pass_result);
}
else {
execute_blur();
}
}
void execute_blur()
{
GPUShader *shader = shader_manager().get("compositor_symmetric_blur");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", get_extend_bounds());
GPU_shader_uniform_1b(shader, "gamma_correct", node_storage(bnode()).gamma);
const Result &input_image = get_input("Image");
input_image.bind_as_texture(shader, "input_tx");
blur_weights_.update(compute_blur_radius(), node_storage(bnode()).filtertype);
blur_weights_.bind_as_texture(shader, "weights_tx");
Domain domain = compute_domain();
if (get_extend_bounds()) {
/* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
domain.size += int2(math::ceil(compute_blur_radius())) * 2;
}
Result &output_image = get_result("Image");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
output_image.unbind_as_image();
input_image.unbind_as_texture();
blur_weights_.unbind_as_texture();
}
GPUTexture *execute_separable_blur_horizontal_pass()
{
GPUShader *shader = shader_manager().get("compositor_symmetric_separable_blur");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", get_extend_bounds());
GPU_shader_uniform_1b(shader, "gamma_correct_input", node_storage(bnode()).gamma);
GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", false);
const Result &input_image = get_input("Image");
input_image.bind_as_texture(shader, "input_tx");
blur_horizontal_weights_.update(compute_blur_radius().x, node_storage(bnode()).filtertype);
blur_horizontal_weights_.bind_as_texture(shader, "weights_tx");
Domain domain = compute_domain();
if (get_extend_bounds()) {
domain.size.x += static_cast<int>(math::ceil(compute_blur_radius().x)) * 2;
}
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will blur the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal blur shader, but since its input is
* transposed, it will effectively do a vertical blur and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each blur
* pass. */
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
GPUTexture *horizontal_pass_result = texture_pool().acquire_color(transposed_domain);
const int image_unit = GPU_shader_get_texture_binding(shader, "output_img");
GPU_texture_image_bind(horizontal_pass_result, image_unit);
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input_image.unbind_as_texture();
blur_horizontal_weights_.unbind_as_texture();
GPU_texture_image_unbind(horizontal_pass_result);
return horizontal_pass_result;
}
void execute_separable_blur_vertical_pass(GPUTexture *horizontal_pass_result)
{
GPUShader *shader = shader_manager().get("compositor_symmetric_separable_blur");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", get_extend_bounds());
GPU_shader_uniform_1b(shader, "gamma_correct_input", false);
GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", node_storage(bnode()).gamma);
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
const int texture_image_unit = GPU_shader_get_texture_binding(shader, "input_tx");
GPU_texture_bind(horizontal_pass_result, texture_image_unit);
blur_vertical_weights_.update(compute_blur_radius().y, node_storage(bnode()).filtertype);
blur_vertical_weights_.bind_as_texture(shader, "weights_tx");
Domain domain = compute_domain();
if (get_extend_bounds()) {
/* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
domain.size += int2(math::ceil(compute_blur_radius())) * 2;
}
Result &output_image = get_result("Image");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass method for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
output_image.unbind_as_image();
blur_vertical_weights_.unbind_as_texture();
GPU_texture_unbind(horizontal_pass_result);
}
float2 compute_blur_radius()
{
const float size = math::clamp(get_input("Size").get_float_value_default(1.0f), 0.0f, 1.0f);
if (!node_storage(bnode()).relative) {
return float2(node_storage(bnode()).sizex, node_storage(bnode()).sizey) * size;
}
int2 image_size = get_input("Image").domain().size;
switch (node_storage(bnode()).aspect) {
case CMP_NODE_BLUR_ASPECT_Y:
image_size.y = image_size.x;
break;
case CMP_NODE_BLUR_ASPECT_X:
image_size.x = image_size.y;
break;
default:
BLI_assert(node_storage(bnode()).aspect == CMP_NODE_BLUR_ASPECT_NONE);
break;
}
return float2(image_size) * get_size_factor() * size;
}
/* Returns true if the operation does nothing and the input can be passed through. */
bool is_identity()
{
const Result &input = get_input("Image");
/* Single value inputs can't be blurred and are returned as is. */
if (input.is_single_value()) {
return true;
}
/* Zero blur radius. The operation does nothing and the input can be passed through. */
if (compute_blur_radius() == float2(0.0)) {
return true;
}
return false;
}
/* The blur node can operate with different filter types, evaluated on the normalized distance to
* the center of the filter. Some of those filters are separable and can be computed as such. If
* the bokeh member is disabled in the node, then the filter is always computed as separable even
* if it is not in fact separable, in which case, the used filter is a cheaper approximation to
* the actual filter. If the bokeh member is enabled, then the filter is computed as separable if
* it is in fact separable and as a normal 2D filter otherwise. */
bool use_separable_filter()
{
if (!node_storage(bnode()).bokeh) {
return true;
}
/* Both Box and Gaussian filters are separable. The rest is not. */
switch (node_storage(bnode()).filtertype) {
case R_FILTER_BOX:
case R_FILTER_GAUSS:
case R_FILTER_FAST_GAUSS:
return true;
default:
return false;
}
}
float2 get_size_factor()
{
return float2(node_storage(bnode()).percentx, node_storage(bnode()).percenty) / 100.0f;
}
bool get_extend_bounds()
{
return bnode().custom1 & CMP_NODEFLAG_BLUR_EXTEND_BOUNDS;
}
};

View File

@ -124,7 +124,8 @@ float RE_filter_value(int type, float x)
}
return 1.0f - x;
case R_FILTER_GAUSS: {
case R_FILTER_GAUSS:
case R_FILTER_FAST_GAUSS: {
const float two_gaussfac2 = 2.0f * gaussfac * gaussfac;
x *= 3.0f * gaussfac;
return 1.0f / sqrtf((float)M_PI * two_gaussfac2) * expf(-x * x / two_gaussfac2);