Realtime Compositor: Add evaluator and engine

This patch adds the core realtime compositor evaluator as well as a
compositor draw engine powered by the evaluator that operates in the
viewport. The realtime compositor is a new GPU accelerated compositor
that will be used to power the viewport compositor imminently as well as
the existing compositor in the future.

This patch only adds the evaluator and engine as an experimental
feature, the implementation of the nodes themselves will be committed
separately.

See T99210.

Differential Revision: https://developer.blender.org/D15206

Reviewed By: Clement Foucault
This commit is contained in:
Omar Emara 2022-08-10 09:14:22 +02:00
parent 169216684a
commit 624b0ac656
61 changed files with 5269 additions and 2 deletions

View File

@ -2258,6 +2258,7 @@ class USERPREF_PT_experimental_new_features(ExperimentalPanel, Panel):
({"property": "use_sculpt_tools_tilt"}, "T82877"),
({"property": "use_extended_asset_browser"}, ("project/view/130/", "Project Page")),
({"property": "use_override_templates"}, ("T73318", "Milestone 4")),
({"property": "use_realtime_compositor"}, "T99210"),
),
)

View File

@ -6128,6 +6128,24 @@ class VIEW3D_PT_shading_render_pass(Panel):
layout.prop(shading, "render_pass", text="")
class VIEW3D_PT_shading_compositor(Panel):
bl_space_type = 'VIEW_3D'
bl_region_type = 'HEADER'
bl_label = "Compositor"
bl_parent_id = 'VIEW3D_PT_shading'
@classmethod
def poll(cls, context):
return (context.space_data.shading.type in ('MATERIAL', 'RENDERED') and
context.preferences.experimental.use_realtime_compositor)
def draw(self, context):
shading = context.space_data.shading
layout = self.layout
layout.prop(shading, "use_compositor")
class VIEW3D_PT_gizmo_display(Panel):
bl_space_type = 'VIEW_3D'
bl_region_type = 'HEADER'
@ -7967,6 +7985,7 @@ classes = (
VIEW3D_PT_shading_options_shadow,
VIEW3D_PT_shading_options_ssao,
VIEW3D_PT_shading_render_pass,
VIEW3D_PT_shading_compositor,
VIEW3D_PT_gizmo_display,
VIEW3D_PT_overlay,
VIEW3D_PT_overlay_guides,

View File

@ -101,6 +101,7 @@ typedef struct bNodeSocketTemplate {
namespace blender {
class CPPType;
namespace nodes {
class DNode;
class NodeMultiFunctionBuilder;
class GeoNodeExecParams;
class NodeDeclarationBuilder;
@ -109,6 +110,11 @@ class GatherLinkSearchOpParams;
namespace fn {
class MFDataType;
} // namespace fn
namespace realtime_compositor {
class Context;
class NodeOperation;
class ShaderNode;
} // namespace realtime_compositor
} // namespace blender
using CPPTypeHandle = blender::CPPType;
@ -123,7 +129,14 @@ using SocketGetGeometryNodesCPPValueFunction = void (*)(const struct bNodeSocket
using NodeGatherSocketLinkOperationsFunction =
void (*)(blender::nodes::GatherLinkSearchOpParams &params);
using NodeGetCompositorOperationFunction = blender::realtime_compositor::NodeOperation
*(*)(blender::realtime_compositor::Context &context, blender::nodes::DNode node);
using NodeGetCompositorShaderNodeFunction =
blender::realtime_compositor::ShaderNode *(*)(blender::nodes::DNode node);
#else
typedef void *NodeGetCompositorOperationFunction;
typedef void *NodeGetCompositorShaderNodeFunction;
typedef void *NodeMultiFunctionBuildFunction;
typedef void *NodeGeometryExecFunction;
typedef void *NodeDeclareFunction;
@ -309,6 +322,14 @@ typedef struct bNodeType {
/* gpu */
NodeGPUExecFunction gpu_fn;
/* Get an instance of this node's compositor operation. Freeing the instance is the
* responsibility of the caller. */
NodeGetCompositorOperationFunction get_compositor_operation;
/* Get an instance of this node's compositor shader node. Freeing the instance is the
* responsibility of the caller. */
NodeGetCompositorShaderNodeFunction get_compositor_shader_node;
/* Build a multi-function for this node. */
NodeMultiFunctionBuildFunction build_multi_function;

View File

@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright 2011 Blender Foundation. All rights reserved.
if(WITH_COMPOSITOR_CPU)
add_subdirectory(realtime_compositor)
if(WITH_COMPOSITOR_CPU)
set(INC
.
intern

View File

@ -0,0 +1,66 @@
# SPDX-License-Identifier: GPL-2.0-or-later
set(INC
.
../../gpu
../../nodes
../../imbuf
../../blenlib
../../makesdna
../../makesrna
../../blenkernel
../../gpu/intern
../../../../intern/guardedalloc
)
set(SRC
intern/compile_state.cc
intern/context.cc
intern/conversion_operation.cc
intern/domain.cc
intern/evaluator.cc
intern/input_single_value_operation.cc
intern/node_operation.cc
intern/operation.cc
intern/realize_on_domain_operation.cc
intern/reduce_to_single_value_operation.cc
intern/result.cc
intern/scheduler.cc
intern/shader_node.cc
intern/shader_operation.cc
intern/simple_operation.cc
intern/static_shader_manager.cc
intern/texture_pool.cc
intern/utilities.cc
COM_compile_state.hh
COM_context.hh
COM_conversion_operation.hh
COM_domain.hh
COM_evaluator.hh
COM_input_descriptor.hh
COM_input_single_value_operation.hh
COM_node_operation.hh
COM_operation.hh
COM_realize_on_domain_operation.hh
COM_reduce_to_single_value_operation.hh
COM_result.hh
COM_scheduler.hh
COM_shader_node.hh
COM_shader_operation.hh
COM_simple_operation.hh
COM_static_shader_manager.hh
COM_texture_pool.hh
COM_utilities.hh
)
set(LIB
bf_gpu
bf_nodes
bf_imbuf
bf_blenlib
bf_blenkernel
)
blender_add_lib(bf_realtime_compositor "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

View File

@ -0,0 +1,170 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_map.hh"
#include "NOD_derived_node_tree.hh"
#include "COM_domain.hh"
#include "COM_node_operation.hh"
#include "COM_scheduler.hh"
#include "COM_shader_operation.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* ------------------------------------------------------------------------------------------------
* Compile State
*
* The compile state is a utility class used to track the state of compilation when compiling the
* node tree. In particular, it tracks two important pieces of information, each of which is
* described in one of the following sections.
*
* First, it stores a mapping between all nodes and the operations they were compiled into. The
* mapping are stored independently depending on the type of the operation in the node_operations_
* and shader_operations_ maps. So those two maps are mutually exclusive. The compiler should call
* the map_node_to_node_operation and map_node_to_shader_operation methods to populate those maps
* as soon as it compiles a node or multiple nodes into an operation. Those maps are used to
* retrieve the results of outputs linked to the inputs of operations. For more details, see the
* get_result_from_output_socket method. For the node tree shown below, nodes 1, 2, and 6 are
* mapped to their compiled operations in the node_operation_ map. While nodes 3 and 4 are both
* mapped to the first shader operation, and node 5 is mapped to the second shader operation in the
* shader_operations_ map.
*
* Shader Operation 1 Shader Operation 2
* +-----------------------------------+ +------------------+
* .------------. | .------------. .------------. | | .------------. | .------------.
* | Node 1 | | | Node 3 | | Node 4 | | | | Node 5 | | | Node 6 |
* | |----|--| |--| |---|-----|--| |--|--| |
* | | .-|--| | | | | .--|--| | | | |
* '------------' | | '------------' '------------' | | | '------------' | '------------'
* | +-----------------------------------+ | +------------------+
* .------------. | |
* | Node 2 | | |
* | |--'----------------------------------------'
* | |
* '------------'
*
* Second, it stores the shader compile unit as well as its domain. One should first go over the
* discussion in COM_evaluator.hh for a high level description of the mechanism of the compile
* unit. The one important detail in this class is the should_compile_shader_compile_unit method,
* which implements the criteria of whether the compile unit should be compiled given the node
* currently being processed as an argument. Those criteria are described as follows. If the
* compile unit is empty as is the case when processing nodes 1, 2, and 3, then it plainly
* shouldn't be compiled. If the given node is not a shader node, then it can't be added to the
* compile unit and the unit is considered complete and should be compiled, as is the case when
* processing node 6. If the computed domain of the given node is not compatible with the domain of
* the compiled unit, then it can't be added to the unit and the unit is considered complete and
* should be compiled, as is the case when processing node 5, more on this in the next section.
* Otherwise, the given node is compatible with the compile unit and can be added to it, so the
* unit shouldn't be compiled just yet, as is the case when processing node 4.
*
* Special attention should be given to the aforementioned domain compatibility criterion. One
* should first go over the discussion in COM_domain.hh for more information on domains. When a
* compile unit gets eventually compiled to a shader operation, that operation will have a certain
* operation domain, and any node that gets added to the compile unit should itself have a computed
* node domain that is compatible with that operation domain, otherwise, had the node been compiled
* into its own operation separately, the result would have been be different. For instance,
* consider the above node tree where node 1 outputs a 100x100 result, node 2 outputs a 50x50
* result, the first input in node 3 has the highest domain priority, and the second input in node
* 5 has the highest domain priority. In this case, shader operation 1 will output a 100x100
* result, and shader operation 2 will output a 50x50 result, because that's the computed operation
* domain for each of them. So node 6 will get a 50x50 result. Now consider the same node tree, but
* where all three nodes 3, 4, and 5 were compiled into a single shader operation as shown the node
* tree below. In that case, shader operation 1 will output a 100x100 result, because that's its
* computed operation domain. So node 6 will get a 100x100 result. As can be seen, the final result
* is different even though the node tree is the same. That's why the compiler can decide to
* compile the compile unit early even though further nodes can still be technically added to it.
*
* Shader Operation 1
* +------------------------------------------------------+
* .------------. | .------------. .------------. .------------. | .------------.
* | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 |
* | |----|--| |--| |------| |--|--| |
* | | .-|--| | | | .---| | | | |
* '------------' | | '------------' '------------' | '------------' | '------------'
* | +----------------------------------|-------------------+
* .------------. | |
* | Node 2 | | |
* | |--'------------------------------------'
* | |
* '------------'
*
* To check for the domain compatibility between the compile unit and the node being processed,
* the domain of the compile unit is assumed to be the domain of the first node whose computed
* domain is not an identity domain. Identity domains corresponds to single value results, so those
* are always compatible with any domain. The domain of the compile unit is computed and set in
* the add_node_to_shader_compile_unit method. When processing a node, the computed domain of node
* is compared to the compile unit domain in the should_compile_shader_compile_unit method, noting
* that identity domains are always compatible. Node domains are computed in the
* compute_shader_node_domain method, which is analogous to Operation::compute_domain for nodes
* that are not yet compiled. */
class CompileState {
private:
/* A reference to the node execution schedule that is being compiled. */
const Schedule &schedule_;
/* Those two maps associate each node with the operation it was compiled into. Each node is
* either compiled into a node operation and added to node_operations, or compiled into a shader
* operation and added to shader_operations. Those maps are used to retrieve the results of
* outputs linked to the inputs of operations. See the get_result_from_output_socket method for
* more information. */
Map<DNode, NodeOperation *> node_operations_;
Map<DNode, ShaderOperation *> shader_operations_;
/* A contiguous subset of the node execution schedule that contains the group of nodes that will
* be compiled together into a Shader Operation. See the discussion in COM_evaluator.hh for
* more information. */
ShaderCompileUnit shader_compile_unit_;
/* The domain of the shader compile unit. */
Domain shader_compile_unit_domain_ = Domain::identity();
public:
/* Construct a compile state from the node execution schedule being compiled. */
CompileState(const Schedule &schedule);
/* Get a reference to the node execution schedule being compiled. */
const Schedule &get_schedule();
/* Add an association between the given node and the give node operation that the node was
* compiled into in the node_operations_ map. */
void map_node_to_node_operation(DNode node, NodeOperation *operation);
/* Add an association between the given node and the give shader operation that the node was
* compiled into in the shader_operations_ map. */
void map_node_to_shader_operation(DNode node, ShaderOperation *operation);
/* Returns a reference to the result of the operation corresponding to the given output that the
* given output's node was compiled to. */
Result &get_result_from_output_socket(DOutputSocket output);
/* Add the given node to the compile unit. And if the domain of the compile unit is not yet
* determined or was determined to be an identity domain, update it to the computed domain for
* the give node. */
void add_node_to_shader_compile_unit(DNode node);
/* Get a reference to the shader compile unit. */
ShaderCompileUnit &get_shader_compile_unit();
/* Clear the compile unit. This should be called once the compile unit is compiled to ready it to
* track the next potential compile unit. */
void reset_shader_compile_unit();
/* Determines if the compile unit should be compiled based on a number of criteria give the node
* currently being processed. Those criteria are as follows:
* - If compile unit is empty, then it can't and shouldn't be compiled.
* - If the given node is not a shader node, then it can't be added to the compile unit
* and the unit is considered complete and should be compiled.
* - If the computed domain of the given node is not compatible with the domain of the compile
* unit, then it can't be added to it and the unit is considered complete and should be
* compiled. */
bool should_compile_shader_compile_unit(DNode node);
private:
/* Compute the node domain of the given shader node. This is analogous to the
* Operation::compute_domain method, except it is computed from the node itself as opposed to a
* compiled operation. See the discussion in COM_domain.hh for more information. */
Domain compute_shader_node_domain(DNode node);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_math_vec_types.hh"
#include "BLI_string_ref.hh"
#include "DNA_scene_types.h"
#include "GPU_texture.h"
#include "COM_static_shader_manager.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Context
*
* A Context is an abstract class that is implemented by the caller of the evaluator to provide the
* necessary data and functionalities for the correct operation of the evaluator. This includes
* providing input data like render passes and the active scene, as well as references to the data
* where the output of the evaluator will be written. The class also provides a reference to the
* texture pool which should be implemented by the caller and provided during construction.
* Finally, the class have an instance of a static shader manager for convenient shader
* acquisition. */
class Context {
private:
/* A texture pool that can be used to allocate textures for the compositor efficiently. */
TexturePool &texture_pool_;
/* A static shader manager that can be used to acquire shaders for the compositor efficiently. */
StaticShaderManager shader_manager_;
public:
Context(TexturePool &texture_pool);
/* Get the active compositing scene. */
virtual const Scene *get_scene() const = 0;
/* Get the dimensions of the output. */
virtual int2 get_output_size() = 0;
/* Get the texture representing the output where the result of the compositor should be
* written. This should be called by output nodes to get their target texture. */
virtual GPUTexture *get_output_texture() = 0;
/* Get the texture where the given render pass is stored. This should be called by the Render
* Layer node to populate its outputs. */
virtual GPUTexture *get_input_texture(int view_layer, eScenePassType pass_type) = 0;
/* Get the name of the view currently being rendered. */
virtual StringRef get_view_name() = 0;
/* Set an info message. This is called by the compositor evaluator to inform or warn the user
* about something, typically an error. The implementation should display the message in an
* appropriate place, which can be directly in the UI or just logged to the output stream. */
virtual void set_info_message(StringRef message) const = 0;
/* Get the current frame number of the active scene. */
int get_frame_number() const;
/* Get the current time in seconds of the active scene. */
float get_time() const;
/* Get a reference to the texture pool of this context. */
TexturePool &texture_pool();
/* Get a reference to the static shader manager of this context. */
StaticShaderManager &shader_manager();
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,126 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "GPU_shader.h"
#include "COM_context.hh"
#include "COM_input_descriptor.hh"
#include "COM_result.hh"
#include "COM_simple_operation.hh"
namespace blender::realtime_compositor {
/* -------------------------------------------------------------------------------------------------
* Conversion Operation
*
* A simple operation that converts a result from a certain type to another. See the derived
* classes for more details. */
class ConversionOperation : public SimpleOperation {
public:
using SimpleOperation::SimpleOperation;
/* If the input result is a single value, execute_single is called. Otherwise, the shader
* provided by get_conversion_shader is dispatched. */
void execute() override;
/* Determine if a conversion operation is needed for the input with the given result and
* descriptor. If it is not needed, return a null pointer. If it is needed, return an instance of
* the appropriate conversion operation. */
static SimpleOperation *construct_if_needed(Context &context,
const Result &input_result,
const InputDescriptor &input_descriptor);
protected:
/* Convert the input single value result to the output single value result. */
virtual void execute_single(const Result &input, Result &output) = 0;
/* Get the shader the will be used for conversion. */
virtual GPUShader *get_conversion_shader() const = 0;
};
/* -------------------------------------------------------------------------------------------------
* Convert Float To Vector Operation
*
* Takes a float result and outputs a vector result. All three components of the output are filled
* with the input float. */
class ConvertFloatToVectorOperation : public ConversionOperation {
public:
ConvertFloatToVectorOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
/* -------------------------------------------------------------------------------------------------
* Convert Float To Color Operation
*
* Takes a float result and outputs a color result. All three color channels of the output are
* filled with the input float and the alpha channel is set to 1. */
class ConvertFloatToColorOperation : public ConversionOperation {
public:
ConvertFloatToColorOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
/* -------------------------------------------------------------------------------------------------
* Convert Color To Float Operation
*
* Takes a color result and outputs a float result. The output is the average of the three color
* channels, the alpha channel is ignored. */
class ConvertColorToFloatOperation : public ConversionOperation {
public:
ConvertColorToFloatOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
/* -------------------------------------------------------------------------------------------------
* Convert Color To Vector Operation
*
* Takes a color result and outputs a vector result. The output is a copy of the three color
* channels to the three vector components. */
class ConvertColorToVectorOperation : public ConversionOperation {
public:
ConvertColorToVectorOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
/* -------------------------------------------------------------------------------------------------
* Convert Vector To Float Operation
*
* Takes a vector result and outputs a float result. The output is the average of the three
* components. */
class ConvertVectorToFloatOperation : public ConversionOperation {
public:
ConvertVectorToFloatOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
/* -------------------------------------------------------------------------------------------------
* Convert Vector To Color Operation
*
* Takes a vector result and outputs a color result. The output is a copy of the three vector
* components to the three color channels with the alpha channel set to 1. */
class ConvertVectorToColorOperation : public ConversionOperation {
public:
ConvertVectorToColorOperation(Context &context);
void execute_single(const Result &input, Result &output) override;
GPUShader *get_conversion_shader() const override;
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,166 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <cstdint>
#include "BLI_float3x3.hh"
#include "BLI_math_vec_types.hh"
namespace blender::realtime_compositor {
/* Possible interpolations to use when realizing an input result of some domain on another domain.
* See the RealizationOptions struct for more information. */
enum class Interpolation : uint8_t {
Nearest,
Bilinear,
Bicubic,
};
/* ------------------------------------------------------------------------------------------------
* Realization Options
*
* The options that describe how an input result prefer to be realized on some other domain. This
* is used by the Realize On Domain Operation to identify the appropriate method of realization.
* See the Domain class for more information. */
struct RealizationOptions {
/* The interpolation method that should be used when performing realization. Since realizing a
* result involves projecting it on a different domain, which in turn, involves sampling the
* result at arbitrary locations, the interpolation identifies the method used for computing the
* value at those arbitrary locations. */
Interpolation interpolation = Interpolation::Nearest;
/* If true, the result will be repeated infinitely along the horizontal axis when realizing the
* result. If false, regions outside of bounds of the result along the horizontal axis will be
* filled with zeros. */
bool repeat_x = false;
/* If true, the result will be repeated infinitely along the vertical axis when realizing the
* result. If false, regions outside of bounds of the result along the vertical axis will be
* filled with zeros. */
bool repeat_y = false;
};
/* ------------------------------------------------------------------------------------------------
* Domain
*
* The compositor is designed in such a way as to allow compositing in an infinite virtual
* compositing space. Consequently, any result of an operation is not only represented by its image
* output, but also by its transformation in that virtual space. The transformation of the result
* together with the dimension of its image is stored and represented by a Domain. In the figure
* below, two results of different domains are illustrated on the virtual compositing space. One of
* the results is centered in space with an image dimension of 800px × 600px, and the other result
* is scaled down and translated such that it lies in the upper right quadrant of the space with an
* image dimension of 800px × 400px. The position of the domain is in pixel space, and the domain
* is considered centered if it has an identity transformation. Note that both results have the
* same resolution, but occupy different areas of the virtual compositing space.
*
* y
* ^
* 800px x 600px |
* .---------------------|---------------------.
* | | 800px x 600px |
* | | .-------------. |
* | | | | |
* | | '-------------' |
* ------|---------------------|---------------------|------> x
* | | |
* | | |
* | | |
* | | |
* '---------------------|---------------------'
* |
*
* By default, results have domains of identity transformations, that is, they are centered in
* space, but a transformation operation like the rotate, translate, or transform operations will
* adjust the transformation to make the result reside somewhere different in space. The domain of
* a single value result is irrelevant and always set to an identity domain.
*
* An operation is typically only concerned about a subset of the virtual compositing space, this
* subset is represented by a domain which is called the Operation Domain. It follows that before
* the operation itself is executed, inputs will typically be realized on the operation domain to
* be in the same domain and have the same dimension as that of the operation domain. This process
* is called Domain Realization and is implemented using an operation called the Realize On Domain
* Operation. Realization involves projecting the result onto the target domain, copying the area
* of the result that intersects the target domain, and filling the rest with zeros or repetitions
* of the result depending on the realization options that can be set by the user. Consequently,
* operations can generally expect their inputs to have the same dimension and can operate on them
* directly and transparently. For instance, if an operation takes both results illustrated in
* the figure above, and the operation has an operation domain that matches the bigger domain, the
* result with the bigger domain will not need to be realized because it already has a domain that
* matches that of the operation domain, but the result with the smaller domain will have to be
* realized into a new result that has the same domain as the domain of the bigger result. Assuming
* no repetition, the output of the realization will be an all zeros image with dimension 800px ×
* 600px with a small scaled version of the smaller result copied into the upper right quadrant of
* the image. The following figure illustrates the realization process on a different set of
* results
*
* Realized Result
* +-------------+ +-------------+
* | Operation | | |
* | Domain | | Zeros |
* | | ----> | |
* +-----|-----+ | |-----+ |
* | | C | | | C | |
* | +-----|-------+ +-----'-------+
* | Domain Of |
* | Input |
* +-----------+
*
* An operation can operate in an arbitrary operation domain, but in most cases, the operation
* domain is inferred from the inputs of the operation. In particular, one of the inputs is said to
* be the Domain Input of the operation and the operation domain is inferred from its domain. It
* follows that this particular input will not need realization, because it already has the correct
* domain. The domain input selection mechanism is as follows. Each of the inputs are assigned a
* value by the developer called the Domain Priority, the domain input is then chosen as the
* non-single value input with the highest domain priority, zero being the highest priority. See
* Operation::compute_domain for more information.
*
* The aforementioned logic for operation domain computation is only a default that works for most
* cases, but an operation can override the compute_domain method to implement a different logic.
* For instance, output nodes have an operation domain the same size as the viewport and with an
* identity transformation, their operation domain doesn't depend on the inputs at all.
*
* For instance, a filter operation has two inputs, a factor and a color, the latter of which is
* assigned a domain priority of 0 and the former is assigned a domain priority of 1. If the color
* input is not a single value input, then the color input is considered to be the domain input of
* the operation and the operation domain is computed to be the same domain as the color input,
* because it has the highest priority. It follows that if the factor input has a different domain
* than the computed domain of the operation, it will be projected and realized on it to have the
* same domain as described above. On the other hand, if the color input is a single value input,
* then the factor input is considered to be the domain input and the operation domain will be the
* same as the domain of the factor input, because it has the second highest domain priority.
* Finally, if both inputs are single value inputs, the operation domain will be an identity domain
* and is irrelevant, because the output will be a domain-less single value. */
class Domain {
public:
/* The size of the domain in pixels. */
int2 size;
/* The 2D transformation of the domain defining its translation in pixels, rotation, and scale in
* the virtual compositing space. */
float3x3 transformation;
/* The options that describe how this domain prefer to be realized on some other domain. See the
* RealizationOptions struct for more information. */
RealizationOptions realization_options;
public:
/* A size only constructor that sets the transformation to identity. */
Domain(int2 size);
Domain(int2 size, float3x3 transformation);
/* Transform the domain by the given transformation. This effectively pre-multiply the given
* transformation by the current transformation of the domain. */
void transform(const float3x3 &transformation);
/* Returns a domain of size 1x1 and an identity transformation. */
static Domain identity();
};
/* Compare the size and transformation of the domain. The realization_options are not compared
* because they only describe the method of realization on another domain, which is not technically
* a property of the domain itself. */
bool operator==(const Domain &a, const Domain &b);
/* Inverse of the above equality operator. */
bool operator!=(const Domain &a, const Domain &b);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,173 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <memory>
#include "BLI_vector.hh"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "COM_compile_state.hh"
#include "COM_context.hh"
#include "COM_node_operation.hh"
#include "COM_operation.hh"
#include "COM_shader_operation.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* ------------------------------------------------------------------------------------------------
* Evaluator
*
* The evaluator is the main class of the compositor and the entry point of its execution. The
* evaluator compiles the compositor node tree and evaluates it to compute its output. It is
* constructed from a compositor node tree and a compositor context. Upon calling the evaluate
* method, the evaluator will check if the node tree is already compiled into an operations stream,
* and if it is, it will go over it and evaluate the operations in order. It is then the
* responsibility of the caller to call the reset method when the node tree changes to invalidate
* the operations stream. A reset is also required if the resources used by the node tree change,
* for instances, when the dimensions of an image used by the node tree changes. This is necessary
* because the evaluator compiles the node tree into an operations stream that is specifically
* optimized for the structure of the resources used by the node tree.
*
* Otherwise, if the node tree is not yet compiled, the evaluator will compile it into an
* operations stream, evaluating the operations in the process. It should be noted that operations
* are evaluated as soon as they are compiled, as opposed to compiling the whole operations stream
* and then evaluating it in a separate step. This is important because, as mentioned before, the
* operations stream is optimized specifically for the structure of the resources used by the node
* tree, which is only known after the operations are evaluated. In other words, the evaluator uses
* the evaluated results of previously compiled operations to compile the operations that follow
* them in an optimized manner.
*
* Compilation starts by computing an optimized node execution schedule by calling the
* compute_schedule function, see the discussion in COM_scheduler.hh for more details. For the node
* tree shown below, the execution schedule is denoted by the node numbers. The compiler then goes
* over the execution schedule in order and compiles each node into either a Node Operation or a
* Shader Operation, depending on the node type, see the is_shader_node function. A Shader
* operation is constructed from a group of nodes forming a contiguous subset of the node execution
* schedule. For instance, in the node tree shown below, nodes 3 and 4 are compiled together into a
* shader operation and node 5 is compiled into its own shader operation, both of which are
* contiguous subsets of the node execution schedule. This process is described in details in the
* following section.
*
* Shader Operation 1 Shader Operation 2
* +-----------------------------------+ +------------------+
* .------------. | .------------. .------------. | | .------------. | .------------.
* | Node 1 | | | Node 3 | | Node 4 | | | | Node 5 | | | Node 6 |
* | |----|--| |--| |---|-----|--| |--|--| |
* | | .-|--| | | | | .--|--| | | | |
* '------------' | | '------------' '------------' | | | '------------' | '------------'
* | +-----------------------------------+ | +------------------+
* .------------. | |
* | Node 2 | | |
* | |--'----------------------------------------'
* | |
* '------------'
*
* For non shader nodes, the compilation process is straight forward, the compiler instantiates a
* node operation from the node, map its inputs to the results of the outputs they are linked to,
* and evaluates the operations. However, for shader nodes, since a group of nodes can be compiled
* together into a shader operation, the compilation process is a bit involved. The compiler uses
* an instance of the Compile State class to keep track of the compilation process. The compiler
* state stores the so called "shader compile unit", which is the current group of nodes that will
* eventually be compiled together into a shader operation. While going over the schedule, the
* compiler adds the shader nodes to the compile unit until it decides that the compile unit is
* complete and should be compiled. This is typically decided when the current node is not
* compatible with the compile unit and can't be added to it, only then it compiles the compile
* unit into a shader operation and resets it to ready it to track the next potential group of
* nodes that will form a shader operation. This decision is made based on various criteria in the
* should_compile_shader_compile_unit function. See the discussion in COM_compile_state.hh for more
* details of those criteria, but perhaps the most evident of which is whether the node is actually
* a shader node, if it isn't, then it evidently can't be added to the compile unit and the compile
* unit is should be compiled.
*
* For the node tree above, the compilation process is as follows. The compiler goes over the node
* execution schedule in order considering each node. Nodes 1 and 2 are not shader node so they are
* compiled into node operations and added to the operations stream. The current compile unit is
* empty, so it is not compiled. Node 3 is a shader node, and since the compile unit is currently
* empty, it is unconditionally added to it. Node 4 is a shader node, it was decided---for the sake
* of the demonstration---that it is compatible with the compile unit and can be added to it. Node
* 5 is a shader node, but it was decided---for the sake of the demonstration---that it is not
* compatible with the compile unit, so the compile unit is considered complete and is compiled
* first, adding the first shader operation to the operations stream and resetting the compile
* unit. Node 5 is then added to the now empty compile unit similar to node 3. Node 6 is not a
* shader node, so the compile unit is considered complete and is compiled first, adding the first
* shader operation to the operations stream and resetting the compile unit. Finally, node 6 is
* compiled into a node operation similar to nodes 1 and 2 and added to the operations stream. */
class Evaluator {
private:
/* A reference to the compositor context. */
Context &context_;
/* A reference to the compositor node tree. */
bNodeTree &node_tree_;
/* The derived and reference node trees representing the compositor node tree. Those are
* initialized when the node tree is compiled and freed when the evaluator resets. */
NodeTreeRefMap node_tree_reference_map_;
std::unique_ptr<DerivedNodeTree> derived_node_tree_;
/* The compiled operations stream. This contains ordered pointers to the operations that were
* compiled. This is initialized when the node tree is compiled and freed when the evaluator
* resets. The is_compiled_ member indicates whether the operation stream can be used or needs to
* be compiled first. Note that the operations stream can be empty even when compiled, this can
* happen when the node tree is empty or invalid for instance. */
Vector<std::unique_ptr<Operation>> operations_stream_;
/* True if the node tree is already compiled into an operations stream that can be evaluated
* directly. False if the node tree is not compiled yet and needs to be compiled. */
bool is_compiled_ = false;
public:
/* Construct an evaluator from a compositor node tree and a context. */
Evaluator(Context &context, bNodeTree &node_tree);
/* Evaluate the compositor node tree. If the node tree is already compiled into an operations
* stream, that stream will be evaluated directly. Otherwise, the node tree will be compiled and
* evaluated. */
void evaluate();
/* Invalidate the operations stream that was compiled for the node tree. This should be called
* when the node tree changes or the structure of any of the resources used by it changes. By
* structure, we mean things like the dimensions of the used images, while changes to their
* contents do not necessitate a reset. */
void reset();
private:
/* Check if the compositor node tree is valid by checking if it has:
* - Cyclic links.
* - Undefined nodes or sockets.
* - Unsupported nodes.
* If the node tree is valid, true is returned. Otherwise, false is returned, and an appropriate
* error message is set by calling the context's set_info_message method. */
bool validate_node_tree();
/* Compile the node tree into an operations stream and evaluate it. */
void compile_and_evaluate();
/* Compile the given node into a node operation, map each input to the result of the output
* linked to it, update the compile state, add the newly created operation to the operations
* stream, and evaluate the operation. */
void compile_and_evaluate_node(DNode node, CompileState &compile_state);
/* Map each input of the node operation to the result of the output linked to it. Unlinked inputs
* are mapped to the result of a newly created Input Single Value Operation, which is added to
* the operations stream and evaluated. Since this method might add operations to the operations
* stream, the actual node operation should only be added to the stream once this method is
* called. */
void map_node_operation_inputs_to_their_results(DNode node,
NodeOperation *operation,
CompileState &compile_state);
/* Compile the shader compile unit into a shader operation, map each input of the operation to
* the result of the output linked to it, update the compile state, add the newly created
* operation to the operations stream, evaluate the operation, and finally reset the shader
* compile unit. */
void compile_and_evaluate_shader_compile_unit(CompileState &compile_state);
/* Map each input of the shader operation to the result of the output linked to it. */
void map_shader_operation_inputs_to_their_results(ShaderOperation *operation,
CompileState &compile_state);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Input Descriptor
*
* A class that describes an input of an operation. */
class InputDescriptor {
public:
/* The type of input. This may be different that the type of result that the operation will
* receive for the input, in which case, an implicit conversion operation will be added as an
* input processor to convert it to the required type. */
ResultType type;
/* If true, then the input does not need to be realized on the domain of the operation before its
* execution. See the discussion in COM_domain.hh for more information. */
bool skip_realization = false;
/* The priority of the input for determining the operation domain. The non-single value input
* with the highest priority will be used to infer the operation domain, the highest priority
* being zero. See the discussion in COM_domain.hh for more information. */
int domain_priority = 0;
/* If true, the input expects a single value, and if a non-single value is provided, a default
* single value will be used instead, see the get_<type>_value_default methods in the Result
* class. It follows that this also implies skip_realization, because we don't need to realize a
* result that will be discarded anyways. If false, the input can work with both single and
* non-single values. */
bool expects_single_value = false;
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,46 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_string_ref.hh"
#include "NOD_derived_node_tree.hh"
#include "COM_context.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* ------------------------------------------------------------------------------------------------
* Input Single Value Operation
*
* An input single value operation is an operation that outputs a single value result whose value
* is the value of an unlinked input socket. This is typically used to initialize the values of
* unlinked node input sockets. */
class InputSingleValueOperation : public Operation {
private:
/* The identifier of the output. */
static const StringRef output_identifier_;
/* The input socket whose value will be computed as the operation's result. */
DInputSocket input_socket_;
public:
InputSingleValueOperation(Context &context, DInputSocket input_socket);
/* Allocate a single value result and set its value to the default value of the input socket. */
void execute() override;
/* Get a reference to the output result of the operation, this essentially calls the super
* get_result with the output identifier of the operation. */
Result &get_result();
private:
/* Populate the result of the operation, this essentially calls the super populate_result method
* with the output identifier of the operation. */
void populate_result(Result result);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_string_ref.hh"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "COM_context.hh"
#include "COM_operation.hh"
#include "COM_scheduler.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* ------------------------------------------------------------------------------------------------
* Node Operation
*
* A node operation is a subclass of operation that nodes should implement and instantiate in the
* get_compositor_operation function of bNodeType, passing the inputs given to that function to the
* constructor. This class essentially just implements a default constructor that populates output
* results for all outputs of the node as well as input descriptors for all inputs of the nodes
* based on their socket declaration. The class also provides some utility methods for easier
* implementation of nodes. */
class NodeOperation : public Operation {
private:
/* The node that this operation represents. */
DNode node_;
public:
/* Populate the output results based on the node outputs and populate the input descriptors based
* on the node inputs. */
NodeOperation(Context &context, DNode node);
/* Compute and set the initial reference counts of all the results of the operation. The
* reference counts of the results are the number of operations that use those results, which is
* computed as the number of inputs whose node is part of the schedule and is linked to the
* output corresponding to each result. The node execution schedule is given as an input. */
void compute_results_reference_counts(const Schedule &schedule);
protected:
/* Returns a reference to the derived node that this operation represents. */
const DNode &node() const;
/* Returns a reference to the node that this operation represents. */
const bNode &bnode() const;
/* Returns true if the output identified by the given identifier is needed and should be
* computed, otherwise returns false. */
bool should_compute_output(StringRef identifier);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,175 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <memory>
#include <string>
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#include "COM_context.hh"
#include "COM_domain.hh"
#include "COM_input_descriptor.hh"
#include "COM_result.hh"
#include "COM_static_shader_manager.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
class SimpleOperation;
/* A type representing a vector of simple operations that store the input processors for a
* particular input. */
using ProcessorsVector = Vector<std::unique_ptr<SimpleOperation>>;
/* ------------------------------------------------------------------------------------------------
* Operation
*
* The operation is the basic unit of the compositor. The evaluator compiles the compositor node
* tree into an ordered stream of operations which are then executed in order during evaluation.
* The operation class can be sub-classed to implement a new operation. Operations have a number of
* inputs and outputs that are declared during construction and are identified by string
* identifiers. Inputs are declared by calling declare_input_descriptor providing an appropriate
* descriptor. Those inputs are mapped to the results computed by other operations whose outputs
* are linked to the inputs. Such mappings are established by the compiler during compilation by
* calling the map_input_to_result method. Outputs are populated by calling the populate_result
* method, providing a result of an appropriate type. Upon execution, the operation allocates a
* result for each of its outputs and computes their value based on its inputs and options.
*
* Each input may have one or more input processors, which are simple operations that process the
* inputs before the operation is executed, see the discussion in COM_simple_operation.hh for more
* information. And thus the effective input of the operation is the result of the last input
* processor if one exists. Input processors are added and evaluated by calling the
* add_and_evaluate_input_processors method, which provides a default implementation that does
* things like implicit conversion, domain realization, and more. This default implementation can,
* however, be overridden, extended, or removed. Once the input processors are added and evaluated
* for the first time, they are stored in the operation and future evaluations can evaluate them
* directly without having to add them again.
*
* The operation is evaluated by calling the evaluate method, which first adds the input processors
* if they weren't added already and evaluates them, then it resets the results of the operation,
* then it calls the execute method of the operation, and finally it releases the results mapped to
* the inputs to declare that they are no longer needed. */
class Operation {
private:
/* A reference to the compositor context. This member references the same object in all
* operations but is included in the class for convenience. */
Context &context_;
/* A mapping between each output of the operation identified by its identifier and the result for
* that output. A result for each output of the operation should be constructed and added to the
* map during operation construction by calling the populate_result method. The results should be
* allocated and their contents should be computed in the execute method. */
Map<std::string, Result> results_;
/* A mapping between each input of the operation identified by its identifier and its input
* descriptor. Those descriptors should be declared during operation construction by calling the
* declare_input_descriptor method. */
Map<std::string, InputDescriptor> input_descriptors_;
/* A mapping between each input of the operation identified by its identifier and a pointer to
* the computed result providing its data. The mapped result is either one that was computed by
* another operation or one that was internally computed in the operation by the last input
* processor for that input. It is the responsibility of the evaluator to map the inputs to their
* linked results before evaluating the operation by calling the map_input_to_result method. */
Map<StringRef, Result *> results_mapped_to_inputs_;
/* A mapping between each input of the operation identified by its identifier and an ordered list
* of simple operations to process that input. This is initialized the first time the input
* processors are evaluated by calling the add_and_evaluate_input_processors method. Further
* evaluations will evaluate the processors directly without the need to add them again. The
* input_processors_added_ member indicates whether the processors were already added and can be
* evaluated directly or need to be added and evaluated. */
Map<StringRef, ProcessorsVector> input_processors_;
/* True if the input processors were already added and can be evaluated directly. False if the
* input processors are not yet added and needs to be added. */
bool input_processors_added_ = false;
public:
Operation(Context &context);
virtual ~Operation();
/* Evaluate the operation by:
* 1. Evaluating the input processors.
* 2. Resetting the results of the operation.
* 3. Calling the execute method of the operation.
* 4. Releasing the results mapped to the inputs. */
void evaluate();
/* Get a reference to the output result identified by the given identifier. */
Result &get_result(StringRef identifier);
/* Map the input identified by the given identifier to the result providing its data. See
* results_mapped_to_inputs_ for more details. This should be called by the evaluator to
* establish links between different operations. */
void map_input_to_result(StringRef identifier, Result *result);
protected:
/* Compute the operation domain of this operation. By default, this implements a default logic
* that infers the operation domain from the inputs, which may be overridden for a different
* logic. See the discussion in COM_domain.hh for the inference logic and more information. */
virtual Domain compute_domain();
/* Add and evaluate any needed input processors, which essentially just involves calling the
* add_and_evaluate_input_processor method with the needed processors. This is called before
* executing the operation to prepare its inputs. The class defines a default implementation
* which adds typically needed processors, but derived classes can override the method to have
* a different implementation, extend the implementation, or remove it entirely. */
virtual void add_and_evaluate_input_processors();
/* Given the identifier of an input of the operation and a processor operation:
* - Add the given processor to the list of input processors for the input.
* - Map the input of the processor to be the result of the last input processor or the result
* mapped to the input if no previous processors exists.
* - Switch the result mapped to the input to be the output result of the processor.
* - Evaluate the processor. */
void add_and_evaluate_input_processor(StringRef identifier, SimpleOperation *processor);
/* This method should allocate the operation results, execute the operation, and compute the
* output results. */
virtual void execute() = 0;
/* Get a reference to the result connected to the input identified by the given identifier. */
Result &get_input(StringRef identifier) const;
/* Switch the result mapped to the input identified by the given identifier with the given
* result. */
void switch_result_mapped_to_input(StringRef identifier, Result *result);
/* Add the given result to the results_ map identified by the given output identifier. This
* should be called during operation construction for all outputs. The provided result shouldn't
* be allocated or initialized, this will happen later during execution. */
void populate_result(StringRef identifier, Result result);
/* Declare the descriptor of the input identified by the given identifier to be the given
* descriptor. Adds the given descriptor to the input_descriptors_ map identified by the given
* input identifier. This should be called during operation constructor for all inputs. */
void declare_input_descriptor(StringRef identifier, InputDescriptor descriptor);
/* Get a reference to the descriptor of the input identified by the given identified. */
InputDescriptor &get_input_descriptor(StringRef identified);
/* Returns a reference to the compositor context. */
Context &context();
/* Returns a reference to the texture pool of the compositor context. */
TexturePool &texture_pool() const;
/* Returns a reference to the shader manager of the compositor context. */
StaticShaderManager &shader_manager() const;
private:
/* Evaluate the input processors. If the input processors were already added they will be
* evaluated directly. Otherwise, the input processors will be added and evaluated. */
void evaluate_input_processors();
/* Resets the results of the operation. See the reset method in the Result class for more
* information. */
void reset_results();
/* Release the results that are mapped to the inputs of the operation. This is called after the
* evaluation of the operation to declare that the results are no longer needed by this
* operation. */
void release_inputs();
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,49 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "GPU_shader.h"
#include "COM_context.hh"
#include "COM_domain.hh"
#include "COM_input_descriptor.hh"
#include "COM_result.hh"
#include "COM_simple_operation.hh"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Realize On Domain Operation
*
* A simple operation that projects the input on a certain target domain, copies the area of the
* input that intersects the target domain, and fill the rest with zeros or repetitions of the
* input depending on the realization options of the target domain. See the discussion in
* COM_domain.hh for more information. */
class RealizeOnDomainOperation : public SimpleOperation {
private:
/* The target domain to realize the input on. */
Domain domain_;
public:
RealizeOnDomainOperation(Context &context, Domain domain, ResultType type);
void execute() override;
/* Determine if a realize on domain operation is needed for the input with the given result and
* descriptor in an operation with the given operation domain. If it is not needed, return a null
* pointer. If it is needed, return an instance of the operation. */
static SimpleOperation *construct_if_needed(Context &context,
const Result &input_result,
const InputDescriptor &input_descriptor,
const Domain &operaiton_domain);
protected:
/* The operation domain is just the target domain. */
Domain compute_domain() override;
private:
/* Get the realization shader of the appropriate type. */
GPUShader *get_realization_shader();
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_simple_operation.hh"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Reduce To Single Value Operation
*
* A simple operation that reduces its input result into a single value output result. The input is
* assumed to be a texture result of size 1x1, that is, a texture composed of a single pixel, the
* value of which shall serve as the single value of the output result. */
class ReduceToSingleValueOperation : public SimpleOperation {
public:
ReduceToSingleValueOperation(Context &context, ResultType type);
/* Download the input pixel from the GPU texture and set its value to the value of the allocated
* single value output result. */
void execute() override;
/* Determine if a reduce to single value operation is needed for the input with the
* given result. If it is not needed, return a null pointer. If it is needed, return an instance
* of the operation. */
static SimpleOperation *construct_if_needed(Context &context, const Result &input_result);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,234 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_float3x3.hh"
#include "BLI_math_vec_types.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_domain.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
/* Possible data types that operations can operate on. They either represent the base type of the
* result texture or a single value result. */
enum class ResultType : uint8_t {
Float,
Vector,
Color,
};
/* ------------------------------------------------------------------------------------------------
* Result
*
* A result represents the computed value of an output of an operation. A result can either
* represent an image or a single value. A result is typed, and can be of type color, vector, or
* float. Single value results are stored in 1x1 textures to make them easily accessible in
* shaders. But the same value is also stored in the value union member of the result for any
* host-side processing. The texture of the result is allocated from the texture pool referenced by
* the result.
*
* Results are reference counted and their textures are released once their reference count reaches
* zero. After constructing a result, the set_initial_reference_count method is called to declare
* the number of operations that needs this result. Once each operation that needs the result no
* longer needs it, the release method is called and the reference count is decremented, until it
* reaches zero, where the result's texture is then released. Since results are eventually
* decremented to zero by the end of every evaluation, the reference count is restored before every
* evaluation to its initial reference count by calling the reset method, which is why a separate
* member initial_reference_count_ is stored to keep track of the initial value.
*
* A result not only represents an image, but also the area it occupies in the virtual compositing
* space. This area is called the Domain of the result, see the discussion in COM_domain.hh for
* more information.
*
* A result can be a proxy result that merely wraps another master result, in which case, it shares
* its values and delegates all reference counting to it. While a proxy result shares the value of
* the master result, it can have a different domain. Consequently, transformation operations are
* implemented using proxy results, where their results are proxy results of their inputs but with
* their domains transformed based on their options. Moreover, proxy results can also be used as
* the results of identity operations, that is, operations that do nothing to their inputs in
* certain configurations. In which case, the proxy result is left as is with no extra
* transformation on its domain whatsoever. Proxy results can be created by calling the
* pass_through method, see that method for more details. */
class Result {
private:
/* The base type of the texture or the type of the single value. */
ResultType type_;
/* If true, the result is a single value, otherwise, the result is a texture. */
bool is_single_value_;
/* A GPU texture storing the result data. This will be a 1x1 texture if the result is a single
* value, the value of which will be identical to that of the value member. See class description
* for more information. */
GPUTexture *texture_ = nullptr;
/* The texture pool used to allocate the texture of the result, this should be initialized during
* construction. */
TexturePool *texture_pool_ = nullptr;
/* The number of operations that currently needs this result. At the time when the result is
* computed, this member will have a value that matches initial_reference_count_. Once each
* operation that needs the result no longer needs it, the release method is called and the
* reference count is decremented, until it reaches zero, where the result's texture is then
* released. If this result have a master result, then this reference count is irrelevant and
* shadowed by the reference count of the master result. */
int reference_count_;
/* The number of operations that reference and use this result at the time when it was initially
* computed. Since reference_count_ is decremented and always becomes zero at the end of the
* evaluation, this member is used to reset the reference count of the results for later
* evaluations by calling the reset method. This member is also used to determine if this result
* should be computed by calling the should_compute method. */
int initial_reference_count_;
/* If the result is a single value, this member stores the value of the result, the value of
* which will be identical to that stored in the texture member. The active union member depends
* on the type of the result. This member is uninitialized and should not be used if the result
* is a texture. */
union {
float float_value_;
float3 vector_value_;
float4 color_value_;
};
/* The domain of the result. This only matters if the result was a texture. See the discussion in
* COM_domain.hh for more information. */
Domain domain_ = Domain::identity();
/* If not nullptr, then this result wraps and shares the value of another master result. In this
* case, calls to texture-related methods like increment_reference_count and release should
* operate on the master result as opposed to this result. This member is typically set upon
* calling the pass_through method, which sets this result to be the master of a target result.
* See that method for more information. */
Result *master_ = nullptr;
public:
/* Construct a result of the given type with the given texture pool that will be used to allocate
* and release the result's texture. */
Result(ResultType type, TexturePool &texture_pool);
/* Declare the result to be a texture result, allocate a texture of an appropriate type with
* the size of the given domain from the result's texture pool, and set the domain of the result
* to the given domain. */
void allocate_texture(Domain domain);
/* Declare the result to be a single value result, allocate a texture of an appropriate
* type with size 1x1 from the result's texture pool, and set the domain to be an identity
* domain. See class description for more information. */
void allocate_single_value();
/* Allocate a single value result and set its value to zero. This is called for results whose
* value can't be computed and are considered invalid. */
void allocate_invalid();
/* Bind the texture of the result to the texture image unit with the given name in the currently
* bound given shader. This also inserts a memory barrier for texture fetches to ensure any prior
* writes to the texture are reflected before reading from it. */
void bind_as_texture(GPUShader *shader, const char *texture_name) const;
/* Bind the texture of the result to the image unit with the given name in the currently bound
* given shader. */
void bind_as_image(GPUShader *shader, const char *image_name) const;
/* Unbind the texture which was previously bound using bind_as_texture. */
void unbind_as_texture() const;
/* Unbind the texture which was previously bound using bind_as_image. */
void unbind_as_image() const;
/* Pass this result through to a target result, in which case, the target result becomes a proxy
* result with this result as its master result. This is done by making the target result a copy
* of this result, essentially having identical values between the two and consequently sharing
* the underlying texture. An exception is the initial reference count, whose value is retained
* and not copied, because it is a property of the original result and is needed for correctly
* resetting the result before the next evaluation. Additionally, this result is set to be the
* master of the target result, by setting the master member of the target. Finally, the
* reference count of the result is incremented by the reference count of the target result. See
* the discussion above for more information. */
void pass_through(Result &target);
/* Transform the result by the given transformation. This effectively pre-multiply the given
* transformation by the current transformation of the domain of the result. */
void transform(const float3x3 &transformation);
/* Get a reference to the realization options of this result. See the RealizationOptions struct
* for more information. */
RealizationOptions &get_realization_options();
/* If the result is a single value result of type float, return its float value. Otherwise, an
* uninitialized value is returned. */
float get_float_value() const;
/* If the result is a single value result of type vector, return its vector value. Otherwise, an
* uninitialized value is returned. */
float3 get_vector_value() const;
/* If the result is a single value result of type color, return its color value. Otherwise, an
* uninitialized value is returned. */
float4 get_color_value() const;
/* Same as get_float_value but returns a default value if the result is not a single value. */
float get_float_value_default(float default_value) const;
/* Same as get_vector_value but returns a default value if the result is not a single value. */
float3 get_vector_value_default(const float3 &default_value) const;
/* Same as get_color_value but returns a default value if the result is not a single value. */
float4 get_color_value_default(const float4 &default_value) const;
/* If the result is a single value result of type float, set its float value and upload it to the
* texture. Otherwise, an undefined behavior is invoked. */
void set_float_value(float value);
/* If the result is a single value result of type vector, set its vector value and upload it to
* the texture. Otherwise, an undefined behavior is invoked. */
void set_vector_value(const float3 &value);
/* If the result is a single value result of type color, set its color value and upload it to the
* texture. Otherwise, an undefined behavior is invoked. */
void set_color_value(const float4 &value);
/* Set the value of initial_reference_count_, see that member for more details. This should be
* called after constructing the result to declare the number of operations that needs it. */
void set_initial_reference_count(int count);
/* Reset the result to prepare it for a new evaluation. This should be called before evaluating
* the operation that computes this result. First, set the value of reference_count_ to the value
* of initial_reference_count_ since reference_count_ may have already been decremented to zero
* in a previous evaluation. Second, set master_ to nullptr because the result may have been
* turned into a proxy result in a previous evaluation. Other fields don't need to be reset
* because they are runtime and overwritten during evaluation. */
void reset();
/* Increment the reference count of the result by the given count. If this result have a master
* result, the reference count of the master result is incremented instead. */
void increment_reference_count(int count = 1);
/* Decrement the reference count of the result and release the its texture back into the texture
* pool if the reference count reaches zero. This should be called when an operation that used
* this result no longer needs it. If this result have a master result, the master result is
* released instead. */
void release();
/* Returns true if this result should be computed and false otherwise. The result should be
* computed if its reference count is not zero, that is, its result is used by at least one
* operation. */
bool should_compute();
/* Returns the type of the result. */
ResultType type() const;
/* Returns true if the result is a texture and false of it is a single value. */
bool is_texture() const;
/* Returns true if the result is a single value and false of it is a texture. */
bool is_single_value() const;
/* Returns the allocated GPU texture of the result. */
GPUTexture *texture() const;
/* Returns the reference count of the result. If this result have a master result, then the
* reference count of the master result is returned instead. */
int reference_count() const;
/* Returns a reference to the domain of the result. See the Domain class. */
const Domain &domain() const;
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_vector_set.hh"
#include "NOD_derived_node_tree.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* A type representing the ordered set of nodes defining the schedule of node execution. */
using Schedule = VectorSet<DNode>;
/* Computes the execution schedule of the node tree. This is essentially a post-order depth first
* traversal of the node tree from the output node to the leaf input nodes, with informed order of
* traversal of dependencies based on a heuristic estimation of the number of needed buffers. */
Schedule compute_schedule(DerivedNodeTree &tree);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#include "DNA_node_types.h"
#include "GPU_material.h"
#include "NOD_derived_node_tree.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* ------------------------------------------------------------------------------------------------
* Shader Node
*
* A shader node encapsulates a compositor node tree that is capable of being used together with
* other shader nodes to construct a Shader Operation using the GPU material compiler. A GPU node
* stack for each of the node inputs and outputs is stored and populated during construction in
* order to represent the node as a GPU node inside the GPU material graph, see GPU_material.h for
* more information. Derived classes should implement the compile method to add the node and link
* it to the GPU material given to the method. The compiler is expected to initialize the input
* links of the node before invoking the compile method. See the discussion in
* COM_shader_operation.hh for more information. */
class ShaderNode {
private:
/* The node that this operation represents. */
DNode node_;
/* The GPU node stacks of the inputs of the node. Those are populated during construction in the
* populate_inputs method. The links of the inputs are initialized by the GPU material compiler
* prior to calling the compile method. There is an extra stack at the end to mark the end of the
* array, as this is what the GPU module functions expect. */
Vector<GPUNodeStack> inputs_;
/* The GPU node stacks of the outputs of the node. Those are populated during construction in the
* populate_outputs method. There is an extra stack at the end to mark the end of the array, as
* this is what the GPU module functions expect. */
Vector<GPUNodeStack> outputs_;
public:
/* Construct the node by populating both its inputs and outputs. */
ShaderNode(DNode node);
virtual ~ShaderNode() = default;
/* Compile the node by adding the appropriate GPU material graph nodes and linking the
* appropriate resources. */
virtual void compile(GPUMaterial *material) = 0;
/* Returns a contiguous array containing the GPU node stacks of each input. */
GPUNodeStack *get_inputs_array();
/* Returns a contiguous array containing the GPU node stacks of each output. */
GPUNodeStack *get_outputs_array();
/* Returns the GPU node stack of the input with the given identifier. */
GPUNodeStack &get_input(StringRef identifier);
/* Returns the GPU node stack of the output with the given identifier. */
GPUNodeStack &get_output(StringRef identifier);
/* Returns the GPU node link of the input with the given identifier, if the input is not linked,
* a uniform link carrying the value of the input will be created a returned. It is expected that
* the caller will use the returned link in a GPU material, otherwise, the link may not be
* properly freed. */
GPUNodeLink *get_input_link(StringRef identifier);
protected:
/* Returns a reference to the derived node that this operation represents. */
const DNode &node() const;
/* Returns a reference to the node this operations represents. */
bNode &bnode() const;
private:
/* Populate the inputs of the node. The input link is set to nullptr and is expected to be
* initialized by the GPU material compiler before calling the compile method. */
void populate_inputs();
/* Populate the outputs of the node. The output link is set to nullptr and is expected to be
* initialized by the compile method. */
void populate_outputs();
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,242 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <memory>
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector_set.hh"
#include "GPU_material.h"
#include "GPU_shader.h"
#include "gpu_shader_create_info.hh"
#include "NOD_derived_node_tree.hh"
#include "COM_context.hh"
#include "COM_operation.hh"
#include "COM_scheduler.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* A type representing a contiguous subset of the node execution schedule that will be compiled
* into a Shader Operation. */
using ShaderCompileUnit = VectorSet<DNode>;
/* ------------------------------------------------------------------------------------------------
* Shader Operation
*
* An operation that evaluates a shader compiled from a contiguous subset of the node execution
* schedule using the GPU material compiler, see GPU_material.h for more information. The subset
* of the node execution schedule is called a shader compile unit, see the discussion in
* COM_compile_state.hh for more information.
*
* Consider the following node graph with a node execution schedule denoted by the number on each
* node. The compiler may decide to compile a subset of the execution schedule into a shader
* operation, in this case, the nodes from 3 to 5 were compiled together into a shader operation.
* This subset is called the shader compile unit. See the discussion in COM_evaluator.hh for more
* information on the compilation process. Each of the nodes inside the compile unit implements a
* Shader Node which is instantiated, stored in shader_nodes_, and used during compilation. See the
* discussion in COM_shader_node.hh for more information. Links that are internal to the shader
* operation are established between the input and outputs of the shader nodes, for instance, the
* links between nodes 3 and 4 as well as those between nodes 4 and 5. However, links that cross
* the boundary of the shader operation needs special handling.
*
* Shader Operation
* +------------------------------------------------------+
* .------------. | .------------. .------------. .------------. | .------------.
* | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 |
* | |----|--| |--| |------| |--|--| |
* | | .-|--| | | | .---| | | | |
* '------------' | | '------------' '------------' | '------------' | '------------'
* | +----------------------------------|-------------------+
* .------------. | |
* | Node 2 | | |
* | |--'------------------------------------'
* | |
* '------------'
*
* Links from nodes that are not part of the shader operation to nodes that are part of the shader
* operation are considered inputs of the operation itself and are declared as such. For instance,
* the link from node 1 to node 3 is declared as an input to the operation, and the same applies
* for the links from node 2 to nodes 3 and 5. Note, however, that only one input is declared for
* each distinct output socket, so both links from node 2 share the same input of the operation.
* An input to the operation is declared for a distinct output socket as follows:
*
* - A texture is added to the shader, which will be bound to the result of the output socket
* during evaluation.
* - A GPU attribute is added to the GPU material for that output socket and is linked to the GPU
* input stack of the inputs linked to the output socket.
* - Code is emitted to initialize the values of the attributes by sampling the textures
* corresponding to each of the inputs.
* - The newly added attribute is mapped to the output socket in output_to_material_attribute_map_
* to share that same attributes for all inputs linked to the same output socket.
*
* Links from nodes that are part of the shader operation to nodes that are not part of the shader
* operation are considered outputs of the operation itself and are declared as such. For instance,
* the link from node 5 to node 6 is declared as an output to the operation. An output to the
* operation is declared for an output socket as follows:
*
* - An image is added in the shader where the output value will be written.
* - A storer GPU material node that stores the value of the output is added and linked to the GPU
* output stack of the output. The storer will store the value in the image identified by the
* index of the output given to the storer.
* - The storer functions are generated dynamically to map each index with its appropriate image.
*
* The GPU material code generator source is used to construct a compute shader that is then
* dispatched during operation evaluation after binding the inputs, outputs, and any necessary
* resources. */
class ShaderOperation : public Operation {
private:
/* The compile unit that will be compiled into this shader operation. */
ShaderCompileUnit compile_unit_;
/* The GPU material backing the operation. This is created and compiled during construction and
* freed during destruction. */
GPUMaterial *material_;
/* A map that associates each node in the compile unit with an instance of its shader node. */
Map<DNode, std::unique_ptr<ShaderNode>> shader_nodes_;
/* A map that associates the identifier of each input of the operation with the output socket it
* is linked to. This is needed to help the compiler establish links between operations. */
Map<std::string, DOutputSocket> inputs_to_linked_outputs_map_;
/* A map that associates the output socket that provides the result of an output of the operation
* with the identifier of that output. This is needed to help the compiler establish links
* between operations. */
Map<DOutputSocket, std::string> output_sockets_to_output_identifiers_map_;
/* A map that associates the output socket of a node that is not part of the shader operation to
* the attribute that was created for it. This is used to share the same attribute with all
* inputs that are linked to the same output socket. */
Map<DOutputSocket, GPUNodeLink *> output_to_material_attribute_map_;
public:
/* Construct and compile a GPU material from the given shader compile unit by calling
* GPU_material_from_callbacks with the appropriate callbacks. */
ShaderOperation(Context &context, ShaderCompileUnit &compile_unit);
/* Free the GPU material. */
~ShaderOperation();
/* Allocate the output results, bind the shader and all its needed resources, then dispatch the
* shader. */
void execute() override;
/* Get the identifier of the operation output corresponding to the given output socket. This is
* called by the compiler to identify the operation output that provides the result for an input
* by providing the output socket that the input is linked to. See
* output_sockets_to_output_identifiers_map_ for more information. */
StringRef get_output_identifier_from_output_socket(DOutputSocket output_socket);
/* Get a reference to the inputs to linked outputs map of the operation. This is called by the
* compiler to identify the output that each input of the operation is linked to for correct
* input mapping. See inputs_to_linked_outputs_map_ for more information. */
Map<std::string, DOutputSocket> &get_inputs_to_linked_outputs_map();
/* Compute and set the initial reference counts of all the results of the operation. The
* reference counts of the results are the number of operations that use those results, which is
* computed as the number of inputs whose node is part of the schedule and is linked to the
* output corresponding to each of the results of the operation. The node execution schedule is
* given as an input. */
void compute_results_reference_counts(const Schedule &schedule);
private:
/* Bind the uniform buffer of the GPU material as well as any color band textures needed by the
* GPU material. The compiled shader of the material is given as an argument and assumed to be
* bound. */
void bind_material_resources(GPUShader *shader);
/* Bind the input results of the operation to the appropriate textures in the GPU material. The
* attributes stored in output_to_material_attribute_map_ have names that match the texture
* samplers in the shader as well as the identifiers of the operation inputs that they correspond
* to. The compiled shader of the material is given as an argument and assumed to be bound. */
void bind_inputs(GPUShader *shader);
/* Bind the output results of the operation to the appropriate images in the GPU material. The
* name of the images in the shader match the identifier of their corresponding outputs. The
* compiled shader of the material is given as an argument and assumed to be bound. */
void bind_outputs(GPUShader *shader);
/* A static callback method of interface ConstructGPUMaterialFn that is passed to
* GPU_material_from_callbacks to construct the GPU material graph. The thunk parameter will be a
* pointer to the instance of ShaderOperation that is being compiled. The method goes over the
* compile unit and does the following for each node:
*
* - Instantiate a ShaderNode from the node and add it to shader_nodes_.
* - Link the inputs of the node if needed. The inputs are either linked to other nodes in the
* GPU material graph or are exposed as inputs to the shader operation itself if they are
* linked to nodes that are not part of the shader operation.
* - Call the compile method of the shader node to actually add and link the GPU material graph
* nodes.
* - If any of the outputs of the node are linked to nodes that are not part of the shader
* operation, they are exposed as outputs to the shader operation itself. */
static void construct_material(void *thunk, GPUMaterial *material);
/* Link the inputs of the node if needed. Unlinked inputs are ignored as they will be linked by
* the node compile method. If the input is linked to a node that is not part of the shader
* operation, the input will be exposed as an input to the shader operation and linked to it.
* While if the input is linked to a node that is part of the shader operation, then it is linked
* to that node in the GPU material node graph. */
void link_node_inputs(DNode node, GPUMaterial *material);
/* Given the input socket of a node that is part of the shader operation which is linked to the
* given output socket of a node that is also part of the shader operation, just link the output
* link of the GPU node stack of the output socket to the input link of the GPU node stack of the
* input socket. This essentially establishes the needed links in the GPU material node graph. */
void link_node_input_internal(DInputSocket input_socket, DOutputSocket output_socket);
/* Given the input socket of a node that is part of the shader operation which is linked to the
* given output socket of a node that is not part of the shader operation, declare a new
* operation input and link it to the input link of the GPU node stack of the input socket. An
* operation input is only declared if no input was already declared for that same output socket
* before. */
void link_node_input_external(DInputSocket input_socket,
DOutputSocket output_socket,
GPUMaterial *material);
/* Given the input socket of a node that is part of the shader operation which is linked to the
* given output socket of a node that is not part of the shader operation, declare a new input to
* the operation that is represented in the GPU material by a newly created GPU attribute. It is
* assumed that no operation input was declared for this same output socket before. In the
* generate_code_for_inputs method, a texture will be added in the shader for each of the
* declared inputs, having the same name as the attribute. Additionally, code will be emitted to
* initialize the attributes by sampling their corresponding textures. */
void declare_operation_input(DInputSocket input_socket,
DOutputSocket output_socket,
GPUMaterial *material);
/* Populate the output results of the shader operation for output sockets of the given node that
* are linked to nodes outside of the shader operation. */
void populate_results_for_node(DNode node, GPUMaterial *material);
/* Given the output socket of a node that is part of the shader operation which is linked to an
* input socket of a node that is not part of the shader operation, declare a new output to the
* operation and link it to an output storer passing in the index of the output. In the
* generate_code_for_outputs method, an image will be added in the shader for each of the
* declared outputs. Additionally, code will be emitted to define the storer functions that store
* the value in the appropriate image identified by the given index. */
void populate_operation_result(DOutputSocket output_socket, GPUMaterial *material);
/* A static callback method of interface GPUCodegenCallbackFn that is passed to
* GPU_material_from_callbacks to create the shader create info of the GPU material. The thunk
* parameter will be a pointer to the instance of ShaderOperation that is being compiled.
*
* This method first generates the necessary code to load the inputs and store the outputs. Then,
* it creates a compute shader from the generated sources. Finally, it adds the necessary GPU
* resources to the shader. */
static void generate_code(void *thunk, GPUMaterial *material, GPUCodegenOutput *code_generator);
/* Add an image in the shader for each of the declared outputs. Additionally, emit code to define
* the storer functions that store the given value in the appropriate image identified by the
* given index. */
void generate_code_for_outputs(gpu::shader::ShaderCreateInfo &shader_create_info);
/* Add a texture will in the shader for each of the declared inputs/attributes in the operation,
* having the same name as the attribute. Additionally, emit code to initialize the attributes by
* sampling their corresponding textures. */
void generate_code_for_inputs(GPUMaterial *material,
gpu::shader::ShaderCreateInfo &shader_create_info);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_string_ref.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Simple Operation
*
* A simple operation is an operation that takes exactly one input and computes exactly one output.
* Moreover, the output is guaranteed to only have a single user, that is, its reference count will
* be one. Such operations can be attached to the inputs of operations to pre-process the inputs to
* prepare them before the operation is executed.*/
class SimpleOperation : public Operation {
private:
/* The identifier of the output. This is constant for all operations. */
static const StringRef output_identifier_;
/* The identifier of the input. This is constant for all operations. */
static const StringRef input_identifier_;
public:
using Operation::Operation;
/* Get a reference to the output result of the operation, this essentially calls the super
* get_result method with the output identifier of the operation. */
Result &get_result();
/* Map the input of the operation to the given result, this essentially calls the super
* map_input_to_result method with the input identifier of the operation. */
void map_input_to_result(Result *result);
protected:
/* Simple operations don't need input processors, so override with an empty implementation. */
void add_and_evaluate_input_processors() override;
/* Get a reference to the input result of the operation, this essentially calls the super
* get_result method with the input identifier of the operation. */
Result &get_input();
/* Switch the result mapped to the input with the given result, this essentially calls the super
* switch_result_mapped_to_input method with the input identifier of the operation. */
void switch_result_mapped_to_input(Result *result);
/* Populate the result of the operation, this essentially calls the super populate_result method
* with the output identifier of the operation and sets the initial reference count of the result
* to 1, since the result of an operation operation is guaranteed to have a single user. */
void populate_result(Result result);
/* Declare the descriptor of the input of the operation to be the given descriptor, this
* essentially calls the super declare_input_descriptor method with the input identifier of the
* operation. */
void declare_input_descriptor(InputDescriptor descriptor);
/* Get a reference to the descriptor of the input, this essentially calls the super
* get_input_descriptor method with the input identifier of the operation. */
InputDescriptor &get_input_descriptor();
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "GPU_shader.h"
namespace blender::realtime_compositor {
/* -------------------------------------------------------------------------------------------------
* Static Shader Manager
*
* A static shader manager is a map of shaders identified by their info name that can be acquired
* and reused throughout the evaluation of the compositor and are only freed when the shader
* manager is destroyed. Once a shader is acquired for the first time, it will be cached in the
* manager to be potentially acquired later if needed without the shader creation overhead. */
class StaticShaderManager {
private:
/* The set of shaders identified by their info name that are currently available in the manager
* to be acquired. */
Map<StringRef, GPUShader *> shaders_;
public:
~StaticShaderManager();
/* Check if there is an available shader with the given info name in the manager, if such shader
* exists, return it, otherwise, return a newly created shader and add it to the manager. */
GPUShader *get(const char *info_name);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,86 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <cstdint>
#include "BLI_map.hh"
#include "BLI_math_vec_types.hh"
#include "BLI_vector.hh"
#include "GPU_texture.h"
namespace blender::realtime_compositor {
/* ------------------------------------------------------------------------------------------------
* Texture Pool Key
*
* A key used to identify a texture specification in a texture pool. Defines a hash and an equality
* operator for use in a hash map. */
class TexturePoolKey {
public:
int2 size;
eGPUTextureFormat format;
/* Construct a key from the given texture size and format. */
TexturePoolKey(int2 size, eGPUTextureFormat format);
/* Construct a key from the size and format of the given texture. */
TexturePoolKey(const GPUTexture *texture);
uint64_t hash() const;
};
bool operator==(const TexturePoolKey &a, const TexturePoolKey &b);
/* ------------------------------------------------------------------------------------------------
* Texture Pool
*
* A texture pool allows the allocation and reuse of textures throughout the execution of the
* compositor to avoid memory fragmentation and texture allocation overheads. The texture pool
* delegates the actual texture allocation to an allocate_texture method that should be implemented
* by the caller of the compositor evaluator, allowing a more agnostic and flexible execution that
* can be controlled by the caller. If the compositor is expected to execute frequently, like on
* every redraw, then the allocation method should use a persistent texture pool to allow
* cross-evaluation texture pooling, for instance, by using the DRWTexturePool. But if the
* evaluator is expected to execute infrequently, the allocated textures can just be freed when the
* evaluator is done, that is, when the pool is destructed. */
class TexturePool {
private:
/* The set of textures in the pool that are available to acquire for each distinct texture
* specification. */
Map<TexturePoolKey, Vector<GPUTexture *>> textures_;
public:
/* Check if there is an available texture with the given specification in the pool, if such
* texture exists, return it, otherwise, return a newly allocated texture. Expect the texture to
* be uncleared and possibly contains garbage data. */
GPUTexture *acquire(int2 size, eGPUTextureFormat format);
/* Shorthand for acquire with GPU_RGBA16F format. */
GPUTexture *acquire_color(int2 size);
/* Shorthand for acquire with GPU_RGBA16F format. Identical to acquire_color because vectors
* are stored in RGBA textures, due to the limited support for RGB textures. */
GPUTexture *acquire_vector(int2 size);
/* Shorthand for acquire with GPU_R16F format. */
GPUTexture *acquire_float(int2 size);
/* Put the texture back into the pool, potentially to be acquired later by another user. Expects
* the texture to be one that was acquired using the same texture pool. */
void release(GPUTexture *texture);
/* Reset the texture pool by clearing all available textures without freeing the textures. If the
* textures will no longer be needed, they should be freed in the destructor. This should be
* called after the compositor is done evaluating. */
void reset();
private:
/* Returns a newly allocated texture with the given specification. This method should be
* implemented by the caller of the compositor evaluator. See the class description for more
* information. */
virtual GPUTexture *allocate_texture(int2 size, eGPUTextureFormat format) = 0;
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,61 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_function_ref.hh"
#include "BLI_math_vec_types.hh"
#include "NOD_derived_node_tree.hh"
#include "GPU_shader.h"
#include "COM_input_descriptor.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* Get the origin socket of the given node input. If the input is not linked, the socket itself is
* returned. If the input is linked, the socket that is linked to it is returned, which could
* either be an input or an output. An input socket is returned when the given input is connected
* to an unlinked input of a group input node. */
DSocket get_input_origin_socket(DInputSocket input);
/* Get the output socket linked to the given node input. If the input is not linked to an output, a
* null output is returned. */
DOutputSocket get_output_linked_to_input(DInputSocket input);
/* Get the result type that corresponds to the type of the given socket. */
ResultType get_node_socket_result_type(const SocketRef *socket);
/* Returns true if any of the nodes linked to the given output satisfies the given condition, and
* false otherwise. */
bool is_output_linked_to_node_conditioned(DOutputSocket output,
FunctionRef<bool(DNode)> condition);
/* Returns the number of inputs linked to the given output that satisfy the given condition. */
int number_of_inputs_linked_to_output_conditioned(DOutputSocket output,
FunctionRef<bool(DInputSocket)> condition);
/* A node is a shader node if it defines a method to get a shader node operation. */
bool is_shader_node(DNode node);
/* Returns true if the given node is supported, that is, have an implementation. Returns false
* otherwise. */
bool is_node_supported(DNode node);
/* Get the input descriptor of the given input socket. */
InputDescriptor input_descriptor_from_input_socket(const InputSocketRef *socket);
/* Dispatch the given compute shader in a 2D compute space such that the number of threads in both
* dimensions is as small as possible but at least covers the entirety of threads_range assuming
* the shader has a local group size given by local_size. That means that the number of threads
* might be a bit larger than threads_range, so shaders has to put that into consideration. A
* default local size of 16x16 is assumed, which is the optimal local size for many image
* processing shaders. */
void compute_dispatch_threads_at_least(GPUShader *shader,
int2 threads_range,
int2 local_size = int2(16));
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,163 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <limits>
#include "BLI_math_vec_types.hh"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "COM_compile_state.hh"
#include "COM_domain.hh"
#include "COM_input_descriptor.hh"
#include "COM_node_operation.hh"
#include "COM_result.hh"
#include "COM_scheduler.hh"
#include "COM_shader_operation.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
CompileState::CompileState(const Schedule &schedule) : schedule_(schedule)
{
}
const Schedule &CompileState::get_schedule()
{
return schedule_;
}
void CompileState::map_node_to_node_operation(DNode node, NodeOperation *operations)
{
return node_operations_.add_new(node, operations);
}
void CompileState::map_node_to_shader_operation(DNode node, ShaderOperation *operations)
{
return shader_operations_.add_new(node, operations);
}
Result &CompileState::get_result_from_output_socket(DOutputSocket output)
{
/* The output belongs to a node that was compiled into a standard node operation, so return a
* reference to the result from that operation using the output identifier. */
if (node_operations_.contains(output.node())) {
NodeOperation *operation = node_operations_.lookup(output.node());
return operation->get_result(output->identifier());
}
/* Otherwise, the output belongs to a node that was compiled into a shader operation, so
* retrieve the internal identifier of that output and return a reference to the result from
* that operation using the retrieved identifier. */
ShaderOperation *operation = shader_operations_.lookup(output.node());
return operation->get_result(operation->get_output_identifier_from_output_socket(output));
}
void CompileState::add_node_to_shader_compile_unit(DNode node)
{
shader_compile_unit_.add_new(node);
/* If the domain of the shader compile unit is not yet determined or was determined to be
* an identity domain, update it to be the computed domain of the node. */
if (shader_compile_unit_domain_ == Domain::identity()) {
shader_compile_unit_domain_ = compute_shader_node_domain(node);
}
}
ShaderCompileUnit &CompileState::get_shader_compile_unit()
{
return shader_compile_unit_;
}
void CompileState::reset_shader_compile_unit()
{
return shader_compile_unit_.clear();
}
bool CompileState::should_compile_shader_compile_unit(DNode node)
{
/* If the shader compile unit is empty, then it can't be compiled yet. */
if (shader_compile_unit_.is_empty()) {
return false;
}
/* If the node is not a shader node, then it can't be added to the shader compile unit and the
* shader compile unit is considered complete and should be compiled. */
if (!is_shader_node(node)) {
return true;
}
/* If the computed domain of the node doesn't matches the domain of the shader compile unit, then
* it can't be added to the shader compile unit and the shader compile unit is considered
* complete and should be compiled. Identity domains are an exception as they are always
* compatible because they represents single values. */
if (shader_compile_unit_domain_ != Domain::identity() &&
shader_compile_unit_domain_ != compute_shader_node_domain(node)) {
return true;
}
/* Otherwise, the node is compatible and can be added to the compile unit and it shouldn't be
* compiled just yet. */
return false;
}
Domain CompileState::compute_shader_node_domain(DNode node)
{
/* Default to an identity domain in case no domain input was found, most likely because all
* inputs are single values. */
Domain node_domain = Domain::identity();
int current_domain_priority = std::numeric_limits<int>::max();
/* Go over the inputs and find the domain of the non single value input with the highest domain
* priority. */
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
/* Get the output linked to the input. If it is null, that means the input is unlinked, so skip
* it. */
const DOutputSocket output = get_output_linked_to_input(input);
if (!output) {
continue;
}
const InputDescriptor input_descriptor = input_descriptor_from_input_socket(input_ref);
/* If the output belongs to a node that is part of the shader compile unit, then the domain of
* the input is the domain of the compile unit itself. */
if (shader_compile_unit_.contains(output.node())) {
/* Single value inputs can't be domain inputs. */
if (shader_compile_unit_domain_.size == int2(1)) {
continue;
}
/* Notice that the lower the domain priority value is, the higher the priority is, hence the
* less than comparison. */
if (input_descriptor.domain_priority < current_domain_priority) {
node_domain = shader_compile_unit_domain_;
current_domain_priority = input_descriptor.domain_priority;
}
continue;
}
const Result &result = get_result_from_output_socket(output);
/* A single value input can't be a domain input. */
if (result.is_single_value() || input_descriptor.expects_single_value) {
continue;
}
/* Notice that the lower the domain priority value is, the higher the priority is, hence the
* less than comparison. */
if (input_descriptor.domain_priority < current_domain_priority) {
node_domain = result.domain();
current_domain_priority = input_descriptor.domain_priority;
}
}
return node_domain;
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "COM_context.hh"
#include "COM_static_shader_manager.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
Context::Context(TexturePool &texture_pool) : texture_pool_(texture_pool)
{
}
int Context::get_frame_number() const
{
return get_scene()->r.cfra;
}
float Context::get_time() const
{
const float frame_number = static_cast<float>(get_frame_number());
const float frame_rate = static_cast<float>(get_scene()->r.frs_sec) /
static_cast<float>(get_scene()->r.frs_sec_base);
return frame_number / frame_rate;
}
TexturePool &Context::texture_pool()
{
return texture_pool_;
}
StaticShaderManager &Context::shader_manager()
{
return shader_manager_;
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,220 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_vec_types.hh"
#include "GPU_shader.h"
#include "COM_context.hh"
#include "COM_conversion_operation.hh"
#include "COM_input_descriptor.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
/* -------------------------------------------------------------------------------------------------
* Conversion Operation.
*/
void ConversionOperation::execute()
{
Result &result = get_result();
const Result &input = get_input();
if (input.is_single_value()) {
result.allocate_single_value();
execute_single(input, result);
return;
}
result.allocate_texture(input.domain());
GPUShader *shader = get_conversion_shader();
GPU_shader_bind(shader);
input.bind_as_texture(shader, "input_tx");
result.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
input.unbind_as_texture();
result.unbind_as_image();
GPU_shader_unbind();
}
SimpleOperation *ConversionOperation::construct_if_needed(Context &context,
const Result &input_result,
const InputDescriptor &input_descriptor)
{
ResultType result_type = input_result.type();
ResultType expected_type = input_descriptor.type;
/* If the result type differs from the expected type, return an instance of an appropriate
* conversion operation. Otherwise, return a null pointer. */
if (result_type == ResultType::Float && expected_type == ResultType::Vector) {
return new ConvertFloatToVectorOperation(context);
}
else if (result_type == ResultType::Float && expected_type == ResultType::Color) {
return new ConvertFloatToColorOperation(context);
}
else if (result_type == ResultType::Color && expected_type == ResultType::Float) {
return new ConvertColorToFloatOperation(context);
}
else if (result_type == ResultType::Color && expected_type == ResultType::Vector) {
return new ConvertColorToVectorOperation(context);
}
else if (result_type == ResultType::Vector && expected_type == ResultType::Float) {
return new ConvertVectorToFloatOperation(context);
}
else if (result_type == ResultType::Vector && expected_type == ResultType::Color) {
return new ConvertVectorToColorOperation(context);
}
else {
return nullptr;
}
}
/* -------------------------------------------------------------------------------------------------
* Convert Float To Vector Operation.
*/
ConvertFloatToVectorOperation::ConvertFloatToVectorOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Float;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Vector, texture_pool()));
}
void ConvertFloatToVectorOperation::execute_single(const Result &input, Result &output)
{
output.set_vector_value(float3(input.get_float_value()));
}
GPUShader *ConvertFloatToVectorOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_float_to_vector");
}
/* -------------------------------------------------------------------------------------------------
* Convert Float To Color Operation.
*/
ConvertFloatToColorOperation::ConvertFloatToColorOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Float;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Color, texture_pool()));
}
void ConvertFloatToColorOperation::execute_single(const Result &input, Result &output)
{
float4 color = float4(input.get_float_value());
color[3] = 1.0f;
output.set_color_value(color);
}
GPUShader *ConvertFloatToColorOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_float_to_color");
}
/* -------------------------------------------------------------------------------------------------
* Convert Color To Float Operation.
*/
ConvertColorToFloatOperation::ConvertColorToFloatOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Color;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Float, texture_pool()));
}
void ConvertColorToFloatOperation::execute_single(const Result &input, Result &output)
{
float4 color = input.get_color_value();
output.set_float_value((color[0] + color[1] + color[2]) / 3.0f);
}
GPUShader *ConvertColorToFloatOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_color_to_float");
}
/* -------------------------------------------------------------------------------------------------
* Convert Color To Vector Operation.
*/
ConvertColorToVectorOperation::ConvertColorToVectorOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Color;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Vector, texture_pool()));
}
void ConvertColorToVectorOperation::execute_single(const Result &input, Result &output)
{
float4 color = input.get_color_value();
output.set_vector_value(float3(color));
}
GPUShader *ConvertColorToVectorOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_color_to_vector");
}
/* -------------------------------------------------------------------------------------------------
* Convert Vector To Float Operation.
*/
ConvertVectorToFloatOperation::ConvertVectorToFloatOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Vector;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Float, texture_pool()));
}
void ConvertVectorToFloatOperation::execute_single(const Result &input, Result &output)
{
float3 vector = input.get_vector_value();
output.set_float_value((vector[0] + vector[1] + vector[2]) / 3.0f);
}
GPUShader *ConvertVectorToFloatOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_vector_to_float");
}
/* -------------------------------------------------------------------------------------------------
* Convert Vector To Color Operation.
*/
ConvertVectorToColorOperation::ConvertVectorToColorOperation(Context &context)
: ConversionOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = ResultType::Vector;
declare_input_descriptor(input_descriptor);
populate_result(Result(ResultType::Color, texture_pool()));
}
void ConvertVectorToColorOperation::execute_single(const Result &input, Result &output)
{
output.set_color_value(float4(input.get_vector_value(), 1.0f));
}
GPUShader *ConvertVectorToColorOperation::get_conversion_shader() const
{
return shader_manager().get("compositor_convert_vector_to_color");
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,38 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_float3x3.hh"
#include "BLI_math_vec_types.hh"
#include "COM_domain.hh"
namespace blender::realtime_compositor {
Domain::Domain(int2 size) : size(size), transformation(float3x3::identity())
{
}
Domain::Domain(int2 size, float3x3 transformation) : size(size), transformation(transformation)
{
}
void Domain::transform(const float3x3 &input_transformation)
{
transformation = input_transformation * transformation;
}
Domain Domain::identity()
{
return Domain(int2(1), float3x3::identity());
}
bool operator==(const Domain &a, const Domain &b)
{
return a.size == b.size && a.transformation == b.transformation;
}
bool operator!=(const Domain &a, const Domain &b)
{
return !(a == b);
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,187 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <string>
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "COM_compile_state.hh"
#include "COM_context.hh"
#include "COM_evaluator.hh"
#include "COM_input_single_value_operation.hh"
#include "COM_node_operation.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_scheduler.hh"
#include "COM_shader_operation.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
Evaluator::Evaluator(Context &context, bNodeTree &node_tree)
: context_(context), node_tree_(node_tree)
{
}
void Evaluator::evaluate()
{
context_.texture_pool().reset();
if (!is_compiled_) {
compile_and_evaluate();
is_compiled_ = true;
return;
}
for (const std::unique_ptr<Operation> &operation : operations_stream_) {
operation->evaluate();
}
}
void Evaluator::reset()
{
operations_stream_.clear();
derived_node_tree_.reset();
node_tree_reference_map_.clear();
is_compiled_ = false;
}
bool Evaluator::validate_node_tree()
{
if (derived_node_tree_->has_link_cycles()) {
context_.set_info_message("Compositor node tree has cyclic links!");
return false;
}
if (derived_node_tree_->has_undefined_nodes_or_sockets()) {
context_.set_info_message("Compositor node tree has undefined nodes or sockets!");
return false;
}
/* Find any of the unsupported nodes in the node tree. We only track one of them because we
* display a message for only one at a time to avoid long messages. */
DNode unsupported_node;
derived_node_tree_->foreach_node([&](DNode node) {
if (!is_node_supported(node)) {
unsupported_node = node;
}
});
/* unsupported_node is null if no unsupported node was found. */
if (unsupported_node) {
std::string message = "Compositor node tree has an unsupported node: ";
context_.set_info_message(message + unsupported_node->idname());
return false;
}
return true;
}
void Evaluator::compile_and_evaluate()
{
derived_node_tree_.reset(new DerivedNodeTree(node_tree_, node_tree_reference_map_));
if (!validate_node_tree()) {
return;
}
const Schedule schedule = compute_schedule(*derived_node_tree_);
CompileState compile_state(schedule);
for (const DNode &node : schedule) {
if (compile_state.should_compile_shader_compile_unit(node)) {
compile_and_evaluate_shader_compile_unit(compile_state);
}
if (is_shader_node(node)) {
compile_state.add_node_to_shader_compile_unit(node);
}
else {
compile_and_evaluate_node(node, compile_state);
}
}
}
void Evaluator::compile_and_evaluate_node(DNode node, CompileState &compile_state)
{
NodeOperation *operation = node->typeinfo()->get_compositor_operation(context_, node);
compile_state.map_node_to_node_operation(node, operation);
map_node_operation_inputs_to_their_results(node, operation, compile_state);
/* This has to be done after input mapping because the method may add Input Single Value
* Operations to the operations stream, which needs to be evaluated before the operation itself
* is evaluated. */
operations_stream_.append(std::unique_ptr<Operation>(operation));
operation->compute_results_reference_counts(compile_state.get_schedule());
operation->evaluate();
}
void Evaluator::map_node_operation_inputs_to_their_results(DNode node,
NodeOperation *operation,
CompileState &compile_state)
{
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
DSocket origin = get_input_origin_socket(input);
/* The origin socket is an output, which means the input is linked. So map the input to the
* result we get from the output. */
if (origin->is_output()) {
Result &result = compile_state.get_result_from_output_socket(DOutputSocket(origin));
operation->map_input_to_result(input->identifier(), &result);
continue;
}
/* Otherwise, the origin socket is an input, which either means the input is unlinked and the
* origin is the input socket itself or the input is connected to an unlinked input of a group
* input node and the origin is the input of the group input node. So map the input to the
* result of a newly created Input Single Value Operation. */
auto *input_operation = new InputSingleValueOperation(context_, DInputSocket(origin));
operation->map_input_to_result(input->identifier(), &input_operation->get_result());
operations_stream_.append(std::unique_ptr<InputSingleValueOperation>(input_operation));
input_operation->evaluate();
}
}
void Evaluator::compile_and_evaluate_shader_compile_unit(CompileState &compile_state)
{
ShaderCompileUnit &compile_unit = compile_state.get_shader_compile_unit();
ShaderOperation *operation = new ShaderOperation(context_, compile_unit);
for (DNode node : compile_unit) {
compile_state.map_node_to_shader_operation(node, operation);
}
map_shader_operation_inputs_to_their_results(operation, compile_state);
operations_stream_.append(std::unique_ptr<Operation>(operation));
operation->compute_results_reference_counts(compile_state.get_schedule());
operation->evaluate();
compile_state.reset_shader_compile_unit();
}
void Evaluator::map_shader_operation_inputs_to_their_results(ShaderOperation *operation,
CompileState &compile_state)
{
for (const auto &item : operation->get_inputs_to_linked_outputs_map().items()) {
Result &result = compile_state.get_result_from_output_socket(item.value);
operation->map_input_to_result(item.key, &result);
}
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,57 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_vec_types.hh"
#include "COM_input_single_value_operation.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
const StringRef InputSingleValueOperation::output_identifier_ = StringRef("Output");
InputSingleValueOperation::InputSingleValueOperation(Context &context, DInputSocket input_socket)
: Operation(context), input_socket_(input_socket)
{
const ResultType result_type = get_node_socket_result_type(input_socket_.socket_ref());
Result result = Result(result_type, texture_pool());
/* The result of an input single value operation is guaranteed to have a single user. */
result.set_initial_reference_count(1);
populate_result(result);
}
void InputSingleValueOperation::execute()
{
/* Allocate a single value for the result. */
Result &result = get_result();
result.allocate_single_value();
/* Set the value of the result to the default value of the input socket. */
switch (result.type()) {
case ResultType::Float:
result.set_float_value(input_socket_->default_value<bNodeSocketValueFloat>()->value);
break;
case ResultType::Vector:
result.set_vector_value(
float3(input_socket_->default_value<bNodeSocketValueVector>()->value));
break;
case ResultType::Color:
result.set_color_value(float4(input_socket_->default_value<bNodeSocketValueRGBA>()->value));
break;
}
}
Result &InputSingleValueOperation::get_result()
{
return Operation::get_result(output_identifier_);
}
void InputSingleValueOperation::populate_result(Result result)
{
Operation::populate_result(output_identifier_, result);
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <memory>
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "NOD_node_declaration.hh"
#include "COM_context.hh"
#include "COM_input_descriptor.hh"
#include "COM_node_operation.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_scheduler.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
NodeOperation::NodeOperation(Context &context, DNode node) : Operation(context), node_(node)
{
for (const OutputSocketRef *output : node->outputs()) {
const ResultType result_type = get_node_socket_result_type(output);
const Result result = Result(result_type, texture_pool());
populate_result(output->identifier(), result);
}
for (const InputSocketRef *input : node->inputs()) {
const InputDescriptor input_descriptor = input_descriptor_from_input_socket(input);
declare_input_descriptor(input->identifier(), input_descriptor);
}
}
void NodeOperation::compute_results_reference_counts(const Schedule &schedule)
{
for (const OutputSocketRef *output_ref : node()->outputs()) {
const DOutputSocket output{node().context(), output_ref};
const int reference_count = number_of_inputs_linked_to_output_conditioned(
output, [&](DInputSocket input) { return schedule.contains(input.node()); });
get_result(output->identifier()).set_initial_reference_count(reference_count);
}
}
const DNode &NodeOperation::node() const
{
return node_;
}
const bNode &NodeOperation::bnode() const
{
return *node_->bnode();
}
bool NodeOperation::should_compute_output(StringRef identifier)
{
return get_result(identifier).should_compute();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,201 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <limits>
#include <memory>
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#include "COM_context.hh"
#include "COM_conversion_operation.hh"
#include "COM_domain.hh"
#include "COM_input_descriptor.hh"
#include "COM_operation.hh"
#include "COM_realize_on_domain_operation.hh"
#include "COM_reduce_to_single_value_operation.hh"
#include "COM_result.hh"
#include "COM_simple_operation.hh"
#include "COM_static_shader_manager.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
Operation::Operation(Context &context) : context_(context)
{
}
Operation::~Operation() = default;
void Operation::evaluate()
{
evaluate_input_processors();
reset_results();
execute();
release_inputs();
}
Result &Operation::get_result(StringRef identifier)
{
return results_.lookup(identifier);
}
void Operation::map_input_to_result(StringRef identifier, Result *result)
{
results_mapped_to_inputs_.add_new(identifier, result);
}
Domain Operation::compute_domain()
{
/* Default to an identity domain in case no domain input was found, most likely because all
* inputs are single values. */
Domain operation_domain = Domain::identity();
int current_domain_priority = std::numeric_limits<int>::max();
/* Go over the inputs and find the domain of the non single value input with the highest domain
* priority. */
for (StringRef identifier : input_descriptors_.keys()) {
const Result &result = get_input(identifier);
const InputDescriptor &descriptor = get_input_descriptor(identifier);
/* A single value input can't be a domain input. */
if (result.is_single_value() || descriptor.expects_single_value) {
continue;
}
/* Notice that the lower the domain priority value is, the higher the priority is, hence the
* less than comparison. */
if (descriptor.domain_priority < current_domain_priority) {
operation_domain = result.domain();
current_domain_priority = descriptor.domain_priority;
}
}
return operation_domain;
}
void Operation::add_and_evaluate_input_processors()
{
/* Each input processor type is added to all inputs entirely before the next type. This is done
* because the construction of the input processors may depend on the result of previous input
* processors for all inputs. For instance, the realize on domain input processor considers the
* value of all inputs, so previous input processors for all inputs needs to be added and
* evaluated first. */
for (const StringRef &identifier : results_mapped_to_inputs_.keys()) {
SimpleOperation *single_value = ReduceToSingleValueOperation::construct_if_needed(
context(), get_input(identifier));
add_and_evaluate_input_processor(identifier, single_value);
}
for (const StringRef &identifier : results_mapped_to_inputs_.keys()) {
SimpleOperation *conversion = ConversionOperation::construct_if_needed(
context(), get_input(identifier), get_input_descriptor(identifier));
add_and_evaluate_input_processor(identifier, conversion);
}
for (const StringRef &identifier : results_mapped_to_inputs_.keys()) {
SimpleOperation *realize_on_domain = RealizeOnDomainOperation::construct_if_needed(
context(), get_input(identifier), get_input_descriptor(identifier), compute_domain());
add_and_evaluate_input_processor(identifier, realize_on_domain);
}
}
void Operation::add_and_evaluate_input_processor(StringRef identifier, SimpleOperation *processor)
{
/* Allow null inputs to facilitate construct_if_needed pattern of addition. For instance, see the
* implementation of the add_and_evaluate_input_processors method. */
if (!processor) {
return;
}
ProcessorsVector &processors = input_processors_.lookup_or_add_default(identifier);
/* Get the result that should serve as the input for the processor. This is either the result
* mapped to the input or the result of the last processor depending on whether this is the first
* processor or not. */
Result &result = processors.is_empty() ? get_input(identifier) : processors.last()->get_result();
/* Map the input result of the processor and add it to the processors vector. */
processor->map_input_to_result(&result);
processors.append(std::unique_ptr<SimpleOperation>(processor));
/* Switch the result mapped to the input to be the output result of the processor. */
switch_result_mapped_to_input(identifier, &processor->get_result());
processor->evaluate();
}
Result &Operation::get_input(StringRef identifier) const
{
return *results_mapped_to_inputs_.lookup(identifier);
}
void Operation::switch_result_mapped_to_input(StringRef identifier, Result *result)
{
results_mapped_to_inputs_.lookup(identifier) = result;
}
void Operation::populate_result(StringRef identifier, Result result)
{
results_.add_new(identifier, result);
}
void Operation::declare_input_descriptor(StringRef identifier, InputDescriptor descriptor)
{
input_descriptors_.add_new(identifier, descriptor);
}
InputDescriptor &Operation::get_input_descriptor(StringRef identifier)
{
return input_descriptors_.lookup(identifier);
}
Context &Operation::context()
{
return context_;
}
TexturePool &Operation::texture_pool() const
{
return context_.texture_pool();
}
StaticShaderManager &Operation::shader_manager() const
{
return context_.shader_manager();
}
void Operation::evaluate_input_processors()
{
if (!input_processors_added_) {
add_and_evaluate_input_processors();
input_processors_added_ = true;
return;
}
for (const ProcessorsVector &processors : input_processors_.values()) {
for (const std::unique_ptr<SimpleOperation> &processor : processors) {
processor->evaluate();
}
}
}
void Operation::reset_results()
{
for (Result &result : results_.values()) {
result.reset();
}
}
void Operation::release_inputs()
{
for (Result *result : results_mapped_to_inputs_.values()) {
result->release();
}
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,130 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_float3x3.hh"
#include "BLI_math_vec_types.hh"
#include "BLI_utildefines.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_domain.hh"
#include "COM_input_descriptor.hh"
#include "COM_realize_on_domain_operation.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
RealizeOnDomainOperation::RealizeOnDomainOperation(Context &context,
Domain domain,
ResultType type)
: SimpleOperation(context), domain_(domain)
{
InputDescriptor input_descriptor;
input_descriptor.type = type;
declare_input_descriptor(input_descriptor);
populate_result(Result(type, texture_pool()));
}
void RealizeOnDomainOperation::execute()
{
Result &input = get_input();
Result &result = get_result();
result.allocate_texture(domain_);
GPUShader *shader = get_realization_shader();
GPU_shader_bind(shader);
/* Transform the input space into the domain space. */
const float3x3 local_transformation = input.domain().transformation *
domain_.transformation.inverted();
/* Set the origin of the transformation to be the center of the domain. */
const float3x3 transformation = float3x3::from_origin_transformation(
local_transformation, float2(domain_.size) / 2.0f);
/* Invert the transformation because the shader transforms the domain coordinates instead of the
* input image itself and thus expect the inverse. */
const float3x3 inverse_transformation = transformation.inverted();
GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr());
/* The texture sampler should use bilinear interpolation for both the bilinear and bicubic
* cases, as the logic used by the bicubic realization shader expects textures to use bilinear
* interpolation. */
const bool use_bilinear = ELEM(input.get_realization_options().interpolation,
Interpolation::Bilinear,
Interpolation::Bicubic);
GPU_texture_filter_mode(input.texture(), use_bilinear);
/* Make out-of-bound texture access return zero by clamping to border color. And make texture
* wrap appropriately if the input repeats. */
const bool repeats = input.get_realization_options().repeat_x ||
input.get_realization_options().repeat_y;
GPU_texture_wrap_mode(input.texture(), repeats, false);
input.bind_as_texture(shader, "input_tx");
result.bind_as_image(shader, "domain_img");
compute_dispatch_threads_at_least(shader, domain_.size);
input.unbind_as_texture();
result.unbind_as_image();
GPU_shader_unbind();
}
GPUShader *RealizeOnDomainOperation::get_realization_shader()
{
switch (get_result().type()) {
case ResultType::Color:
return shader_manager().get("compositor_realize_on_domain_color");
case ResultType::Vector:
return shader_manager().get("compositor_realize_on_domain_vector");
case ResultType::Float:
return shader_manager().get("compositor_realize_on_domain_float");
}
BLI_assert_unreachable();
return nullptr;
}
Domain RealizeOnDomainOperation::compute_domain()
{
return domain_;
}
SimpleOperation *RealizeOnDomainOperation::construct_if_needed(
Context &context,
const Result &input_result,
const InputDescriptor &input_descriptor,
const Domain &operation_domain)
{
/* This input wants to skip realization, the operation is not needed. */
if (input_descriptor.skip_realization) {
return nullptr;
}
/* The input expects a single value and if no single value is provided, it will be ignored and a
* default value will be used, so no need to realize it and the operation is not needed. */
if (input_descriptor.expects_single_value) {
return nullptr;
}
/* Input result is a single value and does not need realization, the operation is not needed. */
if (input_result.is_single_value()) {
return nullptr;
}
/* The input have an identical domain to the operation domain, so no need to realize it and the
* operation is not needed. */
if (input_result.domain() == operation_domain) {
return nullptr;
}
/* Otherwise, realization is needed. */
return new RealizeOnDomainOperation(context, operation_domain, input_descriptor.type);
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "GPU_state.h"
#include "GPU_texture.h"
#include "MEM_guardedalloc.h"
#include "COM_context.hh"
#include "COM_input_descriptor.hh"
#include "COM_reduce_to_single_value_operation.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
ReduceToSingleValueOperation::ReduceToSingleValueOperation(Context &context, ResultType type)
: SimpleOperation(context)
{
InputDescriptor input_descriptor;
input_descriptor.type = type;
declare_input_descriptor(input_descriptor);
populate_result(Result(type, texture_pool()));
}
void ReduceToSingleValueOperation::execute()
{
/* Make sure any prior writes to the texture are reflected before downloading it. */
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
const Result &input = get_input();
float *pixel = static_cast<float *>(GPU_texture_read(input.texture(), GPU_DATA_FLOAT, 0));
Result &result = get_result();
result.allocate_single_value();
switch (result.type()) {
case ResultType::Color:
result.set_color_value(pixel);
break;
case ResultType::Vector:
result.set_vector_value(pixel);
break;
case ResultType::Float:
result.set_float_value(*pixel);
break;
}
MEM_freeN(pixel);
}
SimpleOperation *ReduceToSingleValueOperation::construct_if_needed(Context &context,
const Result &input_result)
{
/* Input result is already a single value, the operation is not needed. */
if (input_result.is_single_value()) {
return nullptr;
}
/* The input is a full sized texture and can't be reduced to a single value, the operation is not
* needed. */
if (input_result.domain().size != int2(1)) {
return nullptr;
}
/* The input is a texture of a single pixel and can be reduced to a single value. */
return new ReduceToSingleValueOperation(context, input_result.type());
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,257 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_float3x3.hh"
#include "BLI_math_vec_types.hh"
#include "GPU_shader.h"
#include "GPU_state.h"
#include "GPU_texture.h"
#include "COM_domain.hh"
#include "COM_result.hh"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
Result::Result(ResultType type, TexturePool &texture_pool)
: type_(type), texture_pool_(&texture_pool)
{
}
void Result::allocate_texture(Domain domain)
{
is_single_value_ = false;
switch (type_) {
case ResultType::Float:
texture_ = texture_pool_->acquire_float(domain.size);
break;
case ResultType::Vector:
texture_ = texture_pool_->acquire_vector(domain.size);
break;
case ResultType::Color:
texture_ = texture_pool_->acquire_color(domain.size);
break;
}
domain_ = domain;
}
void Result::allocate_single_value()
{
is_single_value_ = true;
/* Single values are stored in 1x1 textures as well as the single value members. */
const int2 texture_size{1, 1};
switch (type_) {
case ResultType::Float:
texture_ = texture_pool_->acquire_float(texture_size);
break;
case ResultType::Vector:
texture_ = texture_pool_->acquire_vector(texture_size);
break;
case ResultType::Color:
texture_ = texture_pool_->acquire_color(texture_size);
break;
}
domain_ = Domain::identity();
}
void Result::allocate_invalid()
{
allocate_single_value();
switch (type_) {
case ResultType::Float:
set_float_value(0.0f);
break;
case ResultType::Vector:
set_vector_value(float3(0.0f));
break;
case ResultType::Color:
set_color_value(float4(0.0f));
break;
}
}
void Result::bind_as_texture(GPUShader *shader, const char *texture_name) const
{
/* Make sure any prior writes to the texture are reflected before reading from it. */
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name);
GPU_texture_bind(texture_, texture_image_unit);
}
void Result::bind_as_image(GPUShader *shader, const char *image_name) const
{
const int image_unit = GPU_shader_get_texture_binding(shader, image_name);
GPU_texture_image_bind(texture_, image_unit);
}
void Result::unbind_as_texture() const
{
GPU_texture_unbind(texture_);
}
void Result::unbind_as_image() const
{
GPU_texture_image_unbind(texture_);
}
void Result::pass_through(Result &target)
{
/* Increment the reference count of the master by the original reference count of the target. */
increment_reference_count(target.reference_count());
/* Make the target an exact copy of this result, but keep the initial reference count, as this is
* a property of the original result and is needed for correctly resetting the result before the
* next evaluation. */
const int initial_reference_count = target.initial_reference_count_;
target = *this;
target.initial_reference_count_ = initial_reference_count;
target.master_ = this;
}
void Result::transform(const float3x3 &transformation)
{
domain_.transform(transformation);
}
RealizationOptions &Result::get_realization_options()
{
return domain_.realization_options;
}
float Result::get_float_value() const
{
return float_value_;
}
float3 Result::get_vector_value() const
{
return vector_value_;
}
float4 Result::get_color_value() const
{
return color_value_;
}
float Result::get_float_value_default(float default_value) const
{
if (is_single_value()) {
return get_float_value();
}
return default_value;
}
float3 Result::get_vector_value_default(const float3 &default_value) const
{
if (is_single_value()) {
return get_vector_value();
}
return default_value;
}
float4 Result::get_color_value_default(const float4 &default_value) const
{
if (is_single_value()) {
return get_color_value();
}
return default_value;
}
void Result::set_float_value(float value)
{
float_value_ = value;
GPU_texture_update(texture_, GPU_DATA_FLOAT, &float_value_);
}
void Result::set_vector_value(const float3 &value)
{
vector_value_ = value;
GPU_texture_update(texture_, GPU_DATA_FLOAT, vector_value_);
}
void Result::set_color_value(const float4 &value)
{
color_value_ = value;
GPU_texture_update(texture_, GPU_DATA_FLOAT, color_value_);
}
void Result::set_initial_reference_count(int count)
{
initial_reference_count_ = count;
}
void Result::reset()
{
master_ = nullptr;
reference_count_ = initial_reference_count_;
}
void Result::increment_reference_count(int count)
{
/* If there is a master result, increment its reference count instead. */
if (master_) {
master_->increment_reference_count(count);
return;
}
reference_count_ += count;
}
void Result::release()
{
/* If there is a master result, release it instead. */
if (master_) {
master_->release();
return;
}
/* Decrement the reference count, and if it reaches zero, release the texture back into the
* texture pool. */
reference_count_--;
if (reference_count_ == 0) {
texture_pool_->release(texture_);
}
}
bool Result::should_compute()
{
return initial_reference_count_ != 0;
}
ResultType Result::type() const
{
return type_;
}
bool Result::is_texture() const
{
return !is_single_value_;
}
bool Result::is_single_value() const
{
return is_single_value_;
}
GPUTexture *Result::texture() const
{
return texture_;
}
int Result::reference_count() const
{
/* If there is a master result, return its reference count instead. */
if (master_) {
return master_->reference_count();
}
return reference_count_;
}
const Domain &Result::domain() const
{
return domain_;
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,311 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_map.hh"
#include "BLI_set.hh"
#include "BLI_stack.hh"
#include "BLI_vector.hh"
#include "BLI_vector_set.hh"
#include "NOD_derived_node_tree.hh"
#include "COM_scheduler.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
/* Compute the output node whose result should be computed. The output node is the node marked as
* NODE_DO_OUTPUT. If multiple types of output nodes are marked, then the preference will be
* CMP_NODE_COMPOSITE > CMP_NODE_VIEWER > CMP_NODE_SPLITVIEWER. If no output node exists, a null
* node will be returned. */
static DNode compute_output_node(DerivedNodeTree &tree)
{
const NodeTreeRef &root_tree = tree.root_context().tree();
for (const NodeRef *node : root_tree.nodes_by_type("CompositorNodeComposite")) {
if (node->bnode()->flag & NODE_DO_OUTPUT) {
return DNode(&tree.root_context(), node);
}
}
for (const NodeRef *node : root_tree.nodes_by_type("CompositorNodeViewer")) {
if (node->bnode()->flag & NODE_DO_OUTPUT) {
return DNode(&tree.root_context(), node);
}
}
for (const NodeRef *node : root_tree.nodes_by_type("CompositorNodeSplitViewer")) {
if (node->bnode()->flag & NODE_DO_OUTPUT) {
return DNode(&tree.root_context(), node);
}
}
/* No output node found, return a null node. */
return DNode();
}
/* A type representing a mapping that associates each node with a heuristic estimation of the
* number of intermediate buffers needed to compute it and all of its dependencies. See the
* compute_number_of_needed_buffers function for more information. */
using NeededBuffers = Map<DNode, int>;
/* Compute a heuristic estimation of the number of intermediate buffers needed to compute each node
* and all of its dependencies for all nodes that the given node depends on. The output is a map
* that maps each node with the number of intermediate buffers needed to compute it and all of its
* dependencies.
*
* Consider a node that takes n number of buffers as an input from a number of node dependencies,
* which we shall call the input nodes. The node also computes and outputs m number of buffers.
* In order for the node to compute its output, a number of intermediate buffers will be needed.
* Since the node takes n buffers and outputs m buffers, then the number of buffers directly
* needed by the node is (n + m). But each of the input buffers are computed by a node that, in
* turn, needs a number of buffers to compute its output. So the total number of buffers needed
* to compute the output of the node is max(n + m, d) where d is the number of buffers needed by
* the input node that needs the largest number of buffers. We only consider the input node that
* needs the largest number of buffers, because those buffers can be reused by any input node
* that needs a lesser number of buffers.
*
* Shader nodes, however, are a special case because links between two shader nodes inside the same
* shader operation don't pass a buffer, but a single value in the compiled shader. So for shader
* nodes, only inputs and outputs linked to nodes that are not shader nodes should be considered.
* Note that this might not actually be true, because the compiler may decide to split a shader
* operation into multiples ones that will pass buffers, but this is not something that can be
* known at scheduling-time. See the discussion in COM_compile_state.hh, COM_evaluator.hh, and
* COM_shader_operation.hh for more information. In the node tree shown below, node 4 will have
* exactly the same number of needed buffers by node 3, because its inputs and outputs are all
* internally linked in the shader operation.
*
* Shader Operation
* +------------------------------------------------------+
* .------------. | .------------. .------------. .------------. | .------------.
* | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 |
* | |----|--| |--| |------| |--|--| |
* | | .-|--| | | | .---| | | | |
* '------------' | | '------------' '------------' | '------------' | '------------'
* | +----------------------------------|-------------------+
* .------------. | |
* | Node 2 | | |
* | |--'------------------------------------'
* | |
* '------------'
*
* Note that the computed output is not guaranteed to be accurate, and will not be in most cases.
* The computation is merely a heuristic estimation that works well in most cases. This is due to a
* number of reasons:
* - The node tree is actually a graph that allows output sharing, which is not something that was
* taken into consideration in this implementation because it is difficult to correctly consider.
* - Each node may allocate any number of internal buffers, which is not taken into account in this
* implementation because it rarely affects the output and is done by very few nodes.
* - The compiler may decide to compiler the schedule differently depending on runtime information
* which we can merely speculate at scheduling-time as described above. */
static NeededBuffers compute_number_of_needed_buffers(DNode output_node)
{
NeededBuffers needed_buffers;
/* A stack of nodes used to traverse the node tree starting from the output node. */
Stack<DNode> node_stack = {output_node};
/* Traverse the node tree in a post order depth first manner and compute the number of needed
* buffers for each node. Post order traversal guarantee that all the node dependencies of each
* node are computed before it. This is done by pushing all the uncomputed node dependencies to
* the node stack first and only popping and computing the node when all its node dependencies
* were computed. */
while (!node_stack.is_empty()) {
/* Do not pop the node immediately, as it may turn out that we can't compute its number of
* needed buffers just yet because its dependencies weren't computed, it will be popped later
* when needed. */
DNode &node = node_stack.peek();
/* Go over the node dependencies connected to the inputs of the node and push them to the node
* stack if they were not computed already. */
Set<DNode> pushed_nodes;
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
/* Get the output linked to the input. If it is null, that means the input is unlinked and
* has no dependency node. */
const DOutputSocket output = get_output_linked_to_input(input);
if (!output) {
continue;
}
/* The node dependency was already computed or pushed before, so skip it. */
if (needed_buffers.contains(output.node()) || pushed_nodes.contains(output.node())) {
continue;
}
/* The output node needs to be computed, push the node dependency to the node stack and
* indicate that it was pushed. */
node_stack.push(output.node());
pushed_nodes.add_new(output.node());
}
/* If any of the node dependencies were pushed, that means that not all of them were computed
* and consequently we can't compute the number of needed buffers for this node just yet. */
if (!pushed_nodes.is_empty()) {
continue;
}
/* We don't need to store the result of the pop because we already peeked at it before. */
node_stack.pop();
/* Compute the number of buffers that the node takes as an input as well as the number of
* buffers needed to compute the most demanding of the node dependencies. */
int number_of_input_buffers = 0;
int buffers_needed_by_dependencies = 0;
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
/* Get the output linked to the input. If it is null, that means the input is unlinked.
* Unlinked inputs do not take a buffer, so skip those inputs. */
const DOutputSocket output = get_output_linked_to_input(input);
if (!output) {
continue;
}
/* Since this input is linked, if the link is not between two shader nodes, it means that the
* node takes a buffer through this input and so we increment the number of input buffers. */
if (!is_shader_node(node) || !is_shader_node(output.node())) {
number_of_input_buffers++;
}
/* If the number of buffers needed by the node dependency is more than the total number of
* buffers needed by the dependencies, then update the latter to be the former. This is
* computing the "d" in the aforementioned equation "max(n + m, d)". */
const int buffers_needed_by_dependency = needed_buffers.lookup(output.node());
if (buffers_needed_by_dependency > buffers_needed_by_dependencies) {
buffers_needed_by_dependencies = buffers_needed_by_dependency;
}
}
/* Compute the number of buffers that will be computed/output by this node. */
int number_of_output_buffers = 0;
for (const OutputSocketRef *output_ref : node->outputs()) {
const DOutputSocket output{node.context(), output_ref};
/* The output is not linked, it outputs no buffer. */
if (output->logically_linked_sockets().is_empty()) {
continue;
}
/* If any of the links is not between two shader nodes, it means that the node outputs
* a buffer through this output and so we increment the number of output buffers. */
if (!is_output_linked_to_node_conditioned(output, is_shader_node) || !is_shader_node(node)) {
number_of_output_buffers++;
}
}
/* Compute the heuristic estimation of the number of needed intermediate buffers to compute
* this node and all of its dependencies. This is computing the aforementioned equation
* "max(n + m, d)". */
const int total_buffers = MAX2(number_of_input_buffers + number_of_output_buffers,
buffers_needed_by_dependencies);
needed_buffers.add(node, total_buffers);
}
return needed_buffers;
}
/* There are multiple different possible orders of evaluating a node graph, each of which needs
* to allocate a number of intermediate buffers to store its intermediate results. It follows
* that we need to find the evaluation order which uses the least amount of intermediate buffers.
* For instance, consider a node that takes two input buffers A and B. Each of those buffers is
* computed through a number of nodes constituting a sub-graph whose root is the node that
* outputs that buffer. Suppose the number of intermediate buffers needed to compute A and B are
* N(A) and N(B) respectively and N(A) > N(B). Then evaluating the sub-graph computing A would be
* a better option than that of B, because had B was computed first, its outputs will need to be
* stored in extra buffers in addition to the buffers needed by A. The number of buffers needed by
* each node is estimated as described in the compute_number_of_needed_buffers function.
*
* This is a heuristic generalization of the SethiUllman algorithm, a generalization that
* doesn't always guarantee an optimal evaluation order, as the optimal evaluation order is very
* difficult to compute, however, this method works well in most cases. Moreover it assumes that
* all buffers will have roughly the same size, which may not always be the case. */
Schedule compute_schedule(DerivedNodeTree &tree)
{
Schedule schedule;
/* Compute the output node whose result should be computed. */
const DNode output_node = compute_output_node(tree);
/* No output node, the node tree has no effect, return an empty schedule. */
if (!output_node) {
return schedule;
}
/* Compute the number of buffers needed by each node connected to the output. */
const NeededBuffers needed_buffers = compute_number_of_needed_buffers(output_node);
/* A stack of nodes used to traverse the node tree starting from the output node. */
Stack<DNode> node_stack = {output_node};
/* Traverse the node tree in a post order depth first manner, scheduling the nodes in an order
* informed by the number of buffers needed by each node. Post order traversal guarantee that all
* the node dependencies of each node are scheduled before it. This is done by pushing all the
* unscheduled node dependencies to the node stack first and only popping and scheduling the node
* when all its node dependencies were scheduled. */
while (!node_stack.is_empty()) {
/* Do not pop the node immediately, as it may turn out that we can't schedule it just yet
* because its dependencies weren't scheduled, it will be popped later when needed. */
DNode &node = node_stack.peek();
/* Compute the nodes directly connected to the node inputs sorted by their needed buffers such
* that the node with the lowest number of needed buffers comes first. Note that we actually
* want the node with the highest number of needed buffers to be schedule first, but since
* those are pushed to the traversal stack, we need to push them in reverse order. */
Vector<DNode> sorted_dependency_nodes;
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
/* Get the output linked to the input. If it is null, that means the input is unlinked and
* has no dependency node, so skip it. */
const DOutputSocket output = get_output_linked_to_input(input);
if (!output) {
continue;
}
/* The dependency node was added before, so skip it. The number of dependency nodes is very
* small, typically less than 3, so a linear search is okay. */
if (sorted_dependency_nodes.contains(output.node())) {
continue;
}
/* The dependency node was already schedule, so skip it. */
if (schedule.contains(output.node())) {
continue;
}
/* Sort in ascending order on insertion, the number of dependency nodes is very small,
* typically less than 3, so insertion sort is okay. */
int insertion_position = 0;
for (int i = 0; i < sorted_dependency_nodes.size(); i++) {
if (needed_buffers.lookup(output.node()) >
needed_buffers.lookup(sorted_dependency_nodes[i])) {
insertion_position++;
}
else {
break;
}
}
sorted_dependency_nodes.insert(insertion_position, output.node());
}
/* Push the sorted dependency nodes to the node stack in order. */
for (const DNode &dependency_node : sorted_dependency_nodes) {
node_stack.push(dependency_node);
}
/* If there are no sorted dependency nodes, that means they were all already scheduled or that
* none exists in the first place, so we can pop and schedule the node now. */
if (sorted_dependency_nodes.is_empty()) {
/* The node might have already been scheduled, so we don't use add_new here and simply don't
* add it if it was already scheduled. */
schedule.add(node_stack.pop());
}
}
return schedule;
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,155 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_vector.h"
#include "BLI_string_ref.hh"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "GPU_material.h"
#include "COM_shader_node.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
ShaderNode::ShaderNode(DNode node) : node_(node)
{
populate_inputs();
populate_outputs();
}
GPUNodeStack *ShaderNode::get_inputs_array()
{
return inputs_.data();
}
GPUNodeStack *ShaderNode::get_outputs_array()
{
return outputs_.data();
}
GPUNodeStack &ShaderNode::get_input(StringRef identifier)
{
return inputs_[node_.input_by_identifier(identifier)->index()];
}
GPUNodeStack &ShaderNode::get_output(StringRef identifier)
{
return outputs_[node_.output_by_identifier(identifier)->index()];
}
GPUNodeLink *ShaderNode::get_input_link(StringRef identifier)
{
GPUNodeStack &input = get_input(identifier);
if (input.link) {
return input.link;
}
return GPU_uniform(input.vec);
}
const DNode &ShaderNode::node() const
{
return node_;
}
bNode &ShaderNode::bnode() const
{
return *node_->bnode();
}
static eGPUType gpu_type_from_socket_type(eNodeSocketDatatype type)
{
switch (type) {
case SOCK_FLOAT:
return GPU_FLOAT;
case SOCK_VECTOR:
return GPU_VEC3;
case SOCK_RGBA:
return GPU_VEC4;
default:
BLI_assert_unreachable();
return GPU_NONE;
}
}
static void gpu_stack_vector_from_socket(float *vector, const SocketRef *socket)
{
switch (socket->bsocket()->type) {
case SOCK_FLOAT:
vector[0] = socket->default_value<bNodeSocketValueFloat>()->value;
return;
case SOCK_VECTOR:
copy_v3_v3(vector, socket->default_value<bNodeSocketValueVector>()->value);
return;
case SOCK_RGBA:
copy_v4_v4(vector, socket->default_value<bNodeSocketValueRGBA>()->value);
return;
default:
BLI_assert_unreachable();
}
}
static void populate_gpu_node_stack(DSocket socket, GPUNodeStack &stack)
{
/* Make sure this stack is not marked as the end of the stack array. */
stack.end = false;
/* This will be initialized later by the GPU material compiler or the compile method. */
stack.link = nullptr;
stack.sockettype = socket->bsocket()->type;
stack.type = gpu_type_from_socket_type((eNodeSocketDatatype)socket->bsocket()->type);
if (socket->is_input()) {
const DInputSocket input(socket);
DSocket origin = get_input_origin_socket(input);
/* The input is linked if the origin socket is an output socket. Had it been an input socket,
* then it is an unlinked input of a group input node. */
stack.hasinput = origin->is_output();
/* Get the socket value from the origin if it is an input, because then it would either be an
* unlinked input or an unlinked input of a group input node that the socket is linked to,
* otherwise, get the value from the socket itself. */
if (origin->is_input()) {
gpu_stack_vector_from_socket(stack.vec, origin.socket_ref());
}
else {
gpu_stack_vector_from_socket(stack.vec, socket.socket_ref());
}
}
else {
stack.hasoutput = socket->is_logically_linked();
}
}
void ShaderNode::populate_inputs()
{
/* Reserve a stack for each input in addition to an extra stack at the end to mark the end of the
* array, as this is what the GPU module functions expect. */
inputs_.resize(node_->inputs().size() + 1);
inputs_.last().end = true;
for (int i = 0; i < node_->inputs().size(); i++) {
populate_gpu_node_stack(node_.input(i), inputs_[i]);
}
}
void ShaderNode::populate_outputs()
{
/* Reserve a stack for each output in addition to an extra stack at the end to mark the end of
* the array, as this is what the GPU module functions expect. */
outputs_.resize(node_->outputs().size() + 1);
outputs_.last().end = true;
for (int i = 0; i < node_->outputs().size(); i++) {
populate_gpu_node_stack(node_.output(i), outputs_[i]);
}
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,522 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <memory>
#include <string>
#include "BLI_listbase.h"
#include "BLI_map.hh"
#include "BLI_string_ref.hh"
#include "BLI_utildefines.h"
#include "DNA_customdata_types.h"
#include "GPU_material.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "gpu_shader_create_info.hh"
#include "NOD_derived_node_tree.hh"
#include "NOD_node_declaration.hh"
#include "COM_context.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_scheduler.hh"
#include "COM_shader_node.hh"
#include "COM_shader_operation.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
ShaderOperation::ShaderOperation(Context &context, ShaderCompileUnit &compile_unit)
: Operation(context), compile_unit_(compile_unit)
{
material_ = GPU_material_from_callbacks(&construct_material, &generate_code, this);
GPU_material_status_set(material_, GPU_MAT_QUEUED);
GPU_material_compile(material_);
}
ShaderOperation::~ShaderOperation()
{
GPU_material_free_single(material_);
}
void ShaderOperation::execute()
{
const Domain domain = compute_domain();
for (StringRef identifier : output_sockets_to_output_identifiers_map_.values()) {
Result &result = get_result(identifier);
result.allocate_texture(domain);
}
GPUShader *shader = GPU_material_get_shader(material_);
GPU_shader_bind(shader);
bind_material_resources(shader);
bind_inputs(shader);
bind_outputs(shader);
compute_dispatch_threads_at_least(shader, domain.size);
GPU_texture_unbind_all();
GPU_texture_image_unbind_all();
GPU_uniformbuf_unbind_all();
GPU_shader_unbind();
}
StringRef ShaderOperation::get_output_identifier_from_output_socket(DOutputSocket output_socket)
{
return output_sockets_to_output_identifiers_map_.lookup(output_socket);
}
Map<std::string, DOutputSocket> &ShaderOperation::get_inputs_to_linked_outputs_map()
{
return inputs_to_linked_outputs_map_;
}
void ShaderOperation::compute_results_reference_counts(const Schedule &schedule)
{
for (const auto &item : output_sockets_to_output_identifiers_map_.items()) {
const int reference_count = number_of_inputs_linked_to_output_conditioned(
item.key, [&](DInputSocket input) { return schedule.contains(input.node()); });
get_result(item.value).set_initial_reference_count(reference_count);
}
}
void ShaderOperation::bind_material_resources(GPUShader *shader)
{
/* Bind the uniform buffer of the material if it exists. It may not exist if the GPU material has
* no uniforms. */
GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material_);
if (ubo) {
GPU_uniformbuf_bind(ubo, GPU_shader_get_uniform_block_binding(shader, GPU_UBO_BLOCK_NAME));
}
/* Bind color band textures needed by curve and ramp nodes. */
ListBase textures = GPU_material_textures(material_);
LISTBASE_FOREACH (GPUMaterialTexture *, texture, &textures) {
if (texture->colorband) {
const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture->sampler_name);
GPU_texture_bind(*texture->colorband, texture_image_unit);
}
}
}
void ShaderOperation::bind_inputs(GPUShader *shader)
{
/* Attributes represents the inputs of the operation and their names match those of the inputs of
* the operation as well as the corresponding texture samples in the shader. */
ListBase attributes = GPU_material_attributes(material_);
LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) {
get_input(attribute->name).bind_as_texture(shader, attribute->name);
}
}
void ShaderOperation::bind_outputs(GPUShader *shader)
{
for (StringRefNull output_identifier : output_sockets_to_output_identifiers_map_.values()) {
get_result(output_identifier).bind_as_image(shader, output_identifier.c_str());
}
}
void ShaderOperation::construct_material(void *thunk, GPUMaterial *material)
{
ShaderOperation *operation = static_cast<ShaderOperation *>(thunk);
for (DNode node : operation->compile_unit_) {
ShaderNode *shader_node = node->typeinfo()->get_compositor_shader_node(node);
operation->shader_nodes_.add_new(node, std::unique_ptr<ShaderNode>(shader_node));
operation->link_node_inputs(node, material);
shader_node->compile(material);
operation->populate_results_for_node(node, material);
}
}
void ShaderOperation::link_node_inputs(DNode node, GPUMaterial *material)
{
for (const InputSocketRef *input_ref : node->inputs()) {
const DInputSocket input{node.context(), input_ref};
/* Get the output linked to the input. If it is null, that means the input is unlinked.
* Unlinked inputs are linked by the node compile method, so skip this here. */
const DOutputSocket output = get_output_linked_to_input(input);
if (!output) {
continue;
}
/* If the origin node is part of the shader operation, then the link is internal to the GPU
* material graph and is linked appropriately. */
if (compile_unit_.contains(output.node())) {
link_node_input_internal(input, output);
continue;
}
/* Otherwise, the origin node is not part of the shader operation, then the link is external to
* the GPU material graph and an input to the shader operation must be declared and linked to
* the node input. */
link_node_input_external(input, output, material);
}
}
void ShaderOperation::link_node_input_internal(DInputSocket input_socket,
DOutputSocket output_socket)
{
ShaderNode &output_node = *shader_nodes_.lookup(output_socket.node());
GPUNodeStack &output_stack = output_node.get_output(output_socket->identifier());
ShaderNode &input_node = *shader_nodes_.lookup(input_socket.node());
GPUNodeStack &input_stack = input_node.get_input(input_socket->identifier());
input_stack.link = output_stack.link;
}
void ShaderOperation::link_node_input_external(DInputSocket input_socket,
DOutputSocket output_socket,
GPUMaterial *material)
{
ShaderNode &node = *shader_nodes_.lookup(input_socket.node());
GPUNodeStack &stack = node.get_input(input_socket->identifier());
/* An input was already declared for that same output socket, so no need to declare it again. */
if (!output_to_material_attribute_map_.contains(output_socket)) {
declare_operation_input(input_socket, output_socket, material);
}
/* Link the attribute representing the shader operation input corresponding to the given output
* socket. */
stack.link = output_to_material_attribute_map_.lookup(output_socket);
}
static const char *get_set_function_name(ResultType type)
{
switch (type) {
case ResultType::Float:
return "set_value";
case ResultType::Vector:
return "set_rgb";
case ResultType::Color:
return "set_rgba";
}
BLI_assert_unreachable();
return nullptr;
}
void ShaderOperation::declare_operation_input(DInputSocket input_socket,
DOutputSocket output_socket,
GPUMaterial *material)
{
const int input_index = output_to_material_attribute_map_.size();
std::string input_identifier = "input" + std::to_string(input_index);
/* Declare the input descriptor for this input and prefer to declare its type to be the same as
* the type of the output socket because doing type conversion in the shader is much cheaper. */
InputDescriptor input_descriptor = input_descriptor_from_input_socket(input_socket.socket_ref());
input_descriptor.type = get_node_socket_result_type(output_socket.socket_ref());
declare_input_descriptor(input_identifier, input_descriptor);
/* Add a new GPU attribute representing an input to the GPU material. Instead of using the
* attribute directly, we link it to an appropriate set function and use its output link instead.
* This is needed because the gputype member of the attribute is only initialized if it is linked
* to a GPU node. */
GPUNodeLink *attribute_link;
GPU_link(material,
get_set_function_name(input_descriptor.type),
GPU_attribute(material, CD_AUTO_FROM_NAME, input_identifier.c_str()),
&attribute_link);
/* Map the output socket to the attribute that was created for it. */
output_to_material_attribute_map_.add(output_socket, attribute_link);
/* Map the identifier of the operation input to the output socket it is linked to. */
inputs_to_linked_outputs_map_.add_new(input_identifier, output_socket);
}
void ShaderOperation::populate_results_for_node(DNode node, GPUMaterial *material)
{
for (const OutputSocketRef *output_ref : node->outputs()) {
const DOutputSocket output{node.context(), output_ref};
/* If any of the nodes linked to the output are not part of the shader operation, then an
* output result needs to be populated for it. */
const bool need_to_populate_result = is_output_linked_to_node_conditioned(
output, [&](DNode node) { return !compile_unit_.contains(node); });
if (need_to_populate_result) {
populate_operation_result(output, material);
}
}
}
static const char *get_store_function_name(ResultType type)
{
switch (type) {
case ResultType::Float:
return "node_compositor_store_output_float";
case ResultType::Vector:
return "node_compositor_store_output_vector";
case ResultType::Color:
return "node_compositor_store_output_color";
}
BLI_assert_unreachable();
return nullptr;
}
void ShaderOperation::populate_operation_result(DOutputSocket output_socket, GPUMaterial *material)
{
const unsigned int output_id = output_sockets_to_output_identifiers_map_.size();
std::string output_identifier = "output" + std::to_string(output_id);
const ResultType result_type = get_node_socket_result_type(output_socket.socket_ref());
const Result result = Result(result_type, texture_pool());
populate_result(output_identifier, result);
/* Map the output socket to the identifier of the newly populated result. */
output_sockets_to_output_identifiers_map_.add_new(output_socket, output_identifier);
ShaderNode &node = *shader_nodes_.lookup(output_socket.node());
GPUNodeLink *output_link = node.get_output(output_socket->identifier()).link;
/* Link the output node stack to an output storer storing in the appropriate result. The result
* is identified by its index in the operation and the index is encoded as a float to be passed
* to the GPU function. Additionally, create an output link from the storer node to declare as an
* output to the GPU material. This storer output link is a dummy link in the sense that its
* value is ignored since it is already written in the output, but it is used to track nodes that
* contribute to the output of the compositor node tree. */
GPUNodeLink *storer_output_link;
GPUNodeLink *id_link = GPU_constant((float *)&output_id);
const char *store_function_name = get_store_function_name(result_type);
GPU_link(material, store_function_name, id_link, output_link, &storer_output_link);
/* Declare the output link of the storer node as an output of the GPU material to help the GPU
* code generator to track the nodes that contribute to the output of the shader. */
GPU_material_add_output_link_composite(material, storer_output_link);
}
using namespace gpu::shader;
void ShaderOperation::generate_code(void *thunk,
GPUMaterial *material,
GPUCodegenOutput *code_generator_output)
{
ShaderOperation *operation = static_cast<ShaderOperation *>(thunk);
ShaderCreateInfo &shader_create_info = *reinterpret_cast<ShaderCreateInfo *>(
code_generator_output->create_info);
shader_create_info.local_group_size(16, 16);
/* The resources are added without explicit locations, so make sure it is done by the
* shader creator. */
shader_create_info.auto_resource_location(true);
/* Add implementation for implicit conversion operations inserted by the code generator. This
* file should include the functions [float|vec3|vec4]_from_[float|vec3|vec4]. */
shader_create_info.typedef_source("gpu_shader_compositor_type_conversion.glsl");
/* The source shader is a compute shader with a main function that calls the dynamically
* generated evaluate function. The evaluate function includes the serialized GPU material graph
* preceded by code that initialized the inputs of the operation. Additionally, the storer
* functions that writes the outputs are defined outside the evaluate function. */
shader_create_info.compute_source("gpu_shader_compositor_main.glsl");
/* The main function is emitted in the shader before the evaluate function, so the evaluate
* function needs to be forward declared here. */
shader_create_info.typedef_source_generated += "void evaluate();\n";
operation->generate_code_for_outputs(shader_create_info);
shader_create_info.compute_source_generated += "void evaluate()\n{\n";
operation->generate_code_for_inputs(material, shader_create_info);
shader_create_info.compute_source_generated += code_generator_output->composite;
shader_create_info.compute_source_generated += "}\n";
}
static eGPUTextureFormat texture_format_from_result_type(ResultType type)
{
switch (type) {
case ResultType::Float:
return GPU_R16F;
case ResultType::Vector:
return GPU_RGBA16F;
case ResultType::Color:
return GPU_RGBA16F;
}
BLI_assert_unreachable();
return GPU_RGBA16F;
}
/* Texture storers in the shader always take a vec4 as an argument, so encode each type in a vec4
* appropriately. */
static const char *glsl_store_expression_from_result_type(ResultType type)
{
switch (type) {
case ResultType::Float:
return "vec4(value)";
case ResultType::Vector:
return "vec4(vector, 0.0)";
case ResultType::Color:
return "color";
}
BLI_assert_unreachable();
return nullptr;
}
void ShaderOperation::generate_code_for_outputs(ShaderCreateInfo &shader_create_info)
{
const std::string store_float_function_header = "void store_float(const uint id, float value)";
const std::string store_vector_function_header = "void store_vector(const uint id, vec3 vector)";
const std::string store_color_function_header = "void store_color(const uint id, vec4 color)";
/* The store functions are used by the node_compositor_store_output_[float|vector|color]
* functions but are only defined later as part of the compute source, so they need to be forward
* declared. */
shader_create_info.typedef_source_generated += store_float_function_header + ";\n";
shader_create_info.typedef_source_generated += store_vector_function_header + ";\n";
shader_create_info.typedef_source_generated += store_color_function_header + ";\n";
/* Each of the store functions is essentially a single switch case on the given ID, so start by
* opening the function with a curly bracket followed by opening a switch statement in each of
* the functions. */
std::stringstream store_float_function;
std::stringstream store_vector_function;
std::stringstream store_color_function;
const std::string store_function_start = "\n{\n switch (id) {\n";
store_float_function << store_float_function_header << store_function_start;
store_vector_function << store_vector_function_header << store_function_start;
store_color_function << store_color_function_header << store_function_start;
for (StringRefNull output_identifier : output_sockets_to_output_identifiers_map_.values()) {
const Result &result = get_result(output_identifier);
/* Add a write-only image for this output where its values will be written. */
shader_create_info.image(0,
texture_format_from_result_type(result.type()),
Qualifier::WRITE,
ImageType::FLOAT_2D,
output_identifier,
Frequency::BATCH);
/* Add a case for the index of this output followed by a break statement. */
std::stringstream case_code;
const std::string store_expression = glsl_store_expression_from_result_type(result.type());
const std::string texel = ", ivec2(gl_GlobalInvocationID.xy), ";
case_code << " case " << StringRef(output_identifier).drop_known_prefix("output") << ":\n"
<< " imageStore(" << output_identifier << texel << store_expression << ");\n"
<< " break;\n";
/* Only add the case to the function with the matching type. */
switch (result.type()) {
case ResultType::Float:
store_float_function << case_code.str();
break;
case ResultType::Vector:
store_vector_function << case_code.str();
break;
case ResultType::Color:
store_color_function << case_code.str();
break;
}
}
/* Close the previously opened switch statement as well as the function itself. */
const std::string store_function_end = " }\n}\n\n";
store_float_function << store_function_end;
store_vector_function << store_function_end;
store_color_function << store_function_end;
shader_create_info.compute_source_generated += store_float_function.str() +
store_vector_function.str() +
store_color_function.str();
}
static const char *glsl_type_from_result_type(ResultType type)
{
switch (type) {
case ResultType::Float:
return "float";
case ResultType::Vector:
return "vec3";
case ResultType::Color:
return "vec4";
}
BLI_assert_unreachable();
return nullptr;
}
/* Texture loaders in the shader always return a vec4, so a swizzle is needed to retrieve the
* actual value for each type. */
static const char *glsl_swizzle_from_result_type(ResultType type)
{
switch (type) {
case ResultType::Float:
return "x";
case ResultType::Vector:
return "xyz";
case ResultType::Color:
return "rgba";
}
BLI_assert_unreachable();
return nullptr;
}
void ShaderOperation::generate_code_for_inputs(GPUMaterial *material,
ShaderCreateInfo &shader_create_info)
{
/* The attributes of the GPU material represents the inputs of the operation. */
ListBase attributes = GPU_material_attributes(material);
/* Add a texture sampler for each of the inputs with the same name as the attribute. */
LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) {
shader_create_info.sampler(0, ImageType::FLOAT_2D, attribute->name, Frequency::BATCH);
}
/* Declare a struct called var_attrs that includes an appropriately typed member for each of the
* inputs. The names of the members should be the letter v followed by the ID of the attribute
* corresponding to the input. Such names are expected by the code generator. */
std::stringstream declare_attributes;
declare_attributes << "struct {\n";
LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) {
const InputDescriptor &input_descriptor = get_input_descriptor(attribute->name);
const std::string type = glsl_type_from_result_type(input_descriptor.type);
declare_attributes << " " << type << " v" << attribute->id << ";\n";
}
declare_attributes << "} var_attrs;\n\n";
shader_create_info.compute_source_generated += declare_attributes.str();
/* The texture loader utilities are needed to sample the input textures and initialize the
* attributes. */
shader_create_info.typedef_source("gpu_shader_compositor_texture_utilities.glsl");
/* Initialize each member of the previously declared struct by loading its corresponding texture
* with an appropriate swizzle for its type. */
std::stringstream initialize_attributes;
LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) {
const InputDescriptor &input_descriptor = get_input_descriptor(attribute->name);
const std::string swizzle = glsl_swizzle_from_result_type(input_descriptor.type);
initialize_attributes << "var_attrs.v" << attribute->id << " = "
<< "texture_load(" << attribute->name
<< ", ivec2(gl_GlobalInvocationID.xy))." << swizzle << ";\n";
}
initialize_attributes << "\n";
shader_create_info.compute_source_generated += initialize_attributes.str();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,55 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "COM_input_descriptor.hh"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_simple_operation.hh"
namespace blender::realtime_compositor {
const StringRef SimpleOperation::input_identifier_ = StringRef("Input");
const StringRef SimpleOperation::output_identifier_ = StringRef("Output");
Result &SimpleOperation::get_result()
{
return Operation::get_result(output_identifier_);
}
void SimpleOperation::map_input_to_result(Result *result)
{
Operation::map_input_to_result(input_identifier_, result);
}
void SimpleOperation::add_and_evaluate_input_processors()
{
}
Result &SimpleOperation::get_input()
{
return Operation::get_input(input_identifier_);
}
void SimpleOperation::switch_result_mapped_to_input(Result *result)
{
Operation::switch_result_mapped_to_input(input_identifier_, result);
}
void SimpleOperation::populate_result(Result result)
{
Operation::populate_result(output_identifier_, result);
/* The result of a simple operation is guaranteed to have a single user. */
get_result().set_initial_reference_count(1);
}
void SimpleOperation::declare_input_descriptor(InputDescriptor descriptor)
{
Operation::declare_input_descriptor(input_identifier_, descriptor);
}
InputDescriptor &SimpleOperation::get_input_descriptor()
{
return Operation::get_input_descriptor(input_identifier_);
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "GPU_shader.h"
#include "COM_static_shader_manager.hh"
namespace blender::realtime_compositor {
StaticShaderManager::~StaticShaderManager()
{
for (GPUShader *shader : shaders_.values()) {
GPU_shader_free(shader);
}
}
GPUShader *StaticShaderManager::get(const char *info_name)
{
/* If a shader with the same info name already exists in the manager, return it, otherwise,
* create a new shader from the info name and return it. */
return shaders_.lookup_or_add_cb(
info_name, [info_name]() { return GPU_shader_create_from_info_name(info_name); });
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include <cstdint>
#include "BLI_hash.hh"
#include "BLI_map.hh"
#include "BLI_math_vec_types.hh"
#include "BLI_vector.hh"
#include "GPU_texture.h"
#include "COM_texture_pool.hh"
namespace blender::realtime_compositor {
/* --------------------------------------------------------------------
* Texture Pool Key.
*/
TexturePoolKey::TexturePoolKey(int2 size, eGPUTextureFormat format) : size(size), format(format)
{
}
TexturePoolKey::TexturePoolKey(const GPUTexture *texture)
{
size = int2(GPU_texture_width(texture), GPU_texture_height(texture));
format = GPU_texture_format(texture);
}
uint64_t TexturePoolKey::hash() const
{
return get_default_hash_3(size.x, size.y, format);
}
bool operator==(const TexturePoolKey &a, const TexturePoolKey &b)
{
return a.size == b.size && a.format == b.format;
}
/* --------------------------------------------------------------------
* Texture Pool.
*/
GPUTexture *TexturePool::acquire(int2 size, eGPUTextureFormat format)
{
/* Check if there is an available texture with the required specification, and if one exists,
* return it. */
const TexturePoolKey key = TexturePoolKey(size, format);
Vector<GPUTexture *> &available_textures = textures_.lookup_or_add_default(key);
if (!available_textures.is_empty()) {
return available_textures.pop_last();
}
/* Otherwise, allocate a new texture. */
return allocate_texture(size, format);
}
GPUTexture *TexturePool::acquire_color(int2 size)
{
return acquire(size, GPU_RGBA16F);
}
GPUTexture *TexturePool::acquire_vector(int2 size)
{
/* Vectors are stored in RGBA textures because RGB textures have limited support. */
return acquire(size, GPU_RGBA16F);
}
GPUTexture *TexturePool::acquire_float(int2 size)
{
return acquire(size, GPU_R16F);
}
void TexturePool::release(GPUTexture *texture)
{
textures_.lookup(TexturePoolKey(texture)).append(texture);
}
void TexturePool::reset()
{
textures_.clear();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,134 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_function_ref.hh"
#include "BLI_math_vec_types.hh"
#include "BLI_math_vector.hh"
#include "BLI_utildefines.h"
#include "DNA_node_types.h"
#include "NOD_derived_node_tree.hh"
#include "NOD_node_declaration.hh"
#include "GPU_compute.h"
#include "GPU_shader.h"
#include "COM_operation.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
using namespace nodes::derived_node_tree_types;
using TargetSocketPathInfo = DOutputSocket::TargetSocketPathInfo;
DSocket get_input_origin_socket(DInputSocket input)
{
/* The input is unlinked. Return the socket itself. */
if (input->logically_linked_sockets().is_empty()) {
return input;
}
/* Only a single origin socket is guaranteed to exist. */
DSocket socket;
input.foreach_origin_socket([&](const DSocket origin) { socket = origin; });
return socket;
}
DOutputSocket get_output_linked_to_input(DInputSocket input)
{
/* Get the origin socket of this input, which will be an output socket if the input is linked
* to an output. */
const DSocket origin = get_input_origin_socket(input);
/* If the origin socket is an input, that means the input is unlinked, so return a null output
* socket. */
if (origin->is_input()) {
return DOutputSocket();
}
/* Now that we know the origin is an output, return a derived output from it. */
return DOutputSocket(origin);
}
ResultType get_node_socket_result_type(const SocketRef *socket)
{
switch (socket->bsocket()->type) {
case SOCK_FLOAT:
return ResultType::Float;
case SOCK_VECTOR:
return ResultType::Vector;
case SOCK_RGBA:
return ResultType::Color;
default:
BLI_assert_unreachable();
return ResultType::Float;
}
}
bool is_output_linked_to_node_conditioned(DOutputSocket output, FunctionRef<bool(DNode)> condition)
{
bool condition_satisfied = false;
output.foreach_target_socket(
[&](DInputSocket target, const TargetSocketPathInfo &UNUSED(path_info)) {
if (condition(target.node())) {
condition_satisfied = true;
return;
}
});
return condition_satisfied;
}
int number_of_inputs_linked_to_output_conditioned(DOutputSocket output,
FunctionRef<bool(DInputSocket)> condition)
{
int count = 0;
output.foreach_target_socket(
[&](DInputSocket target, const TargetSocketPathInfo &UNUSED(path_info)) {
if (condition(target)) {
count++;
}
});
return count;
}
bool is_shader_node(DNode node)
{
return node->typeinfo()->get_compositor_shader_node;
}
bool is_node_supported(DNode node)
{
return node->typeinfo()->get_compositor_operation ||
node->typeinfo()->get_compositor_shader_node;
}
InputDescriptor input_descriptor_from_input_socket(const InputSocketRef *socket)
{
using namespace nodes;
InputDescriptor input_descriptor;
input_descriptor.type = get_node_socket_result_type(socket);
const NodeDeclaration *node_declaration = socket->node().declaration();
/* Not every node have a declaration, in which case, we assume the default values for the rest of
* the properties. */
if (!node_declaration) {
return input_descriptor;
}
const SocketDeclarationPtr &socket_declaration = node_declaration->inputs()[socket->index()];
input_descriptor.domain_priority = socket_declaration->compositor_domain_priority();
input_descriptor.expects_single_value = socket_declaration->compositor_expects_single_value();
return input_descriptor;
}
void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size)
{
/* If the threads range is divisible by the local size, dispatch the number of needed groups,
* which is their division. If it is not divisible, then dispatch an extra group to cover the
* remaining invocations, which means the actual threads range of the dispatch will be a bit
* larger than the given one. */
const int2 groups_to_dispatch = math::divide_ceil(threads_range, local_size);
GPU_compute_dispatch(shader, groups_to_dispatch.x, groups_to_dispatch.y, 1);
}
} // namespace blender::realtime_compositor

View File

@ -23,6 +23,7 @@ set(INC
../nodes
../render
../render/intern
../compositor/realtime_compositor
../windowmanager
../../../intern/atomic
@ -103,6 +104,7 @@ set(SRC
intern/smaa_textures.c
engines/basic/basic_engine.c
engines/basic/basic_shader.c
engines/compositor/compositor_engine.cc
engines/image/image_engine.cc
engines/image/image_shader.cc
engines/eevee/eevee_bloom.c
@ -231,6 +233,7 @@ set(SRC
intern/smaa_textures.h
engines/basic/basic_engine.h
engines/basic/basic_private.h
engines/compositor/compositor_engine.h
engines/eevee/eevee_engine.h
engines/eevee/eevee_lightcache.h
engines/eevee/eevee_lut.h
@ -262,6 +265,7 @@ set(SRC
set(LIB
bf_blenkernel
bf_blenlib
bf_realtime_compositor
bf_windowmanager
)

View File

@ -0,0 +1,203 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_listbase.h"
#include "BLI_math_vec_types.hh"
#include "BLI_string_ref.hh"
#include "BLI_utildefines.h"
#include "BLT_translation.h"
#include "DNA_ID_enums.h"
#include "DNA_scene_types.h"
#include "DEG_depsgraph_query.h"
#include "DRW_render.h"
#include "IMB_colormanagement.h"
#include "COM_context.hh"
#include "COM_evaluator.hh"
#include "COM_texture_pool.hh"
#include "GPU_texture.h"
namespace blender::draw::compositor {
class TexturePool : public realtime_compositor::TexturePool {
public:
GPUTexture *allocate_texture(int2 size, eGPUTextureFormat format) override
{
DrawEngineType *owner = (DrawEngineType *)this;
return DRW_texture_pool_query_2d(size.x, size.y, format, owner);
}
};
class Context : public realtime_compositor::Context {
private:
/* A pointer to the info message of the compositor engine. This is a char array of size
* GPU_INFO_SIZE. The message is cleared prior to updating or evaluating the compositor. */
char *info_message_;
public:
Context(realtime_compositor::TexturePool &texture_pool, char *info_message)
: realtime_compositor::Context(texture_pool), info_message_(info_message)
{
}
const Scene *get_scene() const override
{
return DRW_context_state_get()->scene;
}
int2 get_output_size() override
{
return int2(float2(DRW_viewport_size_get()));
}
GPUTexture *get_output_texture() override
{
return DRW_viewport_texture_list_get()->color;
}
GPUTexture *get_input_texture(int UNUSED(view_layer), eScenePassType UNUSED(pass_type)) override
{
return get_output_texture();
}
StringRef get_view_name() override
{
const SceneRenderView *view = static_cast<SceneRenderView *>(
BLI_findlink(&get_scene()->r.views, DRW_context_state_get()->v3d->multiview_eye));
return view->name;
}
void set_info_message(StringRef message) const override
{
message.copy(info_message_, GPU_INFO_SIZE);
}
};
class Engine {
private:
TexturePool texture_pool_;
Context context_;
realtime_compositor::Evaluator evaluator_;
/* Stores the viewport size at the time the last compositor evaluation happened. See the
* update_viewport_size method for more information. */
int2 last_viewport_size_;
public:
Engine(char *info_message)
: context_(texture_pool_, info_message),
evaluator_(context_, node_tree()),
last_viewport_size_(context_.get_output_size())
{
}
/* Update the viewport size and evaluate the compositor. */
void draw()
{
update_viewport_size();
evaluator_.evaluate();
}
/* If the size of the viewport changed from the last time the compositor was evaluated, update
* the viewport size and reset the evaluator. That's because the evaluator compiles the node tree
* in a manner that is specifically optimized for the size of the viewport. This should be called
* before evaluating the compositor. */
void update_viewport_size()
{
if (last_viewport_size_ == context_.get_output_size()) {
return;
}
last_viewport_size_ = context_.get_output_size();
evaluator_.reset();
}
/* If the compositor node tree changed, reset the evaluator. */
void update(const Depsgraph *depsgraph)
{
if (DEG_id_type_updated(depsgraph, ID_NT)) {
evaluator_.reset();
}
}
/* Get a reference to the compositor node tree. */
static bNodeTree &node_tree()
{
return *DRW_context_state_get()->scene->nodetree;
}
};
} // namespace blender::draw::compositor
using namespace blender::draw::compositor;
typedef struct COMPOSITOR_Data {
DrawEngineType *engine_type;
DRWViewportEmptyList *fbl;
DRWViewportEmptyList *txl;
DRWViewportEmptyList *psl;
DRWViewportEmptyList *stl;
Engine *instance_data;
char info[GPU_INFO_SIZE];
} COMPOSITOR_Data;
static void compositor_engine_init(void *data)
{
COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
if (!compositor_data->instance_data) {
compositor_data->instance_data = new Engine(compositor_data->info);
}
}
static void compositor_engine_free(void *instance_data)
{
Engine *engine = static_cast<Engine *>(instance_data);
delete engine;
}
static void compositor_engine_draw(void *data)
{
const COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
compositor_data->instance_data->draw();
}
static void compositor_engine_update(void *data)
{
COMPOSITOR_Data *compositor_data = static_cast<COMPOSITOR_Data *>(data);
/* Clear any info message that was set in a previous update. */
compositor_data->info[0] = '\0';
if (compositor_data->instance_data) {
compositor_data->instance_data->update(DRW_context_state_get()->depsgraph);
}
}
extern "C" {
static const DrawEngineDataSize compositor_data_size = DRW_VIEWPORT_DATA_SIZE(COMPOSITOR_Data);
DrawEngineType draw_engine_compositor_type = {
nullptr, /* next */
nullptr, /* prev */
N_("Compositor"), /* idname */
&compositor_data_size, /* vedata_size */
&compositor_engine_init, /* engine_init */
nullptr, /* engine_free */
&compositor_engine_free, /* instance_free */
nullptr, /* cache_init */
nullptr, /* cache_populate */
nullptr, /* cache_finish */
&compositor_engine_draw, /* draw_scene */
&compositor_engine_update, /* view_update */
nullptr, /* id_update */
nullptr, /* render_to_image */
nullptr, /* store_metadata */
};
}

View File

@ -0,0 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
extern DrawEngineType draw_engine_compositor_type;
#ifdef __cplusplus
}
#endif

View File

@ -43,6 +43,7 @@
#include "DNA_camera_types.h"
#include "DNA_mesh_types.h"
#include "DNA_meshdata_types.h"
#include "DNA_userdef_types.h"
#include "DNA_world_types.h"
#include "ED_gpencil.h"
@ -84,6 +85,7 @@
#include "draw_cache_impl.h"
#include "engines/basic/basic_engine.h"
#include "engines/compositor/compositor_engine.h"
#include "engines/eevee/eevee_engine.h"
#include "engines/eevee_next/eevee_engine.h"
#include "engines/external/external_engine.h"
@ -1214,6 +1216,31 @@ static void drw_engines_enable_editors(void)
}
}
static bool is_compositor_enabled(void)
{
if (!U.experimental.use_realtime_compositor) {
return false;
}
if (!(DST.draw_ctx.v3d->shading.flag & V3D_SHADING_COMPOSITOR)) {
return false;
}
if (!(DST.draw_ctx.v3d->shading.type > OB_MATERIAL)) {
return false;
}
if (!DST.draw_ctx.scene->use_nodes) {
return false;
}
if (!DST.draw_ctx.scene->nodetree) {
return false;
}
return true;
}
static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
RenderEngineType *engine_type,
bool gpencil_engine_needed)
@ -1226,6 +1253,11 @@ static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
if (gpencil_engine_needed && ((drawtype >= OB_SOLID) || !use_xray)) {
use_drw_engine(&draw_engine_gpencil_type);
}
if (is_compositor_enabled()) {
use_drw_engine(&draw_engine_compositor_type);
}
drw_engines_enable_overlays();
#ifdef WITH_DRAW_DEBUG
@ -1597,7 +1629,6 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
GPUViewport *viewport,
const bContext *evil_C)
{
Scene *scene = DEG_get_evaluated_scene(depsgraph);
ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
RegionView3D *rv3d = region->regiondata;
@ -2948,6 +2979,7 @@ void DRW_engines_register(void)
DRW_engine_register(&draw_engine_overlay_type);
DRW_engine_register(&draw_engine_select_type);
DRW_engine_register(&draw_engine_basic_type);
DRW_engine_register(&draw_engine_compositor_type);
#ifdef WITH_DRAW_DEBUG
DRW_engine_register(&draw_engine_debug_select_type);
#endif

View File

@ -1210,6 +1210,9 @@ static void view3d_main_region_listener(const wmRegionListenerParams *params)
break;
}
break;
case NC_NODE:
ED_region_tag_redraw(region);
break;
case NC_WORLD:
switch (wmn->data) {
case ND_WORLD_DRAW:

View File

@ -323,6 +323,14 @@ set(GLSL_SRC
shaders/common/gpu_shader_common_math_utils.glsl
shaders/common/gpu_shader_common_mix_rgb.glsl
shaders/compositor/compositor_convert.glsl
shaders/compositor/compositor_realize_on_domain.glsl
shaders/compositor/library/gpu_shader_compositor_main.glsl
shaders/compositor/library/gpu_shader_compositor_store_output.glsl
shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl
shaders/compositor/library/gpu_shader_compositor_type_conversion.glsl
shaders/material/gpu_shader_material_add_shader.glsl
shaders/material/gpu_shader_material_ambient_occlusion.glsl
shaders/material/gpu_shader_material_anisotropic.glsl
@ -527,6 +535,9 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/gpu_shader_simple_lighting_info.hh
shaders/infos/gpu_shader_text_info.hh
shaders/infos/gpu_srgb_to_framebuffer_space_info.hh
shaders/compositor/infos/compositor_convert_info.hh
shaders/compositor/infos/compositor_realize_on_domain_info.hh
)
set(SHADER_CREATE_INFOS_CONTENT "")

View File

@ -0,0 +1,8 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 value = texture_load(input_tx, texel);
imageStore(output_img, texel, CONVERT_EXPRESSION(value));
}

View File

@ -0,0 +1,25 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* First, transform the input image by transforming the domain coordinates with the inverse of
* input image's transformation. The inverse transformation is an affine matrix and thus the
* coordinates should be in homogeneous coordinates. */
vec2 coordinates = (mat3(inverse_transformation) * vec3(texel, 1.0)).xy;
/* Since an input image with an identity transformation is supposed to be centered in the domain,
* we subtract the offset between the lower left corners of the input image and the domain, which
* is half the difference between their sizes, because the difference in size is on both sides of
* the centered image. */
ivec2 domain_size = imageSize(domain_img);
ivec2 input_size = texture_size(input_tx);
vec2 offset = (domain_size - input_size) / 2.0;
/* Subtract the offset and divide by the input image size to get the relevant coordinates into
* the sampler's expected [0, 1] range. */
vec2 normalized_coordinates = (coordinates - offset) / input_size;
imageStore(domain_img, texel, texture(input_tx, normalized_coordinates));
}

View File

@ -0,0 +1,63 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_convert_shared)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.typedef_source("gpu_shader_compositor_type_conversion.glsl")
.compute_source("compositor_convert.glsl");
GPU_SHADER_CREATE_INFO(compositor_convert_float_to_vector)
.additional_info("compositor_convert_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(vec3_from_float(value.x), 0.0)")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_float_to_color)
.additional_info("compositor_convert_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4_from_float(value.x)")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_color_to_float)
.additional_info("compositor_convert_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(float_from_vec4(value), vec3(0.0))")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_color_to_vector)
.additional_info("compositor_convert_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(vec3_from_vec4(value), 0.0)")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_float)
.additional_info("compositor_convert_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(float_from_vec3(value.xyz), vec3(0.0))")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_color)
.additional_info("compositor_convert_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4_from_vec3(value.xyz)")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_extract_alpha_from_color)
.additional_info("compositor_convert_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(value.a, vec3(0.0))")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_color_to_half_color)
.additional_info("compositor_convert_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "value")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_convert_float_to_half_float)
.additional_info("compositor_convert_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.define("CONVERT_EXPRESSION(value)", "vec4(value.r, vec3(0.0))")
.do_static_compilation(true);

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_shared)
.local_group_size(16, 16)
.push_constant(Type::MAT4, "inverse_transformation")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.compute_source("compositor_realize_on_domain.glsl");
GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_color)
.additional_info("compositor_realize_on_domain_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_vector)
.additional_info("compositor_realize_on_domain_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_float)
.additional_info("compositor_realize_on_domain_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img")
.do_static_compilation(true);

View File

@ -0,0 +1,7 @@
/* The compute shader that will be dispatched by the compositor ShaderOperation. It just calls the
* evaluate function that will be dynamically generated and appended to this shader in the
* ShaderOperation::generate_code method. */
void main()
{
evaluate();
}

View File

@ -0,0 +1,26 @@
/* The following functions are called to store the given value in the output identified by the
* given ID. The ID is an unsigned integer that is encoded in a float, so floatBitsToUint is called
* to get the actual identifier. The functions have an output value as their last argument that is
* used to establish an output link that is then used to track the nodes that contribute to the
* output of the compositor node tree.
*
* The store_[float|vector|color] functions are dynamically generated in
* ShaderOperation::generate_code_for_outputs. */
void node_compositor_store_output_float(const float id, float value, out float out_value)
{
store_float(floatBitsToUint(id), value);
out_value = value;
}
void node_compositor_store_output_vector(const float id, vec3 vector, out vec3 out_vector)
{
store_vector(floatBitsToUint(id), vector);
out_vector = vector;
}
void node_compositor_store_output_color(const float id, vec4 color, out vec4 out_color)
{
store_color(floatBitsToUint(id), color);
out_color = color;
}

View File

@ -0,0 +1,25 @@
/* A shorthand for 1D textureSize with a zero LOD. */
int texture_size(sampler1D sampler)
{
return textureSize(sampler, 0);
}
/* A shorthand for 1D texelFetch with zero LOD and bounded access clamped to border. */
vec4 texture_load(sampler1D sampler, int x)
{
const int texture_bound = texture_size(sampler) - 1;
return texelFetch(sampler, clamp(x, 0, texture_bound), 0);
}
/* A shorthand for 2D textureSize with a zero LOD. */
ivec2 texture_size(sampler2D sampler)
{
return textureSize(sampler, 0);
}
/* A shorthand for 2D texelFetch with zero LOD and bounded access clamped to border. */
vec4 texture_load(sampler2D sampler, ivec2 texel)
{
const ivec2 texture_bounds = texture_size(sampler) - ivec2(1);
return texelFetch(sampler, clamp(texel, ivec2(0), texture_bounds), 0);
}

View File

@ -0,0 +1,29 @@
float float_from_vec4(vec4 vector)
{
return dot(vector.rgb, vec3(1.0)) / 3.0;
}
float float_from_vec3(vec3 vector)
{
return dot(vector, vec3(1.0)) / 3.0;
}
vec3 vec3_from_vec4(vec4 vector)
{
return vector.rgb;
}
vec3 vec3_from_float(float value)
{
return vec3(value);
}
vec4 vec4_from_vec3(vec3 vector)
{
return vec4(vector, 1.0);
}
vec4 vec4_from_float(float value)
{
return vec4(vec3(value), 1.0);
}

View File

@ -653,6 +653,8 @@ typedef struct UserDef_Experimental {
char enable_eevee_next;
char use_sculpt_texture_paint;
char use_draw_manager_acquire_lock;
char use_realtime_compositor;
char _pad[7];
/** `makesdna` does not allow empty structs. */
} UserDef_Experimental;

View File

@ -486,6 +486,7 @@ enum {
V3D_SHADING_SCENE_LIGHTS_RENDER = (1 << 12),
V3D_SHADING_SCENE_WORLD_RENDER = (1 << 13),
V3D_SHADING_STUDIOLIGHT_VIEW_ROTATION = (1 << 14),
V3D_SHADING_COMPOSITOR = (1 << 15),
};
#define V3D_USES_SCENE_LIGHTS(v3d) \

View File

@ -4199,6 +4199,14 @@ static void rna_def_space_view3d_shading(BlenderRNA *brna)
RNA_def_property_ui_text(prop, "Shader AOV Name", "Name of the active Shader AOV");
RNA_def_property_flag(prop, PROP_HIDDEN);
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
prop = RNA_def_property(srna, "use_compositor", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "flag", V3D_SHADING_COMPOSITOR);
RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
RNA_def_property_boolean_default(prop, false);
RNA_def_property_ui_text(
prop, "Compositor", "Preview the compositor output inside the viewport");
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D | NS_VIEW3D_SHADING, NULL);
}
static void rna_def_space_view3d_overlay(BlenderRNA *brna)

View File

@ -6338,6 +6338,10 @@ static void rna_def_userdef_experimental(BlenderRNA *brna)
RNA_def_property_ui_text(
prop, "Sculpt Mode Tilt Support", "Support for pen tablet tilt events in Sculpt Mode");
prop = RNA_def_property(srna, "use_realtime_compositor", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "use_realtime_compositor", 1);
RNA_def_property_ui_text(prop, "Realtime Compositor", "Enable the new realtime compositor");
prop = RNA_def_property(srna, "use_sculpt_texture_paint", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "use_sculpt_texture_paint", 1);
RNA_def_property_ui_text(prop, "Sculpt Texture Paint", "Use texture painting in Sculpt Mode");

View File

@ -88,6 +88,14 @@ class SocketDeclaration {
InputSocketFieldType input_field_type_ = InputSocketFieldType::None;
OutputFieldDependency output_field_dependency_;
/** The priority of the input for determining the domain of the node. See
* realtime_compositor::InputDescriptor for more information. */
int compositor_domain_priority_ = 0;
/** This input expects a single value and can't operate on non-single values. See
* realtime_compositor::InputDescriptor for more information. */
bool compositor_expects_single_value_ = false;
/** Utility method to make the socket available if there is a straightforward way to do so. */
std::function<void(bNode &)> make_available_fn_;
@ -124,6 +132,9 @@ class SocketDeclaration {
InputSocketFieldType input_field_type() const;
const OutputFieldDependency &output_field_dependency() const;
int compositor_domain_priority() const;
bool compositor_expects_single_value() const;
protected:
void set_common_flags(bNodeSocket &socket) const;
bool matches_common_data(const bNodeSocket &socket) const;
@ -238,6 +249,22 @@ class SocketDeclarationBuilder : public BaseSocketDeclarationBuilder {
return *(Self *)this;
}
/** The priority of the input for determining the domain of the node. See
* realtime_compositor::InputDescriptor for more information. */
Self &compositor_domain_priority(int priority)
{
decl_->compositor_domain_priority_ = priority;
return *(Self *)this;
}
/** This input expects a single value and can't operate on non-single values. See
* realtime_compositor::InputDescriptor for more information. */
Self &compositor_expects_single_value(bool value = true)
{
decl_->compositor_expects_single_value_ = value;
return *(Self *)this;
}
/**
* Pass a function that sets properties on the node required to make the corresponding socket
* available, if it is not available on the default state of the node. The function is allowed to
@ -428,6 +455,16 @@ inline const OutputFieldDependency &SocketDeclaration::output_field_dependency()
return output_field_dependency_;
}
inline int SocketDeclaration::compositor_domain_priority() const
{
return compositor_domain_priority_;
}
inline bool SocketDeclaration::compositor_expects_single_value() const
{
return compositor_expects_single_value_;
}
inline void SocketDeclaration::make_available(bNode &node) const
{
if (make_available_fn_) {