EEVEE-Next: Depth Of Field: Port implementation to compute shader

This is a port of the previous implementation but using compute
shaders instead of using the raster pipeline for every steps.

Only the scatter passes is kept as a raster pass for obvious performance
reasons.

Many steps have been rewritten to take advantage of LDS which allows faster
and simpler downsampling and filtering for some passes.

A new stabilize phase has been separated from another setup pass in order
to improve it in the future with better stabilization.

The scatter pass shaders and pipeline also changed. We now use indirect
drawcall to draw quads using triangle strips primitives. This reduces
fragment shader invocation count & overdraw compared to a bounding
triangle. This also reduces the amount of vertex shader invocation
drastically to the bare minimum instead of having always 3 verts per
4 pixels (for each ground).
This commit is contained in:
Clément Foucault 2022-07-29 18:08:42 +02:00
parent 2ea4754109
commit f4fe3fb09b
34 changed files with 3605 additions and 17 deletions

View File

@ -201,7 +201,7 @@ class DATA_PT_camera(CameraButtonsPanel, Panel):
class DATA_PT_camera_dof(CameraButtonsPanel, Panel):
bl_label = "Depth of Field"
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_WORKBENCH'}
def draw_header(self, context):
cam = context.camera
@ -228,7 +228,7 @@ class DATA_PT_camera_dof(CameraButtonsPanel, Panel):
class DATA_PT_camera_dof_aperture(CameraButtonsPanel, Panel):
bl_label = "Aperture"
bl_parent_id = "DATA_PT_camera_dof"
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_WORKBENCH'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_WORKBENCH'}
def draw(self, context):
layout = self.layout

View File

@ -223,7 +223,7 @@ class RENDER_PT_motion_blur_curve(RenderButtonsPanel, Panel):
class RENDER_PT_eevee_depth_of_field(RenderButtonsPanel, Panel):
bl_label = "Depth of Field"
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'BLENDER_EEVEE'}
COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT'}
@classmethod
def poll(cls, context):

View File

@ -134,6 +134,7 @@ set(SRC
engines/eevee/eevee_temporal_sampling.c
engines/eevee/eevee_volumes.c
engines/eevee_next/eevee_camera.cc
engines/eevee_next/eevee_depth_of_field.cc
engines/eevee_next/eevee_engine.cc
engines/eevee_next/eevee_film.cc
engines/eevee_next/eevee_instance.cc
@ -362,6 +363,21 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_attributes_lib.glsl
engines/eevee_next/shaders/eevee_camera_lib.glsl
engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl
engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl
engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl
engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
engines/eevee_next/shaders/eevee_film_comp.glsl
engines/eevee_next/shaders/eevee_film_frag.glsl
engines/eevee_next/shaders/eevee_film_lib.glsl

View File

@ -82,7 +82,6 @@ class Camera {
private:
Instance &inst_;
/** Double buffered to detect changes and have history for re-projection. */
CameraDataBuf data_;
public:
@ -112,6 +111,10 @@ class Camera {
{
return data_.type == CAMERA_ORTHO;
}
bool is_perspective() const
{
return data_.type == CAMERA_PERSP;
}
const float3 &position() const
{
return *reinterpret_cast<const float3 *>(data_.viewinv[3]);

View File

@ -44,8 +44,22 @@
/* Minimum visibility size. */
#define LIGHTPROBE_FILTER_VIS_GROUP_SIZE 16
/* Film. */
#define FILM_GROUP_SIZE 16
/* Motion Blur. */
#define MOTION_BLUR_GROUP_SIZE 32
#define MOTION_BLUR_DILATE_GROUP_SIZE 512
/* Depth Of Field. */
#define DOF_TILES_SIZE 8
#define DOF_TILES_FLATTEN_GROUP_SIZE DOF_TILES_SIZE
#define DOF_TILES_DILATE_GROUP_SIZE 8
#define DOF_BOKEH_LUT_SIZE 32
#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5
#define DOF_REDUCE_GROUP_SIZE 8
#define DOF_DEFAULT_GROUP_SIZE 32
#define DOF_FILTER_GROUP_SIZE 8
#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
#define DOF_MIP_MAX 4

View File

@ -0,0 +1,720 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2021 Blender Foundation.
*/
/** \file
* \ingroup eevee
*
* Depth of field post process effect.
*
* There are 2 methods to achieve this effect.
* - The first uses projection matrix offsetting and sample accumulation to give
* reference quality depth of field. But this needs many samples to hide the
* under-sampling.
* - The second one is a post-processing based one. It follows the
* implementation described in the presentation
* "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
* There are some difference with our actual implementation that prioritize quality.
*/
#include "DRW_render.h"
#include "BKE_camera.h"
#include "DNA_camera_types.h"
#include "GPU_platform.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "eevee_camera.hh"
#include "eevee_instance.hh"
#include "eevee_sampling.hh"
#include "eevee_shader.hh"
#include "eevee_shader_shared.hh"
#include "eevee_depth_of_field.hh"
namespace blender::eevee {
/* -------------------------------------------------------------------- */
/** \name Depth of field
* \{ */
void DepthOfField::init()
{
const SceneEEVEE &sce_eevee = inst_.scene->eevee;
const Object *camera_object_eval = inst_.camera_eval_object;
const ::Camera *camera = (camera_object_eval) ?
reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
nullptr;
if (camera == nullptr) {
/* Set to invalid value for update detection */
data_.scatter_color_threshold = -1.0f;
return;
}
/* Reminder: These are parameters not interpolated by motion blur. */
int update = 0;
int sce_flag = sce_eevee.flag;
update += assign_if_different(do_hq_slight_focus_,
(sce_flag & SCE_EEVEE_DOF_HQ_SLIGHT_FOCUS) != 0);
update += assign_if_different(do_jitter_, (sce_flag & SCE_EEVEE_DOF_JITTER) != 0);
update += assign_if_different(user_overblur_, sce_eevee.bokeh_overblur / 100.0f);
update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size);
update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold);
update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max);
update += assign_if_different(data_.denoise_factor, sce_eevee.bokeh_denoise_fac);
update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades));
if (update > 0) {
inst_.sampling.reset();
}
}
void DepthOfField::sync()
{
const Camera &camera = inst_.camera;
const Object *camera_object_eval = inst_.camera_eval_object;
const ::Camera *camera_data = (camera_object_eval) ?
reinterpret_cast<const ::Camera *>(camera_object_eval->data) :
nullptr;
int update = 0;
if (camera_data == nullptr || (camera_data->dof.flag & CAM_DOF_ENABLED) == 0) {
update += assign_if_different(jitter_radius_, 0.0f);
update += assign_if_different(fx_radius_, 0.0f);
if (update > 0) {
inst_.sampling.reset();
}
return;
}
float2 anisotropic_scale = {clamp_f(1.0f / camera_data->dof.aperture_ratio, 1e-5f, 1.0f),
clamp_f(camera_data->dof.aperture_ratio, 1e-5f, 1.0f)};
update += assign_if_different(data_.bokeh_anisotropic_scale, anisotropic_scale);
update += assign_if_different(data_.bokeh_rotation, camera_data->dof.aperture_rotation);
update += assign_if_different(focus_distance_,
BKE_camera_object_dof_distance(camera_object_eval));
data_.bokeh_anisotropic_scale_inv = 1.0f / data_.bokeh_anisotropic_scale;
float fstop = max_ff(camera_data->dof.aperture_fstop, 1e-5f);
if (update) {
inst_.sampling.reset();
}
float aperture = 1.0f / (2.0f * fstop);
if (camera.is_perspective()) {
aperture *= camera_data->lens * 1e-3f;
}
if (camera.is_orthographic()) {
/* FIXME: Why is this needed? Some kind of implicit unit conversion? */
aperture *= 0.04f;
/* Really strange behavior from Cycles but replicating. */
focus_distance_ += camera.data_get().clip_near;
}
if (camera.is_panoramic()) {
/* FIXME: Eyeballed. */
aperture *= 0.185f;
}
if (camera_data->dof.aperture_ratio < 1.0) {
/* If ratio is scaling the bokeh outwards, we scale the aperture so that
* the gather kernel size will encompass the maximum axis. */
aperture /= max_ff(camera_data->dof.aperture_ratio, 1e-5f);
}
float jitter_radius, fx_radius;
/* Balance blur radius between fx dof and jitter dof. */
if (do_jitter_ && (inst_.sampling.dof_ring_count_get() > 0) && !camera.is_panoramic() &&
!inst_.is_viewport()) {
/* Compute a minimal overblur radius to fill the gaps between the samples.
* This is just the simplified form of dividing the area of the bokeh by
* the number of samples. */
float minimal_overblur = 1.0f / sqrtf(inst_.sampling.dof_sample_count_get());
fx_radius = (minimal_overblur + user_overblur_) * aperture;
/* Avoid dilating the shape. Over-blur only soften. */
jitter_radius = max_ff(0.0f, aperture - fx_radius);
}
else {
jitter_radius = 0.0f;
fx_radius = aperture;
}
/* Disable post fx if result wouldn't be noticeable. */
if (fx_max_coc_ < 0.5f) {
fx_radius = 0.0f;
}
update += assign_if_different(jitter_radius_, jitter_radius);
update += assign_if_different(fx_radius_, fx_radius);
if (update > 0) {
inst_.sampling.reset();
}
if (fx_radius_ == 0.0f) {
return;
}
/* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */
int2 max_render_res = inst_.film.render_extent_get();
int2 half_res = math::divide_ceil(max_render_res, int2(2));
int2 reduce_size = math::ceil_to_multiple(half_res, int2(1 < (DOF_MIP_MAX - 1)));
data_.gather_uv_fac = 1.0f / float2(reduce_size);
/* Now that we know the maximum render resolution of every view, using depth of field, allocate
* the reduced buffers. Color needs to be signed format here. See note in shader for
* explanation. Do not use texture pool because of needs mipmaps. */
reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_MAX);
reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_MAX);
GPU_texture_wrap_mode(reduced_color_tx_, false, false);
GPU_texture_wrap_mode(reduced_coc_tx_, false, false);
reduced_color_tx_.ensure_mip_views();
reduced_coc_tx_.ensure_mip_views();
/* Resize the scatter list to contain enough entry to cover half the screen with sprites (which
* is unlikely due to local contrast test). */
data_.scatter_max_rect = (reduced_color_tx_.pixel_count() / 4) / 2;
scatter_fg_list_buf_.resize(data_.scatter_max_rect);
scatter_bg_list_buf_.resize(data_.scatter_max_rect);
bokeh_lut_pass_sync();
setup_pass_sync();
stabilize_pass_sync();
downsample_pass_sync();
reduce_pass_sync();
tiles_flatten_pass_sync();
tiles_dilate_pass_sync();
gather_pass_sync();
filter_pass_sync();
scatter_pass_sync();
hole_fill_pass_sync();
resolve_pass_sync();
}
void DepthOfField::jitter_apply(float4x4 &winmat, float4x4 &viewmat)
{
if (jitter_radius_ == 0.0f) {
return;
}
float radius, theta;
inst_.sampling.dof_disk_sample_get(&radius, &theta);
if (data_.bokeh_blades >= 3.0f) {
theta = circle_to_polygon_angle(data_.bokeh_blades, theta);
radius *= circle_to_polygon_radius(data_.bokeh_blades, theta);
}
radius *= jitter_radius_;
theta += data_.bokeh_rotation;
/* Sample in View Space. */
float2 sample = float2(radius * cosf(theta), radius * sinf(theta));
sample *= data_.bokeh_anisotropic_scale;
/* Convert to NDC Space. */
float3 jitter = float3(UNPACK2(sample), -focus_distance_);
float3 center = float3(0.0f, 0.0f, -focus_distance_);
mul_project_m4_v3(winmat.ptr(), jitter);
mul_project_m4_v3(winmat.ptr(), center);
const bool is_ortho = (winmat[2][3] != -1.0f);
if (is_ortho) {
sample *= focus_distance_;
}
/* Translate origin. */
sub_v2_v2(viewmat[3], sample);
/* Skew winmat Z axis. */
add_v2_v2(winmat[2], center - jitter);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Passes setup.
* \{ */
void DepthOfField::bokeh_lut_pass_sync()
{
const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f);
if (!has_anisotropy && (data_.bokeh_blades == 0.0)) {
/* No need for LUTs in these cases. */
bokeh_lut_ps_ = nullptr;
return;
}
/* Precompute bokeh texture. */
bokeh_lut_ps_ = DRW_pass_create("Dof.bokeh_lut_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_BOKEH_LUT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, bokeh_lut_ps_);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image_ref(grp, "out_gather_lut_img", &bokeh_gather_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_scatter_lut_img", &bokeh_scatter_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_resolve_lut_img", &bokeh_resolve_lut_tx_);
DRW_shgroup_call_compute(grp, 1, 1, 1);
}
void DepthOfField::setup_pass_sync()
{
RenderBuffers &render_buffers = inst_.render_buffers;
setup_ps_ = DRW_pass_create("Dof.setup_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_SETUP);
DRWShadingGroup *grp = DRW_shgroup_create(sh, setup_ps_);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &setup_color_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_coc_img", &setup_coc_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_setup_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::stabilize_pass_sync()
{
stabilize_ps_ = DRW_pass_create("Dof.stabilize_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_STABILIZE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, stabilize_ps_);
DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &setup_color_tx_, no_filter);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image(grp, "out_coc_img", reduced_coc_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_color_img", reduced_color_tx_.mip_view(0));
DRW_shgroup_call_compute_ref(grp, dispatch_stabilize_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::downsample_pass_sync()
{
downsample_ps_ = DRW_pass_create("Dof.downsample_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_DOWNSAMPLE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, downsample_ps_);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_.mip_view(0), no_filter);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_.mip_view(0), no_filter);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &downsample_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_downsample_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::reduce_pass_sync()
{
reduce_ps_ = DRW_pass_create("Dof.reduce_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_REDUCE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, reduce_ps_);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ref_ex(grp, "downsample_tx", &downsample_tx_, no_filter);
DRW_shgroup_storage_block(grp, "scatter_fg_list_buf", scatter_fg_list_buf_);
DRW_shgroup_storage_block(grp, "scatter_bg_list_buf", scatter_bg_list_buf_);
DRW_shgroup_storage_block(grp, "scatter_fg_indirect_buf", scatter_fg_indirect_buf_);
DRW_shgroup_storage_block(grp, "scatter_bg_indirect_buf", scatter_bg_indirect_buf_);
DRW_shgroup_uniform_image(grp, "inout_color_lod0_img", reduced_color_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_color_lod1_img", reduced_color_tx_.mip_view(1));
DRW_shgroup_uniform_image(grp, "out_color_lod2_img", reduced_color_tx_.mip_view(2));
DRW_shgroup_uniform_image(grp, "out_color_lod3_img", reduced_color_tx_.mip_view(3));
DRW_shgroup_uniform_image(grp, "in_coc_lod0_img", reduced_coc_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
DRW_shgroup_uniform_image(grp, "out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
DRW_shgroup_uniform_image(grp, "out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
/* Sync writes to inout_color_lod0_img from stabilize_ps_. */
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
DRW_shgroup_call_compute_ref(grp, dispatch_reduce_size_);
/* NOTE: Command buffer barrier is done automatically by the GPU backend. */
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
}
void DepthOfField::tiles_flatten_pass_sync()
{
tiles_flatten_ps_ = DRW_pass_create("Dof.tiles_flatten_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_TILES_FLATTEN);
DRWShadingGroup *grp = DRW_shgroup_create(sh, tiles_flatten_ps_);
/* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus
* flag from the setup pass. A better way would be to do the brute-force in focus gather without
* this. */
DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_call_compute_ref(grp, dispatch_tiles_flatten_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
void DepthOfField::tiles_dilate_pass_sync()
{
tiles_dilate_minmax_ps_ = DRW_pass_create("Dof.tiles_dilate_minmax_ps_", DRW_STATE_NO_DRAW);
tiles_dilate_minabs_ps_ = DRW_pass_create("Dof.tiles_dilate_minabs_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
DRWPass *drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_;
GPUShader *sh = inst_.shaders.static_shader_get((pass == 0) ? DOF_TILES_DILATE_MINMAX :
DOF_TILES_DILATE_MINABS);
DRWShadingGroup *grp = DRW_shgroup_create(sh, drw_pass);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.previous());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.previous());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_bool(grp, "dilate_slight_focus", &tiles_dilate_slight_focus_, 1);
DRW_shgroup_uniform_int(grp, "ring_count", &tiles_dilate_ring_count_, 1);
DRW_shgroup_uniform_int(grp, "ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1);
DRW_shgroup_call_compute_ref(grp, dispatch_tiles_dilate_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
}
void DepthOfField::gather_pass_sync()
{
gather_fg_ps_ = DRW_pass_create("Dof.gather_fg_ps_", DRW_STATE_NO_DRAW);
gather_bg_ps_ = DRW_pass_create("Dof.gather_bg_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
bool use_lut = bokeh_lut_ps_ != nullptr;
eShaderType sh_type = (pass == 0) ?
(use_lut ? DOF_GATHER_FOREGROUND_LUT : DOF_GATHER_FOREGROUND) :
(use_lut ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND);
GPUShader *sh = inst_.shaders.static_shader_get(sh_type);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? gather_fg_ps_ : gather_bg_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_occlusion_img", &occlusion_tx_);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_gather_lut_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
}
void DepthOfField::filter_pass_sync()
{
filter_fg_ps_ = DRW_pass_create("Dof.filter_fg_ps_", DRW_STATE_NO_DRAW);
filter_bg_ps_ = DRW_pass_create("Dof.filter_bg_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
GPUShader *sh = inst_.shaders.static_shader_get(DOF_FILTER);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? filter_fg_ps_ : filter_bg_ps_);
DRW_shgroup_uniform_texture_ref(grp, "color_tx", &color_chain.previous());
DRW_shgroup_uniform_texture_ref(grp, "weight_tx", &weight_chain.previous());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current());
DRW_shgroup_call_compute_ref(grp, dispatch_filter_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
}
void DepthOfField::scatter_pass_sync()
{
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL;
scatter_fg_ps_ = DRW_pass_create("Dof.scatter_fg_ps_", state);
scatter_bg_ps_ = DRW_pass_create("Dof.scatter_bg_ps_", state);
for (int pass = 0; pass < 2; pass++) {
GPUStorageBuf *scatter_buf = (pass == 0) ? scatter_fg_indirect_buf_ : scatter_bg_indirect_buf_;
GPUStorageBuf *rect_list_buf = (pass == 0) ? scatter_fg_list_buf_ : scatter_bg_list_buf_;
GPUShader *sh = inst_.shaders.static_shader_get(DOF_SCATTER);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_);
DRW_shgroup_uniform_bool_copy(grp, "use_bokeh_lut", bokeh_lut_ps_ != nullptr);
DRW_shgroup_storage_block(grp, "scatter_list_buf", rect_list_buf);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_scatter_lut_tx_);
DRW_shgroup_uniform_texture_ref(grp, "occlusion_tx", &occlusion_tx_);
DRW_shgroup_call_procedural_indirect(grp, GPU_PRIM_TRI_STRIP, nullptr, scatter_buf);
}
}
void DepthOfField::hole_fill_pass_sync()
{
hole_fill_ps_ = DRW_pass_create("Dof.hole_fill_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL);
DRWShadingGroup *grp = DRW_shgroup_create(sh, hole_fill_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &hole_fill_color_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &hole_fill_weight_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::resolve_pass_sync()
{
eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
RenderBuffers &render_buffers = inst_.render_buffers;
resolve_ps_ = DRW_pass_create("Dof.resolve_ps_", DRW_STATE_NO_DRAW);
bool use_lut = bokeh_lut_ps_ != nullptr;
eShaderType sh_type = do_hq_slight_focus_ ? (use_lut ? DOF_RESOLVE_LUT_HQ : DOF_RESOLVE_HQ) :
(use_lut ? DOF_RESOLVE_LUT : DOF_RESOLVE);
GPUShader *sh = inst_.shaders.static_shader_get(sh_type);
DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_bg_tx", &color_bg_tx_.current(), with_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_fg_tx", &color_fg_tx_.current(), with_filter);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "weight_bg_tx", &weight_bg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "weight_fg_tx", &weight_fg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "color_hole_fill_tx", &hole_fill_color_tx_);
DRW_shgroup_uniform_texture_ref(grp, "weight_hole_fill_tx", &hole_fill_weight_tx_);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_resolve_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &output_color_tx_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
DRW_shgroup_call_compute_ref(grp, dispatch_resolve_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Post-FX Rendering.
* \{ */
void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx)
{
if (fx_radius_ == 0.0f) {
return;
}
input_color_tx_ = *input_tx;
output_color_tx_ = *output_tx;
extent_ = {GPU_texture_width(input_color_tx_), GPU_texture_height(input_color_tx_)};
{
const CameraData &cam_data = inst_.camera.data_get();
data_.camera_type = cam_data.type;
/* OPTI(fclem) Could be optimized. */
float3 jitter = float3(fx_radius_, 0.0f, -focus_distance_);
float3 center = float3(0.0f, 0.0f, -focus_distance_);
mul_project_m4_v3(cam_data.winmat.ptr(), jitter);
mul_project_m4_v3(cam_data.winmat.ptr(), center);
/* Simplify CoC calculation to a simple MADD. */
if (inst_.camera.is_orthographic()) {
data_.coc_mul = (center[0] - jitter[0]) * 0.5f * extent_[0];
data_.coc_bias = focus_distance_ * data_.coc_mul;
}
else {
data_.coc_bias = -(center[0] - jitter[0]) * 0.5f * extent_[0];
data_.coc_mul = focus_distance_ * data_.coc_bias;
}
float min_fg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_near);
float max_bg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_far);
if (data_.camera_type != CAMERA_ORTHO) {
/* Background is at infinity so maximum CoC is the limit of coc_radius_from_camera_depth
* at -inf. We only do this for perspective camera since orthographic coc limit is inf. */
max_bg_coc = data_.coc_bias;
}
/* Clamp with user defined max. */
data_.coc_abs_max = min_ff(max_ff(fabsf(min_fg_coc), fabsf(max_bg_coc)), fx_max_coc_);
/* TODO(fclem): Make this dependent of the quality of the gather pass. */
data_.scatter_coc_threshold = 4.0f;
data_.push_update();
}
int2 half_res = math::divide_ceil(extent_, int2(2));
int2 quarter_res = math::divide_ceil(extent_, int2(4));
int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE));
dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1);
dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)),
1);
dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1);
dispatch_tiles_flatten_size_ = int3(math::divide_ceil(half_res, int2(DOF_TILES_SIZE)), 1);
dispatch_tiles_dilate_size_ = int3(
math::divide_ceil(tile_res, int2(DOF_TILES_DILATE_GROUP_SIZE)), 1);
dispatch_gather_size_ = int3(math::divide_ceil(half_res, int2(DOF_GATHER_GROUP_SIZE)), 1);
dispatch_filter_size_ = int3(math::divide_ceil(half_res, int2(DOF_FILTER_GROUP_SIZE)), 1);
dispatch_resolve_size_ = int3(math::divide_ceil(extent_, int2(DOF_RESOLVE_GROUP_SIZE)), 1);
if (GPU_type_matches_ex(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
/* On Mesa, there is a sync bug which can make a portion of the main pass (usually one shader)
* leave blocks of un-initialized memory. Doing a flush seems to alleviate the issue. */
GPU_flush();
}
DRW_stats_group_start("Depth of Field");
{
DRW_stats_group_start("Setup");
bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F);
bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
DRW_draw_pass(bokeh_lut_ps_);
setup_color_tx_.acquire(half_res, GPU_RGBA16F);
setup_coc_tx_.acquire(half_res, GPU_RG16F);
DRW_draw_pass(setup_ps_);
/* Outputs to reduced_*_tx_ mip 0. */
DRW_draw_pass(stabilize_ps_);
/* Used by stabilize pass. */
setup_color_tx_.release();
{
DRW_stats_group_start("Tile Prepare");
/* WARNING: If format changes, make sure dof_tile_* GLSL constants are properly encoded. */
tiles_fg_tx_.previous().acquire(tile_res, GPU_RGBA16F);
tiles_bg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F);
tiles_fg_tx_.current().acquire(tile_res, GPU_RGBA16F);
tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
DRW_draw_pass(tiles_flatten_ps_);
/* Used by tile_flatten and stabilize_ps pass. */
setup_coc_tx_.release();
/* Error introduced by gather center jittering. */
const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f);
int dilation_end_radius = ceilf((fx_max_coc_ * error_multiplier) / (DOF_TILES_SIZE * 2));
/* Run dilation twice. One for minmax and one for minabs. */
for (int pass = 0; pass < 2; pass++) {
/* This algorithm produce the exact dilation radius by dividing it in multiple passes. */
int dilation_radius = 0;
while (dilation_radius < dilation_end_radius) {
/* Dilate slight focus only on first iteration. */
tiles_dilate_slight_focus_ = (dilation_radius == 0) ? 1 : 0;
int remainder = dilation_end_radius - dilation_radius;
/* Do not step over any unvisited tile. */
int max_multiplier = dilation_radius + 1;
int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier));
int multiplier = min_ii(max_multiplier, floorf(remainder / (float)ring_count));
dilation_radius += ring_count * multiplier;
tiles_dilate_ring_count_ = ring_count;
tiles_dilate_ring_width_mul_ = multiplier;
tiles_fg_tx_.swap();
tiles_bg_tx_.swap();
DRW_draw_pass((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_);
}
}
tiles_fg_tx_.previous().release();
tiles_bg_tx_.previous().release();
DRW_stats_group_end();
}
downsample_tx_.acquire(quarter_res, GPU_RGBA16F);
DRW_draw_pass(downsample_ps_);
scatter_fg_indirect_buf_.clear_to_zero();
scatter_bg_indirect_buf_.clear_to_zero();
DRW_draw_pass(reduce_ps_);
/* Used by reduce pass. */
downsample_tx_.release();
DRW_stats_group_end();
}
for (int is_background = 0; is_background < 2; is_background++) {
DRW_stats_group_start(is_background ? "Background Convolution" : "Foreground Convolution");
SwapChain<TextureFromPool, 2> &color_tx = is_background ? color_bg_tx_ : color_fg_tx_;
SwapChain<TextureFromPool, 2> &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_;
DRWPass *gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_;
DRWPass *filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_;
DRWPass *scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_;
color_tx.current().acquire(half_res, GPU_RGBA16F);
weight_tx.current().acquire(half_res, GPU_R16F);
occlusion_tx_.acquire(half_res, GPU_RG16F);
DRW_draw_pass(gather_ps);
{
/* Filtering pass. */
color_tx.swap();
weight_tx.swap();
color_tx.current().acquire(half_res, GPU_RGBA16F);
weight_tx.current().acquire(half_res, GPU_R16F);
DRW_draw_pass(filter_ps);
color_tx.previous().release();
weight_tx.previous().release();
}
GPU_memory_barrier(GPU_BARRIER_FRAMEBUFFER);
scatter_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current()));
GPU_framebuffer_bind(scatter_fb_);
DRW_draw_pass(scatter_ps);
/* Used by scatter pass. */
occlusion_tx_.release();
DRW_stats_group_end();
}
{
DRW_stats_group_start("Hole Fill");
bokeh_gather_lut_tx_.release();
bokeh_scatter_lut_tx_.release();
hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F);
hole_fill_weight_tx_.acquire(half_res, GPU_R16F);
DRW_draw_pass(hole_fill_ps_);
/* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */
DRW_stats_group_end();
}
{
DRW_stats_group_start("Resolve");
DRW_draw_pass(resolve_ps_);
color_bg_tx_.current().release();
color_fg_tx_.current().release();
weight_bg_tx_.current().release();
weight_fg_tx_.current().release();
tiles_fg_tx_.current().release();
tiles_bg_tx_.current().release();
hole_fill_color_tx_.release();
hole_fill_weight_tx_.release();
bokeh_resolve_lut_tx_.release();
DRW_stats_group_end();
}
DRW_stats_group_end();
/* Swap buffers so that next effect has the right input. */
SWAP(GPUTexture *, *input_tx, *output_tx);
}
/** \} */
} // namespace blender::eevee

View File

@ -0,0 +1,183 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2021 Blender Foundation.
*/
/** \file
* \ingroup eevee
*
* Depth of field post process effect.
*
* There are 2 methods to achieve this effect.
* - The first uses projection matrix offsetting and sample accumulation to give
* reference quality depth of field. But this needs many samples to hide the
* under-sampling.
* - The second one is a post-processing based one. It follows the
* implementation described in the presentation
* "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie.
* There are some difference with our actual implementation that prioritize quality.
*/
#pragma once
#include "eevee_shader_shared.hh"
namespace blender::eevee {
class Instance;
/* -------------------------------------------------------------------- */
/** \name Depth of field
* \{ */
class DepthOfField {
private:
class Instance &inst_;
/** Samplers */
static constexpr eGPUSamplerState gather_bilinear = GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER;
static constexpr eGPUSamplerState gather_nearest = GPU_SAMPLER_MIPMAP;
/** Input/Output texture references. */
GPUTexture *input_color_tx_ = nullptr;
GPUTexture *output_color_tx_ = nullptr;
/** Bokeh LUT precompute pass. */
TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"};
TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"};
TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"};
DRWPass *bokeh_lut_ps_ = nullptr;
/** Outputs half-resolution color and Circle Of Confusion. */
TextureFromPool setup_coc_tx_ = {"dof_setup_coc"};
TextureFromPool setup_color_tx_ = {"dof_setup_color"};
int3 dispatch_setup_size_ = int3(-1);
DRWPass *setup_ps_ = nullptr;
/** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */
Texture reduced_coc_tx_ = {"dof_reduced_coc"};
Texture reduced_color_tx_ = {"dof_reduced_color"};
/** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */
int3 dispatch_stabilize_size_ = int3(-1);
DRWPass *stabilize_ps_ = nullptr;
/** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */
TextureFromPool downsample_tx_ = {"dof_downsample"};
int3 dispatch_downsample_size_ = int3(-1);
DRWPass *downsample_ps_ = nullptr;
/** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */
DepthOfFieldScatterListBuf scatter_fg_list_buf_;
DepthOfFieldScatterListBuf scatter_bg_list_buf_;
DrawIndirectBuf scatter_fg_indirect_buf_;
DrawIndirectBuf scatter_bg_indirect_buf_;
int3 dispatch_reduce_size_ = int3(-1);
DRWPass *reduce_ps_ = nullptr;
/** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */
SwapChain<TextureFromPool, 2> tiles_fg_tx_;
SwapChain<TextureFromPool, 2> tiles_bg_tx_;
int3 dispatch_tiles_flatten_size_ = int3(-1);
DRWPass *tiles_flatten_ps_ = nullptr;
/** Dilates the min & max CoCs to cover maximum COC values. */
bool1 tiles_dilate_slight_focus_ = false;
int tiles_dilate_ring_count_ = -1;
int tiles_dilate_ring_width_mul_ = -1;
int3 dispatch_tiles_dilate_size_ = int3(-1);
DRWPass *tiles_dilate_minmax_ps_ = nullptr;
DRWPass *tiles_dilate_minabs_ps_ = nullptr;
/** Gather convolution for low intensity pixels and low contrast areas. */
SwapChain<TextureFromPool, 2> color_bg_tx_;
SwapChain<TextureFromPool, 2> color_fg_tx_;
SwapChain<TextureFromPool, 2> weight_bg_tx_;
SwapChain<TextureFromPool, 2> weight_fg_tx_;
TextureFromPool occlusion_tx_ = {"dof_occlusion"};
int3 dispatch_gather_size_ = int3(-1);
DRWPass *gather_fg_ps_ = nullptr;
DRWPass *gather_bg_ps_ = nullptr;
/** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */
TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"};
TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"};
DRWPass *hole_fill_ps_ = nullptr;
/** Small Filter pass to reduce noise out of gather passes. */
int3 dispatch_filter_size_ = int3(-1);
DRWPass *filter_fg_ps_ = nullptr;
DRWPass *filter_bg_ps_ = nullptr;
/** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */
Framebuffer scatter_fb_ = {"dof_scatter"};
DRWPass *scatter_fg_ps_ = nullptr;
DRWPass *scatter_bg_ps_ = nullptr;
/** Recombine the results and also perform a slight out of focus gather. */
int3 dispatch_resolve_size_ = int3(-1);
DRWPass *resolve_ps_ = nullptr;
DepthOfFieldDataBuf data_;
/** Scene settings that are immutable. */
float user_overblur_;
float fx_max_coc_;
/** Use Hiqh Quality (expensive) in-focus gather pass. */
bool do_hq_slight_focus_;
/** Use jittered depth of field where we randomize camera location. */
bool do_jitter_;
/** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */
float fx_radius_;
/** Circle of Confusion radius for jittered DoF. Is in view X direction in [0..1] range. */
float jitter_radius_;
/** Focus distance in view space. */
float focus_distance_;
/** Extent of the input buffer. */
int2 extent_;
/** Reduce pass info. */
int reduce_steps_;
public:
DepthOfField(Instance &inst) : inst_(inst){};
~DepthOfField(){};
void init();
void sync();
/**
* Apply Depth Of Field jittering to the view and projection matrices..
*/
void jitter_apply(float4x4 &winmat, float4x4 &viewmat);
/**
* Will swap input and output texture if rendering happens. The actual output of this function
* is in input_tx.
*/
void render(GPUTexture **input_tx, GPUTexture **output_tx);
bool postfx_enabled() const
{
return fx_radius_ > 0.0f;
}
private:
void bokeh_lut_pass_sync();
void setup_pass_sync();
void stabilize_pass_sync();
void downsample_pass_sync();
void reduce_pass_sync();
void tiles_flatten_pass_sync();
void tiles_dilate_pass_sync();
void gather_pass_sync();
void filter_pass_sync();
void scatter_pass_sync();
void hole_fill_pass_sync();
void resolve_pass_sync();
};
/** \} */
} // namespace blender::eevee

View File

@ -79,6 +79,7 @@ class Film {
float *read_pass(eViewLayerEEVEEPassType pass_type);
float *read_aov(ViewLayerAOV *aov);
/** Returns shading views internal resolution. */
int2 render_extent_get() const
{
return data_.render_extent;

View File

@ -61,6 +61,7 @@ void Instance::init(const int2 &output_res,
camera.init();
film.init(output_res, output_rect);
velocity.init();
depth_of_field.init();
motion_blur.init();
main_view.init();
}
@ -98,6 +99,7 @@ void Instance::begin_sync()
gpencil_engine_enabled = false;
depth_of_field.sync();
motion_blur.sync();
pipelines.sync();
main_view.sync();

View File

@ -16,6 +16,7 @@
#include "DRW_render.h"
#include "eevee_camera.hh"
#include "eevee_depth_of_field.hh"
#include "eevee_film.hh"
#include "eevee_material.hh"
#include "eevee_motion_blur.hh"
@ -44,6 +45,7 @@ class Instance {
PipelineModule pipelines;
VelocityModule velocity;
MotionBlurModule motion_blur;
DepthOfField depth_of_field;
Sampling sampling;
Camera camera;
Film film;
@ -80,6 +82,7 @@ class Instance {
pipelines(*this),
velocity(*this),
motion_blur(*this),
depth_of_field(*this),
sampling(*this),
camera(*this),
film(*this),

View File

@ -27,11 +27,11 @@ class Sampling {
Instance &inst_;
/* Number of samples in the first ring of jittered depth of field. */
constexpr static uint64_t dof_web_density_ = 6;
static constexpr uint64_t dof_web_density_ = 6;
/* High number of sample for viewport infinite rendering. */
constexpr static uint64_t infinite_sample_count_ = 0xFFFFFFu;
static constexpr uint64_t infinite_sample_count_ = 0xFFFFFFu;
/* During interactive rendering, loop over the first few samples. */
constexpr static uint64_t interactive_sample_max_ = 8;
static constexpr uint64_t interactive_sample_max_ = 8;
/** 0 based current sample. Might not increase sequentially in viewport. */
uint64_t sample_ = 0;

View File

@ -90,6 +90,44 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_motion_blur_tiles_flatten_render";
case MOTION_BLUR_TILE_FLATTEN_VIEWPORT:
return "eevee_motion_blur_tiles_flatten_viewport";
case DOF_BOKEH_LUT:
return "eevee_depth_of_field_bokeh_lut";
case DOF_DOWNSAMPLE:
return "eevee_depth_of_field_downsample";
case DOF_FILTER:
return "eevee_depth_of_field_filter";
case DOF_GATHER_FOREGROUND_LUT:
return "eevee_depth_of_field_gather_foreground_lut";
case DOF_GATHER_FOREGROUND:
return "eevee_depth_of_field_gather_foreground";
case DOF_GATHER_BACKGROUND_LUT:
return "eevee_depth_of_field_gather_background_lut";
case DOF_GATHER_BACKGROUND:
return "eevee_depth_of_field_gather_background";
case DOF_GATHER_HOLE_FILL:
return "eevee_depth_of_field_hole_fill";
case DOF_REDUCE:
return "eevee_depth_of_field_reduce";
case DOF_RESOLVE:
return "eevee_depth_of_field_resolve_lq";
case DOF_RESOLVE_HQ:
return "eevee_depth_of_field_resolve_hq";
case DOF_RESOLVE_LUT:
return "eevee_depth_of_field_resolve_lq_lut";
case DOF_RESOLVE_LUT_HQ:
return "eevee_depth_of_field_resolve_hq_lut";
case DOF_SETUP:
return "eevee_depth_of_field_setup";
case DOF_SCATTER:
return "eevee_depth_of_field_scatter";
case DOF_STABILIZE:
return "eevee_depth_of_field_stabilize";
case DOF_TILES_DILATE_MINABS:
return "eevee_depth_of_field_tiles_dilate_minabs";
case DOF_TILES_DILATE_MINMAX:
return "eevee_depth_of_field_tiles_dilate_minmax";
case DOF_TILES_FLATTEN:
return "eevee_depth_of_field_tiles_flatten";
/* To avoid compiler warning about missing case. */
case MAX_SHADER_TYPE:
return "";

View File

@ -29,6 +29,26 @@ enum eShaderType {
FILM_FRAG = 0,
FILM_COMP,
DOF_BOKEH_LUT,
DOF_DOWNSAMPLE,
DOF_FILTER,
DOF_GATHER_BACKGROUND_LUT,
DOF_GATHER_BACKGROUND,
DOF_GATHER_FOREGROUND_LUT,
DOF_GATHER_FOREGROUND,
DOF_GATHER_HOLE_FILL,
DOF_REDUCE,
DOF_RESOLVE_HQ,
DOF_RESOLVE_LUT_HQ,
DOF_RESOLVE_LUT,
DOF_RESOLVE,
DOF_SCATTER,
DOF_SETUP,
DOF_STABILIZE,
DOF_TILES_DILATE_MINABS,
DOF_TILES_DILATE_MINMAX,
DOF_TILES_FLATTEN,
MOTION_BLUR_GATHER,
MOTION_BLUR_TILE_DILATE,
MOTION_BLUR_TILE_FLATTEN_RENDER,

View File

@ -23,6 +23,9 @@ using draw::SwapChain;
using draw::Texture;
using draw::TextureFromPool;
constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
#endif
#define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14
@ -345,6 +348,116 @@ BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Depth of field
* \{ */
/* 5% error threshold. */
#define DOF_FAST_GATHER_COC_ERROR 0.05
#define DOF_GATHER_RING_COUNT 5
#define DOF_DILATE_RING_COUNT 3
struct DepthOfFieldData {
/** Size of the render targets for gather & scatter passes. */
int2 extent;
/** Size of a pixel in uv space (1.0 / extent). */
float2 texel_size;
/** Scale factor for anisotropic bokeh. */
float2 bokeh_anisotropic_scale;
float2 bokeh_anisotropic_scale_inv;
/* Correction factor to align main target pixels with the filtered mipmap chain texture. */
float2 gather_uv_fac;
/** Scatter parameters. */
float scatter_coc_threshold;
float scatter_color_threshold;
float scatter_neighbor_max_color;
int scatter_sprite_per_row;
/** Firefly removing factor. */
float denoise_factor;
/** Number of side the bokeh shape has. */
float bokeh_blades;
/** Rotation of the bokeh shape. */
float bokeh_rotation;
/** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */
float coc_mul, coc_bias;
/** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */
float coc_abs_max;
/** Copy of camera type. */
eCameraType camera_type;
/** Max number of sprite in the scatter pass for each ground. */
int scatter_max_rect;
int _pad0, _pad1;
};
BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16)
struct ScatterRect {
/** Color and CoC of the 4 pixels the scatter sprite represents. */
float4 color_and_coc[4];
/** Rect center position in half pixel space. */
float2 offset;
/** Rect half extent in half pixel space. */
float2 half_extent;
};
BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16)
/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */
#ifndef M_PI
# define EEVEE_PI
# define M_PI 3.14159265358979323846 /* pi */
#endif
static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth)
{
depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth;
return dof.coc_mul * depth + dof.coc_bias;
}
static inline float regular_polygon_side_length(float sides_count)
{
return 2.0f * sinf(M_PI / sides_count);
}
/* Returns intersection ratio between the radius edge at theta and the regular polygon edge.
* Start first corners at theta == 0. */
static inline float circle_to_polygon_radius(float sides_count, float theta)
{
/* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide
* 36). */
float side_angle = (2.0f * M_PI) / sides_count;
return cosf(side_angle * 0.5f) /
cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI)));
}
/* Remap input angle to have homogenous spacing of points along a polygon edge.
* Expects theta to be in [0..2pi] range. */
static inline float circle_to_polygon_angle(float sides_count, float theta)
{
float side_angle = (2.0f * M_PI) / sides_count;
float halfside_angle = side_angle * 0.5f;
float side = floorf(theta / side_angle);
/* Length of segment from center to the middle of polygon side. */
float adjacent = circle_to_polygon_radius(sides_count, 0.0f);
/* This is the relative position of the sample on the polygon half side. */
float local_theta = theta - side * side_angle;
float ratio = (local_theta - halfside_angle) / halfside_angle;
float halfside_len = regular_polygon_side_length(sides_count) * 0.5f;
float opposite = ratio * halfside_len;
/* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */
float final_local_theta = atanf(opposite / adjacent);
return side * side_angle + final_local_theta;
}
#ifdef EEVEE_PI
# undef M_PI
#endif
/** \} */
/* -------------------------------------------------------------------- */
/** \name Ray-Tracing
* \{ */
@ -404,13 +517,16 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer)
using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
using CameraDataBuf = draw::UniformBuffer<CameraData>;
using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
using FilmDataBuf = draw::UniformBuffer<FilmData>;
using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
using SamplingDataBuf = draw::StorageBuffer<SamplingData>;
using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;
using MotionBlurDataBuf = draw::UniformBuffer<MotionBlurData>;
using MotionBlurTileIndirectionBuf = draw::StorageBuffer<MotionBlurTileIndirection, true>;
} // namespace blender::eevee
#endif

View File

@ -143,7 +143,7 @@ void ShadingView::render()
GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
{
if (/*!dof_.postfx_enabled() &&*/ !inst_.motion_blur.postfx_enabled()) {
if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) {
return input_tx;
}
postfx_tx_.acquire(extent_, GPU_RGBA16F);
@ -151,7 +151,7 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
GPUTexture *output_tx = postfx_tx_;
/* Swapping is done internally. Actual output is set to the next input. */
// dof_.render(depth_tx_, &input_tx, &output_tx);
inst_.depth_of_field.render(&input_tx, &output_tx);
inst_.motion_blur.render(&input_tx, &output_tx);
return input_tx;
@ -178,7 +178,7 @@ void ShadingView::update_view()
/* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop
* out of the blurring radius. To fix this, use custom enlarged culling matrix. */
// dof_.jitter_apply(winmat, viewmat);
inst_.depth_of_field.jitter_apply(winmat, viewmat);
DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
// inst_.lightprobes.set_view(render_view_, extent_);

View File

@ -41,10 +41,6 @@ class ShadingView {
/** Matrix to apply to the viewmat. */
const float (*face_matrix_)[4];
/** Post-FX modules. */
// DepthOfField dof_;
// MotionBlur mb_;
/** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */
// RaytraceBuffer rt_buffer_opaque_;
// RaytraceBuffer rt_buffer_refract_;

View File

@ -0,0 +1,681 @@
/**
* Depth of Field Gather accumulator.
* We currently have only 2 which are very similar.
* One is for the halfres gather passes and the other one for slight in focus regions.
**/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
/* -------------------------------------------------------------------- */
/** \name Options.
* \{ */
/* Quality options */
#ifdef DOF_HOLEFILL_PASS
/* No need for very high density for hole_fill. */
const int gather_ring_count = 3;
const int gather_ring_density = 3;
const int gather_max_density_change = 0;
const int gather_density_change_ring = 1;
#else
const int gather_ring_count = DOF_GATHER_RING_COUNT;
const int gather_ring_density = 3;
const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */
const int gather_density_change_ring = 1;
#endif
/** \} */
/* -------------------------------------------------------------------- */
/** \name Constants.
* \{ */
const float unit_ring_radius = 1.0 / float(gather_ring_count);
const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5);
const float large_kernel_radius = 0.5 + float(gather_ring_count);
const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring);
/* NOTE(fclem) the bias is reducing issues with density change visible transition. */
const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius;
const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1);
const float coc_radius_error = 2.0;
/** \} */
/* -------------------------------------------------------------------- */
/** \name Gather common.
* \{ */
struct DofGatherData {
vec4 color;
float weight;
float dist; /* TODO remove */
/* For scatter occlusion. */
float coc;
float coc_sqr;
/* For ring bucket merging. */
float transparency;
float layer_opacity;
};
#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
/* Intersection with the center of the kernel. */
float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier)
{
if (no_smooth_intersection) {
return step(0.0, (abs(coc) - distance_from_center));
}
else {
/* (Slide 64). */
return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5);
}
}
/* Returns weight of the sample for the outer bucket (containing previous
* rings). */
float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring)
{
/* First ring has nothing to be mixed against. */
if (first_ring) {
return 0.0;
}
return saturate(coc - bordering_radius);
}
void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight)
{
sample_data.color *= weight;
sample_data.coc *= weight;
sample_data.coc_sqr *= weight;
sample_data.weight *= weight;
}
void dof_gather_accumulate_sample(DofGatherData sample_data,
float weight,
inout DofGatherData accum_data)
{
accum_data.color += sample_data.color * weight;
accum_data.coc += sample_data.coc * weight;
accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight);
accum_data.weight += weight;
}
void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2],
float bordering_radius,
float intersection_multiplier,
bool first_ring,
const bool do_fast_gather,
const bool is_foreground,
inout DofGatherData ring_data,
inout DofGatherData accum_data)
{
if (do_fast_gather) {
for (int i = 0; i < 2; i++) {
dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data);
accum_data.layer_opacity += 1.0;
}
return;
}
#if 0
const float mirroring_threshold = -dof_layer_threshold - dof_layer_offset;
/* TODO(fclem) Promote to parameter? dither with Noise? */
const float mirroring_min_distance = 15.0;
if (pair_data[0].coc < mirroring_threshold &&
(pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) {
pair_data[1].coc = pair_data[0].coc;
}
else if (pair_data[1].coc < mirroring_threshold &&
(pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) {
pair_data[0].coc = pair_data[1].coc;
}
#endif
for (int i = 0; i < 2; i++) {
float sample_weight = dof_sample_weight(pair_data[i].coc);
float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground);
float inter_weight = dof_intersection_weight(
pair_data[i].coc, pair_data[i].dist, intersection_multiplier);
float weight = inter_weight * layer_weight * sample_weight;
/**
* If a CoC is larger than bordering radius we accumulate it to the general accumulator.
* If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion.
**/
float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring);
dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data);
dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data);
accum_data.layer_opacity += layer_weight;
if (is_foreground) {
ring_data.transparency += 1.0 - inter_weight * layer_weight;
}
else {
float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc;
ring_data.transparency += saturate(coc - bordering_radius);
}
}
}
void dof_gather_accumulate_sample_ring(DofGatherData ring_data,
int sample_count,
bool first_ring,
const bool do_fast_gather,
/* accum_data occludes the ring_data if true. */
const bool reversed_occlusion,
inout DofGatherData accum_data)
{
if (do_fast_gather) {
/* Do nothing as ring_data contains nothing. All samples are already in
* accum_data. */
return;
}
if (first_ring) {
/* Layer opacity is directly accumulated into accum_data data. */
accum_data.color = ring_data.color;
accum_data.coc = ring_data.coc;
accum_data.coc_sqr = ring_data.coc_sqr;
accum_data.weight = ring_data.weight;
accum_data.transparency = ring_data.transparency / float(sample_count);
return;
}
if (ring_data.weight == 0.0) {
return;
}
float ring_avg_coc = ring_data.coc / ring_data.weight;
float accum_avg_coc = accum_data.coc / accum_data.weight;
/* Smooth test to set opacity to see if the ring average coc occludes the
* accumulation. Test is reversed to be multiplied against opacity. */
float ring_occlu = saturate(accum_avg_coc - ring_avg_coc);
/* The bias here is arbitrary. Seems to avoid weird looking foreground in most
* cases. We might need to make it a parameter or find a relative bias. */
float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0);
if (is_resolve) {
ring_occlu = accum_occlu = 0.0;
}
if (no_gather_occlusion) {
ring_occlu = 0.0;
accum_occlu = 0.0;
}
/* (Slide 40) */
float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count));
float accum_opacity = 1.0 - accum_data.transparency;
if (reversed_occlusion) {
/* Accum_data occludes the ring. */
float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu;
float one_minus_alpha = 1.0 - alpha;
accum_data.color += ring_data.color * one_minus_alpha;
accum_data.coc += ring_data.coc * one_minus_alpha;
accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha;
accum_data.weight += ring_data.weight * one_minus_alpha;
accum_data.transparency *= 1.0 - ring_opacity;
}
else {
/* Ring occludes the accum_data (Same as reference). */
float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu);
float one_minus_alpha = 1.0 - alpha;
accum_data.color = accum_data.color * one_minus_alpha + ring_data.color;
accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc;
accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr;
accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight;
}
}
/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for
* slightfocus gather. */
/* This should be replaced by web_sample_count_get() but doing so is breaking other things. */
int dof_gather_total_sample_count(const int ring_count, const int ring_density)
{
return (ring_count * ring_count - ring_count) * ring_density + 1;
}
void dof_gather_accumulate_center_sample(DofGatherData center_data,
float bordering_radius,
int i_radius,
const bool do_fast_gather,
const bool is_foreground,
const bool is_resolve,
inout DofGatherData accum_data)
{
float layer_weight = dof_layer_weight(center_data.coc, is_foreground);
float sample_weight = dof_sample_weight(center_data.coc);
float weight = layer_weight * sample_weight;
float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false);
if (do_fast_gather) {
/* Hope for the compiler to optimize the above. */
layer_weight = 1.0;
sample_weight = 1.0;
accum_weight = 1.0;
weight = 1.0;
}
center_data.transparency = 1.0 - weight;
dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data);
if (!do_fast_gather) {
if (is_resolve) {
/* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */
int total_sample_count = dof_gather_total_sample_count(i_radius + 1,
DOF_SLIGHT_FOCUS_DENSITY);
float fac = saturate(1.0 - abs(center_data.coc) / float(dof_layer_threshold));
accum_data.layer_opacity += float(total_sample_count) * fac * fac;
}
accum_data.layer_opacity += layer_weight;
/* Logic of dof_gather_accumulate_sample(). */
weight *= (1.0 - accum_weight);
center_data.coc_sqr = center_data.coc * (center_data.coc * weight);
center_data.color *= weight;
center_data.coc *= weight;
center_data.weight = weight;
if (is_foreground && !is_resolve) {
/* Reduce issue with closer foreground over distant foreground. */
float ring_area = sqr(bordering_radius);
dof_gather_ammend_weight(center_data, ring_area);
}
/* Accumulate center as its own ring. */
dof_gather_accumulate_sample_ring(
center_data, 1, false, do_fast_gather, is_foreground, accum_data);
}
}
int dof_gather_total_sample_count_with_density_change(const int ring_count,
const int ring_density,
int density_change)
{
int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) -
dof_gather_total_sample_count(
ring_count - gather_density_change_ring, ring_density);
return dof_gather_total_sample_count(ring_count, ring_density) +
sample_count_per_density_change * density_change;
}
void dof_gather_accumulate_resolve(int total_sample_count,
DofGatherData accum_data,
out vec4 out_col,
out float out_weight,
out vec2 out_occlusion)
{
float weight_inv = safe_rcp(accum_data.weight);
out_col = accum_data.color * weight_inv;
out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv;
if (is_foreground) {
out_weight = 1.0 - accum_data.transparency;
}
else if (accum_data.weight > 0.0) {
out_weight = accum_data.layer_opacity / float(total_sample_count);
}
else {
out_weight = 0.0;
}
/* Gathering may not accumulate to 1.0 alpha because of float precision. */
if (out_weight > 0.99) {
out_weight = 1.0;
}
else if (out_weight < 0.01) {
out_weight = 0.0;
}
/* Same thing for alpha channel. */
if (out_col.a > 0.99) {
out_col.a = 1.0;
}
else if (out_col.a < 0.01) {
out_col.a = 0.0;
}
}
float dof_load_gather_coc(sampler2D gather_input_coc_tx, vec2 uv, float lod)
{
float coc = textureLod(gather_input_coc_tx, uv, lod).r;
/* We gather at halfres. CoC must be divided by 2 to be compared against radii. */
return coc * 0.5;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Common Gather accumulator.
* \{ */
/* Radii needs to be halfres CoC sizes. */
bool dof_do_density_change(float base_radius, float min_intersectable_radius)
{
/* Reduce artifact for very large blur. */
min_intersectable_radius *= 0.1;
bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius);
bool larger_than_min_density = (base_radius * radius_downscale_factor >
float(gather_ring_count));
return need_new_density && larger_than_min_density;
}
void dof_gather_init(float base_radius,
vec2 noise,
out vec2 center_co,
out float lod,
out float intersection_multiplier)
{
/* Jitter center half a ring to reduce undersampling. */
vec2 jitter_ofs = 0.499 * sample_disk(noise);
if (DOF_BOKEH_TEXTURE) {
jitter_ofs *= dof_buf.bokeh_anisotropic_scale;
}
vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
center_co = frag_coord + jitter_ofs * base_radius * unit_sample_radius;
/* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving out
* of focus shapes. */
const float lod_bias = -2.0;
lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0);
if (no_gather_mipmaps) {
lod = 0.0;
}
/* (Slide 64). */
intersection_multiplier = pow(0.5, lod);
}
void dof_gather_accumulator(sampler2D color_tx,
sampler2D color_bilinear_tx,
sampler2D coc_tx,
sampler2D bkh_lut_tx, /* Renamed because of ugly macro. */
float base_radius,
float min_intersectable_radius,
const bool do_fast_gather,
const bool do_density_change,
out vec4 out_color,
out float out_weight,
out vec2 out_occlusion)
{
vec2 frag_coord = vec2(gl_GlobalInvocationID.xy);
vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U);
vec2 noise = no_gather_random ? vec2(0.0, 0.0) :
vec2(interlieved_gradient_noise(frag_coord, 0, noise_offset.x),
interlieved_gradient_noise(frag_coord, 1, noise_offset.y));
if (!do_fast_gather) {
/* Jitter the radius to reduce noticeable density changes. */
base_radius += noise.x * unit_ring_radius * base_radius;
}
else {
/* Jittering the radius more than we need means we are going to feather the bokeh shape half a
* ring. So we need to compensate for fast gather that does not check CoC intersection. */
base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius;
}
/* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */
noise.x = fract(noise.x * 6.1803398875);
float lod, isect_mul;
vec2 center_co;
dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
bool first_ring = true;
DofGatherData accum_data = GATHER_DATA_INIT;
int density_change = 0;
for (int ring = gather_ring_count; ring > 0; ring--) {
int sample_pair_count = gather_ring_density * ring;
float step_rot = M_PI / float(sample_pair_count);
mat2 step_rot_mat = rot2_from_angle(step_rot);
float angle_offset = noise.y * step_rot;
vec2 offset = vec2(cos(angle_offset), sin(angle_offset));
float ring_radius = float(ring) * unit_sample_radius * base_radius;
/* Slide 38. */
float bordering_radius = ring_radius +
(0.5 + coc_radius_error) * base_radius * unit_sample_radius;
DofGatherData ring_data = GATHER_DATA_INIT;
for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) {
offset = step_rot_mat * offset;
DofGatherData pair_data[2];
for (int i = 0; i < 2; i++) {
vec2 offset_co = ((i == 0) ? offset : -offset);
if (DOF_BOKEH_TEXTURE) {
/* Scaling to 0.25 for speed. Improves texture cache hit. */
offset_co = texture(bkh_lut_tx, offset_co * 0.25 + 0.5).rg;
offset_co *= (is_foreground) ? -dof_buf.bokeh_anisotropic_scale :
dof_buf.bokeh_anisotropic_scale;
}
vec2 sample_co = center_co + offset_co * ring_radius;
vec2 sample_uv = sample_co * dof_buf.gather_uv_fac;
if (do_fast_gather) {
pair_data[i].color = textureLod(color_bilinear_tx, sample_uv, lod);
}
else {
pair_data[i].color = textureLod(color_tx, sample_uv, lod);
}
pair_data[i].coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
pair_data[i].dist = ring_radius;
}
dof_gather_accumulate_sample_pair(pair_data,
bordering_radius,
isect_mul,
first_ring,
do_fast_gather,
is_foreground,
ring_data,
accum_data);
}
if (is_foreground) {
/* Reduce issue with closer foreground over distant foreground. */
/* TODO(fclem) this seems to not be completely correct as the issue remains. */
float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) -
sqr(float(ring) - 0.5 + coc_radius_error)) *
sqr(base_radius * unit_sample_radius);
dof_gather_ammend_weight(ring_data, ring_area);
}
dof_gather_accumulate_sample_ring(
ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data);
first_ring = false;
if (do_density_change && (ring == change_density_at_ring) &&
(density_change < gather_max_density_change)) {
if (dof_do_density_change(base_radius, min_intersectable_radius)) {
base_radius *= radius_downscale_factor;
ring += gather_density_change_ring;
/* We need to account for the density change in the weights (slide 62).
* For that multiply old kernel data by its area divided by the new kernel area. */
const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor);
/* Samples are already weighted per ring in foreground pass. */
if (!is_foreground) {
dof_gather_ammend_weight(accum_data, outer_rings_weight);
}
/* Re-init kernel position & sampling parameters. */
dof_gather_init(base_radius, noise, center_co, lod, isect_mul);
density_change++;
}
}
}
{
/* Center sample. */
vec2 sample_uv = center_co * dof_buf.gather_uv_fac;
DofGatherData center_data;
if (do_fast_gather) {
center_data.color = textureLod(color_bilinear_tx, sample_uv, lod);
}
else {
center_data.color = textureLod(color_tx, sample_uv, lod);
}
center_data.coc = dof_load_gather_coc(coc_tx, sample_uv, lod);
center_data.dist = 0.0;
/* Slide 38. */
float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius;
dof_gather_accumulate_center_sample(
center_data, bordering_radius, 0, do_fast_gather, is_foreground, false, accum_data);
}
int total_sample_count = dof_gather_total_sample_count_with_density_change(
gather_ring_count, gather_ring_density, density_change);
dof_gather_accumulate_resolve(
total_sample_count, accum_data, out_color, out_weight, out_occlusion);
if (debug_gather_perf && density_change > 0) {
float fac = saturate(float(density_change) / float(10.0));
out_color.rgb = avg(out_color.rgb) * neon_gradient(fac);
}
if (debug_gather_perf && do_fast_gather) {
out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
}
if (debug_scatter_perf) {
out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0);
}
/* Output premultiplied color so we can use bilinear sampler in resolve pass. */
out_color *= out_weight;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Slight focus accumulator.
*
* The full pixel neighborhood is gathered.
* \{ */
void dof_slight_focus_gather(sampler2D depth_tx,
sampler2D color_tx,
sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
float radius,
out vec4 out_color,
out float out_weight)
{
vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
float noise_offset = sampling_rng_1D_get(SAMPLING_LENS_U);
float noise = no_gather_random ? 0.0 : interlieved_gradient_noise(frag_coord, 3, noise_offset);
DofGatherData fg_accum = GATHER_DATA_INIT;
DofGatherData bg_accum = GATHER_DATA_INIT;
int i_radius = clamp(int(radius), 0, int(dof_layer_threshold));
const int resolve_ring_density = DOF_SLIGHT_FOCUS_DENSITY;
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
bool first_ring = true;
for (int ring = i_radius; ring > 0; ring--) {
DofGatherData fg_ring = GATHER_DATA_INIT;
DofGatherData bg_ring = GATHER_DATA_INIT;
int ring_distance = ring;
int ring_sample_count = resolve_ring_density * ring_distance;
for (int sample_id = 0; sample_id < ring_sample_count; sample_id++) {
int s = sample_id * (4 / resolve_ring_density) +
int(noise * float((4 - resolve_ring_density) * ring_distance));
ivec2 offset = dof_square_ring_sample_offset(ring_distance, s);
float ring_dist = length(vec2(offset));
DofGatherData pair_data[2];
for (int i = 0; i < 2; i++) {
ivec2 sample_offset = ((i == 0) ? offset : -offset);
ivec2 sample_texel = texel + sample_offset;
/* OPTI: could precompute the factor. */
vec2 sample_uv = (vec2(sample_texel) + 0.5) / vec2(textureSize(depth_tx, 0));
float depth = textureLod(depth_tx, sample_uv, 0.0).r;
pair_data[i].coc = dof_coc_from_depth(dof_buf, sample_uv, depth);
pair_data[i].color = safe_color(textureLod(color_tx, sample_uv, 0.0));
pair_data[i].dist = ring_dist;
if (DOF_BOKEH_TEXTURE) {
/* Contains subpixel distance to bokeh shape. */
sample_offset += dof_max_slight_focus_radius;
pair_data[i].dist = texelFetch(bkh_lut_tx, sample_offset, 0).r;
}
pair_data[i].coc = clamp(pair_data[i].coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
}
float bordering_radius = ring_dist + 0.5;
const float isect_mul = 1.0;
dof_gather_accumulate_sample_pair(
pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum);
if (DOF_BOKEH_TEXTURE) {
/* Swap distances in order to flip bokeh shape for foreground. */
float tmp = pair_data[0].dist;
pair_data[0].dist = pair_data[1].dist;
pair_data[1].dist = tmp;
}
dof_gather_accumulate_sample_pair(
pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum);
}
dof_gather_accumulate_sample_ring(
bg_ring, ring_sample_count * 2, first_ring, false, false, bg_accum);
dof_gather_accumulate_sample_ring(
fg_ring, ring_sample_count * 2, first_ring, false, true, fg_accum);
first_ring = false;
}
/* Center sample. */
vec2 sample_uv = frag_coord / vec2(textureSize(depth_tx, 0));
DofGatherData center_data;
center_data.color = safe_color(textureLod(color_tx, sample_uv, 0.0));
center_data.coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
center_data.coc = clamp(center_data.coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
center_data.dist = 0.0;
/* Slide 38. */
float bordering_radius = 0.5;
dof_gather_accumulate_center_sample(
center_data, bordering_radius, i_radius, false, true, true, fg_accum);
dof_gather_accumulate_center_sample(
center_data, bordering_radius, i_radius, false, false, true, bg_accum);
vec4 bg_col, fg_col;
float bg_weight, fg_weight;
vec2 unused_occlusion;
int total_sample_count = dof_gather_total_sample_count(i_radius + 1, resolve_ring_density);
dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion);
dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion);
/* Fix weighting issues on perfectly focus > slight focus transitionning areas. */
if (abs(center_data.coc) < 0.5) {
bg_col = center_data.color;
bg_weight = 1.0;
}
/* Alpha Over */
float alpha = 1.0 - fg_weight;
out_weight = bg_weight * alpha + fg_weight;
out_color = bg_col * bg_weight * alpha + fg_col * fg_weight;
}
/** \} */

View File

@ -0,0 +1,55 @@
/**
* Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or
* the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not
* a perfect circle.
* We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh
* as it is way more complex and expensive to do.
*/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
void main()
{
vec2 gather_uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) / float(DOF_BOKEH_LUT_SIZE));
/* Center uv in range [-1..1]. */
gather_uv = gather_uv * 2.0 - 1.0;
vec2 slight_focus_texel = vec2(gl_GlobalInvocationID.xy) - float(dof_max_slight_focus_radius);
float radius = length(gather_uv);
if (dof_buf.bokeh_blades > 0.0) {
/* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */
float theta = atan(gather_uv.y, gather_uv.x) + M_2PI;
float r = length(gather_uv);
radius /= circle_to_polygon_radius(dof_buf.bokeh_blades, theta - dof_buf.bokeh_rotation);
float theta_new = circle_to_polygon_angle(dof_buf.bokeh_blades, theta);
float r_new = circle_to_polygon_radius(dof_buf.bokeh_blades, theta_new);
theta_new -= dof_buf.bokeh_rotation;
gather_uv = r_new * vec2(-cos(theta_new), sin(theta_new));
{
/* Slight focus distance */
slight_focus_texel *= dof_buf.bokeh_anisotropic_scale_inv;
float theta = atan(slight_focus_texel.y, -slight_focus_texel.x) + M_2PI;
slight_focus_texel /= circle_to_polygon_radius(dof_buf.bokeh_blades,
theta + dof_buf.bokeh_rotation);
}
}
else {
gather_uv *= safe_rcp(length(gather_uv));
}
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* For gather store the normalized UV. */
imageStore(out_gather_lut_img, texel, gather_uv.xyxy);
/* For scatter store distance. LUT will be scaled by COC. */
imageStore(out_scatter_lut_img, texel, vec4(radius));
/* For slight focus gather store pixel perfect distance. */
imageStore(out_resolve_lut_img, texel, vec4(length(slight_focus_texel)));
}

View File

@ -0,0 +1,32 @@
/**
* Downsample pass: CoC aware downsample to quarter resolution.
*
* Pretty much identical to the setup pass but get CoC from buffer.
* Also does not weight luma for the bilateral weights.
*/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
void main()
{
vec2 halfres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
/* Center uv around the 4 halfres pixels. */
vec2 quad_center = vec2(gl_GlobalInvocationID * 2 + 1) * halfres_texel_size;
vec4 colors[4];
vec4 cocs;
for (int i = 0; i < 4; i++) {
vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size;
colors[i] = textureLod(color_tx, sample_uv, 0.0);
cocs[i] = textureLod(coc_tx, sample_uv, 0.0).r;
}
vec4 weights = dof_bilateral_coc_weights(cocs);
/* Normalize so that the sum is 1. */
weights *= safe_rcp(sum(weights));
vec4 out_color = weighted_sum_array(colors, weights);
imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
}

View File

@ -0,0 +1,157 @@
/**
* Gather Filter pass: Filter the gather pass result to reduce noise.
*
* This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if
* cheaper.
*/
struct FilterSample {
vec4 color;
float weight;
};
/* -------------------------------------------------------------------- */
/** \name Pixel cache.
* \{ */
shared vec4 color_cache[10][10];
shared float weight_cache[10][10];
void cache_init()
{
/**
* Load enough values into LDS to perform the filter.
*
*
* < Border texels that needs to be loaded.
* x x x x x x x x
* x x x x x x x x
* x x x x x x x x
* x x x x x x x x Thread Group Size 8x8.
* L L L L L x x x x
* L L L L L x x x x
* L L L L L x x x x
* L L L L L x x x x
* L L L L L < Border texels that needs to be loaded.
*
*
* Load using 5x5 threads.
*/
ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1;
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
if (all(lessThan(gl_LocalInvocationID.xy, uvec2(5)))) {
ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + ivec2(x, y) * 5;
ivec2 load_texel = clamp(texel + ivec2(x, y) * 5, ivec2(0), textureSize(color_tx, 0) - 1);
color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0);
weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r;
}
}
}
barrier();
}
FilterSample cache_sample(int x, int y)
{
return FilterSample(color_cache[y][x], weight_cache[y][x]);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Median filter
* From:
* Implementing Median Filters in XC4000E FPGAs
* JOHN L. SMITH, Univision Technologies Inc., Billerica, MA
* http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf
* Figure 1
* \{ */
FilterSample filter_min(FilterSample a, FilterSample b)
{
return FilterSample(min(a.color, b.color), min(a.weight, b.weight));
}
FilterSample filter_max(FilterSample a, FilterSample b)
{
return FilterSample(max(a.color, b.color), max(a.weight, b.weight));
}
FilterSample filter_min(FilterSample a, FilterSample b, FilterSample c)
{
return FilterSample(min(a.color, min(c.color, b.color)), min(a.weight, min(c.weight, b.weight)));
}
FilterSample filter_max(FilterSample a, FilterSample b, FilterSample c)
{
return FilterSample(max(a.color, max(c.color, b.color)), max(a.weight, max(c.weight, b.weight)));
}
FilterSample filter_median(FilterSample s1, FilterSample s2, FilterSample s3)
{
/* From diagram, with nodes numbered from top to bottom. */
FilterSample l1 = filter_min(s2, s3);
FilterSample h1 = filter_max(s2, s3);
FilterSample h2 = filter_max(s1, l1);
FilterSample l3 = filter_min(h2, h1);
return l3;
}
struct FilterLmhResult {
FilterSample low;
FilterSample median;
FilterSample high;
};
FilterLmhResult filter_lmh(FilterSample s1, FilterSample s2, FilterSample s3)
{
/* From diagram, with nodes numbered from top to bottom. */
FilterSample h1 = filter_max(s2, s3);
FilterSample l1 = filter_min(s2, s3);
FilterSample h2 = filter_max(s1, l1);
FilterSample l2 = filter_min(s1, l1);
FilterSample h3 = filter_max(h2, h1);
FilterSample l3 = filter_min(h2, h1);
FilterLmhResult result;
result.low = l2;
result.median = l3;
result.high = h3;
return result;
}
/** \} */
void main()
{
/* OPTI(fclem) Could early return on some tiles. */
cache_init();
ivec2 texel = ivec2(gl_LocalInvocationID.xy);
FilterLmhResult rows[3];
for (int y = 0; y < 3; y++) {
rows[y] = filter_lmh(cache_sample(texel.x + 0, texel.y + y),
cache_sample(texel.x + 1, texel.y + y),
cache_sample(texel.x + 2, texel.y + y));
}
/* Left nodes. */
FilterSample high = filter_max(rows[0].low, rows[1].low, rows[2].low);
/* Right nodes. */
FilterSample low = filter_min(rows[0].high, rows[1].high, rows[2].high);
/* Center nodes. */
FilterSample median = filter_median(rows[0].median, rows[1].median, rows[2].median);
/* Last bottom nodes. */
median = filter_median(low, median, high);
ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
imageStore(out_color_img, out_texel, median.color);
imageStore(out_weight_img, out_texel, vec4(median.weight));
}

View File

@ -0,0 +1,99 @@
/**
* Gather pass: Convolve foreground and background parts in separate passes.
*
* Using the min&max CoC tile buffer, we select the best apropriate method to blur the scene color.
* A fast gather path is taken if there is not many CoC variation inside the tile.
*
* We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
* rotation to ensure maximum coverage.
*
* Outputs:
* - Color * Weight, Weight, Occlusion 'CoC' Depth (mean and variance)
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
void main()
{
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
float base_radius, min_radius, min_intersectable_radius;
bool can_early_out;
if (is_foreground) {
base_radius = -coc_tile.fg_min_coc;
min_radius = -coc_tile.fg_max_coc;
min_intersectable_radius = -coc_tile.fg_max_intersectable_coc;
can_early_out = !prediction.do_foreground;
}
else {
base_radius = coc_tile.bg_max_coc;
min_radius = coc_tile.bg_min_coc;
min_intersectable_radius = coc_tile.bg_min_intersectable_coc;
can_early_out = !prediction.do_background;
}
bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
/* Gather at half resolution. Divide CoC by 2. */
base_radius *= 0.5;
min_intersectable_radius *= 0.5;
bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
vec4 out_color;
float out_weight;
vec2 out_occlusion;
if (can_early_out) {
out_color = vec4(0.0);
out_weight = 0.0;
out_occlusion = vec2(0.0, 0.0);
}
else if (do_fast_gather) {
dof_gather_accumulator(color_tx,
color_bilinear_tx,
coc_tx,
bokeh_lut_tx,
base_radius,
min_intersectable_radius,
true,
false,
out_color,
out_weight,
out_occlusion);
}
else if (do_density_change) {
dof_gather_accumulator(color_tx,
color_bilinear_tx,
coc_tx,
bokeh_lut_tx,
base_radius,
min_intersectable_radius,
false,
true,
out_color,
out_weight,
out_occlusion);
}
else {
dof_gather_accumulator(color_tx,
color_bilinear_tx,
coc_tx,
bokeh_lut_tx,
base_radius,
min_intersectable_radius,
false,
false,
out_color,
out_weight,
out_occlusion);
}
ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
imageStore(out_color_img, out_texel, out_color);
imageStore(out_weight_img, out_texel, vec4(out_weight));
imageStore(out_occlusion_img, out_texel, out_occlusion.xyxy);
}

View File

@ -0,0 +1,70 @@
/**
* Holefill pass: Gather background parts where foreground is present.
*
* Using the min&max CoC tile buffer, we select the best apropriate method to blur the scene color.
* A fast gather path is taken if there is not many CoC variation inside the tile.
*
* We sample using an octaweb sampling pattern. We randomize the kernel center and each ring
* rotation to ensure maximum coverage.
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
void main()
{
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE);
CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
float base_radius = -coc_tile.fg_min_coc;
float min_radius = -coc_tile.fg_max_coc;
float min_intersectable_radius = dof_tile_large_coc;
bool can_early_out = !prediction.do_hole_fill;
bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground);
/* Gather at half resolution. Divide CoC by 2. */
base_radius *= 0.5;
min_intersectable_radius *= 0.5;
bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius);
vec4 out_color = vec4(0.0);
float out_weight = 0.0;
vec2 unused_occlusion = vec2(0.0, 0.0);
if (can_early_out) {
/* Early out. */
}
else if (do_fast_gather) {
dof_gather_accumulator(color_tx,
color_bilinear_tx,
coc_tx,
coc_tx,
base_radius,
min_intersectable_radius,
true,
false,
out_color,
out_weight,
unused_occlusion);
}
else {
dof_gather_accumulator(color_tx,
color_bilinear_tx,
coc_tx,
coc_tx,
base_radius,
min_intersectable_radius,
false,
false,
out_color,
out_weight,
unused_occlusion);
}
ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
imageStore(out_color_img, out_texel, out_color);
imageStore(out_weight_img, out_texel, vec4(out_weight));
}

View File

@ -0,0 +1,346 @@
/**
* Depth of Field utils.
**/
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
/* -------------------------------------------------------------------- */
/** \name Constants.
* \{ */
#ifndef DOF_SLIGHT_FOCUS_DENSITY
# define DOF_SLIGHT_FOCUS_DENSITY 2
#endif
#ifdef DOF_RESOLVE_PASS
const bool is_resolve = true;
#else
const bool is_resolve = false;
#endif
#ifdef DOF_FOREGROUND_PASS
const bool is_foreground = DOF_FOREGROUND_PASS;
#else
const bool is_foreground = false;
#endif
/* Debug options */
const bool debug_gather_perf = false;
const bool debug_scatter_perf = false;
const bool debug_resolve_perf = false;
const bool no_smooth_intersection = false;
const bool no_gather_occlusion = false;
const bool no_gather_mipmaps = false;
const bool no_gather_random = false;
const bool no_gather_filtering = false;
const bool no_scatter_occlusion = false;
const bool no_scatter_pass = false;
const bool no_foreground_pass = false;
const bool no_background_pass = false;
const bool no_slight_focus_pass = false;
const bool no_focus_pass = false;
const bool no_hole_fill_pass = false;
/* Distribute weights between near/slightfocus/far fields (slide 117). */
const float dof_layer_threshold = 4.0;
/* Make sure it overlaps. */
const float dof_layer_offset_fg = 0.5 + 1.0;
/* Extra offset for convolution layers to avoid light leaking from background. */
const float dof_layer_offset = 0.5 + 0.5;
const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS;
const vec2 quad_offsets[4] = vec2[4](
vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
/** \} */
/* -------------------------------------------------------------------- */
/** \name Weighting and downsampling utils.
* \{ */
float dof_hdr_color_weight(vec4 color)
{
/* Very fast "luma" weighting. */
float luma = (color.g * 2.0) + (color.r + color.b);
/* TODO(fclem) Pass correct exposure. */
const float exposure = 1.0;
return 1.0 / (luma * exposure + 4.0);
}
float dof_coc_select(vec4 cocs)
{
/* Select biggest coc. */
float selected_coc = cocs.x;
if (abs(cocs.y) > abs(selected_coc)) {
selected_coc = cocs.y;
}
if (abs(cocs.z) > abs(selected_coc)) {
selected_coc = cocs.z;
}
if (abs(cocs.w) > abs(selected_coc)) {
selected_coc = cocs.w;
}
return selected_coc;
}
/* NOTE: Do not forget to normalize weights afterwards. */
vec4 dof_bilateral_coc_weights(vec4 cocs)
{
float chosen_coc = dof_coc_select(cocs);
const float scale = 4.0; /* TODO(fclem) revisit. */
/* NOTE: The difference between the cocs should be inside a abs() function,
* but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */
return saturate(1.0 - (chosen_coc - cocs) * scale);
}
/* NOTE: Do not forget to normalize weights afterwards. */
vec4 dof_bilateral_color_weights(vec4 colors[4])
{
vec4 weights;
for (int i = 0; i < 4; i++) {
weights[i] = dof_hdr_color_weight(colors[i]);
}
return weights;
}
/* Returns signed Circle of confusion radius (in pixel) based on depth buffer value [0..1]. */
float dof_coc_from_depth(DepthOfFieldData dof_data, vec2 uv, float depth)
{
if (is_panoramic(dof_data.camera_type)) {
/* Use radial depth. */
depth = -length(get_view_space_from_depth(uv, depth));
}
else {
depth = get_view_z_from_depth(depth);
}
return coc_radius_from_camera_depth(dof_data, depth);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Gather & Scatter Weighting
* \{ */
float dof_layer_weight(float coc, const bool is_foreground)
{
/* NOTE: These are fullres pixel CoC value. */
if (is_resolve) {
return saturate(-abs(coc) + dof_layer_threshold + dof_layer_offset) *
float(is_foreground ? (coc <= 0.5) : (coc > -0.5));
}
else {
coc *= 2.0; /* Account for half pixel gather. */
float threshold = dof_layer_threshold -
((is_foreground) ? dof_layer_offset_fg : dof_layer_offset);
return saturate(((is_foreground) ? -coc : coc) - threshold);
}
}
vec4 dof_layer_weight(vec4 coc)
{
/* NOTE: Used for scatter pass which already flipped the sign correctly. */
coc *= 2.0; /* Account for half pixel gather. */
return saturate(coc - dof_layer_threshold + dof_layer_offset);
}
/* NOTE: This is halfres CoC radius. */
float dof_sample_weight(float coc)
{
#if 1 /* Optimized */
return min(1.0, 1.0 / sqr(coc));
#else
/* Full intensity if CoC radius is below the pixel footprint. */
const float min_coc = 1.0;
coc = max(min_coc, abs(coc));
return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
#endif
}
vec4 dof_sample_weight(vec4 coc)
{
#if 1 /* Optimized */
return min(vec4(1.0), 1.0 / sqr(coc));
#else
/* Full intensity if CoC radius is below the pixel footprint. */
const float min_coc = 1.0;
coc = max(vec4(min_coc), abs(coc));
return (M_PI * min_coc * min_coc) / (M_PI * coc * coc);
#endif
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Circle of Confusion tiles
* \{ */
struct CocTile {
float fg_min_coc;
float fg_max_coc;
float fg_max_intersectable_coc;
float fg_slight_focus_max_coc;
float bg_min_coc;
float bg_max_coc;
float bg_min_intersectable_coc;
};
struct CocTilePrediction {
bool do_foreground;
bool do_slight_focus;
bool do_focus;
bool do_background;
bool do_hole_fill;
};
/* WATCH: Might have to change depending on the texture format. */
const float dof_tile_defocus = 0.25;
const float dof_tile_focus = 0.0;
const float dof_tile_mixed = 0.75;
const float dof_tile_large_coc = 1024.0;
/* Init a CoC tile for reduction algorithms. */
CocTile dof_coc_tile_init()
{
CocTile tile;
tile.fg_min_coc = 0.0;
tile.fg_max_coc = -dof_tile_large_coc;
tile.fg_max_intersectable_coc = dof_tile_large_coc;
tile.fg_slight_focus_max_coc = -1.0;
tile.bg_min_coc = dof_tile_large_coc;
tile.bg_max_coc = 0.0;
tile.bg_min_intersectable_coc = dof_tile_large_coc;
return tile;
}
CocTile dof_coc_tile_unpack(vec4 fg, vec3 bg)
{
CocTile tile;
tile.fg_min_coc = -fg.x;
tile.fg_max_coc = -fg.y;
tile.fg_max_intersectable_coc = -fg.z;
tile.fg_slight_focus_max_coc = fg.w;
tile.bg_min_coc = bg.x;
tile.bg_max_coc = bg.y;
tile.bg_min_intersectable_coc = bg.z;
return tile;
}
/* WORKAROUND(fclem): GLSL compilers differs in what qualifiers are requires to pass images as
* parameters. Workaround by using defines. */
#define dof_coc_tile_load(tiles_fg_img_, tiles_bg_img_, texel_) \
dof_coc_tile_unpack( \
imageLoad(tiles_fg_img_, clamp(texel_, ivec2(0), imageSize(tiles_fg_img_) - 1)), \
imageLoad(tiles_bg_img_, clamp(texel_, ivec2(0), imageSize(tiles_bg_img_) - 1)).xyz)
void dof_coc_tile_pack(CocTile tile, out vec4 out_fg, out vec3 out_bg)
{
out_fg.x = -tile.fg_min_coc;
out_fg.y = -tile.fg_max_coc;
out_fg.z = -tile.fg_max_intersectable_coc;
out_fg.w = tile.fg_slight_focus_max_coc;
out_bg.x = tile.bg_min_coc;
out_bg.y = tile.bg_max_coc;
out_bg.z = tile.bg_min_intersectable_coc;
}
#define dof_coc_tile_store(tiles_fg_img_, tiles_bg_img_, texel_out_, tile_data_) \
if (true) { \
vec4 out_fg; \
vec3 out_bg; \
dof_coc_tile_pack(tile_data_, out_fg, out_bg); \
imageStore(tiles_fg_img_, texel_out_, out_fg); \
imageStore(tiles_bg_img_, texel_out_, out_bg.xyzz); \
}
bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground)
{
float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc,
is_foreground);
if (min_weight < 1.0) {
return false;
}
/* FIXME(fclem): This is a workaround to fast gather triggering too early. Since we use custom
* opacity mask, the opacity is not given to be 100% even for after normal threshold. */
if (is_foreground && min_absolute_coc < dof_layer_threshold) {
return false;
}
return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc);
}
CocTilePrediction dof_coc_tile_prediction_get(CocTile tile)
{
/* Based on tile value, predict what pass we need to load. */
CocTilePrediction predict;
predict.do_foreground = (-tile.fg_min_coc > dof_layer_threshold - dof_layer_offset_fg);
bool fg_fully_opaque = predict.do_foreground &&
dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true);
predict.do_slight_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc >= 0.5);
predict.do_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc == dof_tile_focus);
predict.do_background = !predict.do_focus && !fg_fully_opaque &&
(tile.bg_max_coc > dof_layer_threshold - dof_layer_offset);
bool bg_fully_opaque = predict.do_background &&
dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false);
predict.do_hole_fill = !predict.do_focus && !fg_fully_opaque && -tile.fg_min_coc > 0.0;
#if 0 /* Debug */
predict.do_foreground = predict.do_background = predict.do_hole_fill = true;
#endif
return predict;
}
/* Special function to return the correct max value of 2 slight focus coc. */
float dof_coc_max_slight_focus(float coc1, float coc2)
{
/* Do not consider values below 0.5 for expansion as they are "encoded".
* See setup pass shader for more infos. */
if ((coc1 == dof_tile_defocus && coc2 == dof_tile_focus) ||
(coc1 == dof_tile_focus && coc2 == dof_tile_defocus)) {
/* Tile where completely out of focus and in focus are both present.
* Consider as very slightly out of focus. */
return dof_tile_mixed;
}
return max(coc1, coc2);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Gathering
* \{ */
/**
* Generate samples in a square pattern with the ring radius. X is the center tile.
*
* Dist1 Dist2
* 6 5 4 3 2
* 3 2 1 7 1
* . X 0 . X 0
* . . . . .
* . . . . .
*
* Samples are expected to be mirrored to complete the pattern.
**/
ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id)
{
ivec2 offset;
if (sample_id < ring_distance) {
offset.x = ring_distance;
offset.y = sample_id;
}
else if (sample_id < ring_distance * 3) {
offset.x = ring_distance - sample_id + ring_distance;
offset.y = ring_distance;
}
else {
offset.x = -ring_distance;
offset.y = ring_distance - sample_id + 3 * ring_distance;
}
return offset;
}
/** \} */

View File

@ -0,0 +1,246 @@
/**
* Reduce copy pass: filter fireflies and split color between scatter and gather input.
*
* NOTE: The texture can end up being too big because of the mipmap padding. We correct for
* that during the convolution phase.
*
* Inputs:
* - Output of setup pass (halfres) and reduce downsample pass (quarter res).
* Outputs:
* - Halfres padded to avoid mipmap mis-alignment (so possibly not matching input size).
* - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain).
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */
float dof_scatter_neighborhood_rejection(vec3 color)
{
color = min(vec3(dof_buf.scatter_neighbor_max_color), color);
float validity = 0.0;
/* Centered in the middle of 4 quarter res texel. */
vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy);
vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size;
vec3 max_diff = vec3(0.0);
for (int i = 0; i < 4; i++) {
vec2 sample_uv = uv + quad_offsets[i] * texel_size;
vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb;
ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref);
float diff = max_v3(max(vec3(0.0), abs(ref - color)));
const float rejection_threshold = 0.7;
diff = saturate(diff / rejection_threshold - 1.0);
validity = max(validity, diff);
}
return validity;
}
/* This avoids Bokeh sprite popping in and out at the screen border and
* drawing Bokeh sprites larger than the screen. */
float dof_scatter_screen_border_rejection(float coc, ivec2 texel)
{
vec2 screen_size = vec2(imageSize(inout_color_lod0_img));
vec2 uv = (vec2(texel) + 0.5) / screen_size;
vec2 screen_pos = uv * screen_size;
float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos));
/* Fullres to halfres CoC. */
coc *= 0.5;
/* Allow 10px transition. */
const float rejection_hardeness = 1.0 / 10.0;
return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0);
}
float dof_scatter_luminosity_rejection(vec3 color)
{
const float rejection_hardness = 1.0;
return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness);
}
float dof_scatter_coc_radius_rejection(float coc)
{
const float rejection_hardness = 0.3;
return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness);
}
float fast_luma(vec3 color)
{
return (2.0 * color.g) + color.r + color.b;
}
shared vec4 color_cache[8][8];
shared float coc_cache[8][8];
shared float do_scatter[8][8];
void main()
{
ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1);
uvec2 texel_local = gl_LocalInvocationID.xy;
/* Increase readablility. */
#define LOCAL_INDEX texel_local.y][texel_local.x
#define LOCAL_OFFSET(x_, y_) texel_local.y + y_][texel_local.x + x_
/* Load level 0 into cache. */
color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel);
coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r;
/* Only scatter if luminous enough. */
do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb);
/* Only scatter if CoC is big enough. */
do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]);
/* Only scatter if CoC is not too big to avoid performance issues. */
do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel);
/* Only scatter if neighborhood is different enough. */
do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb);
/* For debuging. */
if (no_scatter_pass) {
do_scatter[LOCAL_INDEX] = 0.0;
}
barrier();
/* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */
if (all(equal(texel_local & 1u, uvec2(0)))) {
vec4 do_scatter4;
/* Follows quad_offsets order. */
do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)];
do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)];
do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)];
do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)];
if (any(greaterThan(do_scatter4, vec4(0.0)))) {
/* Apply energy conservation to anamorphic scattered bokeh. */
do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv);
/* Circle of Confusion. */
vec4 coc4;
coc4.x = coc_cache[LOCAL_OFFSET(0, 1)];
coc4.y = coc_cache[LOCAL_OFFSET(1, 1)];
coc4.z = coc_cache[LOCAL_OFFSET(1, 0)];
coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
/* We are scattering at half resolution, so divide CoC by 2. */
coc4 *= 0.5;
/* Sprite center position. Center sprite around the 4 texture taps. */
vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1;
/* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin
* and because we smooth the bokeh shape a bit in the pixel shader. */
vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5;
/* Issue a sprite for each field if any CoC matches. */
if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
/* Same value for all threads. Not an issue if we don't sync access to it. */
scatter_fg_indirect_buf.v_count = 4u;
/* Issue 1 strip instance per sprite. */
uint rect_id = atomicAdd(scatter_fg_indirect_buf.i_count, 1u);
if (rect_id < dof_buf.scatter_max_rect) {
vec4 coc4_fg = max(vec4(0.0), -coc4);
vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4;
/* Filter NaNs. */
fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0)));
ScatterRect rect_fg;
rect_fg.offset = offset;
/* Negate extent to flip the sprite. Mimics optical phenomenon. */
rect_fg.half_extent = -half_extent;
/* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and
* (0,0) values so that quad_offsets is in the right order in the vertex shader. */
/* Circle of Confusion absolute radius in halfres pixels. */
rect_fg.color_and_coc[0].a = coc4_fg[0];
rect_fg.color_and_coc[1].a = coc4_fg[3];
rect_fg.color_and_coc[2].a = coc4_fg[2];
rect_fg.color_and_coc[3].a = coc4_fg[1];
/* Apply weights. */
rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0];
rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3];
rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2];
rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1];
scatter_fg_list_buf[rect_id] = rect_fg;
}
}
if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
/* Same value for all threads. Not an issue if we don't sync access to it. */
scatter_bg_indirect_buf.v_count = 4u;
/* Issue 1 strip instance per sprite. */
uint rect_id = atomicAdd(scatter_bg_indirect_buf.i_count, 1u);
if (rect_id < dof_buf.scatter_max_rect) {
vec4 coc4_bg = max(vec4(0.0), coc4);
vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;
/* Filter NaNs. */
bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0)));
ScatterRect rect_bg;
rect_bg.offset = offset;
rect_bg.half_extent = half_extent;
/* Circle of Confusion absolute radius in halfres pixels. */
rect_bg.color_and_coc[0].a = coc4_bg[0];
rect_bg.color_and_coc[1].a = coc4_bg[1];
rect_bg.color_and_coc[2].a = coc4_bg[2];
rect_bg.color_and_coc[3].a = coc4_bg[3];
/* Apply weights. */
rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0];
rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1];
rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2];
rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3];
scatter_bg_list_buf[rect_id] = rect_bg;
}
}
}
}
/* Remove scatter color from gather. */
color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX];
imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]);
/* Recursive downsample. */
for (uint i = 1u; i < DOF_MIP_MAX; i++) {
barrier();
if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_MAX - 1u - i))))) {
uvec2 texel_local = gl_LocalInvocationID.xy << i;
/* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */
vec4 coc4;
coc4.x = coc_cache[LOCAL_OFFSET(0, 1)];
coc4.y = coc_cache[LOCAL_OFFSET(1, 1)];
coc4.z = coc_cache[LOCAL_OFFSET(1, 0)];
coc4.w = coc_cache[LOCAL_OFFSET(0, 0)];
vec4 colors[4];
colors[0] = color_cache[LOCAL_OFFSET(0, 1)];
colors[1] = color_cache[LOCAL_OFFSET(1, 1)];
colors[2] = color_cache[LOCAL_OFFSET(1, 0)];
colors[3] = color_cache[LOCAL_OFFSET(0, 0)];
vec4 weights = dof_bilateral_coc_weights(coc4);
weights *= dof_bilateral_color_weights(colors);
/* Normalize so that the sum is 1. */
weights *= safe_rcp(sum(weights));
color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights);
coc_cache[LOCAL_INDEX] = dot(coc4, weights);
ivec2 texel = ivec2(gl_WorkGroupID.xy * (gl_WorkGroupSize.xy >> i) +
gl_LocalInvocationID.xy);
if (i == 1) {
imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]);
imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX]));
}
else if (i == 2) {
imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]);
imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX]));
}
else /* if (i == 3) */ {
imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]);
imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX]));
}
}
}
}

View File

@ -0,0 +1,87 @@
/**
* Recombine Pass: Load separate convolution layer and composite with self
* slight defocus convolution and in-focus fields.
*
* The halfres gather methods are fast but lack precision for small CoC areas.
* To fix this we do a bruteforce gather to have a smooth transition between
* in-focus and defocus regions.
*/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl)
void main()
{
vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5;
ivec2 tile_co = ivec2(frag_coord / float(DOF_TILES_SIZE * 2));
CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co);
CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile);
vec4 out_color = vec4(0.0);
float weight = 0.0;
vec4 layer_color;
float layer_weight;
vec2 uv = frag_coord / vec2(textureSize(color_tx, 0));
vec2 uv_halfres = (frag_coord * 0.5) / vec2(textureSize(color_bg_tx, 0));
if (!no_hole_fill_pass && prediction.do_hole_fill) {
layer_color = textureLod(color_hole_fill_tx, uv_halfres, 0.0);
layer_weight = textureLod(weight_hole_fill_tx, uv_halfres, 0.0).r;
out_color = layer_color * safe_rcp(layer_weight);
weight = float(layer_weight > 0.0);
}
if (!no_background_pass && prediction.do_background) {
layer_color = textureLod(color_bg_tx, uv_halfres, 0.0);
layer_weight = textureLod(weight_bg_tx, uv_halfres, 0.0).r;
/* Always prefer background to hole_fill pass. */
layer_color *= safe_rcp(layer_weight);
layer_weight = float(layer_weight > 0.0);
/* Composite background. */
out_color = out_color * (1.0 - layer_weight) + layer_color;
weight = weight * (1.0 - layer_weight) + layer_weight;
/* Fill holes with the composited background. */
out_color *= safe_rcp(weight);
weight = float(weight > 0.0);
}
if (!no_slight_focus_pass && prediction.do_slight_focus) {
dof_slight_focus_gather(depth_tx,
color_tx,
bokeh_lut_tx,
coc_tile.fg_slight_focus_max_coc,
layer_color,
layer_weight);
/* Composite slight defocus. */
out_color = out_color * (1.0 - layer_weight) + layer_color;
weight = weight * (1.0 - layer_weight) + layer_weight;
}
if (!no_focus_pass && prediction.do_focus) {
layer_color = safe_color(textureLod(color_tx, uv, 0.0));
layer_weight = 1.0;
/* Composite in focus. */
out_color = out_color * (1.0 - layer_weight) + layer_color;
weight = weight * (1.0 - layer_weight) + layer_weight;
}
if (!no_foreground_pass && prediction.do_foreground) {
layer_color = textureLod(color_fg_tx, uv_halfres, 0.0);
layer_weight = textureLod(weight_fg_tx, uv_halfres, 0.0).r;
/* Composite foreground. */
out_color = out_color * (1.0 - layer_weight) + layer_color;
}
/* Fix float precision issue in alpha compositing. */
if (out_color.a > 0.99) {
out_color.a = 1.0;
}
if (debug_resolve_perf && coc_tile.fg_slight_focus_max_coc >= 0.5) {
out_color.rgb *= vec3(1.0, 0.1, 0.1);
}
imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color);
}

View File

@ -0,0 +1,62 @@
/**
* Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
*
* We only scatter one quad per sprite and one sprite per 4 pixels to reduce vertex shader
* invocations and overdraw.
*/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0))
void main()
{
vec4 coc4 = vec4(interp.color_and_coc1.w,
interp.color_and_coc2.w,
interp.color_and_coc3.w,
interp.color_and_coc4.w);
vec4 shapes;
if (use_bokeh_lut) {
shapes = vec4(texture(bokeh_lut_tx, interp.rect_uv1).r,
texture(bokeh_lut_tx, interp.rect_uv2).r,
texture(bokeh_lut_tx, interp.rect_uv3).r,
texture(bokeh_lut_tx, interp.rect_uv4).r);
}
else {
shapes = vec4(length(interp.rect_uv1),
length(interp.rect_uv2),
length(interp.rect_uv3),
length(interp.rect_uv4));
}
shapes *= interp.distance_scale;
/* Becomes signed distance field in pixel units. */
shapes -= coc4;
/* Smooth the edges a bit to fade out the undersampling artifacts. */
shapes = saturate(1.0 - linearstep(-0.8, 0.8, shapes));
/* Outside of bokeh shape. Try to avoid overloading ROPs. */
if (max_v4(shapes) == 0.0) {
discard;
}
if (!no_scatter_occlusion) {
/* Works because target is the same size as occlusion_tx. */
vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusion_tx, 0).xy);
vec2 occlusion_data = texture(occlusion_tx, uv).rg;
/* Fix tilling artifacts. (Slide 90) */
const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR;
/* Occlude the sprite with geometry from the same field using a chebychev test (slide 85). */
float mean = occlusion_data.x;
float variance = occlusion_data.y;
shapes *= variance * safe_rcp(variance + sqr(max(coc4 * correction_fac - mean, 0.0)));
}
out_color = (interp.color_and_coc1 * shapes[0] + interp.color_and_coc2 * shapes[1] +
interp.color_and_coc3 * shapes[2] + interp.color_and_coc4 * shapes[3]);
/* Do not accumulate alpha. This has already been accumulated by the gather pass. */
out_color.a = 0.0;
if (debug_scatter_perf) {
out_color.rgb = avg(out_color.rgb) * vec3(1.0, 0.0, 0.0);
}
}

View File

@ -0,0 +1,45 @@
/**
* Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur.
*
* We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader
* invocations and overdraw.
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
void main()
{
ScatterRect rect = scatter_list_buf[gl_InstanceID];
interp.color_and_coc1 = rect.color_and_coc[0];
interp.color_and_coc2 = rect.color_and_coc[1];
interp.color_and_coc3 = rect.color_and_coc[2];
interp.color_and_coc4 = rect.color_and_coc[3];
vec2 uv = vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0 - 1.0;
uv = uv * rect.half_extent;
gl_Position = vec4(uv + rect.offset, 0.0, 1.0);
/* NDC range [-1..1]. */
gl_Position.xy = (gl_Position.xy / vec2(textureSize(occlusion_tx, 0).xy)) * 2.0 - 1.0;
if (use_bokeh_lut) {
/* Bias scale to avoid sampling at the texture's border. */
interp.distance_scale = (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1));
vec2 uv_div = 1.0 / (interp.distance_scale * abs(rect.half_extent));
interp.rect_uv1 = ((uv + quad_offsets[0]) * uv_div) * 0.5 + 0.5;
interp.rect_uv2 = ((uv + quad_offsets[1]) * uv_div) * 0.5 + 0.5;
interp.rect_uv3 = ((uv + quad_offsets[2]) * uv_div) * 0.5 + 0.5;
interp.rect_uv4 = ((uv + quad_offsets[3]) * uv_div) * 0.5 + 0.5;
/* Only for sampling. */
interp.distance_scale *= max_v2(abs(rect.half_extent));
}
else {
interp.distance_scale = 1.0;
interp.rect_uv1 = uv + quad_offsets[0];
interp.rect_uv2 = uv + quad_offsets[1];
interp.rect_uv3 = uv + quad_offsets[2];
interp.rect_uv4 = uv + quad_offsets[3];
}
}

View File

@ -0,0 +1,68 @@
/**
* Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer.
*
* An addition to the downsample CoC, we output the maximum slight out of focus CoC to be
* sure we don't miss a pixel.
*
* Input:
* Full-resolution color & depth buffer
* Output:
* Half-resolution Color, signed CoC (out_coc.x), and max slight focus abs CoC (out_coc.y).
**/
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
float dof_abs_max_slight_of_focus_coc(vec4 cocs)
{
/* Clamp to 0.5 if full in defocus to differentiate full focus tiles with coc == 0.0.
* This enables an optimization in the resolve pass. */
const vec4 threshold = vec4(dof_layer_threshold + dof_layer_offset);
cocs = abs(cocs);
bvec4 defocus = greaterThan(cocs, threshold);
bvec4 focus = lessThanEqual(cocs, vec4(0.5));
if (any(defocus) && any(focus)) {
/* For the same reason as in the flatten pass. This is a case we cannot optimize for. */
cocs = mix(cocs, vec4(dof_tile_mixed), focus);
cocs = mix(cocs, vec4(dof_tile_mixed), defocus);
}
else {
cocs = mix(cocs, vec4(dof_tile_focus), focus);
cocs = mix(cocs, vec4(dof_tile_defocus), defocus);
}
return max_v4(cocs);
}
void main()
{
vec2 fullres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy);
/* Center uv around the 4 fullres pixels. */
vec2 quad_center = vec2(gl_GlobalInvocationID.xy * 2 + 1) * fullres_texel_size;
vec4 colors[4];
vec4 cocs;
for (int i = 0; i < 4; i++) {
vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size;
/* NOTE: We use samplers without filtering. */
colors[i] = safe_color(textureLod(color_tx, sample_uv, 0.0));
cocs[i] = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r);
}
cocs = clamp(cocs, -dof_buf.coc_abs_max, dof_buf.coc_abs_max);
vec4 weights = dof_bilateral_coc_weights(cocs);
weights *= dof_bilateral_color_weights(colors);
/* Normalize so that the sum is 1. */
weights *= safe_rcp(sum(weights));
ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
vec4 out_color = weighted_sum_array(colors, weights);
imageStore(out_color_img, out_texel, out_color);
vec2 out_coc;
out_coc.x = dot(cocs, weights);
out_coc.y = dof_abs_max_slight_of_focus_coc(cocs);
imageStore(out_coc_img, out_texel, out_coc.xyxy);
}

View File

@ -0,0 +1,64 @@
/**
* Temporal Stabilization of the Depth of field input.
* Corresponds to the TAA pass in the paper.
*
* TODO: This pass needs a cleanup / improvement using much better TAA.
*
* Inputs:
* - Output of setup pass (halfres).
* Outputs:
* - Stabilized Color and CoC (halfres).
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
float fast_luma(vec3 color)
{
return (2.0 * color.g) + color.r + color.b;
}
/* Lightweight version of neighborhood clamping found in TAA. */
vec3 dof_neighborhood_clamping(vec3 color)
{
vec2 texel_size = 1.0 / vec2(textureSize(color_tx, 0));
vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) * texel_size;
vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy;
/* Luma clamping. 3x3 square neighborhood. */
float c00 = fast_luma(textureLod(color_tx, uv + ofs.xz, 0.0).rgb);
float c01 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb);
float c02 = fast_luma(textureLod(color_tx, uv + ofs.xw, 0.0).rgb);
float c10 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb);
float c11 = fast_luma(color);
float c12 = fast_luma(textureLod(color_tx, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb);
float c20 = fast_luma(textureLod(color_tx, uv + ofs.yz, 0.0).rgb);
float c21 = fast_luma(textureLod(color_tx, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb);
float c22 = fast_luma(textureLod(color_tx, uv + ofs.yw, 0.0).rgb);
float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22);
float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22);
float upper_bound = mix(max_luma, avg_luma, dof_buf.denoise_factor);
upper_bound = mix(c11, upper_bound, dof_buf.denoise_factor);
float clamped_luma = min(upper_bound, c11);
return color * clamped_luma * safe_rcp(c11);
}
void main()
{
vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(textureSize(color_tx, 0).xy);
vec4 out_color = textureLod(color_tx, uv, 0.0);
float out_coc = textureLod(coc_tx, uv, 0.0).r;
out_color.rgb = dof_neighborhood_clamping(out_color.rgb);
/* TODO(fclem): Stabilize CoC. */
ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy);
imageStore(out_color_img, out_texel, out_color);
imageStore(out_coc_img, out_texel, vec4(out_coc));
}

View File

@ -0,0 +1,108 @@
/**
* Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to
* their neighborhood. This pass is repeated multiple time until the maximum CoC can be covered.
*
* Input & Output:
* - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
**/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
/* Error introduced by the random offset of the gathering kernel's center. */
const float bluring_radius_error = 1.0 + 1.0 / (float(DOF_GATHER_RING_COUNT) + 0.5);
const float tile_to_fullres_factor = float(DOF_TILES_SIZE * 2);
void main()
{
ivec2 center_tile_pos = ivec2(gl_GlobalInvocationID.xy);
CocTile ring_buckets[DOF_DILATE_RING_COUNT];
for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
ring_buckets[ring] = dof_coc_tile_init();
int ring_distance = ring + 1;
for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) {
ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id);
offset *= ring_width_multiplier;
for (int i = 0; i < 2; i++) {
ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset);
CocTile adj_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, adj_tile_pos);
if (DILATE_MODE_MIN_MAX) {
/* Actually gather the "absolute" biggest coc but keeping the sign. */
ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc);
ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc);
if (dilate_slight_focus) {
ring_buckets[ring].fg_slight_focus_max_coc = dof_coc_max_slight_focus(
ring_buckets[ring].fg_slight_focus_max_coc, adj_tile.fg_slight_focus_max_coc);
}
}
else { /* DILATE_MODE_MIN_ABS */
ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc);
ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc);
/* Should be tight as possible to reduce gather overhead (see slide 61). */
float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) *
tile_to_fullres_factor;
ring_buckets[ring].fg_max_intersectable_coc = max(
ring_buckets[ring].fg_max_intersectable_coc,
adj_tile.fg_max_intersectable_coc + closest_neighbor_distance);
ring_buckets[ring].bg_min_intersectable_coc = min(
ring_buckets[ring].bg_min_intersectable_coc,
adj_tile.bg_min_intersectable_coc + closest_neighbor_distance);
}
}
}
}
/* Load center tile. */
CocTile out_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, center_tile_pos);
/* Dilate once. */
if (dilate_slight_focus) {
out_tile.fg_slight_focus_max_coc = dof_coc_max_slight_focus(
out_tile.fg_slight_focus_max_coc, ring_buckets[0].fg_slight_focus_max_coc);
}
for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) {
float ring_distance = float(ring + 1);
ring_distance = (ring_distance * ring_width_multiplier - 1) * tile_to_fullres_factor;
if (DILATE_MODE_MIN_MAX) {
/* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */
if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) {
out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc);
}
if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) {
out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc);
}
}
else { /* DILATE_MODE_MIN_ABS */
/* Find minimum absolute CoC radii that will be intersected for the previously
* computed maximum CoC values. */
if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) {
out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc);
out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc,
ring_buckets[ring].fg_max_intersectable_coc);
}
if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) {
out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc);
out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc,
ring_buckets[ring].bg_min_intersectable_coc);
}
}
}
ivec2 texel_out = ivec2(gl_GlobalInvocationID.xy);
dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, texel_out, out_tile);
}

View File

@ -0,0 +1,106 @@
/**
* Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles.
*
* Output min and max values for each tile and for both foreground & background.
* Also outputs min intersectable CoC for the background, which is the minimum CoC
* that comes from the background pixels.
*
* Input:
* - Half-resolution Circle of confusion. Out of setup pass.
* Output:
* - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res.
*/
#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl)
/**
* In order to use atomic operations, we have to use uints. But this means having to deal with the
* negative number ourselves. Luckily, each ground have a nicely defined range of values we can
* remap to positive float.
*/
shared uint fg_min_coc;
shared uint fg_max_coc;
shared uint fg_max_intersectable_coc;
shared uint bg_min_coc;
shared uint bg_max_coc;
shared uint bg_min_intersectable_coc;
shared uint fg_slight_focus_max_coc;
shared uint fg_slight_focus_flag;
const uint slight_focus_flag_defocus = 1u;
const uint slight_focus_flag_focus = 2u;
const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc);
void main()
{
if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
/* NOTE: Min/Max flipped because of inverted fg_coc sign. */
fg_min_coc = floatBitsToUint(0.0);
fg_max_coc = dof_tile_large_coc_uint;
fg_max_intersectable_coc = dof_tile_large_coc_uint;
bg_min_coc = dof_tile_large_coc_uint;
bg_max_coc = floatBitsToUint(0.0);
bg_min_intersectable_coc = dof_tile_large_coc_uint;
/* Should be -1.0 but we want to avoid the sign bit in float representation. */
fg_slight_focus_max_coc = floatBitsToUint(0.0);
fg_slight_focus_flag = 0u;
}
barrier();
ivec2 sample_texel = min(ivec2(gl_GlobalInvocationID.xy), textureSize(coc_tx, 0).xy - 1);
vec2 sample_data = texelFetch(coc_tx, sample_texel, 0).rg;
float sample_coc = sample_data.x;
uint fg_coc = floatBitsToUint(max(-sample_coc, 0.0));
/* NOTE: atomicMin/Max flipped because of inverted fg_coc sign. */
atomicMax(fg_min_coc, fg_coc);
atomicMin(fg_max_coc, fg_coc);
atomicMin(fg_max_intersectable_coc, (sample_coc < 0.0) ? fg_coc : dof_tile_large_coc_uint);
uint bg_coc = floatBitsToUint(max(sample_coc, 0.0));
atomicMin(bg_min_coc, bg_coc);
atomicMax(bg_max_coc, bg_coc);
atomicMin(bg_min_intersectable_coc, (sample_coc > 0.0) ? bg_coc : dof_tile_large_coc_uint);
/* Mimics logic of dof_coc_max_slight_focus(). */
float sample_slight_focus_coc = sample_data.y;
if (sample_slight_focus_coc == dof_tile_defocus) {
atomicOr(fg_slight_focus_flag, slight_focus_flag_defocus);
}
else if (sample_slight_focus_coc == dof_tile_focus) {
atomicOr(fg_slight_focus_flag, slight_focus_flag_focus);
}
/* Add 1 in order to compare signed floats in [-1..1] range. */
atomicMax(fg_slight_focus_max_coc, floatBitsToUint(sample_slight_focus_coc + 1.0));
barrier();
if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) {
if (fg_max_intersectable_coc == dof_tile_large_coc_uint) {
fg_max_intersectable_coc = floatBitsToUint(0.0);
}
CocTile tile;
/* Foreground sign is flipped since we compare unsigned representation. */
tile.fg_min_coc = -uintBitsToFloat(fg_min_coc);
tile.fg_max_coc = -uintBitsToFloat(fg_max_coc);
tile.fg_max_intersectable_coc = -uintBitsToFloat(fg_max_intersectable_coc);
tile.bg_min_coc = uintBitsToFloat(bg_min_coc);
tile.bg_max_coc = uintBitsToFloat(bg_max_coc);
tile.bg_min_intersectable_coc = uintBitsToFloat(bg_min_intersectable_coc);
/* Mimics logic of dof_coc_max_slight_focus(). */
if (fg_slight_focus_flag == (slight_focus_flag_defocus | slight_focus_flag_focus)) {
tile.fg_slight_focus_max_coc = dof_tile_mixed;
}
else {
/* Remove the 1 bias. */
tile.fg_slight_focus_max_coc = uintBitsToFloat(fg_slight_focus_max_coc) - 1.0;
}
ivec2 tile_co = ivec2(gl_WorkGroupID.xy);
dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, tile_co, tile);
}
}

View File

@ -0,0 +1,248 @@
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
/** \name Setup
* \{ */
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut)
.do_static_compilation(true)
.local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img")
.image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img")
.compute_source("eevee_depth_of_field_bokeh_lut_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
.do_static_compilation(true)
.local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::DEPTH_2D, "depth_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.image(1, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
.compute_source("eevee_depth_of_field_setup_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
.do_static_compilation(true)
.local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::DEPTH_2D, "coc_tx")
.sampler(1, ImageType::FLOAT_2D, "color_tx")
// .sampler(2, ImageType::FLOAT_2D, "velocity_tx") /* TODO: TAA with reprojection. */
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img")
.compute_source("eevee_depth_of_field_stabilize_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample)
.do_static_compilation(true)
.local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::FLOAT_2D, "coc_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.compute_source("eevee_depth_of_field_downsample_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce)
.do_static_compilation(true)
.local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::FLOAT_2D, "downsample_tx")
.storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]")
.storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]")
.storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "scatter_fg_indirect_buf")
.storage_buf(3, Qualifier::READ_WRITE, "DrawCommand", "scatter_bg_indirect_buf")
.image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "inout_color_lod0_img")
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod1_img")
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod2_img")
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod3_img")
.image(4, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D, "in_coc_lod0_img")
.image(5, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod1_img")
.image(6, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod2_img")
.image(7, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod3_img")
.compute_source("eevee_depth_of_field_reduce_comp.glsl");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Circle-Of-Confusion Tiles
* \{ */
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_flatten)
.do_static_compilation(true)
.local_group_size(DOF_TILES_FLATTEN_GROUP_SIZE, DOF_TILES_FLATTEN_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.sampler(0, ImageType::FLOAT_2D, "coc_tx")
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
.image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
.compute_source("eevee_depth_of_field_tiles_flatten_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate)
.additional_info("eevee_shared", "draw_view", "eevee_depth_of_field_tiles_common")
.local_group_size(DOF_TILES_DILATE_GROUP_SIZE, DOF_TILES_DILATE_GROUP_SIZE)
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img")
.image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img")
.push_constant(Type::INT, "ring_count")
.push_constant(Type::INT, "ring_width_multiplier")
.push_constant(Type::BOOL, "dilate_slight_focus")
.compute_source("eevee_depth_of_field_tiles_dilate_comp.glsl");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minabs)
.do_static_compilation(true)
.define("DILATE_MODE_MIN_MAX", "false")
.additional_info("eevee_depth_of_field_tiles_dilate");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minmax)
.do_static_compilation(true)
.define("DILATE_MODE_MIN_MAX", "true")
.additional_info("eevee_depth_of_field_tiles_dilate");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_common)
.image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_fg_img")
.image(1, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_bg_img");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Variations
* \{ */
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_no_lut)
.define("DOF_BOKEH_TEXTURE", "false")
/**
* WORKAROUND(@fclem): This is to keep the code as is for now. The bokeh_lut_tx is referenced
* even if not used after optimisation. But we don't want to include it in the create infos.
*/
.define("bokeh_lut_tx", "color_tx");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lut)
.define("DOF_BOKEH_TEXTURE", "true")
.sampler(5, ImageType::FLOAT_2D, "bokeh_lut_tx");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_background).define("DOF_FOREGROUND_PASS", "false");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_foreground).define("DOF_FOREGROUND_PASS", "true");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hq).define("DOF_SLIGHT_FOCUS_DENSITY", "4");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lq).define("DOF_SLIGHT_FOCUS_DENSITY", "2");
#define EEVEE_DOF_FINAL_VARIATION(name, ...) \
GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
#define EEVEE_DOF_LUT_VARIATIONS(prefix, ...) \
EEVEE_DOF_FINAL_VARIATION(prefix##_lut, "eevee_depth_of_field_lut", __VA_ARGS__) \
EEVEE_DOF_FINAL_VARIATION(prefix, "eevee_depth_of_field_no_lut", __VA_ARGS__)
#define EEVEE_DOF_GROUND_VARIATIONS(name, ...) \
EEVEE_DOF_LUT_VARIATIONS(name##_background, "eevee_depth_of_field_background", __VA_ARGS__) \
EEVEE_DOF_LUT_VARIATIONS(name##_foreground, "eevee_depth_of_field_foreground", __VA_ARGS__)
#define EEVEE_DOF_HQ_VARIATIONS(name, ...) \
EEVEE_DOF_LUT_VARIATIONS(name##_hq, "eevee_depth_of_field_hq", __VA_ARGS__) \
EEVEE_DOF_LUT_VARIATIONS(name##_lq, "eevee_depth_of_field_lq", __VA_ARGS__)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Gather
* \{ */
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common)
.additional_info("eevee_shared",
"draw_view",
"eevee_depth_of_field_tiles_common",
"eevee_sampling_data")
.uniform_buf(2, "DepthOfFieldData", "dof_buf")
.local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE)
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx")
.sampler(2, ImageType::FLOAT_2D, "coc_tx")
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.image(3, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather)
.image(4, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_occlusion_img")
.compute_source("eevee_depth_of_field_gather_comp.glsl")
.additional_info("eevee_depth_of_field_gather_common");
EEVEE_DOF_GROUND_VARIATIONS(eevee_depth_of_field_gather, "eevee_depth_of_field_gather")
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hole_fill)
.do_static_compilation(true)
.compute_source("eevee_depth_of_field_hole_fill_comp.glsl")
.additional_info("eevee_depth_of_field_gather_common", "eevee_depth_of_field_no_lut");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_filter)
.do_static_compilation(true)
.local_group_size(DOF_FILTER_GROUP_SIZE, DOF_FILTER_GROUP_SIZE)
.additional_info("eevee_shared")
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::FLOAT_2D, "weight_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img")
.compute_source("eevee_depth_of_field_filter_comp.glsl");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Scatter
* \{ */
GPU_SHADER_INTERFACE_INFO(eevee_depth_of_field_scatter_iface, "interp")
/** Colors, weights, and Circle of confusion radii for the 4 pixels to scatter. */
.flat(Type::VEC4, "color_and_coc1")
.flat(Type::VEC4, "color_and_coc2")
.flat(Type::VEC4, "color_and_coc3")
.flat(Type::VEC4, "color_and_coc4")
/** Sprite pixel position with origin at sprite center. In pixels. */
.no_perspective(Type::VEC2, "rect_uv1")
.no_perspective(Type::VEC2, "rect_uv2")
.no_perspective(Type::VEC2, "rect_uv3")
.no_perspective(Type::VEC2, "rect_uv4")
/** Scaling factor for the bokeh distance. */
.flat(Type::FLOAT, "distance_scale");
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_scatter)
.do_static_compilation(true)
.additional_info("eevee_shared", "draw_view")
.sampler(0, ImageType::FLOAT_2D, "occlusion_tx")
.sampler(1, ImageType::FLOAT_2D, "bokeh_lut_tx")
.storage_buf(0, Qualifier::READ, "ScatterRect", "scatter_list_buf[]")
.fragment_out(0, Type::VEC4, "out_color")
.push_constant(Type::BOOL, "use_bokeh_lut")
.vertex_out(eevee_depth_of_field_scatter_iface)
.vertex_source("eevee_depth_of_field_scatter_vert.glsl")
.fragment_source("eevee_depth_of_field_scatter_frag.glsl");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Resolve
* \{ */
GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve)
.define("DOF_RESOLVE_PASS", "true")
.local_group_size(DOF_RESOLVE_GROUP_SIZE, DOF_RESOLVE_GROUP_SIZE)
.additional_info("eevee_shared",
"draw_view",
"eevee_depth_of_field_tiles_common",
"eevee_sampling_data")
.uniform_buf(2, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.sampler(1, ImageType::FLOAT_2D, "color_tx")
.sampler(2, ImageType::FLOAT_2D, "color_bg_tx")
.sampler(3, ImageType::FLOAT_2D, "color_fg_tx")
.sampler(4, ImageType::FLOAT_2D, "color_hole_fill_tx")
.sampler(7, ImageType::FLOAT_2D, "weight_bg_tx")
.sampler(8, ImageType::FLOAT_2D, "weight_fg_tx")
.sampler(9, ImageType::FLOAT_2D, "weight_hole_fill_tx")
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
.compute_source("eevee_depth_of_field_resolve_comp.glsl");
EEVEE_DOF_HQ_VARIATIONS(eevee_depth_of_field_resolve, "eevee_depth_of_field_resolve")
/** \} */

View File

@ -1,4 +1,5 @@
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten)

View File

@ -454,6 +454,7 @@ list(APPEND INC ${CMAKE_CURRENT_BINARY_DIR})
set(SRC_SHADER_CREATE_INFOS
../draw/engines/basic/shaders/infos/basic_depth_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh