Draw: Improve performance image engine.

Image engine is used to draw the image inside the image editor, uv editor and node editor. The
performance during scrolling wasn't smooth when using larger textures on a dedicated GPU. Main
reason was the data transfers that happens when panning the image.

The original idea of the image engine was to have 4 textures that are as large as the editor.
Those textures would be used to simulate a larger canvas where if the texture is out of the
visible area the texture would be reused to contain the data of a new visible area. This would
reduce the data transfers to only on certain x/y coordinates. Between those coordinates no
data transfers would be needed.

This patch implements the mechanism described above. During development other areas to
improve have been detected (incorrect color management for float textures, using different
image formats to reduce data transfer bandwidths, using different render techniques for
images upto 8k). More improvements will follow.
This commit is contained in:
Jeroen Bakker 2022-12-07 15:09:25 +01:00
parent f423c4191f
commit 2ffc9b72ad
Notes: blender-bot 2023-04-02 22:43:35 +02:00
Referenced by issue #106092, Regression: Image Editor Speed & Quality Regression
Referenced by issue #106403, Regression: Blender 3.5 - scrolling in the rendered image got slower!
7 changed files with 183 additions and 104 deletions

View File

@ -49,12 +49,26 @@ struct FloatImageBuffer {
}
};
/**
* \brief Float buffer cache for image buffers.
*
* Image buffers might not have float buffers which are required for the image engine.
* Image buffers are not allowed to have both a float buffer and a byte buffer as some
* functionality doesn't know what to do.
*
* For this reason we store the float buffer in separate image buffers. The FloatBufferCache keep
* track of the cached buffers and if they are still used.
*
* TODO: When an image buffer has a float buffer but not stored in scene linear, it currently
* doesn't apply color management. In this case we still need to create another float buffer, but
* with the buffer converted to scene linear.
*/
struct FloatBufferCache {
private:
blender::Vector<FloatImageBuffer> cache_;
public:
ImBuf *ensure_float_buffer(ImBuf *image_buffer)
ImBuf *cached_float_buffer(ImBuf *image_buffer)
{
/* Check if we can use the float buffer of the given image_buffer. */
if (image_buffer->rect_float != nullptr) {

View File

@ -22,52 +22,117 @@ namespace blender::draw::image_engine {
constexpr float EPSILON_UV_BOUNDS = 0.00001f;
/**
* \brief Screen space method using a single texture spawning the whole screen.
* \brief Screen space method using a 4 textures spawning the whole screen.
*/
struct OneTextureMethod {
struct FullScreenTextures {
IMAGE_InstanceData *instance_data;
OneTextureMethod(IMAGE_InstanceData *instance_data) : instance_data(instance_data)
FullScreenTextures(IMAGE_InstanceData *instance_data) : instance_data(instance_data)
{
}
/** \brief Update the texture slot uv and screen space bounds. */
void update_screen_space_bounds(const ARegion *region)
/**
* \brief Update the uv and region bounds of all texture_infos of instance_data.
*/
void update_bounds(const ARegion *region)
{
/* Create a single texture that covers the visible screen space. */
// determine uv_area of the region.
float4x4 mat = float4x4(instance_data->ss_to_texture).inverted();
float2 region_uv_min = float2(mat * float3(0.0f, 0.0f, 0.0f));
float2 region_uv_max = float2(mat * float3(1.0f, 1.0f, 0.0f));
float2 region_uv_span = region_uv_max - region_uv_min;
rctf region_uv_bounds;
BLI_rctf_init(
&instance_data->texture_infos[0].clipping_bounds, 0, region->winx, 0, region->winy);
instance_data->texture_infos[0].visible = true;
&region_uv_bounds, region_uv_min.x, region_uv_max.x, region_uv_min.y, region_uv_max.y);
/* Mark the other textures as invalid. */
for (int i = 1; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
BLI_rctf_init_minmax(&instance_data->texture_infos[i].clipping_bounds);
instance_data->texture_infos[i].visible = false;
/* Calculate 9 coordinates that will be used as uv bounds of the 4 textures. */
float2 onscreen_multiple = (blender::math::floor(region_uv_min / region_uv_span) +
float2(1.0f)) *
region_uv_span;
BLI_assert(onscreen_multiple.x > region_uv_min.x);
BLI_assert(onscreen_multiple.y > region_uv_min.y);
BLI_assert(onscreen_multiple.x < region_uv_max.x);
BLI_assert(onscreen_multiple.y < region_uv_max.y);
float2 uv_coords[3][3];
uv_coords[0][0] = onscreen_multiple + float2(-region_uv_span.x, -region_uv_span.y);
uv_coords[0][1] = onscreen_multiple + float2(-region_uv_span.x, 0.0);
uv_coords[0][2] = onscreen_multiple + float2(-region_uv_span.x, region_uv_span.y);
uv_coords[1][0] = onscreen_multiple + float2(0.0f, -region_uv_span.y);
uv_coords[1][1] = onscreen_multiple + float2(0.0f, 0.0);
uv_coords[1][2] = onscreen_multiple + float2(0.0f, region_uv_span.y);
uv_coords[2][0] = onscreen_multiple + float2(region_uv_span.x, -region_uv_span.y);
uv_coords[2][1] = onscreen_multiple + float2(region_uv_span.x, 0.0);
uv_coords[2][2] = onscreen_multiple + float2(region_uv_span.x, region_uv_span.y);
/* Construct the uv bounds of the 4 textures that are needed to fill the region. */
Vector<TextureInfo *> unassigned_textures;
struct TextureInfoBounds {
TextureInfo *info = nullptr;
rctf uv_bounds;
};
TextureInfoBounds bottom_left;
TextureInfoBounds bottom_right;
TextureInfoBounds top_left;
TextureInfoBounds top_right;
BLI_rctf_init(&bottom_left.uv_bounds,
uv_coords[0][0].x,
uv_coords[1][1].x,
uv_coords[0][0].y,
uv_coords[1][1].y);
BLI_rctf_init(&bottom_right.uv_bounds,
uv_coords[1][0].x,
uv_coords[2][1].x,
uv_coords[1][0].y,
uv_coords[2][1].y);
BLI_rctf_init(&top_left.uv_bounds,
uv_coords[0][1].x,
uv_coords[1][2].x,
uv_coords[0][1].y,
uv_coords[1][2].y);
BLI_rctf_init(&top_right.uv_bounds,
uv_coords[1][1].x,
uv_coords[2][2].x,
uv_coords[1][1].y,
uv_coords[2][2].y);
Vector<TextureInfoBounds *> info_bounds;
info_bounds.append(&bottom_left);
info_bounds.append(&bottom_right);
info_bounds.append(&top_left);
info_bounds.append(&top_right);
/* Assign any existing texture that matches uv bounds. */
for (TextureInfo &info : instance_data->texture_infos) {
bool assigned = false;
for (TextureInfoBounds *info_bound : info_bounds) {
if (info_bound->info == nullptr &&
BLI_rctf_compare(&info_bound->uv_bounds, &info.clipping_uv_bounds, 0.001)) {
info_bound->info = &info;
assigned = true;
break;
}
}
if (!assigned) {
unassigned_textures.append(&info);
}
}
}
void update_screen_uv_bounds()
{
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
update_screen_uv_bounds(instance_data->texture_infos[0]);
/* Assign free textures to bounds that weren't found. */
for (TextureInfoBounds *info_bound : info_bounds) {
if (info_bound->info == nullptr) {
info_bound->info = unassigned_textures.pop_last();
info_bound->info->need_full_update = true;
info_bound->info->clipping_uv_bounds = info_bound->uv_bounds;
}
}
}
void update_screen_uv_bounds(TextureInfo &info)
{
/* Although this works, computing an inverted matrix adds some precision issues and leads to
* tearing artifacts. This should be modified to use the scaling and transformation from the
* not inverted matrix. */
float4x4 mat(instance_data->ss_to_texture);
float4x4 mat_inv = mat.inverted();
float3 min_uv = mat_inv * float3(0.0f, 0.0f, 0.0f);
float3 max_uv = mat_inv * float3(1.0f, 1.0f, 0.0f);
rctf new_clipping_bounds;
BLI_rctf_init(&new_clipping_bounds, min_uv[0], max_uv[0], min_uv[1], max_uv[1]);
if (!BLI_rctf_compare(&info.clipping_uv_bounds, &new_clipping_bounds, EPSILON_UV_BOUNDS)) {
info.clipping_uv_bounds = new_clipping_bounds;
info.dirty = true;
/* Calculate the region bounds from the uv bounds. */
rctf region_bounds;
BLI_rctf_init(&region_bounds, 0.0, region->winx, 0.0, region->winy);
float4x4 uv_to_screen;
BLI_rctf_transform_calc_m4_pivot_min(&region_uv_bounds, &region_bounds, uv_to_screen.ptr());
for (TextureInfo &info : instance_data->texture_infos) {
info.calc_region_bounds_from_uv_bounds(uv_to_screen);
}
}
};
@ -108,11 +173,8 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
unit_m4(image_mat);
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
const TextureInfo &info = instance_data->texture_infos[i];
if (!info.visible) {
continue;
}
DRWShadingGroup *shgrp_sub = DRW_shgroup_create_sub(shgrp);
DRW_shgroup_uniform_ivec2_copy(shgrp_sub, "offset", info.offset());
DRW_shgroup_uniform_texture_ex(shgrp_sub, "imageTexture", info.texture, GPU_SAMPLER_DEFAULT);
DRW_shgroup_call_obmat(shgrp_sub, info.batch, image_mat);
}
@ -140,10 +202,6 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
const TextureInfo &info = instance_data.texture_infos[i];
if (!info.visible) {
continue;
}
LISTBASE_FOREACH (ImageTile *, image_tile_ptr, &image->tiles) {
const ImageTileWrapper image_tile(image_tile_ptr);
const int tile_x = image_tile.get_tile_x_offset();
@ -238,7 +296,8 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
if (iterator.tile_data.tile_buffer == nullptr) {
continue;
}
ImBuf *tile_buffer = ensure_float_buffer(instance_data, iterator.tile_data.tile_buffer);
ImBuf *tile_buffer = instance_data.float_buffers.cached_float_buffer(
iterator.tile_data.tile_buffer);
if (tile_buffer != iterator.tile_data.tile_buffer) {
do_partial_update_float_buffer(tile_buffer, iterator);
}
@ -249,10 +308,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
const TextureInfo &info = instance_data.texture_infos[i];
/* Dirty images will receive a full update. No need to do a partial one now. */
if (info.dirty) {
continue;
}
if (!info.visible) {
if (info.need_full_update) {
continue;
}
GPUTexture *texture = info.texture;
@ -353,10 +409,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
{
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
TextureInfo &info = instance_data.texture_infos[i];
if (!info.dirty) {
continue;
}
if (!info.visible) {
if (!info.need_full_update) {
continue;
}
do_full_update_gpu_texture(info, instance_data, image_user);
@ -395,18 +448,9 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
}
/**
* \brief Ensure that the float buffer of the given image buffer is available.
*
* Returns true when a float buffer was created. Somehow the sequencer cache increases the ref
* counter, but might use a different mechanism for destructing the image, that doesn't free the
* rect_float as the reference-counter isn't 0. To work around this we destruct any created local
* buffers ourself.
* texture_buffer is the image buffer belonging to the texture_info.
* tile_buffer is the image buffer of the tile.
*/
ImBuf *ensure_float_buffer(IMAGE_InstanceData &instance_data, ImBuf *image_buffer) const
{
return instance_data.float_buffers.ensure_float_buffer(image_buffer);
}
void do_full_update_texture_slot(IMAGE_InstanceData &instance_data,
const TextureInfo &texture_info,
ImBuf &texture_buffer,
@ -415,24 +459,24 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
{
const int texture_width = texture_buffer.x;
const int texture_height = texture_buffer.y;
ImBuf *float_tile_buffer = ensure_float_buffer(instance_data, &tile_buffer);
ImBuf *float_tile_buffer = instance_data.float_buffers.cached_float_buffer(&tile_buffer);
/* IMB_transform works in a non-consistent space. This should be documented or fixed!.
* Construct a variant of the info_uv_to_texture that adds the texel space
* transformation. */
float uv_to_texel[4][4];
copy_m4_m4(uv_to_texel, instance_data.ss_to_texture);
float scale[3] = {float(texture_width) / float(tile_buffer.x),
float(texture_height) / float(tile_buffer.y),
1.0f};
rescale_m4(uv_to_texel, scale);
uv_to_texel[3][0] += image_tile.get_tile_x_offset() /
BLI_rctf_size_x(&texture_info.clipping_uv_bounds);
uv_to_texel[3][1] += image_tile.get_tile_y_offset() /
BLI_rctf_size_y(&texture_info.clipping_uv_bounds);
uv_to_texel[3][0] *= texture_width;
uv_to_texel[3][1] *= texture_height;
invert_m4(uv_to_texel);
float4x4 uv_to_texel;
rctf texture_area;
rctf tile_area;
BLI_rctf_init(&texture_area, 0.0, texture_width, 0.0, texture_height);
BLI_rctf_init(
&tile_area,
tile_buffer.x * (texture_info.clipping_uv_bounds.xmin - image_tile.get_tile_x_offset()),
tile_buffer.x * (texture_info.clipping_uv_bounds.xmax - image_tile.get_tile_x_offset()),
tile_buffer.y * (texture_info.clipping_uv_bounds.ymin - image_tile.get_tile_y_offset()),
tile_buffer.y * (texture_info.clipping_uv_bounds.ymax - image_tile.get_tile_y_offset()));
BLI_rctf_transform_calc_m4_pivot_min(&tile_area, &texture_area, uv_to_texel.ptr());
invert_m4(uv_to_texel.ptr());
rctf crop_rect;
rctf *crop_rect_ptr = nullptr;
@ -450,7 +494,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
&texture_buffer,
transform_mode,
IMB_FILTER_NEAREST,
uv_to_texel,
uv_to_texel.ptr(),
crop_rect_ptr);
}
@ -469,14 +513,13 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
TextureMethod method(instance_data);
instance_data->partial_update.ensure_image(image);
instance_data->clear_dirty_flag();
instance_data->clear_need_full_update_flag();
instance_data->float_buffers.reset_usage_flags();
/* Step: Find out which screen space textures are needed to draw on the screen. Remove the
* screen space textures that aren't needed. */
const ARegion *region = draw_ctx->region;
method.update_screen_space_bounds(region);
method.update_screen_uv_bounds();
method.update_bounds(region);
/* Check for changes in the image user compared to the last time. */
instance_data->update_image_usage(iuser);

View File

@ -53,7 +53,7 @@ template<
*
* Useful during development to switch between drawing implementations.
*/
typename DrawingMode = ScreenSpaceDrawingMode<OneTextureMethod>>
typename DrawingMode = ScreenSpaceDrawingMode<FullScreenTextures>>
class ImageEngine {
private:
const DRWContextState *draw_ctx;

View File

@ -21,8 +21,12 @@
/**
* \brief max allowed textures to use by the ScreenSpaceDrawingMode.
*
* The image engine uses 4 full screen textures to draw the image. With 4 textures it is possible
* to pan the screen where only the texture needs to be updated when they are not visible on the
* screen.
*/
constexpr int SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN = 1;
constexpr int SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN = 4;
struct IMAGE_InstanceData {
struct Image *image;
@ -60,13 +64,13 @@ struct IMAGE_InstanceData {
public:
virtual ~IMAGE_InstanceData() = default;
void clear_dirty_flag()
void clear_need_full_update_flag()
{
reset_dirty_flag(false);
reset_need_full_update(false);
}
void mark_all_texture_slots_dirty()
{
reset_dirty_flag(true);
reset_need_full_update(true);
}
void update_gpu_texture_allocations()
@ -74,11 +78,10 @@ struct IMAGE_InstanceData {
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
TextureInfo &info = texture_infos[i];
const bool is_allocated = info.texture != nullptr;
const bool is_visible = info.visible;
const bool resolution_changed = assign_if_different(info.last_viewport_size,
float2(DRW_viewport_size_get()));
const bool should_be_freed = is_allocated && (!is_visible || resolution_changed);
const bool should_be_created = is_visible && (!is_allocated || resolution_changed);
const bool should_be_freed = is_allocated && resolution_changed;
const bool should_be_created = !is_allocated || resolution_changed;
if (should_be_freed) {
GPU_texture_free(info.texture);
@ -89,7 +92,7 @@ struct IMAGE_InstanceData {
DRW_texture_ensure_fullscreen_2d(
&info.texture, GPU_RGBA16F, static_cast<DRWTextureFlag>(0));
}
info.dirty |= should_be_created;
info.need_full_update |= should_be_created;
}
}
@ -97,9 +100,6 @@ struct IMAGE_InstanceData {
{
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
TextureInfo &info = texture_infos[i];
if (!info.dirty) {
continue;
}
BatchUpdater batch_updater(info);
batch_updater.update_batch();
}
@ -110,17 +110,17 @@ struct IMAGE_InstanceData {
ImageUsage usage(image, image_user, flags.do_tile_drawing);
if (last_usage != usage) {
last_usage = usage;
reset_dirty_flag(true);
reset_need_full_update(true);
float_buffers.clear();
}
}
private:
/** \brief Set dirty flag of all texture slots to the given value. */
void reset_dirty_flag(bool new_value)
void reset_need_full_update(bool new_value)
{
for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
texture_infos[i].dirty = new_value;
texture_infos[i].need_full_update = new_value;
}
}
};

View File

@ -7,25 +7,19 @@
#pragma once
#include "BLI_math_vec_types.hh"
#include "BLI_rect.h"
#include "GPU_batch.h"
#include "GPU_texture.h"
struct TextureInfo {
/**
* \brief Is the texture clipped.
*
* Resources of clipped textures are freed and ignored when performing partial updates.
*/
bool visible : 1;
/**
* \brief does this texture need a full update.
*
* When set to false the texture can be updated using a partial update.
*/
bool dirty : 1;
bool need_full_update : 1;
/** \brief area of the texture in screen space. */
rctf clipping_bounds;
@ -60,4 +54,31 @@ struct TextureInfo {
texture = nullptr;
}
}
/**
* \brief return the offset of the texture with the area.
*
* A texture covers only a part of the area. The offset if the offset in screen coordinates
* between the area and the part that the texture covers.
*/
int2 offset() const
{
return int2(clipping_bounds.xmin, clipping_bounds.ymin);
}
/**
* \brief Update the region bounds from the uv bounds by applying the given transform matrix.
*/
void calc_region_bounds_from_uv_bounds(const float4x4 &uv_to_region)
{
float3 bottom_left_uv = float3(clipping_uv_bounds.xmin, clipping_uv_bounds.ymin, 0.0f);
float3 top_right_uv = float3(clipping_uv_bounds.xmax, clipping_uv_bounds.ymax, 0.0f);
float3 bottom_left_region = uv_to_region * bottom_left_uv;
float3 top_right_region = uv_to_region * top_right_uv;
BLI_rctf_init(&clipping_bounds,
bottom_left_region.x,
top_right_region.x,
bottom_left_region.y,
top_right_region.y);
}
};

View File

@ -17,7 +17,7 @@ void main()
discard;
}
vec4 tex_color = texelFetch(imageTexture, uvs_clamped, 0);
vec4 tex_color = texelFetch(imageTexture, uvs_clamped - offset, 0);
if ((drawFlags & IMAGE_DRAW_FLAG_APPLY_ALPHA) != 0) {
if (!imgPremultiplied) {

View File

@ -10,6 +10,7 @@ GPU_SHADER_CREATE_INFO(image_engine_color_shader)
.fragment_out(0, Type::VEC4, "fragColor")
.push_constant(Type::VEC4, "shuffle")
.push_constant(Type::VEC2, "farNearDistances")
.push_constant(Type::IVEC2, "offset")
.push_constant(Type::INT, "drawFlags")
.push_constant(Type::BOOL, "imgPremultiplied")
.sampler(0, ImageType::FLOAT_2D, "imageTexture")