Fix T82591: Performance regression when rendering at very high resolution

This patch introduces a partial update of GPUTexture. When rendering
a large image the GPUTexture could have been scaled. The old implementation
would rescale the image on CPU and create a new GPUTexture. This
resulted in flooding the PCI bus.

The new solution would only scale and upload the parts of the GPUTexture
that has been changed. It does this by keeping track of areas of 256x256
pixels. When something changes the tiles that cover that changes will be
rescaled and uploaded the next time the GPUTexture is requested.

Test situation: Default Cube, 4 samples, 19200x10800 tile size 512.

Blender 2.83.9: 4m27s.
Blender 2.91:   20+m (regression)
This patch:     1m01s.

There is still room for more optimizations:
* Reduce the time that an image is locked.
** Use task scheduling to update the tiles of an image.
** Generic optimization of the ImBuf scale method.

Maniphest Tasks: T82591

Differential Revision: https://developer.blender.org/D9591
This commit is contained in:
Jeroen Bakker 2020-12-18 09:15:55 +01:00
parent 095b693614
commit 4f9e21bdc9
Notes: blender-bot 2023-02-14 05:25:44 +01:00
Referenced by issue #83925, Crash when rendering with OptiX denoiser enabled
Referenced by issue #82591, Performance regression when rendering at very high resolution
7 changed files with 235 additions and 87 deletions

View File

@ -382,6 +382,8 @@ struct GPUTexture *BKE_image_get_gpu_tilemap(struct Image *image,
bool BKE_image_has_gpu_texture_premultiplied_alpha(struct Image *image, struct ImBuf *ibuf);
void BKE_image_update_gputexture(
struct Image *ima, struct ImageUser *iuser, int x, int y, int w, int h);
void BKE_image_update_gputexture_delayed(
struct Image *ima, struct ImBuf *ibuf, int x, int y, int w, int h);
void BKE_image_paint_set_mipmap(struct Main *bmain, bool mipmap);
/* Delayed free of OpenGL buffers by main thread */

View File

@ -190,6 +190,7 @@ static void image_free_data(ID *id)
BKE_previewimg_free(&image->preview);
BLI_freelistN(&image->tiles);
BLI_freelistN(&image->gpu_refresh_areas);
}
static void image_foreach_cache(ID *id,
@ -298,6 +299,8 @@ static void image_blend_read_data(BlendDataReader *reader, ID *id)
LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) {
tile->ok = IMA_OK;
}
ima->gpuflag = 0;
BLI_listbase_clear(&ima->gpu_refresh_areas);
}
static void image_blend_read_lib(BlendLibReader *UNUSED(reader), ID *id)
@ -3897,6 +3900,7 @@ RenderResult *BKE_image_acquire_renderresult(Scene *scene, Image *ima)
}
else {
rr = BKE_image_get_renderslot(ima, ima->render_slot)->render;
ima->gpuflag |= IMA_GPU_REFRESH;
}
/* set proper views */

View File

@ -23,6 +23,7 @@
#include "MEM_guardedalloc.h"
#include "BLI_bitmap.h"
#include "BLI_boxpack_2d.h"
#include "BLI_linklist.h"
#include "BLI_listbase.h"
@ -48,6 +49,16 @@
/* Prototypes. */
static void gpu_free_unused_buffers(void);
static void image_free_gpu(Image *ima, const bool immediate);
static void image_update_gputexture_ex(
Image *ima, ImageTile *tile, ImBuf *ibuf, int x, int y, int w, int h);
/* Internal structs. */
#define IMA_PARTIAL_REFRESH_TILE_SIZE 256
typedef struct ImagePartialRefresh {
struct ImagePartialRefresh *next, *prev;
int tile_x;
int tile_y;
} ImagePartialRefresh;
/* Is the alpha of the `GPUTexture` for a given image/ibuf premultiplied. */
bool BKE_image_has_gpu_texture_premultiplied_alpha(Image *image, ImBuf *ibuf)
@ -299,19 +310,35 @@ static GPUTexture *image_get_gpu_texture(Image *ima,
* the current `pass` and `layer` should be 0. */
short requested_pass = iuser ? iuser->pass : 0;
short requested_layer = iuser ? iuser->layer : 0;
short requested_slot = ima->render_slot;
if (ima->gpu_pass != requested_pass || ima->gpu_layer != requested_layer ||
ima->gpu_slot != requested_slot) {
if (ima->gpu_pass != requested_pass || ima->gpu_layer != requested_layer) {
ima->gpu_pass = requested_pass;
ima->gpu_layer = requested_layer;
ima->gpu_slot = requested_slot;
ima->gpuflag |= IMA_GPU_REFRESH;
}
/* currently, gpu refresh tagging is used by ima sequences */
if (ima->gpuflag & IMA_GPU_REFRESH) {
/* Check if image has been updated and tagged to be updated (full or partial). */
ImageTile *tile = BKE_image_get_tile(ima, 0);
if (((ima->gpuflag & IMA_GPU_REFRESH) != 0) ||
((ibuf == NULL || tile == NULL || !tile->ok) &&
((ima->gpuflag & IMA_GPU_PARTIAL_REFRESH) != 0))) {
image_free_gpu(ima, true);
ima->gpuflag &= ~IMA_GPU_REFRESH;
BLI_freelistN(&ima->gpu_refresh_areas);
ima->gpuflag &= ~(IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH);
}
else if (ima->gpuflag & IMA_GPU_PARTIAL_REFRESH) {
BLI_assert(ibuf);
BLI_assert(tile && tile->ok);
ImagePartialRefresh *refresh_area;
while ((refresh_area = BLI_pophead(&ima->gpu_refresh_areas))) {
const int tile_offset_x = refresh_area->tile_x * IMA_PARTIAL_REFRESH_TILE_SIZE;
const int tile_offset_y = refresh_area->tile_y * IMA_PARTIAL_REFRESH_TILE_SIZE;
const int tile_width = MIN2(IMA_PARTIAL_REFRESH_TILE_SIZE, ibuf->x - tile_offset_x);
const int tile_height = MIN2(IMA_PARTIAL_REFRESH_TILE_SIZE, ibuf->y - tile_offset_y);
image_update_gputexture_ex(
ima, tile, ibuf, tile_offset_x, tile_offset_y, tile_width, tile_height);
MEM_freeN(refresh_area);
}
ima->gpuflag &= ~IMA_GPU_PARTIAL_REFRESH;
}
/* Tag as in active use for garbage collector. */
@ -328,7 +355,6 @@ static GPUTexture *image_get_gpu_texture(Image *ima,
/* Check if we have a valid image. If not, we return a dummy
* texture with zero bind-code so we don't keep trying. */
ImageTile *tile = BKE_image_get_tile(ima, 0);
if (tile == NULL || tile->ok == 0) {
*tex = image_gpu_texture_error_create(textarget);
return *tex;
@ -590,8 +616,8 @@ static void gpu_texture_update_scaled(GPUTexture *tex,
}
else {
/* Partial update with scaling. */
int limit_w = smaller_power_of_2_limit(full_w);
int limit_h = smaller_power_of_2_limit(full_h);
int limit_w = GPU_texture_width(tex);
int limit_h = GPU_texture_height(tex);
ibuf = update_do_scale(rect, rect_float, &x, &y, &w, &h, limit_w, limit_h, full_w, full_h);
}
@ -643,7 +669,7 @@ static void gpu_texture_update_from_ibuf(
scaled = (ibuf->x != tilesize[0]) || (ibuf->y != tilesize[1]);
}
else {
scaled = is_over_resolution_limit(ibuf->x, ibuf->y);
scaled = (GPU_texture_width(tex) != ibuf->x) || (GPU_texture_height(tex) != ibuf->y);
}
if (scaled) {
@ -746,18 +772,9 @@ static void gpu_texture_update_from_ibuf(
GPU_texture_unbind(tex);
}
/* Partial update of texture for texture painting. This is often much
* quicker than fully updating the texture for high resolution images. */
void BKE_image_update_gputexture(Image *ima, ImageUser *iuser, int x, int y, int w, int h)
static void image_update_gputexture_ex(
Image *ima, ImageTile *tile, ImBuf *ibuf, int x, int y, int w, int h)
{
ImBuf *ibuf = BKE_image_acquire_ibuf(ima, iuser, NULL);
ImageTile *tile = BKE_image_get_tile_from_iuser(ima, iuser);
if ((ibuf == NULL) || (w == 0) || (h == 0)) {
/* Full reload of texture. */
BKE_image_free_gputextures(ima);
}
GPUTexture *tex = ima->gputexture[TEXTARGET_2D][0];
/* Check if we need to update the main gputexture. */
if (tex != NULL && tile == ima->tiles.first) {
@ -769,10 +786,99 @@ void BKE_image_update_gputexture(Image *ima, ImageUser *iuser, int x, int y, int
if (tex != NULL) {
gpu_texture_update_from_ibuf(tex, ima, ibuf, tile, x, y, w, h);
}
}
/* Partial update of texture for texture painting. This is often much
* quicker than fully updating the texture for high resolution images. */
void BKE_image_update_gputexture(Image *ima, ImageUser *iuser, int x, int y, int w, int h)
{
ImBuf *ibuf = BKE_image_acquire_ibuf(ima, iuser, NULL);
ImageTile *tile = BKE_image_get_tile_from_iuser(ima, iuser);
if ((ibuf == NULL) || (w == 0) || (h == 0)) {
/* Full reload of texture. */
BKE_image_free_gputextures(ima);
}
image_update_gputexture_ex(ima, tile, ibuf, x, y, w, h);
BKE_image_release_ibuf(ima, ibuf, NULL);
}
/* Mark areas on the GPUTexture that needs to be updated. The areas are marked in chunks.
* The next time the GPUTexture is used these tiles will be refreshes. This saves time
* when writing to the same place multiple times This happens for during foreground
* rendering. */
void BKE_image_update_gputexture_delayed(
struct Image *ima, struct ImBuf *ibuf, int x, int y, int w, int h)
{
/* Check for full refresh. */
if (ibuf && x == 0 && y == 0 && w == ibuf->x && h == ibuf->y) {
ima->gpuflag |= IMA_GPU_REFRESH;
}
/* Check if we can promote partial refresh to a full refresh. */
if ((ima->gpuflag & (IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH)) ==
(IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH)) {
ima->gpuflag &= ~IMA_GPU_PARTIAL_REFRESH;
BLI_freelistN(&ima->gpu_refresh_areas);
}
/* Image is already marked for complete refresh. */
if (ima->gpuflag & IMA_GPU_REFRESH) {
return;
}
/* Schedule the tiles that covers the requested area. */
const int start_tile_x = x / IMA_PARTIAL_REFRESH_TILE_SIZE;
const int start_tile_y = y / IMA_PARTIAL_REFRESH_TILE_SIZE;
const int end_tile_x = (x + w) / IMA_PARTIAL_REFRESH_TILE_SIZE;
const int end_tile_y = (y + h) / IMA_PARTIAL_REFRESH_TILE_SIZE;
const int num_tiles_x = (end_tile_x + 1) - (start_tile_x);
const int num_tiles_y = (end_tile_y + 1) - (start_tile_y);
const int num_tiles = num_tiles_x * num_tiles_y;
const bool allocate_on_heap = BLI_BITMAP_SIZE(num_tiles) > 16;
BLI_bitmap *requested_tiles = NULL;
if (allocate_on_heap) {
requested_tiles = BLI_BITMAP_NEW(num_tiles, __func__);
}
else {
requested_tiles = BLI_BITMAP_NEW_ALLOCA(num_tiles);
}
/* Mark the tiles that have already been requested. They don't need to be requested again. */
int num_tiles_not_scheduled = num_tiles;
LISTBASE_FOREACH (ImagePartialRefresh *, area, &ima->gpu_refresh_areas) {
if (area->tile_x < start_tile_x || area->tile_x > end_tile_x || area->tile_y < start_tile_y ||
area->tile_y > end_tile_y) {
continue;
}
int requested_tile_index = (area->tile_x - start_tile_x) +
(area->tile_y - start_tile_y) * num_tiles_x;
BLI_BITMAP_ENABLE(requested_tiles, requested_tile_index);
num_tiles_not_scheduled--;
if (num_tiles_not_scheduled == 0) {
break;
}
}
/* Schedule the tiles that aren't requested yet. */
if (num_tiles_not_scheduled) {
int tile_index = 0;
for (int tile_y = start_tile_y; tile_y <= end_tile_y; tile_y++) {
for (int tile_x = start_tile_x; tile_x <= end_tile_x; tile_x++) {
if (!BLI_BITMAP_TEST_BOOL(requested_tiles, tile_index)) {
ImagePartialRefresh *area = MEM_mallocN(sizeof(ImagePartialRefresh), __func__);
area->tile_x = tile_x;
area->tile_y = tile_y;
BLI_addtail(&ima->gpu_refresh_areas, area);
}
tile_index++;
}
}
ima->gpuflag |= IMA_GPU_PARTIAL_REFRESH;
}
if (allocate_on_heap) {
MEM_freeN(requested_tiles);
}
}
/* these two functions are called on entering and exiting texture paint mode,
* temporary disabling/enabling mipmapping on all images for quick texture
* updates with glTexSubImage2D. images that didn't change don't have to be

View File

@ -29,6 +29,7 @@
#include "BLI_listbase.h"
#include "BLI_math.h"
#include "BLI_rect.h"
#include "BLI_threads.h"
#include "BLI_timecode.h"
#include "BLI_utildefines.h"
@ -121,72 +122,90 @@ typedef struct RenderJob {
} RenderJob;
/* called inside thread! */
static bool image_buffer_calc_tile_rect(const RenderResult *rr,
const ImBuf *ibuf,
volatile rcti *renrect,
rcti *r_ibuf_rect,
int *r_offset_x,
int *r_offset_y)
{
int tile_y, tile_height, tile_x, tile_width;
/* if renrect argument, we only refresh scanlines */
if (renrect) {
/* if (tile_height == recty), rendering of layer is ready,
* we should not draw, other things happen... */
if (rr->renlay == NULL || renrect->ymax >= rr->recty) {
return false;
}
/* tile_x here is first subrect x coord, tile_width defines subrect width */
tile_x = renrect->xmin;
tile_width = renrect->xmax - tile_x;
if (tile_width < 2) {
return false;
}
tile_y = renrect->ymin;
tile_height = renrect->ymax - tile_y;
if (tile_height < 2) {
return false;
}
renrect->ymin = renrect->ymax;
}
else {
tile_x = tile_y = 0;
tile_width = rr->rectx;
tile_height = rr->recty;
}
/* tile_x tile_y is in tile coords. transform to ibuf */
int offset_x = rr->tilerect.xmin;
if (offset_x >= ibuf->x) {
return false;
}
int offset_y = rr->tilerect.ymin;
if (offset_y >= ibuf->y) {
return false;
}
if (offset_x + tile_width > ibuf->x) {
tile_width = ibuf->x - offset_x;
}
if (offset_y + tile_height > ibuf->y) {
tile_height = ibuf->y - offset_y;
}
if (tile_width < 1 || tile_height < 1) {
return false;
}
r_ibuf_rect->xmax = tile_x + tile_width;
r_ibuf_rect->ymax = tile_y + tile_height;
r_ibuf_rect->xmin = tile_x;
r_ibuf_rect->ymin = tile_y;
*r_offset_x = offset_x;
*r_offset_y = offset_y;
return true;
}
static void image_buffer_rect_update(RenderJob *rj,
RenderResult *rr,
ImBuf *ibuf,
ImageUser *iuser,
volatile rcti *renrect,
const rcti *tile_rect,
int offset_x,
int offset_y,
const char *viewname)
{
Scene *scene = rj->scene;
const float *rectf = NULL;
int ymin, ymax, xmin, xmax;
int rymin, rxmin;
int linear_stride, linear_offset_x, linear_offset_y;
ColorManagedViewSettings *view_settings;
ColorManagedDisplaySettings *display_settings;
if (ibuf->userflags & IB_DISPLAY_BUFFER_INVALID) {
/* The whole image buffer it so be color managed again anyway. */
return;
}
/* if renrect argument, we only refresh scanlines */
if (renrect) {
/* if (ymax == recty), rendering of layer is ready,
* we should not draw, other things happen... */
if (rr->renlay == NULL || renrect->ymax >= rr->recty) {
return;
}
/* xmin here is first subrect x coord, xmax defines subrect width */
xmin = renrect->xmin;
xmax = renrect->xmax - xmin;
if (xmax < 2) {
return;
}
ymin = renrect->ymin;
ymax = renrect->ymax - ymin;
if (ymax < 2) {
return;
}
renrect->ymin = renrect->ymax;
}
else {
xmin = ymin = 0;
xmax = rr->rectx;
ymax = rr->recty;
}
/* xmin ymin is in tile coords. transform to ibuf */
rxmin = rr->tilerect.xmin;
if (rxmin >= ibuf->x) {
return;
}
rymin = rr->tilerect.ymin;
if (rymin >= ibuf->y) {
return;
}
if (rxmin + xmax > ibuf->x) {
xmax = ibuf->x - rxmin;
}
if (rymin + ymax > ibuf->y) {
ymax = ibuf->y - rymin;
}
if (xmax < 1 || ymax < 1) {
/* The whole image buffer is to be color managed again anyway. */
return;
}
@ -230,10 +249,10 @@ static void image_buffer_rect_update(RenderJob *rj,
return;
}
rectf += 4 * (rr->rectx * ymin + xmin);
rectf += 4 * (rr->rectx * tile_rect->ymin + tile_rect->xmin);
linear_stride = rr->rectx;
linear_offset_x = rxmin;
linear_offset_y = rymin;
linear_offset_x = offset_x;
linear_offset_y = offset_y;
}
else {
rectf = ibuf->rect_float;
@ -253,10 +272,10 @@ static void image_buffer_rect_update(RenderJob *rj,
linear_offset_y,
view_settings,
display_settings,
rxmin,
rymin,
rxmin + xmax,
rymin + ymax);
offset_x,
offset_y,
offset_x + BLI_rcti_size_x(tile_rect),
offset_y + BLI_rcti_size_y(tile_rect));
}
/* ****************************** render invoking ***************** */
@ -578,8 +597,16 @@ static void image_rect_update(void *rjv, RenderResult *rr, volatile rcti *renrec
/* update part of render */
render_image_update_pass_and_layer(rj, rr, &rj->iuser);
rcti tile_rect;
int offset_x;
int offset_y;
ibuf = BKE_image_acquire_ibuf(ima, &rj->iuser, &lock);
if (ibuf) {
if (!image_buffer_calc_tile_rect(rr, ibuf, renrect, &tile_rect, &offset_x, &offset_y)) {
BKE_image_release_ibuf(ima, ibuf, lock);
return;
}
/* Don't waste time on CPU side color management if
* image will be displayed using GLSL.
*
@ -589,9 +616,10 @@ static void image_rect_update(void *rjv, RenderResult *rr, volatile rcti *renrec
*/
if (!rj->supports_glsl_draw || ibuf->channels == 1 ||
ED_draw_imbuf_method(ibuf) != IMAGE_DRAW_METHOD_GLSL) {
image_buffer_rect_update(rj, rr, ibuf, &rj->iuser, renrect, viewname);
image_buffer_rect_update(rj, rr, ibuf, &rj->iuser, &tile_rect, offset_x, offset_y, viewname);
}
ima->gpuflag |= IMA_GPU_REFRESH;
BKE_image_update_gputexture_delayed(
ima, ibuf, offset_x, offset_y, BLI_rcti_size_x(&tile_rect), BLI_rcti_size_y(&tile_rect));
/* make jobs timer to send notifier */
*(rj->do_update) = true;

View File

@ -407,6 +407,9 @@ bool ED_image_slot_cycle(struct Image *image, int direction)
image->render_slot = ((cur == 1) ? 0 : 1);
}
if ((cur != image->render_slot)) {
image->gpuflag |= IMA_GPU_REFRESH;
}
return (cur != image->render_slot);
}

View File

@ -151,12 +151,13 @@ typedef struct Image {
int lastframe;
/* GPU texture flag. */
/* Contains `ImagePartialRefresh`. */
ListBase gpu_refresh_areas;
int gpuframenr;
short gpuflag;
short gpu_pass;
short gpu_layer;
short gpu_slot;
char _pad2[4];
char _pad2[6];
/** Deprecated. */
struct PackedFile *packedfile DNA_DEPRECATED;
@ -223,8 +224,10 @@ enum {
enum {
/** GPU texture needs to be refreshed. */
IMA_GPU_REFRESH = (1 << 0),
/** GPU texture needs to be partially refreshed. */
IMA_GPU_PARTIAL_REFRESH = (1 << 1),
/** All mipmap levels in OpenGL texture set? */
IMA_GPU_MIPMAP_COMPLETE = (1 << 1),
IMA_GPU_MIPMAP_COMPLETE = (1 << 2),
};
/* Image.source, where the image comes from */

View File

@ -598,6 +598,7 @@ static void rna_render_slots_active_set(PointerRNA *ptr,
int index = BLI_findindex(&image->renderslots, slot);
if (index != -1) {
image->render_slot = index;
image->gpuflag |= IMA_GPU_REFRESH;
}
}
}
@ -613,6 +614,7 @@ static void rna_render_slots_active_index_set(PointerRNA *ptr, int value)
Image *image = (Image *)ptr->owner_id;
int num_slots = BLI_listbase_count(&image->renderslots);
image->render_slot = value;
image->gpuflag |= IMA_GPU_REFRESH;
CLAMP(image->render_slot, 0, num_slots - 1);
}