ImBuf: Optimize GPU memory by using 1 component format for grayscale images

This is done by checking the number of bitplanes from the image buffer.
We assume that for float buffer to use the same bitplanes as it was a
byte buffer.

Then, the data of the image buffer is packed at the start of the `rect` or
`float_rect` before upload.

**Statistics - einar.v004.blend **

Note that not all grayscale textures have been stored as BW images so the
amount of memory that can be reduced would be more.

Without patch
```
104 Textures - 3294.99 MB (3294.47 MB over 32x32), 37 RTs - 192.52 MB.
Avg. tex dimension: 2201.88x1253.51 (2283.53x2202.13 over 32x32)
464 Buffers - 25.01 MB total 1.24 MB IBs 23.50 MB VBs.
3512.52 MB - Grand total GPU buffer + texture load
```

Patch applied
```
104 Textures - 2917.66 MB (2917.14 MB over 32x32), 39 RTs - 215.45 MB.
Avg. tex dimension: 2221.38x1252.75 (2323.28x2253.47 over 32x32)
467 Buffers - 25.01 MB total 1.24 MB IBs 23.51 MB VBs.
3158.13 MB - Grand total GPU buffer + texture load.
```

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D15484
This commit is contained in:
Jeroen Bakker 2022-08-23 14:26:01 +02:00 committed by Jeroen Bakker
parent 13b2716e1c
commit 37533cd6cb
Notes: blender-bot 2023-03-22 10:52:38 +01:00
Referenced by issue #100749, Blender LTS: Maintenance Task 3.3
Referenced by issue #103075, Regression: crash when resizing grayscale textures (eg. setting "Limit Size" to a small value)
Referenced by issue #105976, Regression: Targa Files with Palettes/Indexed Colors are rendered in Grayscale w/ Eevee
7 changed files with 131 additions and 28 deletions

View File

@ -186,8 +186,14 @@ struct NodeData {
{
UDIMTilePixels *tile = find_tile_data(image_tile);
if (tile && tile->flags.dirty) {
BKE_image_partial_update_mark_region(
&image, image_tile.image_tile, &image_buffer, &tile->dirty_region);
if (image_buffer.planes == 8) {
image_buffer.planes = 32;
BKE_image_partial_update_mark_full_update(&image);
}
else {
BKE_image_partial_update_mark_region(
&image, image_tile.image_tile, &image_buffer, &tile->dirty_region);
}
tile->clear_dirty();
}
}

View File

@ -138,6 +138,8 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
int arraywidth = 0, arrayheight = 0;
ListBase boxes = {nullptr};
int planes = 0;
LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) {
ImageUser iuser;
BKE_imageuser_default(&iuser);
@ -164,6 +166,7 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
BKE_image_release_ibuf(ima, ibuf, nullptr);
BLI_addtail(&boxes, packtile);
planes = max_ii(planes, ibuf->planes);
}
}
@ -195,9 +198,15 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
}
const bool use_high_bitdepth = (ima->flag & IMA_HIGH_BITDEPTH);
const bool use_grayscale = planes <= 8;
/* Create Texture without content. */
GPUTexture *tex = IMB_touch_gpu_texture(
ima->id.name + 2, main_ibuf, arraywidth, arrayheight, arraylayers, use_high_bitdepth);
GPUTexture *tex = IMB_touch_gpu_texture(ima->id.name + 2,
main_ibuf,
arraywidth,
arrayheight,
arraylayers,
use_high_bitdepth,
use_grayscale);
/* Upload each tile one by one. */
LISTBASE_FOREACH (ImageTile *, tile, &ima->tiles) {
@ -223,6 +232,7 @@ static GPUTexture *gpu_texture_create_tile_array(Image *ima, ImBuf *main_ibuf)
tilelayer,
UNPACK2(tilesize),
use_high_bitdepth,
use_grayscale,
store_premultiplied);
}

View File

@ -158,6 +158,16 @@ void imapaint_image_update(
imapaintpartial.dirty_region.xmax,
imapaintpartial.dirty_region.ymax);
/* When buffer is partial updated the planes should be set to a larger value than 8. This will
* make sure that partial updating is working but uses more GPU memory as the gpu texture will
* have 4 channels. When so the whole texture needs to be reuploaded to the GPU using the new
* texture format.*/
if (ibuf != nullptr && ibuf->planes == 8) {
ibuf->planes = 32;
BKE_image_partial_update_mark_full_update(image);
return;
}
/* TODO: should set_tpage create ->rect? */
if (texpaint || (sima && sima->lock)) {
const int w = BLI_rcti_size_x(&imapaintpartial.dirty_region);

View File

@ -1212,8 +1212,8 @@ void uiTemplateImageInfo(uiLayout *layout, bContext *C, Image *ima, ImageUser *i
ofs += BLI_strncpy_rlen(str + ofs, TIP_(" + Z"), len - ofs);
}
eGPUTextureFormat texture_format = IMB_gpu_get_texture_format(ibuf,
ima->flag & IMA_HIGH_BITDEPTH);
eGPUTextureFormat texture_format = IMB_gpu_get_texture_format(
ibuf, ima->flag & IMA_HIGH_BITDEPTH, ibuf->planes >= 8);
const char *texture_format_description = GPU_texture_format_description(texture_format);
ofs += BLI_snprintf_rlen(str + ofs, len - ofs, TIP_(", %s"), texture_format_description);

View File

@ -890,14 +890,22 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
bool use_high_bitdepth,
bool use_premult);
eGPUTextureFormat IMB_gpu_get_texture_format(const struct ImBuf *ibuf, bool high_bitdepth);
eGPUTextureFormat IMB_gpu_get_texture_format(const struct ImBuf *ibuf,
bool high_bitdepth,
bool use_grayscale);
/**
* The `ibuf` is only here to detect the storage type. The produced texture will have undefined
* content. It will need to be populated by using #IMB_update_gpu_texture_sub().
*/
GPUTexture *IMB_touch_gpu_texture(
const char *name, struct ImBuf *ibuf, int w, int h, int layers, bool use_high_bitdepth);
GPUTexture *IMB_touch_gpu_texture(const char *name,
struct ImBuf *ibuf,
int w,
int h,
int layers,
bool use_high_bitdepth,
bool use_grayscale);
/**
* Will update a #GPUTexture using the content of the #ImBuf. Only one layer will be updated.
* Will resize the ibuf if needed.
@ -911,6 +919,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
int w,
int h,
bool use_high_bitdepth,
bool use_grayscale,
bool use_premult);
/**

View File

@ -460,7 +460,7 @@ static int imb_read_tiff_pixels(ImBuf *ibuf, TIFF *image)
scanline_contig_16bit(tmpibuf->rect_float + ib_offset, sbuf, ibuf->x, spp);
}
}
/* separate channels: RRRGGGBBB */
/* Separate channels: RRRGGGBBB. */
}
else if (config == PLANARCONFIG_SEPARATE) {
@ -574,7 +574,7 @@ ImBuf *imb_loadtiff(const unsigned char *mem,
TIFFGetField(image, TIFFTAG_IMAGELENGTH, &height);
TIFFGetField(image, TIFFTAG_SAMPLESPERPIXEL, &spp);
ib_depth = (spp == 3) ? 24 : 32;
ib_depth = spp * 8;
ibuf = IMB_allocImBuf(width, height, ib_depth, 0);
if (ibuf) {

View File

@ -14,6 +14,7 @@
#include "BKE_global.h"
#include "GPU_capabilities.h"
#include "GPU_state.h"
#include "GPU_texture.h"
#include "IMB_colormanagement.h"
@ -22,39 +23,62 @@
/* gpu ibuf utils */
static bool imb_is_grayscale_texture_format_compatible(const ImBuf *ibuf)
{
if (ibuf->planes > 8) {
return false;
}
/* Only imbufs with colorspace that do not modify the chrominance of the texture data relative
* to the scene color space can be uploaded as single channel textures. */
if (IMB_colormanagement_space_is_data(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
return true;
};
return false;
}
static void imb_gpu_get_format(const ImBuf *ibuf,
bool high_bitdepth,
bool use_grayscale,
eGPUDataFormat *r_data_format,
eGPUTextureFormat *r_texture_format)
{
const bool float_rect = (ibuf->rect_float != NULL);
const bool is_grayscale = use_grayscale && imb_is_grayscale_texture_format_compatible(ibuf);
if (float_rect) {
/* Float. */
const bool use_high_bitdepth = (!(ibuf->flags & IB_halffloat) && high_bitdepth);
*r_data_format = GPU_DATA_FLOAT;
*r_texture_format = use_high_bitdepth ? GPU_RGBA32F : GPU_RGBA16F;
*r_texture_format = is_grayscale ? (use_high_bitdepth ? GPU_R32F : GPU_R16F) :
(use_high_bitdepth ? GPU_RGBA32F : GPU_RGBA16F);
}
else {
if (IMB_colormanagement_space_is_data(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
/* Non-color data or scene linear, just store buffer as is. */
*r_data_format = GPU_DATA_UBYTE;
*r_texture_format = GPU_RGBA8;
*r_texture_format = (is_grayscale) ? GPU_R8 : GPU_RGBA8;
}
else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace)) {
/* sRGB, store as byte texture that the GPU can decode directly. */
*r_data_format = GPU_DATA_UBYTE;
*r_texture_format = GPU_SRGB8_A8;
*r_data_format = (is_grayscale) ? GPU_DATA_FLOAT : GPU_DATA_UBYTE;
*r_texture_format = (is_grayscale) ? GPU_R16F : GPU_SRGB8_A8;
}
else {
/* Other colorspace, store as half float texture to avoid precision loss. */
*r_data_format = GPU_DATA_FLOAT;
*r_texture_format = GPU_RGBA16F;
*r_texture_format = (is_grayscale) ? GPU_R16F : GPU_RGBA16F;
}
}
}
static const char *imb_gpu_get_swizzle(const ImBuf *ibuf)
{
return imb_is_grayscale_texture_format_compatible(ibuf) ? "rrra" : "rgba";
}
/* Return false if no suitable format was found. */
#ifdef WITH_DDS
static bool IMB_gpu_get_compressed_format(const ImBuf *ibuf, eGPUTextureFormat *r_texture_format)
@ -90,7 +114,8 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
const bool store_premultiplied,
bool *r_freedata)
{
const bool is_float_rect = (ibuf->rect_float != NULL);
bool is_float_rect = (ibuf->rect_float != NULL);
const bool is_grayscale = imb_is_grayscale_texture_format_compatible(ibuf);
void *data_rect = (is_float_rect) ? (void *)ibuf->rect_float : (void *)ibuf->rect;
bool freedata = false;
@ -121,7 +146,8 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
else if (IMB_colormanagement_space_is_srgb(ibuf->rect_colorspace) ||
IMB_colormanagement_space_is_scene_linear(ibuf->rect_colorspace)) {
/* sRGB or scene linear, store as byte texture that the GPU can decode directly. */
data_rect = MEM_mallocN(sizeof(uchar[4]) * ibuf->x * ibuf->y, __func__);
data_rect = MEM_mallocN(
(is_grayscale ? sizeof(float[4]) : sizeof(uchar[4])) * ibuf->x * ibuf->y, __func__);
*r_freedata = freedata = true;
if (data_rect == NULL) {
@ -133,8 +159,16 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
* this allows us to use sRGB texture formats and preserves color values in
* zero alpha areas, and appears generally closer to what game engines that we
* want to be compatible with do. */
IMB_colormanagement_imbuf_to_byte_texture(
(uchar *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
if (is_grayscale) {
/* Convert to byte buffer to then pack as half floats reducing the buffer size by half. */
IMB_colormanagement_imbuf_to_float_texture(
(float *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
is_float_rect = true;
}
else {
IMB_colormanagement_imbuf_to_byte_texture(
(uchar *)data_rect, 0, 0, ibuf->x, ibuf->y, ibuf, store_premultiplied);
}
}
else {
/* Other colorspace, store as float texture to avoid precision loss. */
@ -167,21 +201,52 @@ static void *imb_gpu_get_data(const ImBuf *ibuf,
}
data_rect = (is_float_rect) ? (void *)scale_ibuf->rect_float : (void *)scale_ibuf->rect;
*r_freedata = true;
*r_freedata = freedata = true;
/* Steal the rescaled buffer to avoid double free. */
scale_ibuf->rect_float = NULL;
scale_ibuf->rect = NULL;
IMB_freeImBuf(scale_ibuf);
}
/* Pack first channel data manually at the start of the buffer. */
if (is_grayscale) {
void *src_rect = data_rect;
if (freedata == false) {
data_rect = MEM_mallocN((is_float_rect ? sizeof(float) : sizeof(uchar)) * ibuf->x * ibuf->y,
__func__);
*r_freedata = freedata = true;
}
if (data_rect == NULL) {
return NULL;
}
if (is_float_rect) {
for (uint64_t i = 0; i < ibuf->x * ibuf->y; i++) {
((float *)data_rect)[i] = ((float *)src_rect)[i * 4];
}
}
else {
for (uint64_t i = 0; i < ibuf->x * ibuf->y; i++) {
((uchar *)data_rect)[i] = ((uchar *)src_rect)[i * 4];
}
}
}
return data_rect;
}
GPUTexture *IMB_touch_gpu_texture(
const char *name, ImBuf *ibuf, int w, int h, int layers, bool use_high_bitdepth)
GPUTexture *IMB_touch_gpu_texture(const char *name,
ImBuf *ibuf,
int w,
int h,
int layers,
bool use_high_bitdepth,
bool use_grayscale)
{
eGPUDataFormat data_format;
eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format);
imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &data_format, &tex_format);
GPUTexture *tex;
if (layers > 0) {
@ -191,6 +256,7 @@ GPUTexture *IMB_touch_gpu_texture(
tex = GPU_texture_create_2d(name, w, h, 9999, tex_format, NULL);
}
GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
GPU_texture_anisotropic_filter(tex, true);
return tex;
}
@ -203,6 +269,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
int w,
int h,
bool use_high_bitdepth,
bool use_grayscale,
bool use_premult)
{
const bool do_rescale = (ibuf->x != w || ibuf->y != h);
@ -210,7 +277,7 @@ void IMB_update_gpu_texture_sub(GPUTexture *tex,
eGPUDataFormat data_format;
eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format);
imb_gpu_get_format(ibuf, use_high_bitdepth, use_grayscale, &data_format, &tex_format);
bool freebuf = false;
@ -266,7 +333,7 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
eGPUDataFormat data_format;
eGPUTextureFormat tex_format;
imb_gpu_get_format(ibuf, use_high_bitdepth, &data_format, &tex_format);
imb_gpu_get_format(ibuf, use_high_bitdepth, true, &data_format, &tex_format);
bool freebuf = false;
@ -282,6 +349,7 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
void *data = imb_gpu_get_data(ibuf, do_rescale, size, use_premult, &freebuf);
GPU_texture_update(tex, data_format, data);
GPU_texture_swizzle_set(tex, imb_gpu_get_swizzle(ibuf));
GPU_texture_anisotropic_filter(tex, true);
if (freebuf) {
@ -291,12 +359,12 @@ GPUTexture *IMB_create_gpu_texture(const char *name,
return tex;
}
eGPUTextureFormat IMB_gpu_get_texture_format(const ImBuf *ibuf, bool high_bitdepth)
eGPUTextureFormat IMB_gpu_get_texture_format(const ImBuf *ibuf, bool high_bitdepth, bool use_grayscale)
{
eGPUTextureFormat gpu_texture_format;
eGPUDataFormat gpu_data_format;
imb_gpu_get_format(ibuf, high_bitdepth, &gpu_data_format, &gpu_texture_format);
imb_gpu_get_format(ibuf, high_bitdepth, use_grayscale, &gpu_data_format, &gpu_texture_format);
return gpu_texture_format;
}