Fix T91641: crash rendering with 16k environment map in Cycles

Protect against integer overflow.
This commit is contained in:
Brecht Van Lommel 2021-09-23 17:38:56 +02:00
parent 6279efbb78
commit d7f803f522
Notes: blender-bot 2023-02-14 11:28:39 +01:00
Referenced by issue #91641, Cycles Regression: error rendering 16k environment map in Blender 3.0
10 changed files with 71 additions and 68 deletions

View File

@ -170,7 +170,7 @@ void CPUDevice::mem_copy_to(device_memory &mem)
}
void CPUDevice::mem_copy_from(
device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
{
/* no-op */
}
@ -204,7 +204,7 @@ void CPUDevice::mem_free(device_memory &mem)
}
}
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
{
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
}

View File

@ -72,10 +72,13 @@ class CPUDevice : public Device {
virtual void mem_alloc(device_memory &mem) override;
virtual void mem_copy_to(device_memory &mem) override;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
virtual void mem_copy_from(
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
virtual void mem_zero(device_memory &mem) override;
virtual void mem_free(device_memory &mem) override;
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
size_t offset,
size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;

View File

@ -837,7 +837,7 @@ void CUDADevice::mem_copy_to(device_memory &mem)
}
}
void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
void CUDADevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
{
if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
assert(!"mem_copy_from not supported for textures.");
@ -891,7 +891,7 @@ void CUDADevice::mem_free(device_memory &mem)
}
}
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
{
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
}

View File

@ -120,13 +120,13 @@ class CUDADevice : public Device {
void mem_copy_to(device_memory &mem) override;
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;

View File

@ -119,7 +119,7 @@ class Device {
string error_msg;
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
@ -273,7 +273,7 @@ class Device {
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;

View File

@ -136,7 +136,7 @@ void device_memory::device_copy_to()
}
}
void device_memory::device_copy_from(int y, int w, int h, int elem)
void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
{
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
device->mem_copy_from(*this, y, w, h, elem);
@ -181,7 +181,7 @@ bool device_memory::is_resident(Device *sub_device) const
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
{
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
}

View File

@ -81,154 +81,154 @@ static constexpr size_t datatype_size(DataType datatype)
template<typename T> struct device_type_traits {
static const DataType data_type = TYPE_UNKNOWN;
static const int num_elements_cpu = sizeof(T);
static const int num_elements_gpu = sizeof(T);
static const size_t num_elements_cpu = sizeof(T);
static const size_t num_elements_gpu = sizeof(T);
};
template<> struct device_type_traits<uchar> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar2> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements_cpu = 2;
static const int num_elements_gpu = 2;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar3> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements_cpu = 3;
static const int num_elements_gpu = 3;
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uchar4> {
static const DataType data_type = TYPE_UCHAR;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint> {
static const DataType data_type = TYPE_UINT;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint2> {
static const DataType data_type = TYPE_UINT;
static const int num_elements_cpu = 2;
static const int num_elements_gpu = 2;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint3> {
static const DataType data_type = TYPE_UINT;
static const int num_elements_cpu = 3;
static const int num_elements_gpu = 3;
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint4> {
static const DataType data_type = TYPE_UINT;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int> {
static const DataType data_type = TYPE_INT;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int2> {
static const DataType data_type = TYPE_INT;
static const int num_elements_cpu = 2;
static const int num_elements_gpu = 2;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int3> {
static const DataType data_type = TYPE_INT;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 3;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<int4> {
static const DataType data_type = TYPE_INT;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float2> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements_cpu = 2;
static const int num_elements_gpu = 2;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float3> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 3;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<float4> {
static const DataType data_type = TYPE_FLOAT;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<half> {
static const DataType data_type = TYPE_HALF;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<ushort4> {
static const DataType data_type = TYPE_UINT16;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint16_t> {
static const DataType data_type = TYPE_UINT16;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF;
static const int num_elements_cpu = 4;
static const int num_elements_gpu = 4;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const int num_elements_cpu = 1;
static const int num_elements_gpu = 1;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
};
@ -296,7 +296,7 @@ class device_memory {
void device_alloc();
void device_free();
void device_copy_to();
void device_copy_from(int y, int w, int h, int elem);
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
void device_zero();
bool device_is_cpu();
@ -565,7 +565,7 @@ template<typename T> class device_vector : public device_memory {
device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
}
void copy_from_device(int y, int w, int h)
void copy_from_device(size_t y, size_t w, size_t h)
{
device_copy_from(y, w, h, sizeof(T));
}
@ -601,7 +601,7 @@ template<typename T> class device_vector : public device_memory {
class device_sub_ptr {
public:
device_sub_ptr(device_memory &mem, int offset, int size);
device_sub_ptr(device_memory &mem, size_t offset, size_t size);
~device_sub_ptr();
device_ptr operator*() const

View File

@ -48,7 +48,7 @@ class DummyDevice : public Device {
{
}
virtual void mem_copy_from(device_memory &, int, int, int, int) override
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
{
}

View File

@ -315,14 +315,14 @@ class MultiDevice : public Device {
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
{
device_ptr key = mem.device_pointer;
int i = 0, sub_h = h / devices.size();
size_t i = 0, sub_h = h / devices.size();
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
size_t sy = y + i * sub_h;
size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
SubDevice *owner_sub = find_matching_mem_device(key, sub);
mem.device = owner_sub->device;

View File

@ -149,14 +149,14 @@ bool ShaderEval::eval_gpu(Device *device,
/* Execute work on GPU in chunk, so we can cancel.
* TODO : query appropriate size from device.*/
const int chunk_size = 65536;
const int64_t chunk_size = 65536;
const int work_size = output.size();
const int64_t work_size = output.size();
void *d_input = (void *)input.device_pointer;
void *d_output = (void *)output.device_pointer;
for (int d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
int d_work_size = min(chunk_size, work_size - d_offset);
for (int64_t d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
int64_t d_work_size = std::min(chunk_size, work_size - d_offset);
void *args[] = {&d_input, &d_output, &d_offset, &d_work_size};
queue->enqueue(kernel, d_work_size, args);