Fix T91641: crash rendering with 16k environment map in Cycles
Protect against integer overflow.
This commit is contained in:
parent
6279efbb78
commit
d7f803f522
Notes:
blender-bot
2023-02-14 11:28:39 +01:00
Referenced by issue #91641, Cycles Regression: error rendering 16k environment map in Blender 3.0
|
@ -170,7 +170,7 @@ void CPUDevice::mem_copy_to(device_memory &mem)
|
|||
}
|
||||
|
||||
void CPUDevice::mem_copy_from(
|
||||
device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
|
||||
device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
|
||||
{
|
||||
/* no-op */
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ void CPUDevice::mem_free(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
|
||||
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
|
||||
{
|
||||
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
|
||||
}
|
||||
|
|
|
@ -72,10 +72,13 @@ class CPUDevice : public Device {
|
|||
|
||||
virtual void mem_alloc(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem) override;
|
||||
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
|
||||
virtual void mem_copy_from(
|
||||
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
virtual void mem_zero(device_memory &mem) override;
|
||||
virtual void mem_free(device_memory &mem) override;
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
|
||||
size_t offset,
|
||||
size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
|
|
|
@ -837,7 +837,7 @@ void CUDADevice::mem_copy_to(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
|
||||
void CUDADevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
|
||||
assert(!"mem_copy_from not supported for textures.");
|
||||
|
@ -891,7 +891,7 @@ void CUDADevice::mem_free(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
|
||||
device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
|
||||
{
|
||||
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
|
||||
}
|
||||
|
|
|
@ -120,13 +120,13 @@ class CUDADevice : public Device {
|
|||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ class Device {
|
|||
|
||||
string error_msg;
|
||||
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
|
||||
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
|
||||
{
|
||||
/* Only required for devices that implement denoising. */
|
||||
assert(false);
|
||||
|
@ -273,7 +273,7 @@ class Device {
|
|||
|
||||
virtual void mem_alloc(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
|
||||
virtual void mem_zero(device_memory &mem) = 0;
|
||||
virtual void mem_free(device_memory &mem) = 0;
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ void device_memory::device_copy_to()
|
|||
}
|
||||
}
|
||||
|
||||
void device_memory::device_copy_from(int y, int w, int h, int elem)
|
||||
void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
|
||||
{
|
||||
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
|
||||
device->mem_copy_from(*this, y, w, h, elem);
|
||||
|
@ -181,7 +181,7 @@ bool device_memory::is_resident(Device *sub_device) const
|
|||
|
||||
/* Device Sub Ptr */
|
||||
|
||||
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
|
||||
device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
|
||||
{
|
||||
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
|
||||
}
|
||||
|
|
|
@ -81,154 +81,154 @@ static constexpr size_t datatype_size(DataType datatype)
|
|||
|
||||
template<typename T> struct device_type_traits {
|
||||
static const DataType data_type = TYPE_UNKNOWN;
|
||||
static const int num_elements_cpu = sizeof(T);
|
||||
static const int num_elements_gpu = sizeof(T);
|
||||
static const size_t num_elements_cpu = sizeof(T);
|
||||
static const size_t num_elements_gpu = sizeof(T);
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar2> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar3> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const int num_elements_cpu = 3;
|
||||
static const int num_elements_gpu = 3;
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uchar4> {
|
||||
static const DataType data_type = TYPE_UCHAR;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint2> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint3> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const int num_elements_cpu = 3;
|
||||
static const int num_elements_gpu = 3;
|
||||
static const size_t num_elements_cpu = 3;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint4> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int2> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int3> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 3;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<int4> {
|
||||
static const DataType data_type = TYPE_INT;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float2> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const int num_elements_cpu = 2;
|
||||
static const int num_elements_gpu = 2;
|
||||
static const size_t num_elements_cpu = 2;
|
||||
static const size_t num_elements_gpu = 2;
|
||||
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float3> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 3;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 3;
|
||||
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float4> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<ushort4> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint16_t> {
|
||||
static const DataType data_type = TYPE_UINT16;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<half4> {
|
||||
static const DataType data_type = TYPE_HALF;
|
||||
static const int num_elements_cpu = 4;
|
||||
static const int num_elements_gpu = 4;
|
||||
static const size_t num_elements_cpu = 4;
|
||||
static const size_t num_elements_gpu = 4;
|
||||
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<uint64_t> {
|
||||
static const DataType data_type = TYPE_UINT64;
|
||||
static const int num_elements_cpu = 1;
|
||||
static const int num_elements_gpu = 1;
|
||||
static const size_t num_elements_cpu = 1;
|
||||
static const size_t num_elements_gpu = 1;
|
||||
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
|
||||
};
|
||||
|
||||
|
@ -296,7 +296,7 @@ class device_memory {
|
|||
void device_alloc();
|
||||
void device_free();
|
||||
void device_copy_to();
|
||||
void device_copy_from(int y, int w, int h, int elem);
|
||||
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
|
||||
void device_zero();
|
||||
|
||||
bool device_is_cpu();
|
||||
|
@ -565,7 +565,7 @@ template<typename T> class device_vector : public device_memory {
|
|||
device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
|
||||
}
|
||||
|
||||
void copy_from_device(int y, int w, int h)
|
||||
void copy_from_device(size_t y, size_t w, size_t h)
|
||||
{
|
||||
device_copy_from(y, w, h, sizeof(T));
|
||||
}
|
||||
|
@ -601,7 +601,7 @@ template<typename T> class device_vector : public device_memory {
|
|||
|
||||
class device_sub_ptr {
|
||||
public:
|
||||
device_sub_ptr(device_memory &mem, int offset, int size);
|
||||
device_sub_ptr(device_memory &mem, size_t offset, size_t size);
|
||||
~device_sub_ptr();
|
||||
|
||||
device_ptr operator*() const
|
||||
|
|
|
@ -48,7 +48,7 @@ class DummyDevice : public Device {
|
|||
{
|
||||
}
|
||||
|
||||
virtual void mem_copy_from(device_memory &, int, int, int, int) override
|
||||
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -315,14 +315,14 @@ class MultiDevice : public Device {
|
|||
stats.mem_alloc(mem.device_size - existing_size);
|
||||
}
|
||||
|
||||
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
|
||||
{
|
||||
device_ptr key = mem.device_pointer;
|
||||
int i = 0, sub_h = h / devices.size();
|
||||
size_t i = 0, sub_h = h / devices.size();
|
||||
|
||||
foreach (SubDevice &sub, devices) {
|
||||
int sy = y + i * sub_h;
|
||||
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
size_t sy = y + i * sub_h;
|
||||
size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub);
|
||||
mem.device = owner_sub->device;
|
||||
|
|
|
@ -149,14 +149,14 @@ bool ShaderEval::eval_gpu(Device *device,
|
|||
|
||||
/* Execute work on GPU in chunk, so we can cancel.
|
||||
* TODO : query appropriate size from device.*/
|
||||
const int chunk_size = 65536;
|
||||
const int64_t chunk_size = 65536;
|
||||
|
||||
const int work_size = output.size();
|
||||
const int64_t work_size = output.size();
|
||||
void *d_input = (void *)input.device_pointer;
|
||||
void *d_output = (void *)output.device_pointer;
|
||||
|
||||
for (int d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
|
||||
int d_work_size = min(chunk_size, work_size - d_offset);
|
||||
for (int64_t d_offset = 0; d_offset < work_size; d_offset += chunk_size) {
|
||||
int64_t d_work_size = std::min(chunk_size, work_size - d_offset);
|
||||
void *args[] = {&d_input, &d_output, &d_offset, &d_work_size};
|
||||
|
||||
queue->enqueue(kernel, d_work_size, args);
|
||||
|
|
Loading…
Reference in New Issue