Code refactor: device memory cleanups, preparing for mapped host memory.
This commit is contained in:
parent
5475314f49
commit
5801ef71e4
|
@ -102,17 +102,17 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
|
|||
if(rgba.data_type == TYPE_HALF) {
|
||||
/* for multi devices, this assumes the inefficient method that we allocate
|
||||
* all pixels on the device even though we only render to a subset */
|
||||
GLhalf *data_pointer = (GLhalf*)rgba.data_pointer;
|
||||
GLhalf *host_pointer = (GLhalf*)rgba.host_pointer;
|
||||
float vbuffer[16], *basep;
|
||||
float *vp = NULL;
|
||||
|
||||
data_pointer += 4*y*w;
|
||||
host_pointer += 4*y*w;
|
||||
|
||||
/* draw half float texture, GLSL shader for display transform assumed to be bound */
|
||||
GLuint texid;
|
||||
glGenTextures(1, &texid);
|
||||
glBindTexture(GL_TEXTURE_2D, texid);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, host_pointer);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
|
||||
|
@ -194,7 +194,7 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
|
|||
glPixelZoom((float)width/(float)w, (float)height/(float)h);
|
||||
glRasterPos2f(dx, dy);
|
||||
|
||||
uint8_t *pixels = (uint8_t*)rgba.data_pointer;
|
||||
uint8_t *pixels = (uint8_t*)rgba.host_pointer;
|
||||
|
||||
pixels += 4*y*w;
|
||||
|
||||
|
|
|
@ -297,10 +297,14 @@ public:
|
|||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
mem.device_pointer = mem.data_pointer;
|
||||
|
||||
if(!mem.device_pointer) {
|
||||
mem.device_pointer = (device_ptr)malloc(mem.memory_size());
|
||||
if(mem.type == MEM_DEVICE_ONLY) {
|
||||
assert(!mem.host_pointer);
|
||||
size_t alignment = mem_address_alignment();
|
||||
void *data = util_aligned_malloc(mem.memory_size(), alignment);
|
||||
mem.device_pointer = (device_ptr)data;
|
||||
}
|
||||
else {
|
||||
mem.device_pointer = (device_ptr)mem.host_pointer;
|
||||
}
|
||||
|
||||
mem.device_size = mem.memory_size();
|
||||
|
@ -350,8 +354,8 @@ public:
|
|||
tex_free(mem);
|
||||
}
|
||||
else if(mem.device_pointer) {
|
||||
if(!mem.data_pointer) {
|
||||
free((void*)mem.device_pointer);
|
||||
if(mem.type == MEM_DEVICE_ONLY) {
|
||||
util_aligned_free((void*)mem.device_pointer);
|
||||
}
|
||||
mem.device_pointer = 0;
|
||||
stats.mem_free(mem.device_size);
|
||||
|
@ -379,7 +383,7 @@ public:
|
|||
/* Data texture. */
|
||||
kernel_tex_copy(&kernel_globals,
|
||||
mem.name,
|
||||
mem.data_pointer,
|
||||
mem.host_pointer,
|
||||
mem.data_size);
|
||||
}
|
||||
else {
|
||||
|
@ -400,7 +404,7 @@ public:
|
|||
}
|
||||
|
||||
TextureInfo& info = texture_info[flat_slot];
|
||||
info.data = (uint64_t)mem.data_pointer;
|
||||
info.data = (uint64_t)mem.host_pointer;
|
||||
info.cl_buffer = 0;
|
||||
info.interpolation = mem.interpolation;
|
||||
info.extension = mem.extension;
|
||||
|
@ -411,7 +415,7 @@ public:
|
|||
need_texture_info = true;
|
||||
}
|
||||
|
||||
mem.device_pointer = mem.data_pointer;
|
||||
mem.device_pointer = (device_ptr)mem.host_pointer;
|
||||
mem.device_size = mem.memory_size();
|
||||
stats.mem_alloc(mem.device_size);
|
||||
}
|
||||
|
@ -457,7 +461,7 @@ public:
|
|||
|
||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
||||
{
|
||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
|
||||
for(int i = 0; i < 9; i++) {
|
||||
tiles->buffers[i] = buffers[i];
|
||||
}
|
||||
|
|
|
@ -128,20 +128,26 @@ public:
|
|||
CUdevice cuDevice;
|
||||
CUcontext cuContext;
|
||||
CUmodule cuModule, cuFilterModule;
|
||||
map<device_ptr, bool> tex_interp_map;
|
||||
map<device_ptr, CUtexObject> tex_bindless_map;
|
||||
int cuDevId;
|
||||
int cuDevArchitecture;
|
||||
bool first_error;
|
||||
CUDASplitKernel *split_kernel;
|
||||
|
||||
struct CUDAMem {
|
||||
CUDAMem()
|
||||
: texobject(0), array(0) {}
|
||||
|
||||
CUtexObject texobject;
|
||||
CUarray array;
|
||||
};
|
||||
map<device_memory*, CUDAMem> cuda_mem_map;
|
||||
|
||||
struct PixelMem {
|
||||
GLuint cuPBO;
|
||||
CUgraphicsResource cuPBOresource;
|
||||
GLuint cuTexId;
|
||||
int w, h;
|
||||
};
|
||||
|
||||
map<device_ptr, PixelMem> pixel_mem_map;
|
||||
|
||||
/* Bindless Textures */
|
||||
|
@ -615,7 +621,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void generic_alloc(device_memory& mem, size_t padding = 0)
|
||||
CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
|
@ -625,19 +631,28 @@ public:
|
|||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
}
|
||||
|
||||
CUdeviceptr device_pointer;
|
||||
/* Allocate memory on device. */
|
||||
CUdeviceptr device_pointer = 0;
|
||||
size_t size = mem.memory_size();
|
||||
cuda_assert(cuMemAlloc(&device_pointer, size + padding));
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
if(!mem.device_pointer) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
CUDAMem *cmem = &cuda_mem_map[&mem];
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void generic_copy_to(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
|
||||
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -648,10 +663,11 @@ public:
|
|||
|
||||
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
|
||||
|
||||
mem.device_pointer = 0;
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -700,11 +716,11 @@ public:
|
|||
size_t size = elem*w*h;
|
||||
|
||||
if(mem.device_pointer) {
|
||||
cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
|
||||
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
|
||||
(CUdeviceptr)(mem.device_pointer + offset), size));
|
||||
}
|
||||
else {
|
||||
memset((char*)mem.data_pointer + offset, 0, size);
|
||||
memset((char*)mem.host_pointer + offset, 0, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -715,8 +731,8 @@ public:
|
|||
mem_alloc(mem);
|
||||
}
|
||||
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
if(mem.host_pointer) {
|
||||
memset(mem.host_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
if(mem.device_pointer) {
|
||||
|
@ -814,8 +830,6 @@ public:
|
|||
uint32_t ptr = (uint32_t)mem.device_pointer;
|
||||
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
|
||||
}
|
||||
|
||||
tex_interp_map[mem.device_pointer] = false;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -851,7 +865,7 @@ public:
|
|||
default: assert(0); return;
|
||||
}
|
||||
|
||||
|
||||
CUDAMem *cmem = NULL;
|
||||
CUarray array_3d = NULL;
|
||||
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
|
||||
size_t dst_pitch = src_pitch;
|
||||
|
@ -878,7 +892,7 @@ public:
|
|||
param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
param.dstArray = array_3d;
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost = (void*)mem.data_pointer;
|
||||
param.srcHost = mem.host_pointer;
|
||||
param.srcPitch = src_pitch;
|
||||
param.WidthInBytes = param.srcPitch;
|
||||
param.Height = mem.data_height;
|
||||
|
@ -889,6 +903,10 @@ public:
|
|||
mem.device_pointer = (device_ptr)array_3d;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
|
||||
cmem = &cuda_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = array_3d;
|
||||
}
|
||||
else if(mem.data_height > 1) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
|
@ -897,7 +915,10 @@ public:
|
|||
dst_pitch = align_up(src_pitch, alignment);
|
||||
size_t dst_size = dst_pitch * mem.data_height;
|
||||
|
||||
generic_alloc(mem, dst_size - mem.memory_size());
|
||||
cmem = generic_alloc(mem, dst_size - mem.memory_size());
|
||||
if(!cmem) {
|
||||
return;
|
||||
}
|
||||
|
||||
CUDA_MEMCPY2D param;
|
||||
memset(¶m, 0, sizeof(param));
|
||||
|
@ -905,7 +926,7 @@ public:
|
|||
param.dstDevice = mem.device_pointer;
|
||||
param.dstPitch = dst_pitch;
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost = (void*)mem.data_pointer;
|
||||
param.srcHost = mem.host_pointer;
|
||||
param.srcPitch = src_pitch;
|
||||
param.WidthInBytes = param.srcPitch;
|
||||
param.Height = mem.data_height;
|
||||
|
@ -914,8 +935,12 @@ public:
|
|||
}
|
||||
else {
|
||||
/* 1D texture, using linear memory. */
|
||||
generic_alloc(mem);
|
||||
cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size));
|
||||
cmem = generic_alloc(mem);
|
||||
if(!cmem) {
|
||||
return;
|
||||
}
|
||||
|
||||
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
|
||||
}
|
||||
|
||||
if(!has_fermi_limits) {
|
||||
|
@ -932,7 +957,7 @@ public:
|
|||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if(mem.data_depth > 1) {
|
||||
if(array_3d) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||||
resDesc.res.array.hArray = array_3d;
|
||||
resDesc.flags = 0;
|
||||
|
@ -962,13 +987,7 @@ public:
|
|||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
CUtexObject tex = 0;
|
||||
cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
|
||||
|
||||
/* Safety check */
|
||||
if((uint)tex > UINT_MAX) {
|
||||
assert(0);
|
||||
}
|
||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
/* Resize once */
|
||||
if(flat_slot >= texture_info.size()) {
|
||||
|
@ -979,20 +998,18 @@ public:
|
|||
|
||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
||||
TextureInfo& info = texture_info[flat_slot];
|
||||
info.data = (uint64_t)tex;
|
||||
info.data = (uint64_t)cmem->texobject;
|
||||
info.cl_buffer = 0;
|
||||
info.interpolation = mem.interpolation;
|
||||
info.extension = mem.extension;
|
||||
info.width = mem.data_width;
|
||||
info.height = mem.data_height;
|
||||
info.depth = mem.data_depth;
|
||||
|
||||
tex_bindless_map[mem.device_pointer] = tex;
|
||||
need_texture_info = true;
|
||||
}
|
||||
else {
|
||||
/* Fermi, fixed texture slots. */
|
||||
if(mem.data_depth > 1) {
|
||||
if(array_3d) {
|
||||
cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
|
||||
}
|
||||
else if(mem.data_height > 1) {
|
||||
|
@ -1017,38 +1034,27 @@ public:
|
|||
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
|
||||
}
|
||||
}
|
||||
|
||||
/* Fermi and Kepler */
|
||||
tex_interp_map[mem.device_pointer] = true;
|
||||
}
|
||||
|
||||
void tex_free(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
bool interp = tex_interp_map[mem.device_pointer];
|
||||
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
|
||||
CUDAContextScope scope(this);
|
||||
const CUDAMem& cmem = cuda_mem_map[&mem];
|
||||
|
||||
if(interp) {
|
||||
CUDAContextScope scope(this);
|
||||
if(cmem.texobject) {
|
||||
/* Free bindless texture. */
|
||||
cuTexObjectDestroy(cmem.texobject);
|
||||
}
|
||||
|
||||
if(!info.has_fermi_limits) {
|
||||
/* Free bindless texture. */
|
||||
if(tex_bindless_map[mem.device_pointer]) {
|
||||
CUtexObject tex = tex_bindless_map[mem.device_pointer];
|
||||
cuTexObjectDestroy(tex);
|
||||
}
|
||||
}
|
||||
if(cmem.array) {
|
||||
/* Free array. */
|
||||
cuArrayDestroy(cmem.array);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
if(mem.data_depth > 1) {
|
||||
/* Free array. */
|
||||
cuArrayDestroy((CUarray)mem.device_pointer);
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
}
|
||||
else {
|
||||
generic_free(mem);
|
||||
}
|
||||
cuda_mem_map.erase(cuda_mem_map.find(&mem));
|
||||
}
|
||||
else {
|
||||
generic_free(mem);
|
||||
|
@ -1058,7 +1064,7 @@ public:
|
|||
|
||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
||||
{
|
||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
|
||||
for(int i = 0; i < 9; i++) {
|
||||
tiles->buffers[i] = buffers[i];
|
||||
}
|
||||
|
@ -1455,7 +1461,7 @@ public:
|
|||
/* Allocate work tile. */
|
||||
work_tiles.alloc(1);
|
||||
|
||||
WorkTile *wtile = work_tiles.get_data();
|
||||
WorkTile *wtile = work_tiles.data();
|
||||
wtile->x = rtile.x;
|
||||
wtile->y = rtile.y;
|
||||
wtile->w = rtile.w;
|
||||
|
@ -1716,7 +1722,7 @@ public:
|
|||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
|
||||
uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
|
||||
size_t offset = sizeof(uchar)*4*y*w;
|
||||
memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
|
||||
memcpy((uchar*)mem.host_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
|
||||
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ CCL_NAMESPACE_BEGIN
|
|||
device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||
: data_type(device_type_traits<uchar>::data_type),
|
||||
data_elements(device_type_traits<uchar>::num_elements),
|
||||
data_pointer(0),
|
||||
data_size(0),
|
||||
device_size(0),
|
||||
data_width(0),
|
||||
|
@ -35,7 +34,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
|||
interpolation(INTERPOLATION_NONE),
|
||||
extension(EXTENSION_REPEAT),
|
||||
device(device),
|
||||
device_pointer(0)
|
||||
device_pointer(0),
|
||||
host_pointer(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -43,14 +43,14 @@ device_memory::~device_memory()
|
|||
{
|
||||
}
|
||||
|
||||
device_ptr device_memory::host_alloc(size_t size)
|
||||
void *device_memory::host_alloc(size_t size)
|
||||
{
|
||||
if(!size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t alignment = device->mem_address_alignment();
|
||||
device_ptr ptr = (device_ptr)util_aligned_malloc(size, alignment);
|
||||
void *ptr = util_aligned_malloc(size, alignment);
|
||||
|
||||
if(ptr) {
|
||||
util_guarded_mem_alloc(size);
|
||||
|
@ -62,11 +62,12 @@ device_ptr device_memory::host_alloc(size_t size)
|
|||
return ptr;
|
||||
}
|
||||
|
||||
void device_memory::host_free(device_ptr ptr, size_t size)
|
||||
void device_memory::host_free()
|
||||
{
|
||||
if(ptr) {
|
||||
util_guarded_mem_free(size);
|
||||
util_aligned_free((void*)ptr);
|
||||
if(host_pointer) {
|
||||
util_guarded_mem_free(memory_size());
|
||||
util_aligned_free((void*)host_pointer);
|
||||
host_pointer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ class Device;
|
|||
enum MemoryType {
|
||||
MEM_READ_ONLY,
|
||||
MEM_READ_WRITE,
|
||||
MEM_DEVICE_ONLY,
|
||||
MEM_TEXTURE,
|
||||
MEM_PIXELS
|
||||
};
|
||||
|
@ -182,7 +183,6 @@ public:
|
|||
/* Data information. */
|
||||
DataType data_type;
|
||||
int data_elements;
|
||||
device_ptr data_pointer;
|
||||
size_t data_size;
|
||||
size_t device_size;
|
||||
size_t data_width;
|
||||
|
@ -193,9 +193,10 @@ public:
|
|||
InterpolationType interpolation;
|
||||
ExtensionType extension;
|
||||
|
||||
/* Device pointer. */
|
||||
/* Pointers. */
|
||||
Device *device;
|
||||
device_ptr device_pointer;
|
||||
void *host_pointer;
|
||||
|
||||
virtual ~device_memory();
|
||||
|
||||
|
@ -207,11 +208,11 @@ protected:
|
|||
device_memory(const device_memory&);
|
||||
device_memory& operator = (const device_memory&);
|
||||
|
||||
/* Host allocation on the device. All data_pointer memory should be
|
||||
/* Host allocation on the device. All host_pointer memory should be
|
||||
* allocated with these functions, for devices that support using
|
||||
* the same pointer for host and device. */
|
||||
device_ptr host_alloc(size_t size);
|
||||
void host_free(device_ptr ptr, size_t size);
|
||||
void *host_alloc(size_t size);
|
||||
void host_free();
|
||||
|
||||
/* Device memory allocation and copying. */
|
||||
void device_alloc();
|
||||
|
@ -231,7 +232,7 @@ class device_only_memory : public device_memory
|
|||
{
|
||||
public:
|
||||
device_only_memory(Device *device, const char *name)
|
||||
: device_memory(device, name, MEM_READ_WRITE)
|
||||
: device_memory(device, name, MEM_DEVICE_ONLY)
|
||||
{
|
||||
data_type = device_type_traits<T>::data_type;
|
||||
data_elements = max(device_type_traits<T>::num_elements, 1);
|
||||
|
@ -294,8 +295,8 @@ public:
|
|||
|
||||
if(new_size != data_size) {
|
||||
device_free();
|
||||
host_free(data_pointer, sizeof(T)*data_size);
|
||||
data_pointer = host_alloc(sizeof(T)*new_size);
|
||||
host_free();
|
||||
host_pointer = host_alloc(sizeof(T)*new_size);
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
|
||||
|
@ -304,7 +305,7 @@ public:
|
|||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
return get_data();
|
||||
return data();
|
||||
}
|
||||
|
||||
/* Host memory resize. Only use this if the original data needs to be
|
||||
|
@ -314,16 +315,16 @@ public:
|
|||
size_t new_size = size(width, height, depth);
|
||||
|
||||
if(new_size != data_size) {
|
||||
device_ptr new_ptr = host_alloc(sizeof(T)*new_size);
|
||||
void *new_ptr = host_alloc(sizeof(T)*new_size);
|
||||
|
||||
if(new_size && data_size) {
|
||||
size_t min_size = ((new_size < data_size)? new_size: data_size);
|
||||
memcpy((T*)new_ptr, (T*)data_pointer, sizeof(T)*min_size);
|
||||
memcpy((T*)new_ptr, (T*)host_pointer, sizeof(T)*min_size);
|
||||
}
|
||||
|
||||
device_free();
|
||||
host_free(data_pointer, sizeof(T)*data_size);
|
||||
data_pointer = new_ptr;
|
||||
host_free();
|
||||
host_pointer = new_ptr;
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
|
||||
|
@ -332,20 +333,20 @@ public:
|
|||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
return get_data();
|
||||
return data();
|
||||
}
|
||||
|
||||
/* Take over data from an existing array. */
|
||||
void steal_data(array<T>& from)
|
||||
{
|
||||
device_free();
|
||||
host_free(data_pointer, sizeof(T)*data_size);
|
||||
host_free();
|
||||
|
||||
data_size = from.size();
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
data_pointer = (device_ptr)from.steal_pointer();
|
||||
host_pointer = from.steal_pointer();
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
|
||||
|
@ -353,13 +354,13 @@ public:
|
|||
void free()
|
||||
{
|
||||
device_free();
|
||||
host_free(data_pointer, sizeof(T)*data_size);
|
||||
host_free();
|
||||
|
||||
data_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
data_pointer = 0;
|
||||
host_pointer = 0;
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
|
||||
|
@ -368,15 +369,15 @@ public:
|
|||
return data_size;
|
||||
}
|
||||
|
||||
T* get_data()
|
||||
T* data()
|
||||
{
|
||||
return (T*)data_pointer;
|
||||
return (T*)host_pointer;
|
||||
}
|
||||
|
||||
T& operator[](size_t i)
|
||||
{
|
||||
assert(i < data_size);
|
||||
return get_data()[i];
|
||||
return data()[i];
|
||||
}
|
||||
|
||||
void copy_to_device()
|
||||
|
@ -423,7 +424,7 @@ public:
|
|||
T *copy_from_device(int y, int w, int h)
|
||||
{
|
||||
device_memory::device_copy_from(y, w, h, sizeof(T));
|
||||
return device_vector<T>::get_data();
|
||||
return device_vector<T>::data();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ public:
|
|||
|
||||
snd.add(mem);
|
||||
snd.write();
|
||||
snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
|
||||
snd.write_buffer(mem.host_pointer, mem.memory_size());
|
||||
}
|
||||
|
||||
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
|
||||
|
@ -131,7 +131,7 @@ public:
|
|||
snd.write();
|
||||
|
||||
RPCReceive rcv(socket, &error_func);
|
||||
rcv.read_buffer((void*)mem.data_pointer, data_size);
|
||||
rcv.read_buffer(mem.host_pointer, data_size);
|
||||
}
|
||||
|
||||
void mem_zero(device_memory& mem)
|
||||
|
@ -439,7 +439,7 @@ protected:
|
|||
device_ptr client_pointer = mem.device_pointer;
|
||||
|
||||
DataVector &data_v = data_vector_insert(client_pointer, data_size);
|
||||
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
|
||||
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
|
||||
|
||||
/* Perform the allocation on the actual device. */
|
||||
device->mem_alloc(mem);
|
||||
|
@ -459,7 +459,7 @@ protected:
|
|||
if(client_pointer) {
|
||||
/* Lookup existing host side data buffer. */
|
||||
DataVector &data_v = data_vector_find(client_pointer);
|
||||
mem.data_pointer = (device_ptr)&data_v[0];
|
||||
mem.host_pointer = (void*)&data_v[0];
|
||||
|
||||
/* Translate the client pointer to a real device pointer. */
|
||||
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
|
||||
|
@ -467,11 +467,11 @@ protected:
|
|||
else {
|
||||
/* Allocate host side data buffer. */
|
||||
DataVector &data_v = data_vector_insert(client_pointer, data_size);
|
||||
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
|
||||
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
|
||||
}
|
||||
|
||||
/* Copy data from network into memory buffer. */
|
||||
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
|
||||
rcv.read_buffer((uint8_t*)mem.host_pointer, data_size);
|
||||
|
||||
/* Copy the data from the memory buffer to the device buffer. */
|
||||
device->mem_copy_to(mem);
|
||||
|
@ -497,7 +497,7 @@ protected:
|
|||
|
||||
DataVector &data_v = data_vector_find(client_pointer);
|
||||
|
||||
mem.data_pointer = (device_ptr)&(data_v[0]);
|
||||
mem.host_pointer = (device_ptr)&(data_v[0]);
|
||||
|
||||
device->mem_copy_from(mem, y, w, h, elem);
|
||||
|
||||
|
@ -505,7 +505,7 @@ protected:
|
|||
|
||||
RPCSend snd(socket, &error_func, "mem_copy_from");
|
||||
snd.write();
|
||||
snd.write_buffer((uint8_t*)mem.data_pointer, data_size);
|
||||
snd.write_buffer((uint8_t*)mem.host_pointer, data_size);
|
||||
lock.unlock();
|
||||
}
|
||||
else if(rcv.name == "mem_zero") {
|
||||
|
@ -520,7 +520,7 @@ protected:
|
|||
if(client_pointer) {
|
||||
/* Lookup existing host side data buffer. */
|
||||
DataVector &data_v = data_vector_find(client_pointer);
|
||||
mem.data_pointer = (device_ptr)&data_v[0];
|
||||
mem.host_pointer = (void*)&data_v[0];
|
||||
|
||||
/* Translate the client pointer to a real device pointer. */
|
||||
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
|
||||
|
@ -528,7 +528,7 @@ protected:
|
|||
else {
|
||||
/* Allocate host side data buffer. */
|
||||
DataVector &data_v = data_vector_insert(client_pointer, data_size);
|
||||
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
|
||||
mem.host_pointer = (void*)? (device_ptr)&(data_v[0]): 0;
|
||||
}
|
||||
|
||||
/* Zero memory. */
|
||||
|
|
|
@ -278,7 +278,7 @@ public:
|
|||
*archive & mem.device_pointer;
|
||||
|
||||
mem.name = name.c_str();
|
||||
mem.data_pointer = 0;
|
||||
mem.host_pointer = 0;
|
||||
|
||||
/* Can't transfer OpenGL texture over network. */
|
||||
if(mem.type == MEM_PIXELS) {
|
||||
|
|
|
@ -280,8 +280,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
|||
activeRaysAvailable = false;
|
||||
|
||||
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
|
||||
if(!IS_STATE(ray_state.get_data(), rayStateIter, RAY_INACTIVE)) {
|
||||
if(IS_STATE(ray_state.get_data(), rayStateIter, RAY_INVALID)) {
|
||||
if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
|
||||
if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
|
||||
/* Something went wrong, abort to avoid looping endlessly. */
|
||||
device->set_error("Split kernel error: invalid ray state");
|
||||
return false;
|
||||
|
|
|
@ -88,7 +88,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
|
|||
CL_FALSE,
|
||||
offset,
|
||||
allocation->mem->memory_size(),
|
||||
(void*)allocation->mem->data_pointer,
|
||||
allocation->mem->host_pointer,
|
||||
0, NULL, NULL
|
||||
));
|
||||
|
||||
|
@ -127,7 +127,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
|
|||
CL_FALSE,
|
||||
offset,
|
||||
allocation->mem->memory_size(),
|
||||
(void*)allocation->mem->data_pointer,
|
||||
allocation->mem->host_pointer,
|
||||
0, NULL, NULL
|
||||
));
|
||||
|
||||
|
|
|
@ -362,7 +362,7 @@ void OpenCLDeviceBase::mem_copy_to(device_memory& mem)
|
|||
CL_TRUE,
|
||||
0,
|
||||
size,
|
||||
(void*)mem.data_pointer,
|
||||
mem.host_pointer,
|
||||
0,
|
||||
NULL, NULL));
|
||||
}
|
||||
|
@ -379,7 +379,7 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
|
|||
CL_TRUE,
|
||||
offset,
|
||||
size,
|
||||
(uchar*)mem.data_pointer + offset,
|
||||
(uchar*)mem.host_pointer + offset,
|
||||
0,
|
||||
NULL, NULL));
|
||||
}
|
||||
|
@ -426,14 +426,14 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
|||
mem_zero_kernel(mem.device_pointer, mem.memory_size());
|
||||
}
|
||||
|
||||
if(mem.data_pointer) {
|
||||
memset((void*)mem.data_pointer, 0, mem.memory_size());
|
||||
if(mem.host_pointer) {
|
||||
memset(mem.host_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
if(!base_program.is_loaded()) {
|
||||
void* zero = (void*)mem.data_pointer;
|
||||
void* zero = mem.host_pointer;
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
if(!mem.host_pointer) {
|
||||
zero = util_aligned_malloc(mem.memory_size(), 16);
|
||||
memset(zero, 0, mem.memory_size());
|
||||
}
|
||||
|
@ -447,7 +447,7 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
|
|||
0,
|
||||
NULL, NULL));
|
||||
|
||||
if(!mem.data_pointer) {
|
||||
if(!mem.host_pointer) {
|
||||
util_aligned_free(zero);
|
||||
}
|
||||
}
|
||||
|
@ -519,7 +519,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
|||
data = i->second;
|
||||
}
|
||||
|
||||
memcpy(data->get_data(), host, size);
|
||||
memcpy(data->data(), host, size);
|
||||
data->copy_to_device();
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ bool kernel_osl_use(KernelGlobals *kg);
|
|||
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
|
||||
void kernel_tex_copy(KernelGlobals *kg,
|
||||
const char *name,
|
||||
device_ptr mem,
|
||||
void *mem,
|
||||
size_t size);
|
||||
|
||||
#define KERNEL_ARCH cpu
|
||||
|
|
|
@ -74,7 +74,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
|
|||
|
||||
void kernel_tex_copy(KernelGlobals *kg,
|
||||
const char *name,
|
||||
device_ptr mem,
|
||||
void *mem,
|
||||
size_t size)
|
||||
{
|
||||
if(0) {
|
||||
|
|
|
@ -201,7 +201,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
|
|||
/* read result */
|
||||
int k = 0;
|
||||
|
||||
float4 *offset = (float4*)d_output.data_pointer;
|
||||
float4 *offset = d_output.data();
|
||||
|
||||
size_t depth = 4;
|
||||
for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) {
|
||||
|
|
|
@ -173,8 +173,8 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
|
|||
/* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
|
||||
* update does not work efficiently with atomics in the kernel. */
|
||||
int mean_offset = offset - components;
|
||||
float *mean = (float*)buffer.data_pointer + mean_offset;
|
||||
float *var = (float*)buffer.data_pointer + offset;
|
||||
float *mean = buffer.data() + mean_offset;
|
||||
float *var = buffer.data() + offset;
|
||||
assert(mean_offset >= 0);
|
||||
|
||||
if(components == 1) {
|
||||
|
@ -194,7 +194,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
|
|||
}
|
||||
}
|
||||
else {
|
||||
float *in = (float*)buffer.data_pointer + offset;
|
||||
float *in = buffer.data() + offset;
|
||||
|
||||
if(components == 1) {
|
||||
for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
|
||||
|
@ -228,7 +228,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
|
|||
continue;
|
||||
}
|
||||
|
||||
float *in = (float*)buffer.data_pointer + pass_offset;
|
||||
float *in = buffer.data() + pass_offset;
|
||||
int pass_stride = params.get_passes_size();
|
||||
|
||||
float scale = (pass.filter)? 1.0f/(float)sample: 1.0f;
|
||||
|
@ -295,7 +295,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
|
|||
pass_offset += color_pass.components;
|
||||
}
|
||||
|
||||
float *in_divide = (float*)buffer.data_pointer + pass_offset;
|
||||
float *in_divide = buffer.data() + pass_offset;
|
||||
|
||||
for(int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
|
||||
float3 f = make_float3(in[0], in[1], in[2]);
|
||||
|
@ -344,7 +344,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
|
|||
pass_offset += color_pass.components;
|
||||
}
|
||||
|
||||
float *in_weight = (float*)buffer.data_pointer + pass_offset;
|
||||
float *in_weight = buffer.data() + pass_offset;
|
||||
|
||||
for(int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
|
||||
float4 f = make_float4(in[0], in[1], in[2], in[3]);
|
||||
|
|
|
@ -79,7 +79,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
|
|||
|
||||
d_input.free();
|
||||
|
||||
float4 *d_output_data = reinterpret_cast<float4*>(d_output.data_pointer);
|
||||
float4 *d_output_data = d_output.data();
|
||||
|
||||
pixels.resize(width*height);
|
||||
|
||||
|
|
|
@ -149,7 +149,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
|
|||
done.resize(num_verts, false);
|
||||
int k = 0;
|
||||
|
||||
float4 *offset = (float4*)d_output.data_pointer;
|
||||
float4 *offset = d_output.data();
|
||||
|
||||
Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
for(size_t i = 0; i < num_triangles; i++) {
|
||||
|
|
|
@ -589,7 +589,7 @@ void ObjectManager::device_update_flags(Device *,
|
|||
return;
|
||||
|
||||
/* object info flag */
|
||||
uint *object_flag = dscene->object_flag.get_data();
|
||||
uint *object_flag = dscene->object_flag.data();
|
||||
|
||||
vector<Object *> volume_objects;
|
||||
bool has_volume_objects = false;
|
||||
|
@ -647,7 +647,7 @@ void ObjectManager::device_update_patch_map_offsets(Device *, DeviceScene *dscen
|
|||
return;
|
||||
}
|
||||
|
||||
uint4* objects = (uint4*)dscene->objects.get_data();
|
||||
uint4* objects = (uint4*)dscene->objects.data();
|
||||
|
||||
bool update = false;
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ size_t LookupTables::add_table(DeviceScene *dscene, vector<float>& data)
|
|||
}
|
||||
|
||||
/* copy table data and return offset */
|
||||
float *dtable = dscene->lookup_table.get_data();
|
||||
float *dtable = dscene->lookup_table.data();
|
||||
memcpy(dtable + new_table.offset, &data[0], sizeof(float) * data.size());
|
||||
|
||||
return new_table.offset;
|
||||
|
|
Loading…
Reference in New Issue