Code refactor: device memory cleanups, preparing for mapped host memory.

This commit is contained in:
Brecht Van Lommel 2017-11-05 00:34:30 +01:00
parent 5475314f49
commit 5801ef71e4
18 changed files with 152 additions and 140 deletions

View File

@ -102,17 +102,17 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
if(rgba.data_type == TYPE_HALF) {
/* for multi devices, this assumes the inefficient method that we allocate
* all pixels on the device even though we only render to a subset */
GLhalf *data_pointer = (GLhalf*)rgba.data_pointer;
GLhalf *host_pointer = (GLhalf*)rgba.host_pointer;
float vbuffer[16], *basep;
float *vp = NULL;
data_pointer += 4*y*w;
host_pointer += 4*y*w;
/* draw half float texture, GLSL shader for display transform assumed to be bound */
GLuint texid;
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, host_pointer);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@ -194,7 +194,7 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
glPixelZoom((float)width/(float)w, (float)height/(float)h);
glRasterPos2f(dx, dy);
uint8_t *pixels = (uint8_t*)rgba.data_pointer;
uint8_t *pixels = (uint8_t*)rgba.host_pointer;
pixels += 4*y*w;

View File

@ -297,10 +297,14 @@ public:
<< string_human_readable_size(mem.memory_size()) << ")";
}
mem.device_pointer = mem.data_pointer;
if(!mem.device_pointer) {
mem.device_pointer = (device_ptr)malloc(mem.memory_size());
if(mem.type == MEM_DEVICE_ONLY) {
assert(!mem.host_pointer);
size_t alignment = mem_address_alignment();
void *data = util_aligned_malloc(mem.memory_size(), alignment);
mem.device_pointer = (device_ptr)data;
}
else {
mem.device_pointer = (device_ptr)mem.host_pointer;
}
mem.device_size = mem.memory_size();
@ -350,8 +354,8 @@ public:
tex_free(mem);
}
else if(mem.device_pointer) {
if(!mem.data_pointer) {
free((void*)mem.device_pointer);
if(mem.type == MEM_DEVICE_ONLY) {
util_aligned_free((void*)mem.device_pointer);
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
@ -379,7 +383,7 @@ public:
/* Data texture. */
kernel_tex_copy(&kernel_globals,
mem.name,
mem.data_pointer,
mem.host_pointer,
mem.data_size);
}
else {
@ -400,7 +404,7 @@ public:
}
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)mem.data_pointer;
info.data = (uint64_t)mem.host_pointer;
info.cl_buffer = 0;
info.interpolation = mem.interpolation;
info.extension = mem.extension;
@ -411,7 +415,7 @@ public:
need_texture_info = true;
}
mem.device_pointer = mem.data_pointer;
mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
}
@ -457,7 +461,7 @@ public:
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}

View File

@ -128,20 +128,26 @@ public:
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
map<device_ptr, bool> tex_interp_map;
map<device_ptr, CUtexObject> tex_bindless_map;
int cuDevId;
int cuDevArchitecture;
bool first_error;
CUDASplitKernel *split_kernel;
struct CUDAMem {
CUDAMem()
: texobject(0), array(0) {}
CUtexObject texobject;
CUarray array;
};
map<device_memory*, CUDAMem> cuda_mem_map;
struct PixelMem {
GLuint cuPBO;
CUgraphicsResource cuPBOresource;
GLuint cuTexId;
int w, h;
};
map<device_ptr, PixelMem> pixel_mem_map;
/* Bindless Textures */
@ -615,7 +621,7 @@ public:
}
}
void generic_alloc(device_memory& mem, size_t padding = 0)
CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
{
CUDAContextScope scope(this);
@ -625,19 +631,28 @@ public:
<< string_human_readable_size(mem.memory_size()) << ")";
}
CUdeviceptr device_pointer;
/* Allocate memory on device. */
CUdeviceptr device_pointer = 0;
size_t size = mem.memory_size();
cuda_assert(cuMemAlloc(&device_pointer, size + padding));
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
if(!mem.device_pointer) {
return NULL;
}
/* Insert into map of allocations. */
CUDAMem *cmem = &cuda_mem_map[&mem];
return cmem;
}
void generic_copy_to(device_memory& mem)
{
if(mem.device_pointer) {
CUDAContextScope scope(this);
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
}
}
@ -648,10 +663,11 @@ public:
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
}
@ -700,11 +716,11 @@ public:
size_t size = elem*w*h;
if(mem.device_pointer) {
cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
(CUdeviceptr)(mem.device_pointer + offset), size));
}
else {
memset((char*)mem.data_pointer + offset, 0, size);
memset((char*)mem.host_pointer + offset, 0, size);
}
}
}
@ -715,8 +731,8 @@ public:
mem_alloc(mem);
}
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
if(mem.host_pointer) {
memset(mem.host_pointer, 0, mem.memory_size());
}
if(mem.device_pointer) {
@ -814,8 +830,6 @@ public:
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
tex_interp_map[mem.device_pointer] = false;
return;
}
@ -851,7 +865,7 @@ public:
default: assert(0); return;
}
CUDAMem *cmem = NULL;
CUarray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
@ -878,7 +892,7 @@ public:
param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
param.dstArray = array_3d;
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = (void*)mem.data_pointer;
param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
@ -889,6 +903,10 @@ public:
mem.device_pointer = (device_ptr)array_3d;
mem.device_size = size;
stats.mem_alloc(size);
cmem = &cuda_mem_map[&mem];
cmem->texobject = 0;
cmem->array = array_3d;
}
else if(mem.data_height > 1) {
/* 2D texture, using pitch aligned linear memory. */
@ -897,7 +915,10 @@ public:
dst_pitch = align_up(src_pitch, alignment);
size_t dst_size = dst_pitch * mem.data_height;
generic_alloc(mem, dst_size - mem.memory_size());
cmem = generic_alloc(mem, dst_size - mem.memory_size());
if(!cmem) {
return;
}
CUDA_MEMCPY2D param;
memset(&param, 0, sizeof(param));
@ -905,7 +926,7 @@ public:
param.dstDevice = mem.device_pointer;
param.dstPitch = dst_pitch;
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = (void*)mem.data_pointer;
param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
@ -914,8 +935,12 @@ public:
}
else {
/* 1D texture, using linear memory. */
generic_alloc(mem);
cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size));
cmem = generic_alloc(mem);
if(!cmem) {
return;
}
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
}
if(!has_fermi_limits) {
@ -932,7 +957,7 @@ public:
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
if(mem.data_depth > 1) {
if(array_3d) {
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
resDesc.res.array.hArray = array_3d;
resDesc.flags = 0;
@ -962,13 +987,7 @@ public:
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
CUtexObject tex = 0;
cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
/* Safety check */
if((uint)tex > UINT_MAX) {
assert(0);
}
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
/* Resize once */
if(flat_slot >= texture_info.size()) {
@ -979,20 +998,18 @@ public:
/* Set Mapping and tag that we need to (re-)upload to device */
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)tex;
info.data = (uint64_t)cmem->texobject;
info.cl_buffer = 0;
info.interpolation = mem.interpolation;
info.extension = mem.extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
tex_bindless_map[mem.device_pointer] = tex;
need_texture_info = true;
}
else {
/* Fermi, fixed texture slots. */
if(mem.data_depth > 1) {
if(array_3d) {
cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
}
else if(mem.data_height > 1) {
@ -1017,38 +1034,27 @@ public:
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
}
}
/* Fermi and Kepler */
tex_interp_map[mem.device_pointer] = true;
}
void tex_free(device_memory& mem)
{
if(mem.device_pointer) {
bool interp = tex_interp_map[mem.device_pointer];
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
CUDAContextScope scope(this);
const CUDAMem& cmem = cuda_mem_map[&mem];
if(interp) {
CUDAContextScope scope(this);
if(cmem.texobject) {
/* Free bindless texture. */
cuTexObjectDestroy(cmem.texobject);
}
if(!info.has_fermi_limits) {
/* Free bindless texture. */
if(tex_bindless_map[mem.device_pointer]) {
CUtexObject tex = tex_bindless_map[mem.device_pointer];
cuTexObjectDestroy(tex);
}
}
if(cmem.array) {
/* Free array. */
cuArrayDestroy(cmem.array);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
if(mem.data_depth > 1) {
/* Free array. */
cuArrayDestroy((CUarray)mem.device_pointer);
stats.mem_free(mem.device_size);
mem.device_pointer = 0;
mem.device_size = 0;
}
else {
generic_free(mem);
}
cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
else {
generic_free(mem);
@ -1058,7 +1064,7 @@ public:
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
@ -1455,7 +1461,7 @@ public:
/* Allocate work tile. */
work_tiles.alloc(1);
WorkTile *wtile = work_tiles.get_data();
WorkTile *wtile = work_tiles.data();
wtile->x = rtile.x;
wtile->y = rtile.y;
wtile->w = rtile.w;
@ -1716,7 +1722,7 @@ public:
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
size_t offset = sizeof(uchar)*4*y*w;
memcpy((uchar*)mem.data_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
memcpy((uchar*)mem.host_pointer + offset, pixels + offset, sizeof(uchar)*4*w*h);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}

View File

@ -24,7 +24,6 @@ CCL_NAMESPACE_BEGIN
device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements),
data_pointer(0),
data_size(0),
device_size(0),
data_width(0),
@ -35,7 +34,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
interpolation(INTERPOLATION_NONE),
extension(EXTENSION_REPEAT),
device(device),
device_pointer(0)
device_pointer(0),
host_pointer(0)
{
}
@ -43,14 +43,14 @@ device_memory::~device_memory()
{
}
device_ptr device_memory::host_alloc(size_t size)
void *device_memory::host_alloc(size_t size)
{
if(!size) {
return 0;
}
size_t alignment = device->mem_address_alignment();
device_ptr ptr = (device_ptr)util_aligned_malloc(size, alignment);
void *ptr = util_aligned_malloc(size, alignment);
if(ptr) {
util_guarded_mem_alloc(size);
@ -62,11 +62,12 @@ device_ptr device_memory::host_alloc(size_t size)
return ptr;
}
void device_memory::host_free(device_ptr ptr, size_t size)
void device_memory::host_free()
{
if(ptr) {
util_guarded_mem_free(size);
util_aligned_free((void*)ptr);
if(host_pointer) {
util_guarded_mem_free(memory_size());
util_aligned_free((void*)host_pointer);
host_pointer = 0;
}
}

View File

@ -34,6 +34,7 @@ class Device;
enum MemoryType {
MEM_READ_ONLY,
MEM_READ_WRITE,
MEM_DEVICE_ONLY,
MEM_TEXTURE,
MEM_PIXELS
};
@ -182,7 +183,6 @@ public:
/* Data information. */
DataType data_type;
int data_elements;
device_ptr data_pointer;
size_t data_size;
size_t device_size;
size_t data_width;
@ -193,9 +193,10 @@ public:
InterpolationType interpolation;
ExtensionType extension;
/* Device pointer. */
/* Pointers. */
Device *device;
device_ptr device_pointer;
void *host_pointer;
virtual ~device_memory();
@ -207,11 +208,11 @@ protected:
device_memory(const device_memory&);
device_memory& operator = (const device_memory&);
/* Host allocation on the device. All data_pointer memory should be
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
* the same pointer for host and device. */
device_ptr host_alloc(size_t size);
void host_free(device_ptr ptr, size_t size);
void *host_alloc(size_t size);
void host_free();
/* Device memory allocation and copying. */
void device_alloc();
@ -231,7 +232,7 @@ class device_only_memory : public device_memory
{
public:
device_only_memory(Device *device, const char *name)
: device_memory(device, name, MEM_READ_WRITE)
: device_memory(device, name, MEM_DEVICE_ONLY)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_type_traits<T>::num_elements, 1);
@ -294,8 +295,8 @@ public:
if(new_size != data_size) {
device_free();
host_free(data_pointer, sizeof(T)*data_size);
data_pointer = host_alloc(sizeof(T)*new_size);
host_free();
host_pointer = host_alloc(sizeof(T)*new_size);
assert(device_pointer == 0);
}
@ -304,7 +305,7 @@ public:
data_height = height;
data_depth = depth;
return get_data();
return data();
}
/* Host memory resize. Only use this if the original data needs to be
@ -314,16 +315,16 @@ public:
size_t new_size = size(width, height, depth);
if(new_size != data_size) {
device_ptr new_ptr = host_alloc(sizeof(T)*new_size);
void *new_ptr = host_alloc(sizeof(T)*new_size);
if(new_size && data_size) {
size_t min_size = ((new_size < data_size)? new_size: data_size);
memcpy((T*)new_ptr, (T*)data_pointer, sizeof(T)*min_size);
memcpy((T*)new_ptr, (T*)host_pointer, sizeof(T)*min_size);
}
device_free();
host_free(data_pointer, sizeof(T)*data_size);
data_pointer = new_ptr;
host_free();
host_pointer = new_ptr;
assert(device_pointer == 0);
}
@ -332,20 +333,20 @@ public:
data_height = height;
data_depth = depth;
return get_data();
return data();
}
/* Take over data from an existing array. */
void steal_data(array<T>& from)
{
device_free();
host_free(data_pointer, sizeof(T)*data_size);
host_free();
data_size = from.size();
data_width = 0;
data_height = 0;
data_depth = 0;
data_pointer = (device_ptr)from.steal_pointer();
host_pointer = from.steal_pointer();
assert(device_pointer == 0);
}
@ -353,13 +354,13 @@ public:
void free()
{
device_free();
host_free(data_pointer, sizeof(T)*data_size);
host_free();
data_size = 0;
data_width = 0;
data_height = 0;
data_depth = 0;
data_pointer = 0;
host_pointer = 0;
assert(device_pointer == 0);
}
@ -368,15 +369,15 @@ public:
return data_size;
}
T* get_data()
T* data()
{
return (T*)data_pointer;
return (T*)host_pointer;
}
T& operator[](size_t i)
{
assert(i < data_size);
return get_data()[i];
return data()[i];
}
void copy_to_device()
@ -423,7 +424,7 @@ public:
T *copy_from_device(int y, int w, int h)
{
device_memory::device_copy_from(y, w, h, sizeof(T));
return device_vector<T>::get_data();
return device_vector<T>::data();
}
};

View File

@ -112,7 +112,7 @@ public:
snd.add(mem);
snd.write();
snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
snd.write_buffer(mem.host_pointer, mem.memory_size());
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
@ -131,7 +131,7 @@ public:
snd.write();
RPCReceive rcv(socket, &error_func);
rcv.read_buffer((void*)mem.data_pointer, data_size);
rcv.read_buffer(mem.host_pointer, data_size);
}
void mem_zero(device_memory& mem)
@ -439,7 +439,7 @@ protected:
device_ptr client_pointer = mem.device_pointer;
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
/* Perform the allocation on the actual device. */
device->mem_alloc(mem);
@ -459,7 +459,7 @@ protected:
if(client_pointer) {
/* Lookup existing host side data buffer. */
DataVector &data_v = data_vector_find(client_pointer);
mem.data_pointer = (device_ptr)&data_v[0];
mem.host_pointer = (void*)&data_v[0];
/* Translate the client pointer to a real device pointer. */
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
@ -467,11 +467,11 @@ protected:
else {
/* Allocate host side data buffer. */
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
}
/* Copy data from network into memory buffer. */
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
rcv.read_buffer((uint8_t*)mem.host_pointer, data_size);
/* Copy the data from the memory buffer to the device buffer. */
device->mem_copy_to(mem);
@ -497,7 +497,7 @@ protected:
DataVector &data_v = data_vector_find(client_pointer);
mem.data_pointer = (device_ptr)&(data_v[0]);
mem.host_pointer = (device_ptr)&(data_v[0]);
device->mem_copy_from(mem, y, w, h, elem);
@ -505,7 +505,7 @@ protected:
RPCSend snd(socket, &error_func, "mem_copy_from");
snd.write();
snd.write_buffer((uint8_t*)mem.data_pointer, data_size);
snd.write_buffer((uint8_t*)mem.host_pointer, data_size);
lock.unlock();
}
else if(rcv.name == "mem_zero") {
@ -520,7 +520,7 @@ protected:
if(client_pointer) {
/* Lookup existing host side data buffer. */
DataVector &data_v = data_vector_find(client_pointer);
mem.data_pointer = (device_ptr)&data_v[0];
mem.host_pointer = (void*)&data_v[0];
/* Translate the client pointer to a real device pointer. */
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
@ -528,7 +528,7 @@ protected:
else {
/* Allocate host side data buffer. */
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.data_pointer = (data_size)? (device_ptr)&(data_v[0]): 0;
mem.host_pointer = (void*)? (device_ptr)&(data_v[0]): 0;
}
/* Zero memory. */

View File

@ -278,7 +278,7 @@ public:
*archive & mem.device_pointer;
mem.name = name.c_str();
mem.data_pointer = 0;
mem.host_pointer = 0;
/* Can't transfer OpenGL texture over network. */
if(mem.type == MEM_PIXELS) {

View File

@ -280,8 +280,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
activeRaysAvailable = false;
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if(!IS_STATE(ray_state.get_data(), rayStateIter, RAY_INACTIVE)) {
if(IS_STATE(ray_state.get_data(), rayStateIter, RAY_INVALID)) {
if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
/* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state");
return false;

View File

@ -88,7 +88,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
CL_FALSE,
offset,
allocation->mem->memory_size(),
(void*)allocation->mem->data_pointer,
allocation->mem->host_pointer,
0, NULL, NULL
));
@ -127,7 +127,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
CL_FALSE,
offset,
allocation->mem->memory_size(),
(void*)allocation->mem->data_pointer,
allocation->mem->host_pointer,
0, NULL, NULL
));

View File

@ -362,7 +362,7 @@ void OpenCLDeviceBase::mem_copy_to(device_memory& mem)
CL_TRUE,
0,
size,
(void*)mem.data_pointer,
mem.host_pointer,
0,
NULL, NULL));
}
@ -379,7 +379,7 @@ void OpenCLDeviceBase::mem_copy_from(device_memory& mem, int y, int w, int h, in
CL_TRUE,
offset,
size,
(uchar*)mem.data_pointer + offset,
(uchar*)mem.host_pointer + offset,
0,
NULL, NULL));
}
@ -426,14 +426,14 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
mem_zero_kernel(mem.device_pointer, mem.memory_size());
}
if(mem.data_pointer) {
memset((void*)mem.data_pointer, 0, mem.memory_size());
if(mem.host_pointer) {
memset(mem.host_pointer, 0, mem.memory_size());
}
if(!base_program.is_loaded()) {
void* zero = (void*)mem.data_pointer;
void* zero = mem.host_pointer;
if(!mem.data_pointer) {
if(!mem.host_pointer) {
zero = util_aligned_malloc(mem.memory_size(), 16);
memset(zero, 0, mem.memory_size());
}
@ -447,7 +447,7 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
0,
NULL, NULL));
if(!mem.data_pointer) {
if(!mem.host_pointer) {
util_aligned_free(zero);
}
}
@ -519,7 +519,7 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
data = i->second;
}
memcpy(data->get_data(), host, size);
memcpy(data->data(), host, size);
data->copy_to_device();
}

View File

@ -40,7 +40,7 @@ bool kernel_osl_use(KernelGlobals *kg);
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
void kernel_tex_copy(KernelGlobals *kg,
const char *name,
device_ptr mem,
void *mem,
size_t size);
#define KERNEL_ARCH cpu

View File

@ -74,7 +74,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
void kernel_tex_copy(KernelGlobals *kg,
const char *name,
device_ptr mem,
void *mem,
size_t size)
{
if(0) {

View File

@ -201,7 +201,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
/* read result */
int k = 0;
float4 *offset = (float4*)d_output.data_pointer;
float4 *offset = d_output.data();
size_t depth = 4;
for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) {

View File

@ -173,8 +173,8 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
/* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
* update does not work efficiently with atomics in the kernel. */
int mean_offset = offset - components;
float *mean = (float*)buffer.data_pointer + mean_offset;
float *var = (float*)buffer.data_pointer + offset;
float *mean = buffer.data() + mean_offset;
float *var = buffer.data() + offset;
assert(mean_offset >= 0);
if(components == 1) {
@ -194,7 +194,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
}
}
else {
float *in = (float*)buffer.data_pointer + offset;
float *in = buffer.data() + offset;
if(components == 1) {
for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
@ -228,7 +228,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
continue;
}
float *in = (float*)buffer.data_pointer + pass_offset;
float *in = buffer.data() + pass_offset;
int pass_stride = params.get_passes_size();
float scale = (pass.filter)? 1.0f/(float)sample: 1.0f;
@ -295,7 +295,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
pass_offset += color_pass.components;
}
float *in_divide = (float*)buffer.data_pointer + pass_offset;
float *in_divide = buffer.data() + pass_offset;
for(int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
float3 f = make_float3(in[0], in[1], in[2]);
@ -344,7 +344,7 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
pass_offset += color_pass.components;
}
float *in_weight = (float*)buffer.data_pointer + pass_offset;
float *in_weight = buffer.data() + pass_offset;
for(int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
float4 f = make_float4(in[0], in[1], in[2], in[3]);

View File

@ -79,7 +79,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
d_input.free();
float4 *d_output_data = reinterpret_cast<float4*>(d_output.data_pointer);
float4 *d_output_data = d_output.data();
pixels.resize(width*height);

View File

@ -149,7 +149,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
done.resize(num_verts, false);
int k = 0;
float4 *offset = (float4*)d_output.data_pointer;
float4 *offset = d_output.data();
Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
for(size_t i = 0; i < num_triangles; i++) {

View File

@ -589,7 +589,7 @@ void ObjectManager::device_update_flags(Device *,
return;
/* object info flag */
uint *object_flag = dscene->object_flag.get_data();
uint *object_flag = dscene->object_flag.data();
vector<Object *> volume_objects;
bool has_volume_objects = false;
@ -647,7 +647,7 @@ void ObjectManager::device_update_patch_map_offsets(Device *, DeviceScene *dscen
return;
}
uint4* objects = (uint4*)dscene->objects.get_data();
uint4* objects = (uint4*)dscene->objects.data();
bool update = false;

View File

@ -87,7 +87,7 @@ size_t LookupTables::add_table(DeviceScene *dscene, vector<float>& data)
}
/* copy table data and return offset */
float *dtable = dscene->lookup_table.get_data();
float *dtable = dscene->lookup_table.data();
memcpy(dtable + new_table.offset, &data[0], sizeof(float) * data.size());
return new_table.offset;