Code refactor: avoid some unnecessary device memory copying.

This commit is contained in:
Brecht Van Lommel 2017-10-20 04:32:29 +02:00
parent 92ec4863c2
commit 57a0cb797d
Notes: blender-bot 2023-02-14 08:25:14 +01:00
Referenced by issue #53408, Updating Cycles Nodes via Drivers in Material View
Referenced by issue #53130, NLA Tweak (moved) Tracks + Alt+RMB Select does not work in Graph Editor
Referenced by issue #53134, Some tiles remain noisy with denoising in CPU+GPU render from master
12 changed files with 109 additions and 118 deletions

View File

@ -379,7 +379,7 @@ public:
texture_info.resize(flat_slot + 128);
}
TextureInfo& info = texture_info.get_data()[flat_slot];
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)mem.data_pointer;
info.cl_buffer = 0;
info.interpolation = interpolation;

View File

@ -840,7 +840,7 @@ public:
}
/* Set Mapping and tag that we need to (re-)upload to device */
TextureInfo& info = texture_info.get_data()[flat_slot];
TextureInfo& info = texture_info[flat_slot];
info.data = (uint64_t)tex;
info.cl_buffer = 0;
info.interpolation = interpolation;
@ -1911,9 +1911,10 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
0, 0, (void**)&args, 0));
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
size_t size = size_buffer[0];
device->mem_free(size_buffer);
return *size_buffer.get_data();
return size;
}
bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim,

View File

@ -270,31 +270,14 @@ public:
return &data[0];
}
T *copy(T *ptr, size_t width, size_t height = 0, size_t depth = 0)
void steal_data(array<T>& from)
{
T *mem = resize(width, height, depth);
if(mem != NULL) {
memcpy(mem, ptr, memory_size());
}
return mem;
}
void copy_at(T *ptr, size_t offset, size_t size)
{
if(size > 0) {
size_t mem_size = size*data_elements*datatype_size(data_type);
memcpy(&data[0] + offset, ptr, mem_size);
}
}
void reference(T *ptr, size_t width, size_t height = 0, size_t depth = 0)
{
data.clear();
data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
data_pointer = (device_ptr)ptr;
data_width = width;
data_height = height;
data_depth = depth;
data.steal_data(from);
data_size = data.size();
data_pointer = (data_size)? (device_ptr)&data[0]: 0;
data_width = data_size;
data_height = 0;
data_depth = 0;
}
void clear()
@ -318,6 +301,11 @@ public:
return &data[0];
}
T& operator[](size_t i)
{
return data[i];
}
private:
array<T> data;
};

View File

@ -494,20 +494,21 @@ void OpenCLDeviceBase::mem_free_sub_ptr(device_ptr device_pointer)
void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
{
ConstMemMap::iterator i = const_mem_map.find(name);
device_vector<uchar> *data;
if(i == const_mem_map.end()) {
device_vector<uchar> *data = new device_vector<uchar>();
data->copy((uchar*)host, size);
data = new device_vector<uchar>();
data->resize(size);
mem_alloc(name, *data, MEM_READ_ONLY);
i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
const_mem_map.insert(ConstMemMap::value_type(name, data));
}
else {
device_vector<uchar> *data = i->second;
data->copy((uchar*)host, size);
data = i->second;
}
mem_copy_to(*i->second);
memcpy(data->get_data(), host, size);
mem_copy_to(*data);
}
void OpenCLDeviceBase::tex_alloc(const char *name,

View File

@ -309,6 +309,7 @@ public:
device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
size_t size = size_buffer[0];
device->mem_free(size_buffer);
if(device->ciErr != CL_SUCCESS) {
@ -318,7 +319,7 @@ public:
return 0;
}
return *size_buffer.get_data();
return size;
}
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,

View File

@ -1127,14 +1127,12 @@ bool Mesh::is_instanced() const
MeshManager::MeshManager()
{
bvh = NULL;
need_update = true;
need_flags_update = true;
}
MeshManager::~MeshManager()
{
delete bvh;
}
void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<AttributeRequestSet>& mesh_attributes)
@ -1393,11 +1391,11 @@ static void update_attribute_element_size(Mesh *mesh,
}
static void update_attribute_element_offset(Mesh *mesh,
vector<float>& attr_float,
device_vector<float>& attr_float,
size_t& attr_float_offset,
vector<float4>& attr_float3,
device_vector<float4>& attr_float3,
size_t& attr_float3_offset,
vector<uchar4>& attr_uchar4,
device_vector<uchar4>& attr_uchar4,
size_t& attr_uchar4_offset,
Attribute *mattr,
AttributePrimitive prim,
@ -1425,7 +1423,7 @@ static void update_attribute_element_offset(Mesh *mesh,
uchar4 *data = mattr->data_uchar4();
offset = attr_uchar4_offset;
assert(attr_uchar4.capacity() >= offset + size);
assert(attr_uchar4.size() >= offset + size);
for(size_t k = 0; k < size; k++) {
attr_uchar4[offset+k] = data[k];
}
@ -1435,7 +1433,7 @@ static void update_attribute_element_offset(Mesh *mesh,
float *data = mattr->data_float();
offset = attr_float_offset;
assert(attr_float.capacity() >= offset + size);
assert(attr_float.size() >= offset + size);
for(size_t k = 0; k < size; k++) {
attr_float[offset+k] = data[k];
}
@ -1445,7 +1443,7 @@ static void update_attribute_element_offset(Mesh *mesh,
Transform *tfm = mattr->data_transform();
offset = attr_float3_offset;
assert(attr_float3.capacity() >= offset + size * 4);
assert(attr_float3.size() >= offset + size * 4);
for(size_t k = 0; k < size*4; k++) {
attr_float3[offset+k] = (&tfm->x)[k];
}
@ -1455,7 +1453,7 @@ static void update_attribute_element_offset(Mesh *mesh,
float4 *data = mattr->data_float4();
offset = attr_float3_offset;
assert(attr_float3.capacity() >= offset + size);
assert(attr_float3.size() >= offset + size);
for(size_t k = 0; k < size; k++) {
attr_float3[offset+k] = data[k];
}
@ -1556,9 +1554,9 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
}
}
vector<float> attr_float(attr_float_size);
vector<float4> attr_float3(attr_float3_size);
vector<uchar4> attr_uchar4(attr_uchar4_size);
dscene->attributes_float.resize(attr_float_size);
dscene->attributes_float3.resize(attr_float3_size);
dscene->attributes_uchar4.resize(attr_uchar4_size);
size_t attr_float_offset = 0;
size_t attr_float3_offset = 0;
@ -1577,27 +1575,27 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
Attribute *subd_mattr = mesh->subd_attributes.find(req);
update_attribute_element_offset(mesh,
attr_float, attr_float_offset,
attr_float3, attr_float3_offset,
attr_uchar4, attr_uchar4_offset,
dscene->attributes_float, attr_float_offset,
dscene->attributes_float3, attr_float3_offset,
dscene->attributes_uchar4, attr_uchar4_offset,
triangle_mattr,
ATTR_PRIM_TRIANGLE,
req.triangle_type,
req.triangle_desc);
update_attribute_element_offset(mesh,
attr_float, attr_float_offset,
attr_float3, attr_float3_offset,
attr_uchar4, attr_uchar4_offset,
dscene->attributes_float, attr_float_offset,
dscene->attributes_float3, attr_float3_offset,
dscene->attributes_uchar4, attr_uchar4_offset,
curve_mattr,
ATTR_PRIM_CURVE,
req.curve_type,
req.curve_desc);
update_attribute_element_offset(mesh,
attr_float, attr_float_offset,
attr_float3, attr_float3_offset,
attr_uchar4, attr_uchar4_offset,
dscene->attributes_float, attr_float_offset,
dscene->attributes_float3, attr_float3_offset,
dscene->attributes_uchar4, attr_uchar4_offset,
subd_mattr,
ATTR_PRIM_SUBD,
req.subd_type,
@ -1618,16 +1616,13 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
/* copy to device */
progress.set_status("Updating Mesh", "Copying Attributes to device");
if(attr_float.size()) {
dscene->attributes_float.copy(&attr_float[0], attr_float.size());
if(dscene->attributes_float.size()) {
device->tex_alloc("__attributes_float", dscene->attributes_float);
}
if(attr_float3.size()) {
dscene->attributes_float3.copy(&attr_float3[0], attr_float3.size());
if(dscene->attributes_float3.size()) {
device->tex_alloc("__attributes_float3", dscene->attributes_float3);
}
if(attr_uchar4.size()) {
dscene->attributes_uchar4.copy(&attr_uchar4[0], attr_uchar4.size());
if(dscene->attributes_uchar4.size()) {
device->tex_alloc("__attributes_uchar4", dscene->attributes_uchar4);
}
}
@ -1725,10 +1720,9 @@ void MeshManager::device_update_mesh(Device *device,
}
}
else {
PackedBVH& pack = bvh->pack;
for(size_t i = 0; i < pack.prim_index.size(); ++i) {
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
tri_prim_index[pack.prim_index[i]] = pack.prim_tri_index[i];
for(size_t i = 0; i < dscene->prim_index.size(); ++i) {
if((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i];
}
}
}
@ -1832,11 +1826,13 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
VLOG(1) << (bparams.use_qbvh ? "Using QBVH optimization structure"
: "Using regular BVH optimization structure");
delete bvh;
bvh = BVH::create(bparams, scene->objects);
BVH *bvh = BVH::create(bparams, scene->objects);
bvh->build(progress);
if(progress.get_cancel()) return;
if(progress.get_cancel()) {
delete bvh;
return;
}
/* copy to device */
progress.set_status("Updating Scene BVH", "Copying BVH to device");
@ -1844,49 +1840,51 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
PackedBVH& pack = bvh->pack;
if(pack.nodes.size()) {
dscene->bvh_nodes.reference((float4*)&pack.nodes[0], pack.nodes.size());
dscene->bvh_nodes.steal_data(pack.nodes);
device->tex_alloc("__bvh_nodes", dscene->bvh_nodes);
}
if(pack.leaf_nodes.size()) {
dscene->bvh_leaf_nodes.reference((float4*)&pack.leaf_nodes[0], pack.leaf_nodes.size());
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
device->tex_alloc("__bvh_leaf_nodes", dscene->bvh_leaf_nodes);
}
if(pack.object_node.size()) {
dscene->object_node.reference((uint*)&pack.object_node[0], pack.object_node.size());
dscene->object_node.steal_data(pack.object_node);
device->tex_alloc("__object_node", dscene->object_node);
}
if(pack.prim_tri_index.size()) {
dscene->prim_tri_index.reference((uint*)&pack.prim_tri_index[0], pack.prim_tri_index.size());
dscene->prim_tri_index.steal_data(pack.prim_tri_index);
device->tex_alloc("__prim_tri_index", dscene->prim_tri_index);
}
if(pack.prim_tri_verts.size()) {
dscene->prim_tri_verts.reference((float4*)&pack.prim_tri_verts[0], pack.prim_tri_verts.size());
dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts);
}
if(pack.prim_type.size()) {
dscene->prim_type.reference((uint*)&pack.prim_type[0], pack.prim_type.size());
dscene->prim_type.steal_data(pack.prim_type);
device->tex_alloc("__prim_type", dscene->prim_type);
}
if(pack.prim_visibility.size()) {
dscene->prim_visibility.reference((uint*)&pack.prim_visibility[0], pack.prim_visibility.size());
dscene->prim_visibility.steal_data(pack.prim_visibility);
device->tex_alloc("__prim_visibility", dscene->prim_visibility);
}
if(pack.prim_index.size()) {
dscene->prim_index.reference((uint*)&pack.prim_index[0], pack.prim_index.size());
dscene->prim_index.steal_data(pack.prim_index);
device->tex_alloc("__prim_index", dscene->prim_index);
}
if(pack.prim_object.size()) {
dscene->prim_object.reference((uint*)&pack.prim_object[0], pack.prim_object.size());
dscene->prim_object.steal_data(pack.prim_object);
device->tex_alloc("__prim_object", dscene->prim_object);
}
if(pack.prim_time.size()) {
dscene->prim_time.reference((float2*)&pack.prim_time[0], pack.prim_time.size());
dscene->prim_time.steal_data(pack.prim_time);
device->tex_alloc("__prim_time", dscene->prim_time);
}
dscene->data.bvh.root = pack.root_index;
dscene->data.bvh.use_qbvh = bparams.use_qbvh;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
delete bvh;
}
void MeshManager::device_update_flags(Device * /*device*/,
@ -2168,6 +2166,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
device->tex_free(dscene->attributes_uchar4);
dscene->bvh_nodes.clear();
dscene->bvh_leaf_nodes.clear();
dscene->object_node.clear();
dscene->prim_tri_verts.clear();
dscene->prim_tri_index.clear();

View File

@ -321,8 +321,6 @@ public:
class MeshManager {
public:
BVH *bvh;
bool need_update;
bool need_flags_update;

View File

@ -60,15 +60,15 @@ class BakeData;
class DeviceScene {
public:
/* BVH */
device_vector<float4> bvh_nodes;
device_vector<float4> bvh_leaf_nodes;
device_vector<uint> object_node;
device_vector<int4> bvh_nodes;
device_vector<int4> bvh_leaf_nodes;
device_vector<int> object_node;
device_vector<uint> prim_tri_index;
device_vector<float4> prim_tri_verts;
device_vector<uint> prim_type;
device_vector<int> prim_type;
device_vector<uint> prim_visibility;
device_vector<uint> prim_index;
device_vector<uint> prim_object;
device_vector<int> prim_index;
device_vector<int> prim_object;
device_vector<float2> prim_time;
/* mesh */
@ -103,7 +103,7 @@ public:
device_vector<float4> particles;
/* shaders */
device_vector<uint4> svm_nodes;
device_vector<int4> svm_nodes;
device_vector<uint> shader_flag;
device_vector<uint> object_flag;

View File

@ -48,15 +48,15 @@ void SVMShaderManager::reset(Scene * /*scene*/)
void SVMShaderManager::device_update_shader(Scene *scene,
Shader *shader,
Progress *progress,
vector<int4> *global_svm_nodes)
array<int4> *global_svm_nodes)
{
if(progress->get_cancel()) {
return;
}
assert(shader->graph);
vector<int4> svm_nodes;
svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
array<int4> svm_nodes;
svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
SVMCompiler::Summary summary;
SVMCompiler compiler(scene->shader_manager, scene->image_manager);
@ -79,12 +79,12 @@ void SVMShaderManager::device_update_shader(Scene *scene,
global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
/* Offset local SVM nodes to a global address space. */
int4& jump_node = global_svm_nodes->at(shader->id);
int4& jump_node = (*global_svm_nodes)[shader->id];
jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
/* Copy new nodes to global storage. */
memcpy(&global_svm_nodes->at(global_nodes_size),
memcpy(&(*global_svm_nodes)[global_nodes_size],
&svm_nodes[1],
sizeof(int4) * (svm_nodes.size() - 1));
nodes_lock_.unlock();
@ -106,11 +106,11 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
device_update_shaders_used(scene);
/* svm_nodes */
vector<int4> svm_nodes;
array<int4> svm_nodes;
size_t i;
for(i = 0; i < scene->shaders.size(); i++) {
svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
}
TaskPool task_pool;
@ -129,7 +129,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
return;
}
dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
dscene->svm_nodes.steal_data(svm_nodes);
device->tex_alloc("__svm_nodes", dscene->svm_nodes);
for(i = 0; i < scene->shaders.size(); i++) {
@ -366,17 +366,17 @@ uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
void SVMCompiler::add_node(int a, int b, int c, int d)
{
current_svm_nodes.push_back(make_int4(a, b, c, d));
current_svm_nodes.push_back_slow(make_int4(a, b, c, d));
}
void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
{
current_svm_nodes.push_back(make_int4(type, a, b, c));
current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
}
void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
{
current_svm_nodes.push_back(make_int4(type,
current_svm_nodes.push_back_slow(make_int4(type,
__float_as_int(f.x),
__float_as_int(f.y),
__float_as_int(f.z)));
@ -384,7 +384,7 @@ void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
void SVMCompiler::add_node(const float4& f)
{
current_svm_nodes.push_back(make_int4(
current_svm_nodes.push_back_slow(make_int4(
__float_as_int(f.x),
__float_as_int(f.y),
__float_as_int(f.z),
@ -627,7 +627,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
/* Add instruction to skip closure and its dependencies if mix
* weight is zero.
*/
current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE,
current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE,
0,
stack_assign(facin),
0));
@ -645,7 +645,7 @@ void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
/* Add instruction to skip closure and its dependencies if mix
* weight is zero.
*/
current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO,
current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO,
0,
stack_assign(facin),
0));
@ -797,7 +797,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
void SVMCompiler::compile(Scene *scene,
Shader *shader,
vector<int4>& svm_nodes,
array<int4>& svm_nodes,
int index,
Summary *summary)
{
@ -839,9 +839,7 @@ void SVMCompiler::compile(Scene *scene,
scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
svm_nodes[index].y = svm_nodes.size();
svm_nodes.insert(svm_nodes.end(),
current_svm_nodes.begin(),
current_svm_nodes.end());
svm_nodes.append(current_svm_nodes);
}
/* generate surface shader */
@ -852,9 +850,7 @@ void SVMCompiler::compile(Scene *scene,
if(!has_bump) {
svm_nodes[index].y = svm_nodes.size();
}
svm_nodes.insert(svm_nodes.end(),
current_svm_nodes.begin(),
current_svm_nodes.end());
svm_nodes.append(current_svm_nodes);
}
/* generate volume shader */
@ -862,9 +858,7 @@ void SVMCompiler::compile(Scene *scene,
scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
svm_nodes[index].z = svm_nodes.size();
svm_nodes.insert(svm_nodes.end(),
current_svm_nodes.begin(),
current_svm_nodes.end());
svm_nodes.append(current_svm_nodes);
}
/* generate displacement shader */
@ -872,9 +866,7 @@ void SVMCompiler::compile(Scene *scene,
scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
svm_nodes[index].w = svm_nodes.size();
svm_nodes.insert(svm_nodes.end(),
current_svm_nodes.begin(),
current_svm_nodes.end());
svm_nodes.append(current_svm_nodes);
}
/* Fill in summary information. */

View File

@ -55,7 +55,7 @@ protected:
void device_update_shader(Scene *scene,
Shader *shader,
Progress *progress,
vector<int4> *global_svm_nodes);
array<int4> *global_svm_nodes);
};
/* Graph Compiler */
@ -98,7 +98,7 @@ public:
SVMCompiler(ShaderManager *shader_manager, ImageManager *image_manager);
void compile(Scene *scene,
Shader *shader,
vector<int4>& svm_nodes,
array<int4>& svm_nodes,
int index,
Summary *summary = NULL);
@ -207,7 +207,7 @@ protected:
/* compile */
void compile_type(Shader *shader, ShaderGraph *graph, ShaderType type);
vector<int4> current_svm_nodes;
array<int4> current_svm_nodes;
ShaderType current_type;
Shader *current_shader;
ShaderGraph *current_graph;

View File

@ -90,7 +90,9 @@ size_t LookupTables::add_table(DeviceScene *dscene, vector<float>& data)
}
/* copy table data and return offset */
dscene->lookup_table.copy_at(&data[0], new_table.offset, data.size());
float *dtable = dscene->lookup_table.get_data();
memcpy(dtable + new_table.offset, &data[0], sizeof(float) * data.size());
return new_table.offset;
}

View File

@ -273,6 +273,15 @@ public:
push_back_slow(t);
}
void append(const array<T>& from)
{
if(from.size()) {
size_t old_size = size();
resize(old_size + from.size());
memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
}
}
protected:
inline T* mem_allocate(size_t N)
{