Cleanup: refactor to make number of channels for shader evaluation variable
This commit is contained in:
parent
70376154a0
commit
2ba7c3aa65
|
@ -54,7 +54,7 @@ class CPUKernels {
|
|||
/* Shader evaluation. */
|
||||
|
||||
using ShaderEvalFunction = CPUKernelFunction<void (*)(
|
||||
const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>;
|
||||
const KernelGlobals *kg, const KernelShaderEvalInput *, float *, const int)>;
|
||||
|
||||
ShaderEvalFunction shader_eval_displace;
|
||||
ShaderEvalFunction shader_eval_background;
|
||||
|
|
|
@ -34,9 +34,10 @@ ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), pr
|
|||
}
|
||||
|
||||
bool ShaderEval::eval(const ShaderEvalType type,
|
||||
const int max_num_points,
|
||||
const int max_num_inputs,
|
||||
const int num_channels,
|
||||
const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input,
|
||||
const function<void(device_vector<float4> &)> &read_output)
|
||||
const function<void(device_vector<float> &)> &read_output)
|
||||
{
|
||||
bool first_device = true;
|
||||
bool success = true;
|
||||
|
@ -50,26 +51,27 @@ bool ShaderEval::eval(const ShaderEvalType type,
|
|||
first_device = false;
|
||||
|
||||
device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY);
|
||||
device_vector<float4> output(device, "ShaderEval output", MEM_READ_WRITE);
|
||||
device_vector<float> output(device, "ShaderEval output", MEM_READ_WRITE);
|
||||
|
||||
/* Allocate and copy device buffers. */
|
||||
DCHECK_EQ(input.device, device);
|
||||
DCHECK_EQ(output.device, device);
|
||||
DCHECK_LE(output.size(), input.size());
|
||||
|
||||
input.alloc(max_num_points);
|
||||
input.alloc(max_num_inputs);
|
||||
int num_points = fill_input(input);
|
||||
if (num_points == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
input.copy_to_device();
|
||||
output.alloc(num_points);
|
||||
output.alloc(num_points * num_channels);
|
||||
output.zero_to_device();
|
||||
|
||||
/* Evaluate on CPU or GPU. */
|
||||
success = (device->info.type == DEVICE_CPU) ? eval_cpu(device, type, input, output) :
|
||||
eval_gpu(device, type, input, output);
|
||||
success = (device->info.type == DEVICE_CPU) ?
|
||||
eval_cpu(device, type, input, output, num_points) :
|
||||
eval_gpu(device, type, input, output, num_points);
|
||||
|
||||
/* Copy data back from device if not canceled. */
|
||||
if (success) {
|
||||
|
@ -87,7 +89,8 @@ bool ShaderEval::eval(const ShaderEvalType type,
|
|||
bool ShaderEval::eval_cpu(Device *device,
|
||||
const ShaderEvalType type,
|
||||
device_vector<KernelShaderEvalInput> &input,
|
||||
device_vector<float4> &output)
|
||||
device_vector<float> &output,
|
||||
const int64_t work_size)
|
||||
{
|
||||
vector<CPUKernelThreadGlobals> kernel_thread_globals;
|
||||
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
|
||||
|
@ -96,9 +99,8 @@ bool ShaderEval::eval_cpu(Device *device,
|
|||
const CPUKernels &kernels = *(device->get_cpu_kernels());
|
||||
|
||||
/* Simple parallel_for over all work items. */
|
||||
const int64_t work_size = output.size();
|
||||
KernelShaderEvalInput *input_data = input.data();
|
||||
float4 *output_data = output.data();
|
||||
float *output_data = output.data();
|
||||
bool success = true;
|
||||
|
||||
tbb::task_arena local_arena(device->info.cpu_threads);
|
||||
|
@ -130,7 +132,8 @@ bool ShaderEval::eval_cpu(Device *device,
|
|||
bool ShaderEval::eval_gpu(Device *device,
|
||||
const ShaderEvalType type,
|
||||
device_vector<KernelShaderEvalInput> &input,
|
||||
device_vector<float4> &output)
|
||||
device_vector<float> &output,
|
||||
const int64_t work_size)
|
||||
{
|
||||
/* Find required kernel function. */
|
||||
DeviceKernel kernel;
|
||||
|
@ -151,7 +154,6 @@ bool ShaderEval::eval_gpu(Device *device,
|
|||
* TODO : query appropriate size from device.*/
|
||||
const int64_t chunk_size = 65536;
|
||||
|
||||
const int64_t work_size = output.size();
|
||||
void *d_input = (void *)input.device_pointer;
|
||||
void *d_output = (void *)output.device_pointer;
|
||||
|
||||
|
|
|
@ -40,19 +40,22 @@ class ShaderEval {
|
|||
/* Evaluate shader at points specified by KernelShaderEvalInput and write out
|
||||
* RGBA colors to output. */
|
||||
bool eval(const ShaderEvalType type,
|
||||
const int max_num_points,
|
||||
const int max_num_inputs,
|
||||
const int num_channels,
|
||||
const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input,
|
||||
const function<void(device_vector<float4> &)> &read_output);
|
||||
const function<void(device_vector<float> &)> &read_output);
|
||||
|
||||
protected:
|
||||
bool eval_cpu(Device *device,
|
||||
const ShaderEvalType type,
|
||||
device_vector<KernelShaderEvalInput> &input,
|
||||
device_vector<float4> &output);
|
||||
device_vector<float> &output,
|
||||
const int64_t work_size);
|
||||
bool eval_gpu(Device *device,
|
||||
const ShaderEvalType type,
|
||||
device_vector<KernelShaderEvalInput> &input,
|
||||
device_vector<float4> &output);
|
||||
device_vector<float> &output,
|
||||
const int64_t work_size);
|
||||
|
||||
Device *device_;
|
||||
Progress &progress_;
|
||||
|
|
|
@ -58,11 +58,11 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
|
|||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset);
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset);
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
|
|
|
@ -114,7 +114,7 @@ DEFINE_INTEGRATOR_SHADE_KERNEL(megakernel)
|
|||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset)
|
||||
{
|
||||
#ifdef KERNEL_STUB
|
||||
|
@ -126,7 +126,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
|
|||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset)
|
||||
{
|
||||
#ifdef KERNEL_STUB
|
||||
|
|
|
@ -615,7 +615,7 @@ KERNEL_FILM_CONVERT_DEFINE(float4, rgba)
|
|||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
kernel_gpu_shader_eval_displace(KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset,
|
||||
const int work_size)
|
||||
{
|
||||
|
@ -629,7 +629,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
kernel_gpu_shader_eval_background(KernelShaderEvalInput *input,
|
||||
float4 *output,
|
||||
float *output,
|
||||
const int offset,
|
||||
const int work_size)
|
||||
{
|
||||
|
|
|
@ -85,7 +85,8 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
|
|||
if (num_recorded_hits > 0) {
|
||||
sort_intersections(isect, num_recorded_hits);
|
||||
|
||||
/* Write intersection result into global integrator state memory. */
|
||||
/* Write intersection result into global integrator state memory.
|
||||
* More efficient may be to do this directly from the intersection kernel. */
|
||||
for (int hit = 0; hit < num_recorded_hits; hit++) {
|
||||
integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit);
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
|
||||
ccl_global const KernelShaderEvalInput *input,
|
||||
ccl_global float4 *output,
|
||||
ccl_global float *output,
|
||||
const int offset)
|
||||
{
|
||||
/* Setup shader data. */
|
||||
|
@ -53,12 +53,14 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
|
|||
D = ensure_finite3(D);
|
||||
|
||||
/* Write output. */
|
||||
output[offset] += make_float4(D.x, D.y, D.z, 0.0f);
|
||||
output[offset * 3 + 0] += D.x;
|
||||
output[offset * 3 + 1] += D.y;
|
||||
output[offset * 3 + 2] += D.z;
|
||||
}
|
||||
|
||||
ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
|
||||
ccl_global const KernelShaderEvalInput *input,
|
||||
ccl_global float4 *output,
|
||||
ccl_global float *output,
|
||||
const int offset)
|
||||
{
|
||||
/* Setup ray */
|
||||
|
@ -88,7 +90,9 @@ ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
|
|||
color = ensure_finite3(color);
|
||||
|
||||
/* Write output. */
|
||||
output[offset] += make_float4(color.x, color.y, color.z, 0.0f);
|
||||
output[offset * 3 + 0] += color.x;
|
||||
output[offset * 3 + 1] += color.y;
|
||||
output[offset * 3 + 2] += color.z;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -50,6 +50,7 @@ static void shade_background_pixels(Device *device,
|
|||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||
|
||||
const int size = width * height;
|
||||
const int num_channels = 3;
|
||||
pixels.resize(size);
|
||||
|
||||
/* Evaluate shader on device. */
|
||||
|
@ -57,6 +58,7 @@ static void shade_background_pixels(Device *device,
|
|||
shader_eval.eval(
|
||||
SHADER_EVAL_BACKGROUND,
|
||||
size,
|
||||
num_channels,
|
||||
[&](device_vector<KernelShaderEvalInput> &d_input) {
|
||||
/* Fill coordinates for shading. */
|
||||
KernelShaderEvalInput *d_input_data = d_input.data();
|
||||
|
@ -77,15 +79,15 @@ static void shade_background_pixels(Device *device,
|
|||
|
||||
return size;
|
||||
},
|
||||
[&](device_vector<float4> &d_output) {
|
||||
[&](device_vector<float> &d_output) {
|
||||
/* Copy output to pixel buffer. */
|
||||
float4 *d_output_data = d_output.data();
|
||||
float *d_output_data = d_output.data();
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
pixels[y * width + x].x = d_output_data[y * width + x].x;
|
||||
pixels[y * width + x].y = d_output_data[y * width + x].y;
|
||||
pixels[y * width + x].z = d_output_data[y * width + x].z;
|
||||
pixels[y * width + x].x = d_output_data[(y * width + x) * num_channels + 0];
|
||||
pixels[y * width + x].y = d_output_data[(y * width + x) * num_channels + 1];
|
||||
pixels[y * width + x].z = d_output_data[(y * width + x) * num_channels + 2];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
@ -115,7 +115,7 @@ static int fill_shader_input(const Scene *scene,
|
|||
/* Read back mesh displacement shader output. */
|
||||
static void read_shader_output(const Scene *scene,
|
||||
Mesh *mesh,
|
||||
const device_vector<float4> &d_output)
|
||||
const device_vector<float> &d_output)
|
||||
{
|
||||
const array<int> &mesh_shaders = mesh->get_shader();
|
||||
const array<Node *> &mesh_used_shaders = mesh->get_used_shaders();
|
||||
|
@ -125,7 +125,7 @@ static void read_shader_output(const Scene *scene,
|
|||
const int num_motion_steps = mesh->get_motion_steps();
|
||||
vector<bool> done(num_verts, false);
|
||||
|
||||
const float4 *d_output_data = d_output.data();
|
||||
const float *d_output_data = d_output.data();
|
||||
int d_output_index = 0;
|
||||
|
||||
Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
|
@ -144,7 +144,11 @@ static void read_shader_output(const Scene *scene,
|
|||
for (int j = 0; j < 3; j++) {
|
||||
if (!done[t.v[j]]) {
|
||||
done[t.v[j]] = true;
|
||||
float3 off = float4_to_float3(d_output_data[d_output_index++]);
|
||||
float3 off = make_float3(d_output_data[d_output_index + 0],
|
||||
d_output_data[d_output_index + 1],
|
||||
d_output_data[d_output_index + 2]);
|
||||
d_output_index += 3;
|
||||
|
||||
/* Avoid illegal vertex coordinates. */
|
||||
off = ensure_finite3(off);
|
||||
mesh_verts[t.v[j]] += off;
|
||||
|
@ -194,6 +198,7 @@ bool GeometryManager::displace(
|
|||
ShaderEval shader_eval(device, progress);
|
||||
if (!shader_eval.eval(SHADER_EVAL_DISPLACE,
|
||||
num_verts,
|
||||
3,
|
||||
function_bind(&fill_shader_input, scene, mesh, object_index, _1),
|
||||
function_bind(&read_shader_output, scene, mesh, _1))) {
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue