Fix T41471 Cycles Bake: Setting small tile size results in wrong bake with stripes rather than the expected noise pattern

This problem was introduced in 983cbafd18
Basically the issue is that we were not getting a unique index in the
baking routine for the RNG (random number generator).

Reviewers: sergey

Differential Revision: https://developer.blender.org/D749
This commit is contained in:
Dalai Felinto 2014-08-19 11:39:40 +02:00
parent 37da1dadb6
commit 8d3cc431d7
Notes: blender-bot 2023-09-08 04:55:43 +02:00
Referenced by issue #41541, Cuda renders objects in black with MIS enabled in world setting
Referenced by issue #41471, Cycles Bake: Setting small tile size results in wrong bake with 'stripes' rather than the expected noise pattern
Referenced by issue #41471, Cycles Bake: Setting small tile size results in wrong bake with 'stripes' rather than the expected noise pattern
14 changed files with 43 additions and 31 deletions

View File

@ -435,7 +435,8 @@ public:
if(system_cpu_support_avx2()) {
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@ -449,7 +450,8 @@ public:
if(system_cpu_support_avx()) {
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@ -463,7 +465,8 @@ public:
if(system_cpu_support_sse41()) {
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@ -477,7 +480,8 @@ public:
if(system_cpu_support_sse3()) {
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@ -491,7 +495,8 @@ public:
if(system_cpu_support_sse2()) {
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;
@ -504,7 +509,8 @@ public:
{
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample);
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
task.shader_eval_type, x, task.offset, sample);
if(task.get_cancel() || task_pool.canceled())
break;

View File

@ -676,6 +676,7 @@ public:
const int shader_chunk_size = 65536;
const int start = task.shader_x;
const int end = task.shader_x + task.shader_w;
int offset = task.offset;
bool canceled = false;
for(int sample = 0; sample < task.num_samples && !canceled; sample++) {
@ -688,6 +689,7 @@ public:
&task.shader_eval_type,
&shader_x,
&shader_w,
&offset,
&sample};
/* launch kernel */

View File

@ -1004,6 +1004,7 @@ public:
cl_int d_shader_eval_type = task.shader_eval_type;
cl_int d_shader_x = task.shader_x;
cl_int d_shader_w = task.shader_w;
cl_int d_offset = task.offset;
/* sample arguments */
cl_uint narg = 0;
@ -1033,6 +1034,7 @@ public:
opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_offset), (void*)&d_offset));
opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_sample), (void*)&d_sample));
enqueue_kernel(kernel, task.shader_w, 1);

View File

@ -115,7 +115,7 @@ __kernel void kernel_ocl_shader(
ccl_global type *name,
#include "kernel_textures.h"
int type, int sx, int sw, int sample)
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@ -140,7 +140,7 @@ __kernel void kernel_ocl_bake(
ccl_global type *name,
#include "kernel_textures.h"
int type, int sx, int sw, int sample)
int type, int sx, int sw, int offset, int sample)
{
KernelGlobals kglobals, *kg = &kglobals;
@ -153,6 +153,6 @@ __kernel void kernel_ocl_bake(
int x = sx + get_global_id(0);
if(x < sx + sw)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x, offset, sample);
}

View File

@ -120,10 +120,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
/* Shader Evaluation */
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -156,12 +156,12 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw, int s
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int sample)
kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw, int offset, int sample)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
if(x < sx + sw)
kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, sample);
kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x, offset, sample);
}
#endif

View File

@ -41,7 +41,7 @@ void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
@ -51,7 +51,7 @@ void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
@ -62,7 +62,7 @@ void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@ -73,7 +73,7 @@ void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
@ -84,7 +84,7 @@ void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buff
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@ -95,7 +95,7 @@ void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buf
void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int sample);
int type, int i, int offset, int sample);
#endif
CCL_NAMESPACE_END

View File

@ -68,10 +68,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
/* Shader Evaluate */
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -69,10 +69,10 @@ void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -172,7 +172,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u)
}
#endif
ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i, int sample)
ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output,
ShaderEvalType type, int i, int offset, int sample)
{
ShaderData sd;
uint4 in = input[i * 2];
@ -197,7 +198,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
int num_samples = kernel_data.integrator.aa_samples;
/* random number generator */
RNG rng = cmj_hash(i, 0);
RNG rng = cmj_hash(offset + i, 0);
#if 0
uint rng_state = cmj_hash(i, 0);

View File

@ -64,10 +64,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -66,10 +66,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
/* Shader Evaluate */
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -67,10 +67,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
/* Shader Evaluate */
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int sample)
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}

View File

@ -179,6 +179,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
task.shader_output = d_output.device_pointer;
task.shader_eval_type = shader_type;
task.shader_x = 0;
task.offset = shader_offset;
task.shader_w = d_output.size();
task.num_samples = this->num_samples;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);