Cycles: Speedup transparent shadows in split kernel

This commit enables record-all transparent shadows rays.

Perfromance results:

               R9 290 render time (without synchronization), seconds
                        Before    After   Change
BMW                      261.5    262.5   +0.4 %
Classroom                869.6    867.3   -0.3 %
Fishy Cat                657.4    639.8   -2.7 %
Koro                    1909.8    692.8  -63.7 %
Pabellon Barcelona      1633.3   1238.0  -24.2 %
Pabellon Barcelona(*)   1158.1    903.8  -22.0 %

(*) without glossy connected to volume
This commit is contained in:
Hristo Gueorguiev 2017-03-08 16:26:39 +01:00 committed by Sergey Sharybin
parent 57e26627c4
commit e8b5a5bf5b
2 changed files with 16 additions and 6 deletions

View File

@ -152,7 +152,13 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
int bounce = state->transparent_bounce;
Intersection *isect = hits;
# ifdef __VOLUME__
PathState ps = *state;
# ifdef __SPLIT_KERNEL__
ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
# else
PathState ps_object;
PathState *ps = &ps_object;
# endif
*ps = *state;
# endif
sort_intersections(hits, num_hits);
for(int hit = 0; hit < num_hits; hit++, isect++) {
@ -171,7 +177,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
shadow_sd,
state,
#ifdef __VOLUME__
&ps,
ps,
#endif
isect,
ray,
@ -188,8 +194,8 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
}
# ifdef __VOLUME__
/* Attenuation for last line segment towards light. */
if(ps.volume_stack[0].shader != SHADER_NONE) {
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
if(ps->volume_stack[0].shader != SHADER_NONE) {
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
}
# endif
*shadow = throughput;
@ -214,7 +220,10 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
uint max_hits,
float3 *shadow)
{
# ifdef __KERNEL_CUDA__
# ifdef __SPLIT_KERNEL__
Intersection hits_[SHADOW_STACK_MAX_HITS];
Intersection *hits = &hits_[0];
# elif defined(__KERNEL_CUDA__)
Intersection *hits = kg->hits_stack;
# else
Intersection hits_stack[SHADOW_STACK_MAX_HITS];

View File

@ -80,9 +80,9 @@ CCL_NAMESPACE_BEGIN
# define __CMJ__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# ifndef __SPLIT_KERNEL__
# define __VOLUME_DECOUPLED__
# define __SHADOW_RECORD_ALL__
# define __VOLUME_RECORD_ALL__
# endif
#endif /* __KERNEL_CPU__ */
@ -131,6 +131,7 @@ CCL_NAMESPACE_BEGIN
# define __SUBSURFACE__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# endif /* __KERNEL_OPENCL_AMD__ */
# ifdef __KERNEL_OPENCL_INTEL_CPU__