Cycles: Speedup transparent shadows in split kernel
This commit enables record-all transparent shadows rays. Perfromance results: R9 290 render time (without synchronization), seconds Before After Change BMW 261.5 262.5 +0.4 % Classroom 869.6 867.3 -0.3 % Fishy Cat 657.4 639.8 -2.7 % Koro 1909.8 692.8 -63.7 % Pabellon Barcelona 1633.3 1238.0 -24.2 % Pabellon Barcelona(*) 1158.1 903.8 -22.0 % (*) without glossy connected to volume
This commit is contained in:
parent
57e26627c4
commit
e8b5a5bf5b
|
@ -152,7 +152,13 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
|
|||
int bounce = state->transparent_bounce;
|
||||
Intersection *isect = hits;
|
||||
# ifdef __VOLUME__
|
||||
PathState ps = *state;
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
|
||||
# else
|
||||
PathState ps_object;
|
||||
PathState *ps = &ps_object;
|
||||
# endif
|
||||
*ps = *state;
|
||||
# endif
|
||||
sort_intersections(hits, num_hits);
|
||||
for(int hit = 0; hit < num_hits; hit++, isect++) {
|
||||
|
@ -171,7 +177,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
|
|||
shadow_sd,
|
||||
state,
|
||||
#ifdef __VOLUME__
|
||||
&ps,
|
||||
ps,
|
||||
#endif
|
||||
isect,
|
||||
ray,
|
||||
|
@ -188,8 +194,8 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
|
|||
}
|
||||
# ifdef __VOLUME__
|
||||
/* Attenuation for last line segment towards light. */
|
||||
if(ps.volume_stack[0].shader != SHADER_NONE) {
|
||||
kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
|
||||
if(ps->volume_stack[0].shader != SHADER_NONE) {
|
||||
kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
|
||||
}
|
||||
# endif
|
||||
*shadow = throughput;
|
||||
|
@ -214,7 +220,10 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
|
|||
uint max_hits,
|
||||
float3 *shadow)
|
||||
{
|
||||
# ifdef __KERNEL_CUDA__
|
||||
# ifdef __SPLIT_KERNEL__
|
||||
Intersection hits_[SHADOW_STACK_MAX_HITS];
|
||||
Intersection *hits = &hits_[0];
|
||||
# elif defined(__KERNEL_CUDA__)
|
||||
Intersection *hits = kg->hits_stack;
|
||||
# else
|
||||
Intersection hits_stack[SHADOW_STACK_MAX_HITS];
|
||||
|
|
|
@ -80,9 +80,9 @@ CCL_NAMESPACE_BEGIN
|
|||
# define __CMJ__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# ifndef __SPLIT_KERNEL__
|
||||
# define __VOLUME_DECOUPLED__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# define __VOLUME_RECORD_ALL__
|
||||
# endif
|
||||
#endif /* __KERNEL_CPU__ */
|
||||
|
@ -131,6 +131,7 @@ CCL_NAMESPACE_BEGIN
|
|||
# define __SUBSURFACE__
|
||||
# define __VOLUME__
|
||||
# define __VOLUME_SCATTER__
|
||||
# define __SHADOW_RECORD_ALL__
|
||||
# endif /* __KERNEL_OPENCL_AMD__ */
|
||||
|
||||
# ifdef __KERNEL_OPENCL_INTEL_CPU__
|
||||
|
|
Loading…
Reference in New Issue