Page Menu
Home
Search
Configure Global Search
Log In
Paste
P2236
(An Untitled Masterwork)
Active
Public
Actions
Authored by
Brecht Van Lommel (brecht)
on Wed, Jul 7, 9:31 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Tags
None
Subscribers
None
From 68065750ef3ad9af7db60ae3a7bb43e081901d4f Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 21 Jul 2021 19:00:12 +0200
Subject: [PATCH 1/7] Cycles X: copy volume phase functions to smaller
ShaderVolumePhases struct
Will be important to reduce memory usage in upcoming commits.

intern/cycles/kernel/closure/bsdf.h  76 
.../kernel/closure/bsdf_ashikhmin_velvet.h  8 
intern/cycles/kernel/closure/bsdf_diffuse.h  8 
intern/cycles/kernel/closure/bsdf_hair.h  9 
.../cycles/kernel/closure/bsdf_microfacet.h  15 
.../cycles/kernel/closure/bsdf_oren_nayar.h  8 
.../kernel/closure/bsdf_principled_diffuse.h  8 
intern/cycles/kernel/closure/bsdf_toon.h  9 
intern/cycles/kernel/closure/volume.h  56 +++
.../integrator/integrator_shade_surface.h  2 +
.../integrator/integrator_shade_volume.h  25 ++++
intern/cycles/kernel/kernel_shader.h  105 +++++++++++
intern/cycles/kernel/kernel_types.h  17 +++
13 files changed, 117 insertions(+), 229 deletions()
diff git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 8f03e0b..4eb8bca 100644
 a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ 430,21 +430,6 @@ ccl_device_inline int bsdf_sample(const KernelGlobals *kg,
pdf);
break;
# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
 case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
 label = volume_henyey_greenstein_sample(sc,
 sd>I,
 sd>dI.dx,
 sd>dI.dy,
 randu,
 randv,
 eval,
 omega_in,
 &domega_in>dx,
 &domega_in>dy,
 pdf);
 break;
#endif
default:
label = LABEL_NONE;
@@ 572,11 +557,6 @@ ccl_device_inline
eval = bsdf_principled_sheen_eval_reflect(sc, sd>I, omega_in, pdf);
break;
# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
 case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
 eval = volume_henyey_greenstein_eval_phase(sc, sd>I, omega_in, pdf);
 break;
#endif
default:
break;
@@ 664,11 +644,6 @@ ccl_device_inline
eval = bsdf_principled_sheen_eval_transmit(sc, sd>I, omega_in, pdf);
break;
# endif /* __PRINCIPLED__ */
#endif
#ifdef __VOLUME__
 case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
 eval = volume_henyey_greenstein_eval_phase(sc, sd>I, omega_in, pdf);
 break;
#endif
default:
break;
@@ 716,55 +691,4 @@ ccl_device void bsdf_blur(const KernelGlobals *kg, ShaderClosure *sc, float roug
#endif
}
ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b)
{
#ifdef __SVM__
 switch (a>type) {
 case CLOSURE_BSDF_TRANSPARENT_ID:
 return true;
 case CLOSURE_BSDF_DIFFUSE_ID:
 case CLOSURE_BSDF_BSSRDF_ID:
 case CLOSURE_BSDF_TRANSLUCENT_ID:
 return bsdf_diffuse_merge(a, b);
 case CLOSURE_BSDF_OREN_NAYAR_ID:
 return bsdf_oren_nayar_merge(a, b);
 case CLOSURE_BSDF_REFLECTION_ID:
 case CLOSURE_BSDF_REFRACTION_ID:
 case CLOSURE_BSDF_MICROFACET_GGX_ID:
 case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
 case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
 case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
 case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
 case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
 case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
 case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
 case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
 case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
 case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
 return bsdf_microfacet_merge(a, b);
 case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
 return bsdf_ashikhmin_velvet_merge(a, b);
 case CLOSURE_BSDF_DIFFUSE_TOON_ID:
 case CLOSURE_BSDF_GLOSSY_TOON_ID:
 return bsdf_toon_merge(a, b);
 case CLOSURE_BSDF_HAIR_REFLECTION_ID:
 case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
 return bsdf_hair_merge(a, b);
# ifdef __PRINCIPLED__
 case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
 case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
 return bsdf_principled_diffuse_merge(a, b);
# endif
# ifdef __VOLUME__
 case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
 return volume_henyey_greenstein_merge(a, b);
# endif
 default:
 return false;
 }
#else
 return false;
#endif
}

CCL_NAMESPACE_END
diff git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index ffdaec55..f51027f 100644
 a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ 55,14 +55,6 @@ ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const VelvetBsdf *bsdf_a = (const VelvetBsdf *)a;
 const VelvetBsdf *bsdf_b = (const VelvetBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N)) && (bsdf_a>sigma == bsdf_b>sigma);
}

ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc,
const float3 I,
const float3 omega_in,
diff git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 657677d..1555aa3 100644
 a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ 48,14 +48,6 @@ ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const DiffuseBsdf *bsdf_a = (const DiffuseBsdf *)a;
 const DiffuseBsdf *bsdf_b = (const DiffuseBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N));
}

ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc,
const float3 I,
const float3 omega_in,
diff git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 51bfdb8..f56f78a 100644
 a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ 61,15 +61,6 @@ ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const HairBsdf *bsdf_a = (const HairBsdf *)a;
 const HairBsdf *bsdf_b = (const HairBsdf *)b;

 return (isequal_float3(bsdf_a>T, bsdf_b>T)) && (bsdf_a>roughness1 == bsdf_b>roughness1) &&
 (bsdf_a>roughness2 == bsdf_b>roughness2) && (bsdf_a>offset == bsdf_b>offset);
}

ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc,
const float3 I,
const float3 omega_in,
diff git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index b94b421..dc09f65 100644
 a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ 354,21 +354,6 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const S
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf *)a;
 const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N)) && (bsdf_a>alpha_x == bsdf_b>alpha_x) &&
 (bsdf_a>alpha_y == bsdf_b>alpha_y) && (isequal_float3(bsdf_a>T, bsdf_b>T)) &&
 (bsdf_a>ior == bsdf_b>ior) &&
 ((bsdf_a>extra == NULL && bsdf_b>extra == NULL) 
 ((bsdf_a>extra && bsdf_b>extra) &&
 (isequal_float3(bsdf_a>extra>color, bsdf_b>extra>color)) &&
 (isequal_float3(bsdf_a>extra>cspec0, bsdf_b>extra>cspec0)) &&
 (bsdf_a>extra>clearcoat == bsdf_b>extra>clearcoat)));
}

ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf)
{
bsdf>extra = NULL;
diff git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 1936c74..be12d47 100644
 a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ 60,14 +60,6 @@ ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf *)a;
 const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N)) && (bsdf_a>roughness == bsdf_b>roughness);
}

ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc,
const float3 I,
const float3 omega_in,
diff git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 1bc8b3c..97923a6 100644
 a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ 62,14 +62,6 @@ ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf *)a;
 const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N) && bsdf_a>roughness == bsdf_b>roughness);
}

ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
const float3 I,
const float3 omega_in,
diff git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index b3b601a..acdafe0 100644
 a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ 54,15 +54,6 @@ ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
return SD_BSDF  SD_BSDF_HAS_EVAL;
}
ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const ToonBsdf *bsdf_a = (const ToonBsdf *)a;
 const ToonBsdf *bsdf_b = (const ToonBsdf *)b;

 return (isequal_float3(bsdf_a>N, bsdf_b>N)) && (bsdf_a>size == bsdf_b>size) &&
 (bsdf_a>smooth == bsdf_b>smooth);
}

ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
{
float is;
diff git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 745e37d..1d1b419 100644
 a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ 61,21 +61,12 @@ ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume)
return SD_SCATTER;
}
ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b)
{
 const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume *)a;
 const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume *)b;

 return (volume_a>g == volume_b>g);
}

ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc,
+ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderVolumeClosure *sc,
const float3 I,
float3 omega_in,
float *pdf)
{
 const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
 float g = volume>g;
+ float g = sc>g;
/* note that I points towards the viewer */
if (fabsf(g) < 1e3f) {
@@ 121,7 +112,7 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pd
return dir;
}
ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc,
+ccl_device int volume_henyey_greenstein_sample(const ShaderVolumeClosure *sc,
float3 I,
float3 dIdx,
float3 dIdy,
@@ 133,8 +124,7 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc,
float3 *domega_in_dy,
float *pdf)
{
 const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc;
 float g = volume>g;
+ float g = sc>g;
/* note that I points towards the viewer and so is used negated */
*omega_in = henyey_greenstrein_sample(I, g, randu, randv, pdf);
@@ 152,7 +142,7 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc,
/* VOLUME CLOSURE */
ccl_device float3 volume_phase_eval(const ShaderData *sd,
 const ShaderClosure *sc,
+ const ShaderVolumeClosure *sc,
float3 omega_in,
float *pdf)
{
@@ 162,7 +152,7 @@ ccl_device float3 volume_phase_eval(const ShaderData *sd,
}
ccl_device int volume_phase_sample(const ShaderData *sd,
 const ShaderClosure *sc,
+ const ShaderVolumeClosure *sc,
float randu,
float randv,
float3 *eval,
@@ 170,29 +160,17 @@ ccl_device int volume_phase_sample(const ShaderData *sd,
differential3 *domega_in,
float *pdf)
{
 int label;

 switch (sc>type) {
 case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
 label = volume_henyey_greenstein_sample(sc,
 sd>I,
 sd>dI.dx,
 sd>dI.dy,
 randu,
 randv,
 eval,
 omega_in,
 &domega_in>dx,
 &domega_in>dy,
 pdf);
 break;
 default:
 *eval = make_float3(0.0f, 0.0f, 0.0f);
 label = LABEL_NONE;
 break;
 }

 return label;
+ return volume_henyey_greenstein_sample(sc,
+ sd>I,
+ sd>dI.dx,
+ sd>dI.dy,
+ randu,
+ randv,
+ eval,
+ omega_in,
+ &domega_in>dx,
+ &domega_in>dy,
+ pdf);
}
/* Volume sampling utilities. */
diff git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 5674420..7f6ded9 100644
 a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ 384,7 +384,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
}
#endif
 shader_prepare_closures(INTEGRATOR_STATE_PASS, &sd);
+ shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd);
#ifdef __HOLDOUT__
/* Evaluate holdout. */
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 6b86e5f..5f5322e 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 376,6 +376,7 @@ ccl_device VolumeIntegrateEvent
volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
Ray *ccl_restrict ray,
ShaderData *ccl_restrict sd,
+ ShaderVolumePhases *ccl_restrict phases,
ccl_addr_space float3 *ccl_restrict throughput,
const RNGState *rng_state,
ccl_global float *ccl_restrict render_buffer,
@@ 490,6 +491,8 @@ volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
/* prepare to scatter to new direction */
if (scatter) {
+ shader_copy_volume_phases(phases, sd);
+
/* adjust throughput and move to new location */
sd>P = ray>P + new_t * ray>D;
*throughput = tp;
@@ 518,6 +521,7 @@ volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
* queue shadow ray to be traced. */
ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
ShaderData *sd,
+ const ShaderVolumePhases *phases,
const RNGState *rng_state)
{
/* Test if there is a light or BSDF that needs direct light. */
@@ 560,7 +564,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
/* Evaluate BSDF. */
BsdfEval phase_eval ccl_optional_struct_init;
 const float phase_pdf = shader_volume_phase_eval(kg, sd, ls.D, &phase_eval);
+ const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls.D, &phase_eval);
if (ls.shader & SHADER_USE_MIS) {
float mis_weight = power_heuristic(ls.pdf, phase_pdf);
@@ 617,6 +621,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
/* Path tracing: scatter in new direction using phase function. */
ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
ShaderData *sd,
+ const ShaderVolumePhases *phases,
const RNGState *rng_state)
{
float phase_u, phase_v;
@@ 628,8 +633,15 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS
float3 phase_omega_in ccl_optional_struct_init;
differential3 phase_domega_in ccl_optional_struct_init;
 const int label = shader_volume_phase_sample(
 kg, sd, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_domega_in, &phase_pdf);
+ const int label = shader_volume_phase_sample(kg,
+ sd,
+ phases,
+ phase_u,
+ phase_v,
+ &phase_eval,
+ &phase_omega_in,
+ &phase_domega_in,
+ &phase_pdf);
if (phase_pdf == 0.0f  bsdf_eval_is_zero(&phase_eval)) {
return false;
@@ 685,8 +697,9 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
});
+ ShaderVolumePhases phases;
VolumeIntegrateEvent event = volume_integrate_heterogeneous(
 INTEGRATOR_STATE_PASS, ray, &sd, &throughput, &rng_state, render_buffer, step_size);
+ INTEGRATOR_STATE_PASS, ray, &sd, &phases, &throughput, &rng_state, render_buffer, step_size);
/* Perform path termination. The intersect_closest will have already marked this path
* to be terminated. That will shading evaluating to leave out any scattering closures,
@@ 711,10 +724,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
if (event == VOLUME_PATH_SCATTERED) {
/* Direct light. */
 integrate_volume_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state);
+ integrate_volume_direct_light(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state);
/* Scatter. */
 if (!integrate_volume_phase_scatter(INTEGRATOR_STATE_PASS, &sd, &rng_state)) {
+ if (!integrate_volume_phase_scatter(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state)) {
return VOLUME_PATH_MISSED;
}
}
diff git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 146e2da..f6d8824 100644
 a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ 37,19 +37,27 @@ CCL_NAMESPACE_BEGIN
/* Merging */
#if defined(__VOLUME__)
ccl_device_inline void shader_merge_closures(ShaderData *sd)
+ccl_device_inline void shader_merge_volume_closures(ShaderData *sd)
{
 /* merge identical closures, better when we sample a single closure at a time */
+ /* Merge identical closures to save closure space with stacked volumes. */
for (int i = 0; i < sd>num_closure; i++) {
ShaderClosure *sci = &sd>closure[i];
+ if (sci>type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ continue;
+ }
+
for (int j = i + 1; j < sd>num_closure; j++) {
ShaderClosure *scj = &sd>closure[j];

 if (sci>type != scj>type)
+ if (sci>type != scj>type) {
continue;
 if (!bsdf_merge(sci, scj))
+ }
+
+ const HenyeyGreensteinVolume *hgi = (const HenyeyGreensteinVolume *)sci;
+ const HenyeyGreensteinVolume *hgj = (const HenyeyGreensteinVolume *)scj;
+ if (!(hgi>g == hgj>g)) {
continue;
+ }
sci>weight += scj>weight;
sci>sample_weight += scj>sample_weight;
@@ 67,9 +75,27 @@ ccl_device_inline void shader_merge_closures(ShaderData *sd)
}
}
}
+
+ccl_device_inline void shader_copy_volume_phases(ShaderVolumePhases *ccl_restrict phases,
+ const ShaderData *ccl_restrict sd)
+{
+ phases>num_closure = sd>num_closure;
+
+ for (int i = 0; i < min(sd>num_closure, MAX_VOLUME_CLOSURE); i++) {
+ const ShaderClosure *from_sc = &sd>closure[i];
+ if (from_sc>type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ const HenyeyGreensteinVolume *hg = (const HenyeyGreensteinVolume *)from_sc;
+ ShaderVolumeClosure *to_sc = &phases>closure[i];
+
+ to_sc>weight = from_sc>weight;
+ to_sc>sample_weight = from_sc>sample_weight;
+ to_sc>g = hg>g;
+ }
+ }
+}
#endif /* __VOLUME__ */
ccl_device_inline void shader_prepare_closures(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
+ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS, ShaderData *sd)
{
/* Defensive sampling.
*
@@ 605,29 +631,27 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
#ifdef __VOLUME__
ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
+ const ShaderVolumePhases *phases,
const float3 omega_in,
int skip_phase,
BsdfEval *result_eval,
float sum_pdf,
float sum_sample_weight)
{
 for (int i = 0; i < sd>num_closure; i++) {
+ for (int i = 0; i < phases>num_closure; i++) {
if (i == skip_phase)
continue;
 const ShaderClosure *sc = &sd>closure[i];
+ const ShaderVolumeClosure *sc = &phases>closure[i];
+ float phase_pdf = 0.0f;
+ float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);
 if (CLOSURE_IS_PHASE(sc>type)) {
 float phase_pdf = 0.0f;
 float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf);

 if (phase_pdf != 0.0f) {
 bsdf_eval_accum(result_eval, false, eval, 1.0f);
 sum_pdf += phase_pdf * sc>sample_weight;
 }

 sum_sample_weight += sc>sample_weight;
+ if (phase_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, false, eval, 1.0f);
+ sum_pdf += phase_pdf * sc>sample_weight;
}
+
+ sum_sample_weight += sc>sample_weight;
}
return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
@@ 635,6 +659,7 @@ ccl_device_inline float _shader_volume_phase_multi_eval(const ShaderData *sd,
ccl_device float shader_volume_phase_eval(const KernelGlobals *kg,
const ShaderData *sd,
+ const ShaderVolumePhases *phases,
const float3 omega_in,
BsdfEval *phase_eval)
{
@@ 642,11 +667,12 @@ ccl_device float shader_volume_phase_eval(const KernelGlobals *kg,
bsdf_eval_init(phase_eval, false, zero_float3(), kernel_data.film.use_light_pass);
 return _shader_volume_phase_multi_eval(sd, omega_in, 1, phase_eval, 0.0f, 0.0f);
+ return _shader_volume_phase_multi_eval(sd, phases, omega_in, 1, phase_eval, 0.0f, 0.0f);
}
ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
const ShaderData *sd,
+ const ShaderVolumePhases *phases,
float randu,
float randv,
BsdfEval *phase_eval,
@@ 658,37 +684,32 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
int sampled = 0;
 if (sd>num_closure > 1) {
+ if (phases>num_closure > 1) {
/* pick a phase closure based on sample weights */
float sum = 0.0f;
 for (sampled = 0; sampled < sd>num_closure; sampled++) {
 const ShaderClosure *sc = &sd>closure[sampled];

 if (CLOSURE_IS_PHASE(sc>type))
 sum += sc>sample_weight;
+ for (sampled = 0; sampled < phases>num_closure; sampled++) {
+ const ShaderVolumeClosure *sc = &phases>closure[sampled];
+ sum += sc>sample_weight;
}
float r = randu * sum;
float partial_sum = 0.0f;
 for (sampled = 0; sampled < sd>num_closure; sampled++) {
 const ShaderClosure *sc = &sd>closure[sampled];
+ for (sampled = 0; sampled < phases>num_closure; sampled++) {
+ const ShaderVolumeClosure *sc = &phases>closure[sampled];
+ float next_sum = partial_sum + sc>sample_weight;
 if (CLOSURE_IS_PHASE(sc>type)) {
 float next_sum = partial_sum + sc>sample_weight;

 if (r <= next_sum) {
 /* Rescale to reuse for BSDF direction sample. */
 randu = (r  partial_sum) / sc>sample_weight;
 break;
 }

 partial_sum = next_sum;
+ if (r <= next_sum) {
+ /* Rescale to reuse for BSDF direction sample. */
+ randu = (r  partial_sum) / sc>sample_weight;
+ break;
}
+
+ partial_sum = next_sum;
}
 if (sampled == sd>num_closure) {
+ if (sampled == phases>num_closure) {
*pdf = 0.0f;
return LABEL_NONE;
}
@@ 696,7 +717,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
/* todo: this isn't quite correct, we don't weight anisotropy properly
* depending on color channels, even if this is perhaps not a common case */
 const ShaderClosure *sc = &sd>closure[sampled];
+ const ShaderVolumeClosure *sc = &phases>closure[sampled];
int label;
float3 eval = zero_float3();
@@ 712,7 +733,7 @@ ccl_device int shader_volume_phase_sample(const KernelGlobals *kg,
ccl_device int shader_phase_sample_closure(const KernelGlobals *kg,
const ShaderData *sd,
 const ShaderClosure *sc,
+ const ShaderVolumeClosure *sc,
float randu,
float randv,
BsdfEval *phase_eval,
@@ 800,9 +821,9 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
}
# endif
 /* merge closures to avoid exceeding number of closures limit */
+ /* Merge closures to avoid exceeding number of closures limit. */
if (i > 0)
 shader_merge_closures(sd);
+ shader_merge_volume_closures(sd);
}
}
diff git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 67bde06..bdcafec 100644
 a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ 593,6 +593,8 @@ typedef struct AttributeDescriptor {
# define MAX_CLOSURE __MAX_CLOSURE__
#endif
+#define MAX_VOLUME_CLOSURE 8
+
/* This struct is the base class for all closures. The common members are
* duplicated in all derived classes since we don't have C++ in the kernel
* yet, and because it lets us lay out the members to minimize padding. The
@@ 818,6 +820,21 @@ typedef ccl_addr_space struct ccl_align(16) ShaderDataTinyStorage
ShaderDataTinyStorage;
#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
+/* Compact volume closures storage.
+ *
+ * Used for decoupled direct/indirect light closure storage. */
+
+ccl_addr_space struct ShaderVolumeClosure {
+ float3 weight;
+ float sample_weight;
+ float g;
+};
+
+ccl_addr_space struct ShaderVolumePhases {
+ ShaderVolumeClosure closure[MAX_VOLUME_CLOSURE];
+ int num_closure;
+};
+
/* Volume Stack */
#ifdef __VOLUME__

2.25.1
From a5d70f77424e4453f9a6aad366ff679cee1307a5 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 7 Jul 2021 20:56:32 +0200
Subject: [PATCH 2/7] Cycles X: tweak light functions in preparation of
equiangular sampling
* Add function to sample a new position on a given light
* Tweak shadow ray generation to be able to override the position

.../integrator/integrator_shade_surface.h  2 +
.../integrator/integrator_shade_volume.h  2 +
intern/cycles/kernel/kernel_emission.h  78 ++++++++++++
intern/cycles/kernel/kernel_light.h  17 ++++
4 files changed, 69 insertions(+), 30 deletions()
diff git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 7f6ded9..cf271ec 100644
 a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ 160,7 +160,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
 light_sample_to_shadow_ray(kg, sd, &ls, &ray);
+ light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
const bool is_light = light_sample_is_light(&ls);
/* Copy volume stack and enter/exit volume. */
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 5f5322e..344f6cb 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 581,7 +581,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
 light_sample_to_shadow_ray<false>(kg, sd, &ls, &ray);
+ light_sample_to_volume_shadow_ray(kg, sd, &ls, sd>P, &ray);
const bool is_light = light_sample_is_light(&ls);
/* Write shadow ray and associated state to global memory. */
diff git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 74322ae..90fd95e 100644
 a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ 25,8 +25,8 @@ CCL_NAMESPACE_BEGIN
/* Evaluate shader on light. */
ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
 ShaderData *emission_sd,
 LightSample *ls,
+ ShaderData *ccl_restrict emission_sd,
+ LightSample *ccl_restrict ls,
float time)
{
/* setup shading at emitter */
@@ 93,7 +93,7 @@ ccl_device_noinline_cpu float3 light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
}
/* Test if light sample is from a light or emission from geometry. */
ccl_device_inline bool light_sample_is_light(const LightSample *ls)
+ccl_device_inline bool light_sample_is_light(const LightSample *ccl_restrict ls)
{
/* return if it's a lamp for shadow pass */
return (ls>prim == PRIM_NONE && ls>type != LIGHT_BACKGROUND);
@@ 101,8 +101,8 @@ ccl_device_inline bool light_sample_is_light(const LightSample *ls)
/* Early path termination of shadow rays. */
ccl_device_inline bool light_sample_terminate(const KernelGlobals *ccl_restrict kg,
 const LightSample *ls,
 BsdfEval *eval,
+ const LightSample *ccl_restrict ls,
+ BsdfEval *ccl_restrict eval,
const float rand_terminate)
{
if (bsdf_eval_is_zero(eval)) {
@@ 207,35 +207,57 @@ ccl_device_inline float3 shadow_ray_offset(const KernelGlobals *ccl_restrict kg,
return P;
}
+ccl_device_inline void shadow_ray_setup(const ShaderData *ccl_restrict sd,
+ const LightSample *ccl_restrict ls,
+ const float3 P,
+ Ray *ray)
+{
+ ray>P = P;
+
+ if (ls>t == FLT_MAX) {
+ /* distant light */
+ ray>D = ls>D;
+ ray>t = ls>t;
+ }
+ else {
+ /* other lights, avoid selfintersection */
+ ray>D = ray_offset(ls>P, ls>Ng)  P;
+ ray>D = normalize_len(ray>D, &ray>t);
+ }
+
+ ray>dP = differential_make_compact(sd>dP);
+ ray>dD = differential_zero_compact();
+}
+
/* Create shadow ray towards light sample. */
template<bool is_volume = false>
ccl_device_inline void light_sample_to_shadow_ray(const KernelGlobals *ccl_restrict kg,
 const ShaderData *sd,
 const LightSample *ls,
 Ray *ray)
+ccl_device_inline void light_sample_to_surface_shadow_ray(const KernelGlobals *ccl_restrict kg,
+ const ShaderData *ccl_restrict sd,
+ const LightSample *ccl_restrict ls,
+ Ray *ray)
{
if (ls>shader & SHADER_CAST_SHADOW) {
/* setup ray */
 if (is_volume) {
 ray>P = sd>P;
 }
 else {
 ray>P = shadow_ray_offset(kg, sd, ls>D);
 }
+ const float3 P = shadow_ray_offset(kg, sd, ls>D);
+ shadow_ray_setup(sd, ls, P, ray);
+ }
+ else {
+ /* signal to not cast shadow ray */
+ ray>t = 0.0f;
+ }
 if (ls>t == FLT_MAX) {
 /* distant light */
 ray>D = ls>D;
 ray>t = ls>t;
 }
 else {
 /* other lights, avoid selfintersection */
 ray>D = ray_offset(ls>P, ls>Ng)  ray>P;
 ray>D = normalize_len(ray>D, &ray>t);
 }
+ ray>time = sd>time;
+}
 ray>dP = differential_make_compact(sd>dP);
 ray>dD = differential_zero_compact();
+/* Create shadow ray towards light sample. */
+ccl_device_inline void light_sample_to_volume_shadow_ray(const KernelGlobals *ccl_restrict kg,
+ const ShaderData *ccl_restrict sd,
+ const LightSample *ccl_restrict ls,
+ const float3 P,
+ Ray *ray)
+{
+ if (ls>shader & SHADER_CAST_SHADOW) {
+ /* setup ray */
+ shadow_ray_setup(sd, ls, P, ray);
}
else {
/* signal to not cast shadow ray */
diff git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 9ce9fe5..ad83f79 100644
 a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ 790,4 +790,21 @@ ccl_device_noinline bool light_sample(const KernelGlobals *kg,
return light_sample_from_position(kg, lamp, randu, randv, P, ls);
}
+ccl_device_inline bool light_sample_new_position(const KernelGlobals *kg,
+ const float randu,
+ const float randv,
+ const float time,
+ const float3 P,
+ LightSample *ls)
+{
+ /* Sample a new position on the same light, for volume sampling. */
+ if (ls>type == LIGHT_TRIANGLE) {
+ triangle_light_sample(kg, ls>prim, ls>object, randu, randv, time, ls, P);
+ return (ls>pdf > 0.0f);
+ }
+ else {
+ return light_sample_from_position(kg, ls>lamp, randu, randv, P, ls);
+ }
+}
+
CCL_NAMESPACE_END

2.25.1
From 69885e5e9eeb32e8cd504172595cfbe22f31a31c Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 7 Jul 2021 21:01:54 +0200
Subject: [PATCH 3/7] Cycles X: sample position on light before stepping
through volume
In preparation of equiangular sampling.

.../integrator/integrator_shade_volume.h  77 +++++++++++++
1 file changed, 53 insertions(+), 24 deletions()
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 344f6cb..0845747 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 519,35 +519,56 @@ volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
# ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
 ShaderData *sd,
 const ShaderVolumePhases *phases,
 const RNGState *rng_state)
+ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ LightSample *ccl_restrict ls)
{
/* Test if there is a light or BSDF that needs direct light. */
if (!kernel_data.integrator.use_direct_light) {
 return;
+ return false;
}
/* Sample position on a light. */
 LightSample ls ccl_optional_struct_init;
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const uint bounce = INTEGRATOR_STATE(path, bounce);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, ls);
+
+ if (ls>shader & SHADER_EXCLUDE_SCATTER) {
+ return false;
+ }
+
+ return true;
+}
+
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const ShaderVolumePhases *ccl_restrict
+ phases,
+ const RNGState *ccl_restrict rng_state,
+ LightSample *ccl_restrict ls)
+{
+ /* Sample position on the same light again, now from the shading
+ * point where we scattered.
+ *
+ * TODO: decorrelate random numbers and use light_sample_new_position to
+ * avoid resampling the CDF. */
{
const int path_flag = INTEGRATOR_STATE(path, flag);
const uint bounce = INTEGRATOR_STATE(path, bounce);
float light_u, light_v;
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 if (!light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, &ls)) {
+ if (!light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, ls)) {
return;
}
}
 if (ls.shader & SHADER_EXCLUDE_SCATTER) {
 return;
 }

 kernel_assert(ls.pdf != 0.0f);

/* Evaluate light shader.
*
* TODO: can we reuse sd memory? In theory we can move this after
@@ 557,32 +578,32 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
ShaderDataTinyStorage emission_sd_storage;
ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
const float3 light_eval = light_sample_shader_eval(
 INTEGRATOR_STATE_PASS, emission_sd, &ls, sd>time);
+ INTEGRATOR_STATE_PASS, emission_sd, ls, sd>time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
BsdfEval phase_eval ccl_optional_struct_init;
 const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls.D, &phase_eval);
+ const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls>D, &phase_eval);
 if (ls.shader & SHADER_USE_MIS) {
 float mis_weight = power_heuristic(ls.pdf, phase_pdf);
+ if (ls>shader & SHADER_USE_MIS) {
+ float mis_weight = power_heuristic(ls>pdf, phase_pdf);
bsdf_eval_mul(&phase_eval, mis_weight);
}
 bsdf_eval_mul3(&phase_eval, light_eval / ls.pdf);
+ bsdf_eval_mul3(&phase_eval, light_eval / ls>pdf);
/* Path termination. */
const float terminate = path_state_rng_light_termination(kg, rng_state);
 if (light_sample_terminate(kg, &ls, &phase_eval, terminate)) {
+ if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
return;
}
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
 light_sample_to_volume_shadow_ray(kg, sd, &ls, sd>P, &ray);
 const bool is_light = light_sample_is_light(&ls);
+ light_sample_to_volume_shadow_ray(kg, sd, ls, sd>P, &ray);
+ const bool is_light = light_sample_is_light(ls);
/* Write shadow ray and associated state to global memory. */
integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
@@ 687,12 +708,20 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
ShaderData sd;
shader_setup_from_volume(kg, &sd, ray);
 float3 throughput = INTEGRATOR_STATE(path, throughput);

/* Load random number state. */
RNGState rng_state;
path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+ /* Sample light ahead of volume stepping. */
+ LightSample ls ccl_optional_struct_init;
+ const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE);
+ if (need_light_sample) {
+ integrate_volume_sample_light(INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls);
+ }
+
+ /* Step through volume. */
+ float3 throughput = INTEGRATOR_STATE(path, throughput);
+
const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
});
@@ 724,7 +753,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
if (event == VOLUME_PATH_SCATTERED) {
/* Direct light. */
 integrate_volume_direct_light(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state);
+ integrate_volume_direct_light(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state, &ls);
/* Scatter. */
if (!integrate_volume_phase_scatter(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state)) {

2.25.1
From 4f77d4713558b2b837a7f6351fa9b3ccbc09a856 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 7 Jul 2021 21:01:54 +0200
Subject: [PATCH 4/7] Cycles X: refactor to separately track direct and
indirect in volumes
So that we can scatter at two different positions.

.../integrator/integrator_shade_volume.h  334 +++++++++++
1 file changed, 197 insertions(+), 137 deletions()
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 0845747..4e4b624 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 31,6 +31,21 @@ typedef enum VolumeIntegrateEvent {
VOLUME_PATH_MISSED = 2
} VolumeIntegrateEvent;
+typedef struct VolumeIntegrateResult {
+ /* Throughput and offset for direct light scattering. */
+ VolumeSampleMethod direct_sample_method;
+ bool direct_scatter;
+ float3 direct_throughput;
+ float direct_t;
+ ShaderVolumePhases direct_phases;
+
+ /* Throughput and offset for indirect light scattering. */
+ bool indirect_scatter;
+ float3 indirect_throughput;
+ float indirect_t;
+ ShaderVolumePhases indirect_phases;
+} VolumeIntegrateResult;
+
/* Ignore paths that have volume throughput below this value, to avoid unnecessary work
* and precision issues.
* todo: this value could be tweaked or turned into a probability to avoid unnecessary
@@ 366,24 +381,88 @@ ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff,
return emission;
}
/* Volume Path */
+/* Volume Integration */
+
+typedef struct VolumeIntegrateState {
+ /* Volume segment extents. */
+ float start_t;
+ float end_t;
+
+ /* Current throughput. */
+ float3 throughput;
+
+ /* If volume is absorptiononly up to this point, and no probabilistic
+ * scattering or termination has been used yet. */
+ bool absorption_only;
+
+ /* Random numbers for scattering. */
+ float rscatter;
+ float rphase;
+} VolumeIntegrateState;
+
+ccl_device_forceinline void volume_integrate_step_scattering(
+ const ShaderData *sd,
+ const VolumeShaderCoefficients &ccl_restrict coeff,
+ const float3 transmittance,
+ VolumeIntegrateState &ccl_restrict vstate,
+ VolumeIntegrateResult &ccl_restrict result)
+{
+ /* Distance sampling */
+
+ /* Pick random color channel, we use the Veach onesample
+ * model with balance heuristic for the channels. */
+ const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ const int channel = volume_sample_channel(
+ albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
+
+ /* decide if we will scatter or continue */
+ const float sample_transmittance = volume_channel_get(transmittance, channel);
+
+ if (1.0f  vstate.rscatter >= sample_transmittance) {
+ /* compute sampling distance */
+ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
+ const float new_dt = logf(1.0f  vstate.rscatter) / sample_sigma_t;
+ const float new_t = vstate.start_t + new_dt;
+
+ /* transmittance and pdf */
+ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const float3 pdf = coeff.sigma_t * new_transmittance;
+
+ /* throughput */
+ result.indirect_scatter = true;
+ result.indirect_t = new_t;
+ result.indirect_throughput *= coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
+ shader_copy_volume_phases(&result.indirect_phases, sd);
+
+ result.direct_scatter = true;
+ result.direct_t = result.indirect_t;
+ result.direct_throughput = result.indirect_throughput;
+ shader_copy_volume_phases(&result.direct_phases, sd); /* TODO: only copy once? */
+ }
+ else {
+ /* throughput */
+ const float pdf = dot(channel_pdf, transmittance);
+ result.indirect_throughput *= transmittance / pdf;
+
+ /* remap rscatter so we can reuse it and keep thing stratified */
+ vstate.rscatter = 1.0f  (1.0f  vstate.rscatter) / sample_transmittance;
+ }
+}
/* heterogeneous volume distance sampling: integrate stepping through the
* volume until we reach the end, get absorbed entirely, or run out of
* iterations. this does probabilistically scatter or get transmitted through
* for path tracing where we don't want to branch. */
ccl_device VolumeIntegrateEvent
volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
 Ray *ccl_restrict ray,
 ShaderData *ccl_restrict sd,
 ShaderVolumePhases *ccl_restrict phases,
 ccl_addr_space float3 *ccl_restrict throughput,
 const RNGState *rng_state,
 ccl_global float *ccl_restrict render_buffer,
 const float object_step_size)
+ccl_device_forceinline void volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ShaderData *ccl_restrict sd,
+ const RNGState *rng_state,
+ ccl_global float *ccl_restrict
+ render_buffer,
+ const float object_step_size,
+ VolumeIntegrateResult &result)
{
 float3 tp = *throughput;

/* Prepare for stepping.
* Using a different step offset for the first step avoids banding artifacts. */
int max_steps;
@@ 397,123 +476,86 @@ volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
&steps_offset,
&max_steps);
 /* compute coefficients at the start */
 float t = 0.0f;
 float3 accum_transmittance = one_float3();
+ /* Initialize volume integration state. */
+ VolumeIntegrateState vstate ccl_optional_struct_init;
+ vstate.start_t = 0.0f;
+ vstate.end_t = 0.0f;
+ vstate.absorption_only = true;
+ vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
+ vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
 /* pick random color channel, we use the Veach onesample
 * model with balance heuristic for the channels */
 float xi = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
 const float rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
 bool has_scatter = false;
+ /* Initialize volume integration result. */
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ result.direct_throughput = throughput;
+ result.indirect_throughput = throughput;
for (int i = 0; i < max_steps; i++) {
 /* advance to new position */
 float new_t = min(ray>t, (i + steps_offset) * step_size);
 float dt = new_t  t;

 float3 new_P = ray>P + ray>D * (t + dt * step_shade_offset);
 VolumeShaderCoefficients coeff ccl_optional_struct_init;
+ /* Advance to new position */
+ vstate.end_t = min(ray>t, (i + steps_offset) * step_size);
+ const float shade_t = vstate.start_t + (vstate.end_t  vstate.start_t) * step_shade_offset;
+ sd>P = ray>P + ray>D * shade_t;
/* compute segment */
 sd>P = new_P;
+ VolumeShaderCoefficients coeff ccl_optional_struct_init;
if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) {
 int closure_flag = sd>flag;
 float3 new_tp;
 float3 transmittance;
 bool scatter = false;

 /* distance sampling */
 if ((closure_flag & SD_SCATTER)  (has_scatter && (closure_flag & SD_EXTINCTION))) {
 has_scatter = true;

 /* Sample channel, use MIS with balance heuristic. */
 const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
 float3 channel_pdf;
 const int channel = volume_sample_channel(albedo, tp, rphase, &channel_pdf);

 /* compute transmittance over full step */
 transmittance = volume_color_transmittance(coeff.sigma_t, dt);

 /* decide if we will scatter or continue */
 const float sample_transmittance = volume_channel_get(transmittance, channel);

 if (1.0f  xi >= sample_transmittance) {
 /* compute sampling distance */
 const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
 const float new_dt = logf(1.0f  xi) / sample_sigma_t;
 new_t = t + new_dt;

 /* transmittance and pdf */
 const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
 const float3 pdf = coeff.sigma_t * new_transmittance;

 /* throughput */
 new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
 scatter = true;
 }
 else {
 /* throughput */
 const float pdf = dot(channel_pdf, transmittance);
 new_tp = tp * transmittance / pdf;
+ const int closure_flag = sd>flag;
 /* remap xi so we can reuse it and keep thing stratified */
 xi = 1.0f  (1.0f  xi) / sample_transmittance;
 }
 }
 else if (closure_flag & SD_EXTINCTION) {
 /* absorption only, no sampling needed */
 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
 new_tp = tp * transmittance;
 }
 else {
 transmittance = zero_float3();
 new_tp = tp;
 }
+ /* Evaluate transmittance over segment. */
+ const float dt = (vstate.end_t  vstate.start_t);
+ const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
+ volume_color_transmittance(coeff.sigma_t, dt) :
+ one_float3();
 /* integrate emission attenuated by absorption */
+ /* Emission. */
if (closure_flag & SD_EMISSION) {
 const float3 emission = volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
 kernel_accum_emission(INTEGRATOR_STATE_PASS, tp, emission, render_buffer);
+ /* Only write emission before indirect light scatter position, since we terminate
+ * stepping at that point if we have already found a direct light scatter position. */
+ if (!result.indirect_scatter) {
+ /* TODO: write only once to avoid overhead of atomics? */
+ const float3 emission = volume_emission_integrate(
+ &coeff, closure_flag, transmittance, dt);
+ kernel_accum_emission(
+ INTEGRATOR_STATE_PASS, result.indirect_throughput, emission, render_buffer);
+ }
}
 /* modify throughput */
if (closure_flag & SD_EXTINCTION) {
 tp = new_tp;
+ if ((closure_flag & SD_SCATTER)  !vstate.absorption_only) {
+ /* Scattering and absorption. */
+ volume_integrate_step_scattering(sd, coeff, transmittance, vstate, result);
+ }
+ else {
+ /* Absorption only. */
+ result.indirect_throughput *= transmittance;
+ result.direct_throughput *= transmittance;
+ }
 /* stop if nearly all light blocked */
 if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
 tp.z < VOLUME_THROUGHPUT_EPSILON) {
 tp = zero_float3();
 break;
+ /* Stop if nearly all light blocked. */
+ if (!result.indirect_scatter) {
+ if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ result.indirect_throughput = zero_float3();
+ break;
+ }
+ }
+ else if (!result.direct_scatter) {
+ if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ break;
+ }
}
}
 /* prepare to scatter to new direction */
 if (scatter) {
 shader_copy_volume_phases(phases, sd);

 /* adjust throughput and move to new location */
 sd>P = ray>P + new_t * ray>D;
 *throughput = tp;

 return VOLUME_PATH_SCATTERED;
 }
 else {
 /* accumulate transmittance */
 accum_transmittance *= transmittance;
+ /* If we have scattering data for both direct and indirect, we're done. */
+ if (result.direct_scatter && result.indirect_scatter) {
+ break;
}
}
 /* stop if at the end of the volume */
 t = new_t;
 if (t == ray>t)
+ /* Stop if at the end of the volume. */
+ vstate.start_t = vstate.end_t;
+ if (vstate.start_t == ray>t) {
break;
+ }
}

 *throughput = tp;

 return VOLUME_PATH_ATTENUATED;
}
# ifdef __EMISSION__
@@ 548,9 +590,11 @@ ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
* queue shadow ray to be traced. */
ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ const float3 P,
const ShaderVolumePhases *ccl_restrict
phases,
 const RNGState *ccl_restrict rng_state,
+ const float3 throughput,
LightSample *ccl_restrict ls)
{
/* Sample position on the same light again, now from the shading
@@ 564,7 +608,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
float light_u, light_v;
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 if (!light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, ls)) {
+ if (!light_sample(kg, light_u, light_v, sd>time, P, bounce, path_flag, ls)) {
return;
}
}
@@ 602,7 +646,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
 light_sample_to_volume_shadow_ray(kg, sd, ls, sd>P, &ray);
+ light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
const bool is_light = light_sample_is_light(ls);
/* Write shadow ray and associated state to global memory. */
@@ 614,7 +658,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
shadow_flag = (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
shadow_flag = PATH_RAY_VOLUME_PASS;
 const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&phase_eval);
+ const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
const float3 diffuse_glossy_ratio = (bounce == 0) ?
@@ 626,7 +670,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
 INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+ INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase;
if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
@@ 642,8 +686,8 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
/* Path tracing: scatter in new direction using phase function. */
ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
ShaderData *sd,
 const ShaderVolumePhases *phases,
 const RNGState *rng_state)
+ const RNGState *rng_state,
+ const ShaderVolumePhases *phases)
{
float phase_u, phase_v;
path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
@@ 679,9 +723,9 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS
# endif
/* Update throughput. */
 float3 throughput = INTEGRATOR_STATE(path, throughput);
 throughput *= bsdf_eval_sum(&phase_eval) / phase_pdf;
 INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
+ INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase;
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
@@ 719,16 +763,16 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
integrate_volume_sample_light(INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls);
}
 /* Step through volume. */
 float3 throughput = INTEGRATOR_STATE(path, throughput);
+ /* TODO: expensive to zero closures? */
+ VolumeIntegrateResult result = {};
+ /* Step through volume. */
const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
});
 ShaderVolumePhases phases;
 VolumeIntegrateEvent event = volume_integrate_heterogeneous(
 INTEGRATOR_STATE_PASS, ray, &sd, &phases, &throughput, &rng_state, render_buffer, step_size);
+ volume_integrate_heterogeneous(
+ INTEGRATOR_STATE_PASS, ray, &sd, &rng_state, render_buffer, step_size, result);
/* Perform path termination. The intersect_closest will have already marked this path
* to be terminated. That will shading evaluating to leave out any scattering closures,
@@ 741,27 +785,43 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
if (probability == 0.0f) {
return VOLUME_PATH_MISSED;
}
 else if (event == VOLUME_PATH_SCATTERED) {
 /* Only divide throughput by probability if we scatter. For the attenuation
 * case the next surface will already do this division. */
 if (probability != 1.0f) {
 throughput /= probability;
 }
+
+ /* Direct light. */
+ if (result.direct_scatter) {
+ const float3 direct_P = ray>P + result.direct_t * ray>D;
+ result.direct_throughput /= probability;
+ integrate_volume_direct_light(INTEGRATOR_STATE_PASS,
+ &sd,
+ &rng_state,
+ direct_P,
+ &result.direct_phases,
+ result.direct_throughput,
+ &ls);
}
 INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+ /* Indirect light.
+ *
+ * Only divide throughput by probability if we scatter. For the attenuation
+ * case the next surface will already do this division. */
+ if (result.indirect_scatter) {
+ result.indirect_throughput /= probability;
+ }
+ INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput;
 if (event == VOLUME_PATH_SCATTERED) {
 /* Direct light. */
 integrate_volume_direct_light(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state, &ls);
+ if (result.indirect_scatter) {
+ sd.P = ray>P + result.indirect_t * ray>D;
 /* Scatter. */
 if (!integrate_volume_phase_scatter(INTEGRATOR_STATE_PASS, &sd, &phases, &rng_state)) {
+ if (integrate_volume_phase_scatter(
+ INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) {
+ return VOLUME_PATH_SCATTERED;
+ }
+ else {
return VOLUME_PATH_MISSED;
}
}

 return event;
+ else {
+ return VOLUME_PATH_ATTENUATED;
+ }
}
#endif

2.25.1
From ef77e6fcc751f0c408be717483054dad4a5089b9 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 7 Jul 2021 18:33:22 +0200
Subject: [PATCH 5/7] Cycles X: add equiangular volume sampling for direct
light
Indirect always uses distance sampling.

intern/cycles/blender/addon/ui.py  6 +
.../integrator/integrator_shade_volume.h  160 +++++++++++
2 files changed, 103 insertions(+), 63 deletions()
diff git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 5b515aa..8553d17 100644
 a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ 1449,8 +1449,7 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column()
 sub.active = use_cpu(context)
 sub.prop(cworld, "volume_sampling", text="Sampling")
+ col.prop(cworld, "volume_sampling", text="Sampling")
col.prop(cworld, "volume_interpolation", text="Interpolation")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
sub = col.column()
@@ 1589,8 +1588,7 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column()
 sub.active = use_cpu(context)
 sub.prop(cmat, "volume_sampling", text="Sampling")
+ col.prop(cmat, "volume_sampling", text="Sampling")
col.prop(cmat, "volume_interpolation", text="Interpolation")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")
sub = col.column()
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 4e4b624..1482b0b 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 33,7 +33,6 @@ typedef enum VolumeIntegrateEvent {
typedef struct VolumeIntegrateResult {
/* Throughput and offset for direct light scattering. */
 VolumeSampleMethod direct_sample_method;
bool direct_scatter;
float3 direct_throughput;
float direct_t;
@@ 388,9 +387,6 @@ typedef struct VolumeIntegrateState {
float start_t;
float end_t;
 /* Current throughput. */
 float3 throughput;

/* If volume is absorptiononly up to this point, and no probabilistic
* scattering or termination has been used yet. */
bool absorption_only;
@@ 398,6 +394,9 @@ typedef struct VolumeIntegrateState {
/* Random numbers for scattering. */
float rscatter;
float rphase;
+
+ /* Sampling. */
+ VolumeSampleMethod direct_sample_method;
} VolumeIntegrateState;
ccl_device_forceinline void volume_integrate_step_scattering(
@@ 407,46 +406,66 @@ ccl_device_forceinline void volume_integrate_step_scattering(
VolumeIntegrateState &ccl_restrict vstate,
VolumeIntegrateResult &ccl_restrict result)
{
 /* Distance sampling */

 /* Pick random color channel, we use the Veach onesample
 * model with balance heuristic for the channels. */
 const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
 float3 channel_pdf;
 const int channel = volume_sample_channel(
 albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);

 /* decide if we will scatter or continue */
 const float sample_transmittance = volume_channel_get(transmittance, channel);

 if (1.0f  vstate.rscatter >= sample_transmittance) {
 /* compute sampling distance */
 const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
 const float new_dt = logf(1.0f  vstate.rscatter) / sample_sigma_t;
 const float new_t = vstate.start_t + new_dt;

 /* transmittance and pdf */
 const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
 const float3 pdf = coeff.sigma_t * new_transmittance;

 /* throughput */
 result.indirect_scatter = true;
 result.indirect_t = new_t;
 result.indirect_throughput *= coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
 shader_copy_volume_phases(&result.indirect_phases, sd);

 result.direct_scatter = true;
 result.direct_t = result.indirect_t;
 result.direct_throughput = result.indirect_throughput;
 shader_copy_volume_phases(&result.direct_phases, sd); /* TODO: only copy once? */
+ /* Equiangular sampling for direct lighting. */
+ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
+ if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
+ const float new_dt = result.direct_t  vstate.start_t;
+ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+
+ result.direct_scatter = true;
+ result.direct_throughput *= coeff.sigma_s * new_transmittance;
+ shader_copy_volume_phases(&result.direct_phases, sd);
+ }
+ else {
+ result.direct_throughput *= transmittance;
+ }
}
 else {
 /* throughput */
 const float pdf = dot(channel_pdf, transmittance);
 result.indirect_throughput *= transmittance / pdf;
 /* remap rscatter so we can reuse it and keep thing stratified */
 vstate.rscatter = 1.0f  (1.0f  vstate.rscatter) / sample_transmittance;
+ /* Distance sampling for indirect and optional direct lighting. */
+ if (!result.indirect_scatter) {
+ /* Pick random color channel, we use the Veach onesample
+ * model with balance heuristic for the channels. */
+ const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ const int channel = volume_sample_channel(
+ albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
+
+ /* decide if we will scatter or continue */
+ const float sample_transmittance = volume_channel_get(transmittance, channel);
+
+ if (1.0f  vstate.rscatter >= sample_transmittance) {
+ /* compute sampling distance */
+ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
+ const float new_dt = logf(1.0f  vstate.rscatter) / sample_sigma_t;
+ const float new_t = vstate.start_t + new_dt;
+
+ /* transmittance and pdf */
+ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
+
+ /* throughput */
+ result.indirect_scatter = true;
+ result.indirect_t = new_t;
+ result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
+ shader_copy_volume_phases(&result.indirect_phases, sd);
+
+ if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
+ /* If using distance sampling for direct light, just copy parameters
+ * of indirect light since we scatter at the same point then. */
+ result.direct_scatter = true;
+ result.direct_t = result.indirect_t;
+ result.direct_throughput = result.indirect_throughput;
+ shader_copy_volume_phases(&result.direct_phases, sd);
+ }
+ }
+ else {
+ /* throughput */
+ const float pdf = dot(channel_pdf, transmittance);
+ result.indirect_throughput *= transmittance / pdf;
+
+ /* remap rscatter so we can reuse it and keep thing stratified */
+ vstate.rscatter = 1.0f  (1.0f  vstate.rscatter) / sample_transmittance;
+ }
}
}
@@ 454,14 +473,16 @@ ccl_device_forceinline void volume_integrate_step_scattering(
* volume until we reach the end, get absorbed entirely, or run out of
* iterations. this does probabilistically scatter or get transmitted through
* for path tracing where we don't want to branch. */
ccl_device_forceinline void volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS,
 Ray *ccl_restrict ray,
 ShaderData *ccl_restrict sd,
 const RNGState *rng_state,
 ccl_global float *ccl_restrict
 render_buffer,
 const float object_step_size,
 VolumeIntegrateResult &result)
+ccl_device_forceinline void volume_integrate_heterogeneous(
+ INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ShaderData *ccl_restrict sd,
+ const RNGState *rng_state,
+ ccl_global float *ccl_restrict render_buffer,
+ const float object_step_size,
+ const VolumeSampleMethod direct_sample_method,
+ const float3 equiangular_light_P,
+ VolumeIntegrateResult &result)
{
/* Prepare for stepping.
* Using a different step offset for the first step avoids banding artifacts. */
@@ 483,12 +504,21 @@ ccl_device_forceinline void volume_integrate_heterogeneous(INTEGRATOR_STATE_ARGS
vstate.absorption_only = true;
vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+ vstate.direct_sample_method = direct_sample_method;
/* Initialize volume integration result. */
const float3 throughput = INTEGRATOR_STATE(path, throughput);
result.direct_throughput = throughput;
result.indirect_throughput = throughput;
+ /* Equiangular sampling: compute distance and PDF in advance. */
+ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
+ float equiangular_pdf;
+ result.direct_t = volume_equiangular_sample(
+ ray, equiangular_light_P, vstate.rscatter, &equiangular_pdf);
+ result.direct_throughput /= equiangular_pdf;
+ }
+
for (int i = 0; i < max_steps; i++) {
/* Advance to new position */
vstate.end_t = min(ray>t, (i + steps_offset) * step_size);
@@ 683,7 +713,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
}
# endif
/* Path tracing: scatter in new direction using phase function. */
+/* Path tracing: scatter in new direction using phase function */
ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
ShaderData *sd,
const RNGState *rng_state,
@@ 756,23 +786,35 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
RNGState rng_state;
path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
 /* Sample light ahead of volume stepping. */
+ /* Sample light ahead of volume stepping, for equiangular sampling. */
+ /* TODO: distant lights are ignored now, but could instead use even distribution. */
LightSample ls ccl_optional_struct_init;
const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE);
 if (need_light_sample) {
 integrate_volume_sample_light(INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls);
 }
+ const bool have_equiangular_sample = need_light_sample &&
+ integrate_volume_sample_light(
+ INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) &&
+ (ls.t != FLT_MAX);
 /* TODO: expensive to zero closures? */
 VolumeIntegrateResult result = {};
+ VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
+ volume_stack_sample_method(INTEGRATOR_STATE_PASS) :
+ VOLUME_SAMPLE_DISTANCE;
/* Step through volume. */
const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
});
 volume_integrate_heterogeneous(
 INTEGRATOR_STATE_PASS, ray, &sd, &rng_state, render_buffer, step_size, result);
+ /* TODO: expensive to zero closures? */
+ VolumeIntegrateResult result = {};
+ volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS,
+ ray,
+ &sd,
+ &rng_state,
+ render_buffer,
+ step_size,
+ direct_sample_method,
+ ls.P,
+ result);
/* Perform path termination. The intersect_closest will have already marked this path
* to be terminated. That will shading evaluating to leave out any scattering closures,

2.25.1
From f45ca4e7d15812950c656114aa64ad3ced367c99 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Wed, 7 Jul 2021 20:25:23 +0200
Subject: [PATCH 6/7] Cycles X: add volume multiple importance between
equiangular and distance

.../integrator/integrator_shade_volume.h  59 +++++++++++++++
1 file changed, 46 insertions(+), 13 deletions()
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 1482b0b..eb6f72c 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 395,17 +395,29 @@ typedef struct VolumeIntegrateState {
float rscatter;
float rphase;
 /* Sampling. */
+ /* Multiple importance sampling. */
VolumeSampleMethod direct_sample_method;
+ bool use_mis;
+ float distance_pdf;
+ float equiangular_pdf;
} VolumeIntegrateState;
ccl_device_forceinline void volume_integrate_step_scattering(
const ShaderData *sd,
+ const Ray *ray,
+ const float3 equiangular_light_P,
const VolumeShaderCoefficients &ccl_restrict coeff,
const float3 transmittance,
VolumeIntegrateState &ccl_restrict vstate,
VolumeIntegrateResult &ccl_restrict result)
{
+ /* Pick random color channel, we use the Veach onesample
+ * model with balance heuristic for the channels. */
+ const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ const int channel = volume_sample_channel(
+ albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
+
/* Equiangular sampling for direct lighting. */
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
@@ 413,23 +425,24 @@ ccl_device_forceinline void volume_integrate_step_scattering(
const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
result.direct_scatter = true;
 result.direct_throughput *= coeff.sigma_s * new_transmittance;
+ result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
shader_copy_volume_phases(&result.direct_phases, sd);
+
+ /* Multiple importance sampling. */
+ if (vstate.use_mis) {
+ const float distance_pdf = vstate.distance_pdf *
+ dot(channel_pdf, coeff.sigma_t * new_transmittance);
+ result.direct_throughput *= 2.0f * power_heuristic(vstate.equiangular_pdf, distance_pdf);
+ }
}
else {
result.direct_throughput *= transmittance;
+ vstate.distance_pdf *= dot(channel_pdf, transmittance);
}
}
/* Distance sampling for indirect and optional direct lighting. */
if (!result.indirect_scatter) {
 /* Pick random color channel, we use the Veach onesample
 * model with balance heuristic for the channels. */
 const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
 float3 channel_pdf;
 const int channel = volume_sample_channel(
 albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);

/* decide if we will scatter or continue */
const float sample_transmittance = volume_channel_get(transmittance, channel);
@@ 456,6 +469,12 @@ ccl_device_forceinline void volume_integrate_step_scattering(
result.direct_t = result.indirect_t;
result.direct_throughput = result.indirect_throughput;
shader_copy_volume_phases(&result.direct_phases, sd);
+
+ /* Multiple importance sampling. */
+ if (vstate.use_mis) {
+ const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
+ result.direct_throughput *= 2.0f * power_heuristic(distance_pdf, equiangular_pdf);
+ }
}
}
else {
@@ 504,7 +523,22 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
vstate.absorption_only = true;
vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+
+ /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
vstate.direct_sample_method = direct_sample_method;
+ vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS);
+ if (vstate.use_mis) {
+ if (vstate.rscatter < 0.5f) {
+ vstate.rscatter *= 2.0f;
+ vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE;
+ }
+ else {
+ vstate.rscatter = (vstate.rscatter  0.5f) * 2.0f;
+ vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR;
+ }
+ }
+ vstate.equiangular_pdf = 0.0f;
+ vstate.distance_pdf = 1.0f;
/* Initialize volume integration result. */
const float3 throughput = INTEGRATOR_STATE(path, throughput);
@@ 513,10 +547,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Equiangular sampling: compute distance and PDF in advance. */
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
 float equiangular_pdf;
result.direct_t = volume_equiangular_sample(
 ray, equiangular_light_P, vstate.rscatter, &equiangular_pdf);
 result.direct_throughput /= equiangular_pdf;
+ ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
}
for (int i = 0; i < max_steps; i++) {
@@ 552,7 +584,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
if (closure_flag & SD_EXTINCTION) {
if ((closure_flag & SD_SCATTER)  !vstate.absorption_only) {
/* Scattering and absorption. */
 volume_integrate_step_scattering(sd, coeff, transmittance, vstate, result);
+ volume_integrate_step_scattering(
+ sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
}
else {
/* Absorption only. */

2.25.1
From 1a6565808687a54e7c59780f5b9962da27790155 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Mon, 19 Jul 2021 18:04:58 +0200
Subject: [PATCH 7/7] Cycles X: don't use ray position when sampling light for
volume segment
Area light solid importance sampling can introduce a lot of noise otherwise
if the ray start position happens to be close to the light, but the volume
segment also passes closely to other parts of the area light.

.../integrator/integrator_shade_surface.h  2 +
.../integrator/integrator_shade_volume.h  4 +
intern/cycles/kernel/kernel_light.h  41 ++++++++++++++
3 files changed, 34 insertions(+), 13 deletions()
diff git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index cf271ec..1cf9b63 100644
 a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ 119,7 +119,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
float light_u, light_v;
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 if (!light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, &ls)) {
+ if (!light_sample<false>(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, &ls)) {
return;
}
}
diff git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index eb6f72c..3a495c2 100644
 a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ 640,7 +640,7 @@ ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
float light_u, light_v;
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 light_sample(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, ls);
+ light_sample<true>(kg, light_u, light_v, sd>time, sd>P, bounce, path_flag, ls);
if (ls>shader & SHADER_EXCLUDE_SCATTER) {
return false;
@@ 671,7 +671,7 @@ ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
float light_u, light_v;
path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 if (!light_sample(kg, light_u, light_v, sd>time, P, bounce, path_flag, ls)) {
+ if (!light_sample<false>(kg, light_u, light_v, sd>time, P, bounce, path_flag, ls)) {
return;
}
}
diff git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index ad83f79..295f879 100644
 a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ 44,8 +44,13 @@ typedef struct LightSample {
/* Regular Light */
ccl_device_inline bool light_sample_from_position(
 const KernelGlobals *kg, int lamp, float randu, float randv, float3 P, LightSample *ls)
+template<bool in_volume_segment>
+ccl_device_inline bool light_sample_from_position(const KernelGlobals *kg,
+ const int lamp,
+ const float randu,
+ const float randv,
+ const float3 P,
+ LightSample *ls)
{
const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
LightType type = (LightType)klight>type;
@@ 57,6 +62,18 @@ ccl_device_inline bool light_sample_from_position(
ls>u = randu;
ls>v = randv;
+ if (in_volume_segment && (type == LIGHT_DISTANT  type == LIGHT_BACKGROUND)) {
+ /* Distant lights in a volume get a dummy sample, position will not actually
+ * be used in that case. Only when sampling from a specific scatter position
+ * do we actually need to evaluate these. */
+ ls>P = zero_float3();
+ ls>Ng = zero_float3();
+ ls>D = zero_float3();
+ ls>pdf = true;
+ ls>t = FLT_MAX;
+ return true;
+ }
+
if (type == LIGHT_DISTANT) {
/* distant light */
float3 lightD = make_float3(klight>co[0], klight>co[1], klight>co[2]);
@@ 130,13 +147,15 @@ ccl_device_inline bool light_sample_from_position(
float invarea = fabsf(klight>area.invarea);
bool is_round = (klight>area.invarea < 0.0f);
 if (dot(ls>P  P, Ng) > 0.0f) {
 return false;
+ if (!in_volume_segment) {
+ if (dot(ls>P  P, Ng) > 0.0f) {
+ return false;
+ }
}
float3 inplane;
 if (is_round) {
+ if (is_round  in_volume_segment) {
inplane = ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
ls>P += inplane;
ls>pdf = invarea;
@@ 536,6 +555,7 @@ ccl_device_forceinline float triangle_light_pdf(const KernelGlobals *kg,
}
}
+template<bool in_volume_segment>
ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
int prim,
int object,
@@ 576,7 +596,7 @@ ccl_device_forceinline void triangle_light_sample(const KernelGlobals *kg,
float distance_to_plane = fabsf(dot(N0, V[0]  P) / dot(N0, N0));
 if (longest_edge_squared > distance_to_plane * distance_to_plane) {
+ if (!in_volume_segment && (longest_edge_squared > distance_to_plane * distance_to_plane)) {
/* see James Arvo, "Stratified Sampling of Spherical Triangles"
* http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
@@ 750,6 +770,7 @@ ccl_device_inline bool light_select_reached_max_bounces(const KernelGlobals *kg,
return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
}
+template<bool in_volume_segment>
ccl_device_noinline bool light_sample(const KernelGlobals *kg,
float randu,
const float randv,
@@ 776,7 +797,7 @@ ccl_device_noinline bool light_sample(const KernelGlobals *kg,
}
const int shader_flag = kdistribution>mesh_light.shader_flag;
 triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
+ triangle_light_sample<in_volume_segment>(kg, prim, object, randu, randv, time, ls, P);
ls>shader = shader_flag;
return (ls>pdf > 0.0f);
}
@@ 787,7 +808,7 @@ ccl_device_noinline bool light_sample(const KernelGlobals *kg,
return false;
}
 return light_sample_from_position(kg, lamp, randu, randv, P, ls);
+ return light_sample_from_position<in_volume_segment>(kg, lamp, randu, randv, P, ls);
}
ccl_device_inline bool light_sample_new_position(const KernelGlobals *kg,
@@ 799,11 +820,11 @@ ccl_device_inline bool light_sample_new_position(const KernelGlobals *kg,
{
/* Sample a new position on the same light, for volume sampling. */
if (ls>type == LIGHT_TRIANGLE) {
 triangle_light_sample(kg, ls>prim, ls>object, randu, randv, time, ls, P);
+ triangle_light_sample<false>(kg, ls>prim, ls>object, randu, randv, time, ls, P);
return (ls>pdf > 0.0f);
}
else {
 return light_sample_from_position(kg, ls>lamp, randu, randv, P, ls);
+ return light_sample_from_position<false>(kg, ls>lamp, randu, randv, P, ls);
}
}

2.25.1
Event Timeline
Brecht Van Lommel (brecht)
created this paste.
Wed, Jul 7, 9:31 PM
Brecht Van Lommel (brecht)
mentioned this in
D11845: Cycles X: add equiangular and multiple importance sampling for volumes
.
Wed, Jul 7, 9:33 PM
Brecht Van Lommel (brecht)
edited the content of this paste.
(Show Details)
Thu, Jul 22, 5:48 PM
Log In to Comment