Cycles: simplify handling of ray differentials

* Store compact ray differentials in ShaderData and compute full differentials
  on demand. This reduces register pressure on the GPU.
* Remove BSDF differential code that was effectively doing nothing as the
  differential orientation was discarded when making it compact.

This gives a 1-5% speedup with RTX A6000 + OptiX in our benchmarks, with the
bigger speedups in simpler scenes.

Renders appear to be identical except for the Both displacement option that
does both displacement and bump.

Differential Revision: https://developer.blender.org/D15677
This commit is contained in:
Brecht Van Lommel 2022-08-11 16:53:11 +02:00
parent d8841d0aa3
commit e949d6da5b
Notes: blender-bot 2023-02-14 10:21:10 +01:00
Referenced by issue #100914, Cycles shows transmission in displaced shader as black
35 changed files with 163 additions and 707 deletions

View File

@ -105,7 +105,6 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
/* For curves use the smooth normal, particularly for ribbons the geometric
@ -115,304 +114,80 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
label = bsdf_diffuse_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_oren_nayar_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_phong_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_diffuse_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_translucent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_refraction_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_transparent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(kg,
sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_microfacet_ggx_sample(kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_sample(kg,
sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf,
&sd->lcg_state);
label = bsdf_microfacet_multi_ggx_sample(
kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(kg,
sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf,
&sd->lcg_state);
label = bsdf_microfacet_multi_ggx_glass_sample(
kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, &sd->lcg_state);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(kg,
sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_microfacet_beckmann_sample(
kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = bsdf_ashikhmin_shirley_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_ashikhmin_shirley_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_diffuse_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_glossy_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_hair_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_hair_transmission_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
label = bsdf_principled_hair_sample(
kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
label = bsdf_principled_hair_sample(kg, sc, sd, randu, randv, eval, omega_in, pdf);
break;
# ifdef __PRINCIPLED__
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
label = bsdf_principled_diffuse_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_principled_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
label = bsdf_principled_sheen_sample(sc,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
label = bsdf_principled_sheen_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
break;
# endif /* __PRINCIPLED__ */
#endif

View File

@ -133,14 +133,10 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
@ -221,12 +217,6 @@ ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc
/* leave the rest to eval_reflect */
*eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
}
#ifdef __RAY_DIFFERENTIALS__
/* just do the reflection thing for now */
*domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
*domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
#endif
}
return label;

View File

@ -87,14 +87,10 @@ ccl_device Spectrum bsdf_ashikhmin_velvet_eval_transmit(ccl_private const Shader
ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
@ -130,12 +126,6 @@ ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
float power = 0.25f * (D * G) / cosNO;
*eval = make_spectrum(power);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the retroreflective bounce
*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
#endif
}
else {
*pdf = 0.0f;

View File

@ -51,14 +51,10 @@ ccl_device Spectrum bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *
ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
@ -69,11 +65,6 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0.0f) {
*eval = make_spectrum(*pdf);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
#endif
}
else {
*pdf = 0.0f;
@ -115,14 +106,10 @@ ccl_device Spectrum bsdf_translucent_eval_transmit(ccl_private const ShaderClosu
ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
@ -133,11 +120,6 @@ ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
sample_cos_hemisphere(-N, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) < 0) {
*eval = make_spectrum(*pdf);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
}
else {
*pdf = 0;

View File

@ -71,14 +71,10 @@ ccl_device Spectrum bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClos
ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
@ -89,10 +85,6 @@ ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0.0f) {
*eval = rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F);
# ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
*domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
# endif
}
else {
*pdf = 0.0f;

View File

@ -151,14 +151,10 @@ ccl_device Spectrum bsdf_hair_transmission_eval_transmit(ccl_private const Shade
ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
@ -194,12 +190,6 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
// differentials - TODO: find a better approximation for the reflective bounce
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
#endif
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f)
*pdf = 0.0f;
@ -212,14 +202,10 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
@ -255,12 +241,6 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
// differentials - TODO: find a better approximation for the transmission bounce
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
*domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
#endif
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;

View File

@ -354,8 +354,6 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
@ -471,12 +469,6 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
*omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
#ifdef __RAY_DIFFERENTIALS__
float3 N = safe_normalize(sd->I + *omega_in);
*domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
*domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
#endif
return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT);
}

View File

@ -537,14 +537,10 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
@ -672,11 +668,6 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg,
if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
*eval *= 0.25f * bsdf->extra->clearcoat;
}
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
*domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
#endif
}
else {
*eval = zero_spectrum();
@ -690,34 +681,13 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg,
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
float m_eta = bsdf->ior, fresnel;
bool inside;
fresnel = fresnel_dielectric(m_eta,
m,
I,
&R,
&T,
#ifdef __RAY_DIFFERENTIALS__
dIdx,
dIdy,
&dRdx,
&dRdy,
&dTdx,
&dTdy,
#endif
&inside);
fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
*omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
/* some high number for MIS */
@ -978,14 +948,10 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
@ -1076,11 +1042,6 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
*eval = make_spectrum(out);
}
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
*domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
#endif
}
else {
*eval = zero_spectrum();
@ -1094,35 +1055,14 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
float m_eta = bsdf->ior, fresnel;
bool inside;
fresnel = fresnel_dielectric(m_eta,
m,
I,
&R,
&T,
#ifdef __RAY_DIFFERENTIALS__
dIdx,
dIdy,
&dRdx,
&dRdy,
&dTdx,
&dTdy,
#endif
&inside);
fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
*omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
/* some high number for MIS */
*pdf = 1e6f;

View File

@ -478,14 +478,10 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf,
ccl_private uint *lcg_state)
{
@ -510,10 +506,6 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
}
*pdf = 1e6f;
*eval = make_spectrum(1e6f);
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
return LABEL_REFLECT | LABEL_SINGULAR;
}
@ -551,10 +543,6 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
*eval *= *pdf;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
return LABEL_REFLECT | LABEL_GLOSSY;
}
@ -662,14 +650,10 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf,
ccl_private uint *lcg_state)
{
@ -680,41 +664,17 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) {
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
bool inside;
float fresnel = fresnel_dielectric(bsdf->ior,
Z,
I,
&R,
&T,
#ifdef __RAY_DIFFERENTIALS__
dIdx,
dIdy,
&dRdx,
&dRdy,
&dTdx,
&dTdy,
#endif
&inside);
float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T, &inside);
*pdf = 1e6f;
*eval = make_spectrum(1e6f);
if (randu < fresnel) {
*omega_in = R;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dRdx;
*domega_in_dy = dRdy;
#endif
return LABEL_REFLECT | LABEL_SINGULAR;
}
else {
*omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
return LABEL_TRANSMIT | LABEL_SINGULAR;
}
}
@ -740,22 +700,9 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
*omega_in = X * localO.x + Y * localO.y + Z * localO.z;
if (localO.z * localI.z > 0.0f) {
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
return LABEL_REFLECT | LABEL_GLOSSY;
}
else {
#ifdef __RAY_DIFFERENTIALS__
float cosI = dot(Z, I);
float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI * cosI))), 1e-7f);
*domega_in_dx = -(bsdf->ior * dIdx) +
((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z;
*domega_in_dy = -(bsdf->ior * dIdy) +
((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z;
#endif
return LABEL_TRANSMIT | LABEL_GLOSSY;
}
}

View File

@ -75,14 +75,10 @@ ccl_device Spectrum bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosur
ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
@ -90,12 +86,6 @@ ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0.0f) {
*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the bounce
*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
}
else {
*pdf = 0.0f;

View File

@ -82,14 +82,10 @@ ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure
ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
@ -99,12 +95,6 @@ ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
if (cosNO > 0) {
// reflect the view vector
float3 R = (2 * cosNO) * bsdf->N - I;
# ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
# endif
float3 T, B;
make_orthonormals(R, &T, &B);
float phi = M_2PI_F * randu;

View File

@ -142,14 +142,10 @@ ccl_device Spectrum bsdf_principled_diffuse_eval_transmit(ccl_private const Shad
ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
@ -160,12 +156,6 @@ ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *s
if (dot(Ng, *omega_in) > 0) {
*eval = bsdf_principled_diffuse_compute_brdf(bsdf, N, I, *omega_in, pdf);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
}
else {
*pdf = 0.0f;

View File

@ -93,14 +93,10 @@ ccl_device Spectrum bsdf_principled_sheen_eval_transmit(ccl_private const Shader
ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
@ -113,12 +109,6 @@ ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
float3 H = normalize(I + *omega_in);
*eval = calculate_principled_sheen_brdf(N, I, *omega_in, H, pdf);
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
*domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
#endif
}
else {
*eval = zero_spectrum();

View File

@ -39,14 +39,10 @@ ccl_device Spectrum bsdf_reflection_eval_transmit(ccl_private const ShaderClosur
ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
@ -57,10 +53,6 @@ ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
if (cosNO > 0) {
*omega_in = (2 * cosNO) * N - I;
if (dot(Ng, *omega_in) > 0) {
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
*domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
#endif
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_spectrum(1e6f);

View File

@ -39,14 +39,10 @@ ccl_device Spectrum bsdf_refraction_eval_transmit(ccl_private const ShaderClosur
ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
@ -54,35 +50,15 @@ ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
float3 N = bsdf->N;
float3 R, T;
#ifdef __RAY_DIFFERENTIALS__
float3 dRdx, dRdy, dTdx, dTdy;
#endif
bool inside;
float fresnel;
fresnel = fresnel_dielectric(m_eta,
N,
I,
&R,
&T,
#ifdef __RAY_DIFFERENTIALS__
dIdx,
dIdy,
&dRdx,
&dRdy,
&dTdx,
&dTdy,
#endif
&inside);
fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
/* Some high number for MIS. */
*pdf = 1e6f;
*eval = make_spectrum(1e6f);
*omega_in = T;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = dTdx;
*domega_in_dy = dTdy;
#endif
}
else {
*pdf = 0.0f;

View File

@ -83,14 +83,10 @@ ccl_device Spectrum bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClos
ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
@ -104,12 +100,6 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0.0f) {
*eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
#ifdef __RAY_DIFFERENTIALS__
// TODO: find a better approximation for the bounce
*domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
*domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
}
else {
*eval = zero_spectrum();
@ -175,14 +165,10 @@ ccl_device Spectrum bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosu
ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
@ -205,11 +191,6 @@ ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
/* make sure the direction we chose is still in the right hemisphere */
if (cosNI > 0) {
*eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
*domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
#endif
}
else {
*pdf = 0.0f;

View File

@ -80,22 +80,14 @@ ccl_device Spectrum bsdf_transparent_eval_transmit(ccl_private const ShaderClosu
ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
// only one direction is possible
*omega_in = -I;
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = -dIdx;
*domega_in_dy = -dIdy;
#endif
*pdf = 1;
*eval = one_spectrum();
return LABEL_TRANSMIT | LABEL_TRANSPARENT;

View File

@ -15,14 +15,6 @@ ccl_device float fresnel_dielectric(float eta,
const float3 I,
ccl_private float3 *R,
ccl_private float3 *T,
#ifdef __RAY_DIFFERENTIALS__
const float3 dIdx,
const float3 dIdy,
ccl_private float3 *dRdx,
ccl_private float3 *dRdy,
ccl_private float3 *dTdx,
ccl_private float3 *dTdy,
#endif
ccl_private bool *is_inside)
{
float cos = dot(N, I), neta;
@ -45,28 +37,16 @@ ccl_device float fresnel_dielectric(float eta,
// compute reflection
*R = (2 * cos) * Nn - I;
#ifdef __RAY_DIFFERENTIALS__
*dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
*dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
#endif
float arg = 1 - (neta * neta * (1 - (cos * cos)));
if (arg < 0) {
*T = make_float3(0.0f, 0.0f, 0.0f);
#ifdef __RAY_DIFFERENTIALS__
*dTdx = make_float3(0.0f, 0.0f, 0.0f);
*dTdy = make_float3(0.0f, 0.0f, 0.0f);
#endif
return 1; // total internal reflection
}
else {
float dnp = max(sqrtf(arg), 1e-7f);
float nK = (neta * cos) - dnp;
*T = -(neta * I) + (nK * Nn);
#ifdef __RAY_DIFFERENTIALS__
*dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
*dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
#endif
// compute Fresnel terms
float cosTheta1 = cos; // N.R
float cosTheta2 = -dot(Nn, *T);

View File

@ -101,14 +101,10 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_priva
ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc,
float3 I,
float3 dIdx,
float3 dIdy,
float randu,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private float3 *domega_in_dx,
ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
float g = svc->g;
@ -117,12 +113,6 @@ ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClo
*omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
*eval = make_spectrum(*pdf); /* perfect importance sampling */
#ifdef __RAY_DIFFERENTIALS__
/* todo: implement ray differential estimation */
*domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
*domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
return LABEL_VOLUME_SCATTER;
}
@ -142,20 +132,9 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
float randv,
ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
return volume_henyey_greenstein_sample(svc,
sd->I,
sd->dI.dx,
sd->dI.dy,
randu,
randv,
eval,
omega_in,
&domega_in->dx,
&domega_in->dy,
pdf);
return volume_henyey_greenstein_sample(svc, sd->I, randu, randv, eval, omega_in, pdf);
}
/* Volume sampling utilities. */

View File

@ -123,9 +123,9 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
differential_transfer_compact(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, sd->ray_length);
differential_incoming_compact(&sd->dI, ray->D, ray->dD);
differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
sd->dP = differential_transfer_compact(ray->dP, ray->D, ray->dD, sd->ray_length);
sd->dI = differential_incoming_compact(ray->dD);
differential_dudv_compact(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
#endif
}
@ -240,8 +240,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* no ray differentials here yet */
sd->dP = differential3_zero();
sd->dI = differential3_zero();
sd->dP = differential_zero_compact();
sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@ -348,8 +348,8 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg,
/* No ray differentials currently. */
#ifdef __RAY_DIFFERENTIALS__
sd->dP = differential3_zero();
sd->dI = differential3_zero();
sd->dP = differential_zero_compact();
sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@ -391,8 +391,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
sd->dP = differential3_zero(); /* TODO: ray->dP */
differential_incoming(&sd->dI, sd->dP);
sd->dP = differential_zero_compact(); /* TODO: ray->dP */
sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@ -433,8 +433,8 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
# ifdef __RAY_DIFFERENTIALS__
/* differentials */
sd->dP = differential3_zero(); /* TODO ray->dD */
differential_incoming(&sd->dI, sd->dP);
sd->dP = differential_zero_compact(); /* TODO ray->dD */
sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
# endif

View File

@ -362,11 +362,10 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
float bsdf_pdf;
BsdfEval bsdf_eval ccl_optional_struct_init;
float3 bsdf_omega_in ccl_optional_struct_init;
differential3 bsdf_domega_in ccl_optional_struct_init;
int label;
label = shader_bsdf_sample_closure(
kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf);
if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
return LABEL_NONE;
@ -385,7 +384,6 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
#endif
}

View File

@ -871,17 +871,9 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
float phase_pdf;
BsdfEval phase_eval ccl_optional_struct_init;
float3 phase_omega_in ccl_optional_struct_init;
differential3 phase_domega_in ccl_optional_struct_init;
const int label = shader_volume_phase_sample(kg,
sd,
phases,
phase_u,
phase_v,
&phase_eval,
&phase_omega_in,
&phase_domega_in,
&phase_pdf);
const int label = shader_volume_phase_sample(
kg, sd, phases, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_pdf);
if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
return false;
@ -894,7 +886,6 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
# ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
# endif
// Save memory by storing last hit prim and object in isect
INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim;

View File

@ -339,7 +339,6 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
float randv,
ccl_private BsdfEval *bsdf_eval,
ccl_private float3 *omega_in,
ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
/* BSSRDF should already have been handled elsewhere. */
@ -349,7 +348,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
Spectrum eval = zero_spectrum();
*pdf = 0.0f;
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, pdf);
if (*pdf != 0.0f) {
bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
@ -708,7 +707,6 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
float randv,
ccl_private BsdfEval *phase_eval,
ccl_private float3 *omega_in,
ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
int sampled = 0;
@ -751,7 +749,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
Spectrum eval = zero_spectrum();
*pdf = 0.0f;
label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, pdf);
if (*pdf != 0.0f) {
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
@ -767,14 +765,13 @@ ccl_device int shader_phase_sample_closure(KernelGlobals kg,
float randv,
ccl_private BsdfEval *phase_eval,
ccl_private float3 *omega_in,
ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
int label;
Spectrum eval = zero_spectrum();
*pdf = 0.0f;
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, pdf);
if (*pdf != 0.0f)
bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);

View File

@ -1102,8 +1102,9 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
if (derivatives) {
ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0];
ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0];
}
}
@ -1755,11 +1756,13 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
return set_attribute_float3(sd->Ng, type, derivatives, val);
}
else if (name == u_P) {
float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
float3 f[3] = {sd->P, dP.dx, dP.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_I) {
float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
const differential3 dI = differential_from_compact(sd->I, sd->dI);
float3 f[3] = {sd->I, dI.dx, dI.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_u) {

View File

@ -17,6 +17,8 @@
#include "kernel/osl/globals.h"
#include "kernel/osl/services.h"
#include "kernel/osl/shader.h"
#include "kernel/util/differential.h"
// clang-format on
#include "scene/attribute.h"
@ -79,13 +81,16 @@ static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
{
OSL::ShaderGlobals *globals = &tdata->globals;
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
const differential3 dI = differential_from_compact(sd->I, sd->dI);
/* copy from shader data to shader globals */
globals->P = TO_VEC3(sd->P);
globals->dPdx = TO_VEC3(sd->dP.dx);
globals->dPdy = TO_VEC3(sd->dP.dy);
globals->dPdx = TO_VEC3(dP.dx);
globals->dPdy = TO_VEC3(dP.dy);
globals->I = TO_VEC3(sd->I);
globals->dIdx = TO_VEC3(sd->dI.dx);
globals->dIdy = TO_VEC3(sd->dI.dy);
globals->dIdx = TO_VEC3(dI.dx);
globals->dIdy = TO_VEC3(dI.dy);
globals->N = TO_VEC3(sd->N);
globals->Ng = TO_VEC3(sd->Ng);
globals->u = sd->u;
@ -183,9 +188,10 @@ void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
/* automatic bump shader */
if (kg->osl->bump_state[shader]) {
/* save state */
float3 P = sd->P;
float3 dPdx = sd->dP.dx;
float3 dPdy = sd->dP.dy;
const float3 P = sd->P;
const float dP = sd->dP;
const OSL::Vec3 dPdx = globals->dPdx;
const OSL::Vec3 dPdy = globals->dPdy;
/* set state as if undisplaced */
if (sd->flag & SD_HAS_DISPLACEMENT) {
@ -199,17 +205,20 @@ void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
(void)found;
assert(found);
differential3 tmp_dP;
memcpy(&sd->P, data, sizeof(float) * 3);
memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
object_position_transform(kg, sd, &sd->P);
object_dir_transform(kg, sd, &sd->dP.dx);
object_dir_transform(kg, sd, &sd->dP.dy);
object_dir_transform(kg, sd, &tmp_dP.dx);
object_dir_transform(kg, sd, &tmp_dP.dy);
sd->dP = differential_make_compact(tmp_dP);
globals->P = TO_VEC3(sd->P);
globals->dPdx = TO_VEC3(sd->dP.dx);
globals->dPdy = TO_VEC3(sd->dP.dy);
globals->dPdx = TO_VEC3(tmp_dP.dx);
globals->dPdy = TO_VEC3(tmp_dP.dy);
}
/* execute bump shader */
@ -217,8 +226,7 @@ void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
/* reset state */
sd->P = P;
sd->dP.dx = dPdx;
sd->dP.dy = dPdy;
sd->dP = dP;
globals->P = TO_VEC3(P);
globals->dPdx = TO_VEC3(dPdx);

View File

@ -140,6 +140,16 @@ ccl_device_noinline void svm_node_attr(KernelGlobals kg,
}
}
ccl_device_forceinline float3 svm_node_bump_P_dx(const ccl_private ShaderData *sd)
{
return sd->P + differential_from_compact(sd->Ng, sd->dP).dx;
}
ccl_device_forceinline float3 svm_node_bump_P_dy(const ccl_private ShaderData *sd)
{
return sd->P + differential_from_compact(sd->Ng, sd->dP).dy;
}
ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
@ -167,7 +177,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
/* No generated attribute, fall back to object coordinates. */
float3 f = sd->P + sd->dP.dx;
float3 f = svm_node_bump_P_dx(sd);
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &f);
}
@ -265,7 +275,7 @@ ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg,
if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
/* No generated attribute, fall back to object coordinates. */
float3 f = sd->P + sd->dP.dy;
float3 f = svm_node_bump_P_dy(sd);
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &f);
}

View File

@ -14,23 +14,21 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
{
/* save state */
stack_store_float3(stack, offset + 0, sd->P);
stack_store_float3(stack, offset + 3, sd->dP.dx);
stack_store_float3(stack, offset + 6, sd->dP.dy);
stack_store_float(stack, offset + 3, sd->dP);
/* set state as if undisplaced */
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
if (desc.offset != ATTR_STD_NOT_FOUND) {
float3 P, dPdx, dPdy;
P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
differential3 dP;
float3 P = primitive_surface_attribute_float3(kg, sd, desc, &dP.dx, &dP.dy);
object_position_transform(kg, sd, &P);
object_dir_transform(kg, sd, &dPdx);
object_dir_transform(kg, sd, &dPdy);
object_dir_transform(kg, sd, &dP.dx);
object_dir_transform(kg, sd, &dP.dy);
sd->P = P;
sd->dP.dx = dPdx;
sd->dP.dy = dPdy;
sd->dP = differential_make_compact(dP);
}
}
@ -41,8 +39,7 @@ ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
{
/* restore state */
sd->P = stack_load_float3(stack, offset + 0);
sd->dP.dx = stack_load_float3(stack, offset + 3);
sd->dP.dy = stack_load_float3(stack, offset + 6);
sd->dP = stack_load_float(stack, offset + 3);
}
CCL_NAMESPACE_END

View File

@ -24,18 +24,17 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) :
sd->N;
float3 dPdx = sd->dP.dx;
float3 dPdy = sd->dP.dy;
differential3 dP = differential_from_compact(sd->Ng, sd->dP);
if (use_object_space) {
object_inverse_normal_transform(kg, sd, &normal_in);
object_inverse_dir_transform(kg, sd, &dPdx);
object_inverse_dir_transform(kg, sd, &dPdy);
object_inverse_dir_transform(kg, sd, &dP.dx);
object_inverse_dir_transform(kg, sd, &dP.dy);
}
/* get surface tangents from normal */
float3 Rx = cross(dPdy, normal_in);
float3 Ry = cross(normal_in, dPdx);
float3 Rx = cross(dP.dy, normal_in);
float3 Ry = cross(normal_in, dP.dx);
/* get bump values */
uint c_offset, x_offset, y_offset, strength_offset;
@ -46,7 +45,7 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
float h_y = stack_load_float(stack, y_offset);
/* compute surface gradient and determinant */
float det = dot(dPdx, Rx);
float det = dot(dP.dx, Rx);
float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
float absdet = fabsf(det);

View File

@ -54,7 +54,7 @@ ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
switch (type) {
case NODE_GEOM_P:
data = sd->P + sd->dP.dx;
data = svm_node_bump_P_dx(sd);
break;
case NODE_GEOM_uv:
data = make_float3(1.0f - sd->u - sd->du.dx - sd->v - sd->dv.dx, sd->u + sd->du.dx, 0.0f);
@ -81,7 +81,7 @@ ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
switch (type) {
case NODE_GEOM_P:
data = sd->P + sd->dP.dy;
data = svm_node_bump_P_dy(sd);
break;
case NODE_GEOM_uv:
data = make_float3(1.0f - sd->u - sd->du.dy - sd->v - sd->dv.dy, sd->u + sd->du.dy, 0.0f);

View File

@ -106,7 +106,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
switch (type) {
case NODE_TEXCO_OBJECT: {
data = sd->P + sd->dP.dx;
data = svm_node_bump_P_dx(sd);
if (node.w == 0) {
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
@ -130,9 +130,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
Transform tfm = kernel_data.cam.worldtocamera;
if (sd->object != OBJECT_NONE)
data = transform_point(&tfm, sd->P + sd->dP.dx);
data = transform_point(&tfm, svm_node_bump_P_dx(sd));
else
data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
data = transform_point(&tfm, svm_node_bump_P_dx(sd) + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
@ -140,7 +140,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dx(sd));
data.z = 0.0f;
break;
}
@ -160,7 +160,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
data = sd->P + sd->dP.dx;
data = svm_node_bump_P_dx(sd);
# ifdef __VOLUME__
if (sd->object != OBJECT_NONE)
@ -191,7 +191,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
switch (type) {
case NODE_TEXCO_OBJECT: {
data = sd->P + sd->dP.dy;
data = svm_node_bump_P_dy(sd);
if (node.w == 0) {
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
@ -215,9 +215,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
Transform tfm = kernel_data.cam.worldtocamera;
if (sd->object != OBJECT_NONE)
data = transform_point(&tfm, sd->P + sd->dP.dy);
data = transform_point(&tfm, svm_node_bump_P_dy(sd));
else
data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
data = transform_point(&tfm, svm_node_bump_P_dy(sd) + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
@ -225,7 +225,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dy(sd));
data.z = 0.0f;
break;
}
@ -245,7 +245,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
data = sd->P + sd->dP.dy;
data = svm_node_bump_P_dy(sd);
# ifdef __VOLUME__
if (sd->object != OBJECT_NONE)

View File

@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN
/* SVM stack offsets with this value indicate that it's not on the stack */
#define SVM_STACK_INVALID 255
#define SVM_BUMP_EVAL_STATE_SIZE 9
#define SVM_BUMP_EVAL_STATE_SIZE 4
/* Nodes */

View File

@ -14,6 +14,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float wireframe(KernelGlobals kg,
ccl_private ShaderData *sd,
const differential3 dP,
float size,
int pixel_size,
ccl_private float3 *P)
@ -46,8 +47,8 @@ ccl_device_inline float wireframe(KernelGlobals kg,
if (pixel_size) {
// Project the derivatives of P to the viewing plane defined
// by I so we have a measure of how big is a pixel at this point
float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
float pixelwidth_x = len(dP.dx - dot(dP.dx, sd->I) * sd->I);
float pixelwidth_y = len(dP.dy - dot(dP.dy, sd->I) * sd->I);
// Take the average of both axis' length
pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
}
@ -86,16 +87,17 @@ ccl_device_noinline void svm_node_wireframe(KernelGlobals kg,
int pixel_size = (int)use_pixel_size;
/* Calculate wireframe */
float f = wireframe(kg, sd, size, pixel_size, &sd->P);
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
float f = wireframe(kg, sd, dP, size, pixel_size, &sd->P);
/* TODO(sergey): Think of faster way to calculate derivatives. */
if (bump_offset == NODE_BUMP_OFFSET_DX) {
float3 Px = sd->P - sd->dP.dx;
f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
float3 Px = sd->P - dP.dx;
f += (f - wireframe(kg, sd, dP, size, pixel_size, &Px)) / len(dP.dx);
}
else if (bump_offset == NODE_BUMP_OFFSET_DY) {
float3 Py = sd->P - sd->dP.dy;
f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
float3 Py = sd->P - dP.dy;
f += (f - wireframe(kg, sd, dP, size, pixel_size, &Py)) / len(dP.dy);
}
if (stack_valid(out_fac))

View File

@ -873,10 +873,10 @@ typedef struct ccl_align(16) ShaderData
float ray_length;
#ifdef __RAY_DIFFERENTIALS__
/* differential of P. these are orthogonal to Ng, not N */
differential3 dP;
/* differential of I */
differential3 dI;
/* Radius of differential of P. */
float dP;
/* Radius of differential of I. */
float dI;
/* differential of u, v */
differential du;
differential dv;

View File

@ -101,53 +101,59 @@ ccl_device differential3 differential3_zero()
return d;
}
/* Compact ray differentials that are just a scale to reduce memory usage and
* access cost in GPU.
/* Compact ray differentials that are just a radius to reduce memory usage and access cost
* on GPUs, basically cone tracing.
*
* See above for more accurate reference implementations.
*
* TODO: also store the more compact version in ShaderData and recompute where
* needed? */
* See above for more accurate reference implementations of ray differentials. */
ccl_device_forceinline float differential_zero_compact()
{
return 0.0f;
}
ccl_device_forceinline float differential_make_compact(const differential3 D)
ccl_device_forceinline float differential_make_compact(const float dD)
{
return 0.5f * (len(D.dx) + len(D.dy));
return dD;
}
ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
const float ray_dP,
const float3 /* ray_D */,
const float ray_dD,
const float3 surface_Ng,
const float ray_t)
ccl_device_forceinline float differential_make_compact(const differential3 dD)
{
/* ray differential transfer through homogeneous medium, to
* compute dPdx/dy at a shading point from the incoming ray */
float scale = ray_dP + ray_t * ray_dD;
float3 dx, dy;
make_orthonormals(surface_Ng, &dx, &dy);
surface_dP->dx = dx * scale;
surface_dP->dy = dy * scale;
return 0.5f * (len(dD.dx) + len(dD.dy));
}
ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
const float3 D,
const float dD)
ccl_device_forceinline float differential_incoming_compact(const float dD)
{
/* compute dIdx/dy at a shading point, we just need to negate the
* differential of the ray direction */
return dD;
}
ccl_device_forceinline float differential_transfer_compact(const float ray_dP,
const float3 /* ray_D */,
const float ray_dD,
const float ray_t)
{
return ray_dP + ray_t * ray_dD;
}
ccl_device_forceinline differential3 differential_from_compact(const float3 D, const float dD)
{
float3 dx, dy;
make_orthonormals(D, &dx, &dy);
dI->dx = dD * dx;
dI->dy = dD * dy;
differential3 d;
d.dx = dD * dx;
d.dy = dD * dy;
return d;
}
ccl_device void differential_dudv_compact(ccl_private differential *du,
ccl_private differential *dv,
float3 dPdu,
float3 dPdv,
float dP,
float3 Ng)
{
/* TODO: can we speed this up? */
differential_dudv(du, dv, dPdu, dPdv, differential_from_compact(Ng, dP), Ng);
}
CCL_NAMESPACE_END

View File

@ -772,10 +772,7 @@ float Camera::world_to_raster_size(float3 P)
#endif
/* TODO: would it help to use more accurate differentials here? */
differential3 dP;
differential_transfer_compact(&dP, ray.dP, ray.D, ray.dD, ray.D, dist);
return max(len(dP.dx), len(dP.dy));
return differential_transfer_compact(ray.dP, ray.D, ray.dD, dist);
}
return res;