Cycles: Replace saturate with saturatef

saturate is depricated in favour of __saturatef this replaces saturate with __saturatef on CUDA by createing a saturatef function which replaces all instances of saturate and are hooked up to the correct function on all platforms. Reviewed By: brecht Differential Revision: https://developer.blender.org/D13010
2021-10-27 13:28:13 +02:00 · 2021-10-27 13:28:13 +02:00 · 7b1c5712f8
parent 18b6f0d0f1
commit 7b1c5712f8
19 changed files with 55 additions and 50 deletions
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@ -315,8 +315,8 @@ ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
 {
  bsdf->extra = NULL;

-  bsdf->alpha_x = saturate(bsdf->alpha_x);
-  bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
+  bsdf->alpha_y = saturatef(bsdf->alpha_y);

  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID;

@ -336,8 +336,8 @@ ccl_device int bsdf_microfacet_ggx_fresnel_setup(ccl_private MicrofacetBsdf *bsd
 {
  bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);

-  bsdf->alpha_x = saturate(bsdf->alpha_x);
-  bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
+  bsdf->alpha_y = saturatef(bsdf->alpha_y);

  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID;

@ -351,7 +351,7 @@ ccl_device int bsdf_microfacet_ggx_clearcoat_setup(ccl_private MicrofacetBsdf *b
 {
  bsdf->extra->cspec0 = saturate3(bsdf->extra->cspec0);

-  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID;
@ -365,7 +365,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ccl_private MicrofacetBsdf *
 {
  bsdf->extra = NULL;

-  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

  bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
@ -783,8 +783,8 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg,

 ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
 {
-  bsdf->alpha_x = saturate(bsdf->alpha_x);
-  bsdf->alpha_y = saturate(bsdf->alpha_y);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
+  bsdf->alpha_y = saturatef(bsdf->alpha_y);

  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID;
  return SD_BSDF | SD_BSDF_HAS_EVAL;
@ -800,7 +800,7 @@ ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBs

 ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
 {
-  bsdf->alpha_x = saturate(bsdf->alpha_x);
+  bsdf->alpha_x = saturatef(bsdf->alpha_x);
  bsdf->alpha_y = bsdf->alpha_x;

  bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@ -220,12 +220,12 @@ ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha)
 /* Height distribution CDF (based on page 4 of the supplemental implementation). */
 ccl_device_forceinline float mf_invC1(const float h)
 {
-  return 2.0f * saturate(h) - 1.0f;
+  return 2.0f * saturatef(h) - 1.0f;
 }

 ccl_device_forceinline float mf_C1(const float h)
 {
-  return saturate(0.5f * (h + 1.0f));
+  return saturatef(0.5f * (h + 1.0f));
 }

 /* Masking function (based on page 16 of the supplemental implementation). */
@ -284,7 +284,7 @@ ccl_device_forceinline float mf_ggx_albedo(float r)
             0.027803f) *
                r +
            0.00568739f;
-  return saturate(albedo);
+  return saturatef(albedo);
 }

 ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
@ -292,7 +292,7 @@ ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
  if (ior < 1.0f) {
    ior = 1.0f / ior;
  }
-  a = saturate(a);
+  a = saturatef(a);
  ior = clamp(ior, 1.0f, 3.0f);
  float I_1 = 0.0476898f * expf(-0.978352f * (ior - 0.65657f) * (ior - 0.65657f)) -
              0.033756f * ior + 0.993261f;
@ -302,7 +302,7 @@ ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior)
  float R_2 = ((((5.3725f * a - 24.9307f) * a + 22.7437f) * a - 3.40751f) * a + 0.0986325f) * a +
              0.00493504f;

-  return saturate(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f);
+  return saturatef(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f);
 }

 ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha)
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@ -50,7 +50,7 @@ ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)

  bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID;

-  sigma = saturate(sigma);
+  sigma = saturatef(sigma);

  float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma);

--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@ -48,8 +48,8 @@ static_assert(sizeof(ShaderClosure) >= sizeof(ToonBsdf), "ToonBsdf is too large!
 ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf)
 {
  bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
-  bsdf->size = saturate(bsdf->size);
-  bsdf->smooth = saturate(bsdf->smooth);
+  bsdf->size = saturatef(bsdf->size);
+  bsdf->smooth = saturatef(bsdf->smooth);

  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
@ -146,8 +146,8 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
 ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf)
 {
  bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID;
-  bsdf->size = saturate(bsdf->size);
-  bsdf->smooth = saturate(bsdf->smooth);
+  bsdf->size = saturatef(bsdf->size);
+  bsdf->smooth = saturatef(bsdf->smooth);

  return SD_BSDF | SD_BSDF_HAS_EVAL;
 }
--- a/intern/cycles/kernel/film/passes.h
+++ b/intern/cycles/kernel/film/passes.h
@ -312,7 +312,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
    const float mist_inv_depth = kernel_data.film.mist_inv_depth;

    const float depth = camera_distance(kg, sd->P);
-    float mist = saturate((depth - mist_start) * mist_inv_depth);
+    float mist = saturatef((depth - mist_start) * mist_inv_depth);

    /* Falloff */
    const float mist_falloff = kernel_data.film.mist_falloff;
--- a/intern/cycles/kernel/film/read.h
+++ b/intern/cycles/kernel/film/read.h
@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
 * roulette. */
 ccl_device_forceinline float film_transparency_to_alpha(float transparency)
 {
-  return saturate(1.0f - transparency);
+  return saturatef(1.0f - transparency);
 }

 ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
@ -136,7 +136,7 @@ ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConve

  /* Note that we accumulate 1 - mist in the kernel to avoid having to
   * track the mist values in the integrator state. */
-  pixel[0] = saturate(1.0f - f * scale_exposure);
+  pixel[0] = saturatef(1.0f - f * scale_exposure);
 }

 ccl_device_inline void film_get_pass_pixel_sample_count(
@ -458,7 +458,7 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;

  const float transparency = in_matte[3] * scale;
-  const float alpha = saturate(1.0f - transparency);
+  const float alpha = saturatef(1.0f - transparency);

  const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;

--- a/intern/cycles/kernel/svm/bevel.h
+++ b/intern/cycles/kernel/svm/bevel.h
@ -73,7 +73,7 @@ ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
    if (fabsf(f) < tolerance || f_ == 0.0f)
      break;

-    x = saturate(x - f / f_);
+    x = saturatef(x - f / f_);
  }

  return x;
--- a/intern/cycles/kernel/svm/brick.h
+++ b/intern/cycles/kernel/svm/brick.h
@ -56,7 +56,7 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p,
  x = (p.x + offset) - brick_width * bricknum;
  y = p.y - row_height * rownum;

-  float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
+  float tint = saturatef((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
  float min_dist = min(min(x, y), min(brick_width - x, row_height - y));

  float mortar;
--- a/intern/cycles/kernel/svm/closure.h
+++ b/intern/cycles/kernel/svm/closure.h
@ -173,9 +173,9 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
      float fresnel = fresnel_dielectric_cos(cosNO, ior);

      // calculate weights of the diffuse and specular part
-      float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
+      float diffuse_weight = (1.0f - saturatef(metallic)) * (1.0f - saturatef(transmission));

-      float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
+      float final_transmission = saturatef(transmission) * (1.0f - saturatef(metallic));
      float specular_weight = (1.0f - final_transmission);

      // get the base color
@ -746,7 +746,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
      if (bsdf) {
        bsdf->N = N;

-        bsdf->sigma = saturate(param1);
+        bsdf->sigma = saturatef(param1);
        sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
      }
      break;
@ -1233,7 +1233,7 @@ ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
      node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);

  float weight = stack_load_float(stack, weight_offset);
-  weight = saturate(weight);
+  weight = saturatef(weight);

  float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) :
                                                      1.0f;
--- a/intern/cycles/kernel/svm/color_util.h
+++ b/intern/cycles/kernel/svm/color_util.h
@ -262,7 +262,7 @@ ccl_device float3 svm_mix_clamp(float3 col)

 ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
 {
-  float t = saturate(fac);
+  float t = saturatef(fac);

  switch (type) {
    case NODE_MIX_BLEND:
--- a/intern/cycles/kernel/svm/gradient.h
+++ b/intern/cycles/kernel/svm/gradient.h
@ -73,7 +73,7 @@ ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd,
  float3 co = stack_load_float3(stack, co_offset);

  float f = svm_gradient(co, (NodeGradientType)type);
-  f = saturate(f);
+  f = saturatef(f);

  if (stack_valid(fac_offset))
    stack_store_float(stack, fac_offset, f);
--- a/intern/cycles/kernel/svm/hsv.h
+++ b/intern/cycles/kernel/svm/hsv.h
@ -40,7 +40,7 @@ ccl_device_noinline void svm_node_hsv(KernelGlobals kg,

  /* Remember: `fmodf` doesn't work for negative numbers here. */
  color.x = fmodf(color.x + hue + 0.5f, 1.0f);
-  color.y = saturate(color.y * sat);
+  color.y = saturatef(color.y * sat);
  color.z *= val;

  color = hsv_to_rgb(color);
--- a/intern/cycles/kernel/svm/image.h
+++ b/intern/cycles/kernel/svm/image.h
@ -167,17 +167,17 @@ ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg,
    /* in case of blending, test for mixes between two textures */
    if (N.z < (1.0f - limit) * (N.y + N.x)) {
      weight.x = N.x / (N.x + N.y);
-      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.x = saturatef((weight.x - 0.5f * (1.0f - blend)) / blend);
      weight.y = 1.0f - weight.x;
    }
    else if (N.x < (1.0f - limit) * (N.y + N.z)) {
      weight.y = N.y / (N.y + N.z);
-      weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend);
+      weight.y = saturatef((weight.y - 0.5f * (1.0f - blend)) / blend);
      weight.z = 1.0f - weight.y;
    }
    else if (N.y < (1.0f - limit) * (N.x + N.z)) {
      weight.x = N.x / (N.x + N.z);
-      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.x = saturatef((weight.x - 0.5f * (1.0f - blend)) / blend);
      weight.z = 1.0f - weight.x;
    }
    else {
--- a/intern/cycles/kernel/svm/musgrave.h
+++ b/intern/cycles/kernel/svm/musgrave.h
@ -180,7 +180,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_1d(

  for (int i = 1; i < float_to_int(octaves); i++) {
    p *= lacunarity;
-    weight = saturate(signal * gain);
+    weight = saturatef(signal * gain);
    signal = offset - fabsf(snoise_1d(p));
    signal *= signal;
    signal *= weight;
@ -351,7 +351,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_2d(

  for (int i = 1; i < float_to_int(octaves); i++) {
    p *= lacunarity;
-    weight = saturate(signal * gain);
+    weight = saturatef(signal * gain);
    signal = offset - fabsf(snoise_2d(p));
    signal *= signal;
    signal *= weight;
@ -522,7 +522,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_3d(

  for (int i = 1; i < float_to_int(octaves); i++) {
    p *= lacunarity;
-    weight = saturate(signal * gain);
+    weight = saturatef(signal * gain);
    signal = offset - fabsf(snoise_3d(p));
    signal *= signal;
    signal *= weight;
@ -693,7 +693,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(

  for (int i = 1; i < float_to_int(octaves); i++) {
    p *= lacunarity;
-    weight = saturate(signal * gain);
+    weight = saturatef(signal * gain);
    signal = offset - fabsf(snoise_4d(p));
    signal *= signal;
    signal *= weight;
--- a/intern/cycles/kernel/svm/ramp.h
+++ b/intern/cycles/kernel/svm/ramp.h
@ -44,7 +44,7 @@ ccl_device_inline float float_ramp_lookup(
    return t0 + dy * f * (table_size - 1);
  }

-  f = saturate(f) * (table_size - 1);
+  f = saturatef(f) * (table_size - 1);

  /* clamp int as well in case of NaN */
  int i = clamp(float_to_int(f), 0, table_size - 1);
@ -76,7 +76,7 @@ ccl_device_inline float4 rgb_ramp_lookup(
    return t0 + dy * f * (table_size - 1);
  }

-  f = saturate(f) * (table_size - 1);
+  f = saturatef(f) * (table_size - 1);

  /* clamp int as well in case of NaN */
  int i = clamp(float_to_int(f), 0, table_size - 1);
--- a/intern/cycles/kernel/util/lookup_table.h
+++ b/intern/cycles/kernel/util/lookup_table.h
@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN

 ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size)
 {
-  x = saturate(x) * (size - 1);
+  x = saturatef(x) * (size - 1);

  int index = min(float_to_int(x), size - 1);
  int nindex = min(index + 1, size - 1);
@ -39,7 +39,7 @@ ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int si
 ccl_device float lookup_table_read_2D(
    KernelGlobals kg, float x, float y, int offset, int xsize, int ysize)
 {
-  y = saturate(y) * (ysize - 1);
+  y = saturatef(y) * (ysize - 1);

  int index = min(float_to_int(y), ysize - 1);
  int nindex = min(index + 1, ysize - 1);
--- a/intern/cycles/scene/constant_fold.cpp
+++ b/intern/cycles/scene/constant_fold.cpp
@ -68,15 +68,15 @@ void ConstantFolder::make_constant(float3 value) const

 void ConstantFolder::make_constant_clamp(float value, bool clamp) const
 {
-  make_constant(clamp ? saturate(value) : value);
+  make_constant(clamp ? saturatef(value) : value);
 }

 void ConstantFolder::make_constant_clamp(float3 value, bool clamp) const
 {
  if (clamp) {
-    value.x = saturate(value.x);
-    value.y = saturate(value.y);
-    value.z = saturate(value.z);
+    value.x = saturatef(value.x);
+    value.y = saturatef(value.y);
+    value.z = saturatef(value.z);
  }

  make_constant(value);
@ -215,7 +215,7 @@ void ConstantFolder::fold_mix(NodeMix type, bool clamp) const
  ShaderInput *color1_in = node->input("Color1");
  ShaderInput *color2_in = node->input("Color2");

-  float fac = saturate(node->get_float(fac_in->socket_type));
+  float fac = saturatef(node->get_float(fac_in->socket_type));
  bool fac_is_zero = !fac_in->link && fac == 0.0f;
  bool fac_is_one = !fac_in->link && fac == 1.0f;

--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@ -347,10 +347,15 @@ ccl_device_inline float smoothstep(float edge0, float edge1, float x)
 }

 #ifndef __KERNEL_CUDA__
-ccl_device_inline float saturate(float a)
+ccl_device_inline float saturatef(float a)
 {
  return clamp(a, 0.0f, 1.0f);
 }
+#else
+ccl_device_inline float saturatef(float a)
+{
+  return __saturatef(a);
+}
 #endif /* __KERNEL_CUDA__ */

 ccl_device_inline int float_to_int(float f)
--- a/intern/cycles/util/math_float3.h
+++ b/intern/cycles/util/math_float3.h
@ -408,7 +408,7 @@ ccl_device_inline float3 project(const float3 v, const float3 v_proj)

 ccl_device_inline float3 saturate3(float3 a)
 {
-  return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+  return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z));
 }

 ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)