EEVEE: Ambient Occlusion: Refactor

- Fix noise/banding artifact on distant geometry. - Fix overshadowing on un-occluded surfaces at grazing angle producing "fresnel" like shadowing. Some of it still appears but this is caused to the low number of horizons per pixel. - Improve performance by using a fixed number of samples and fixing the sampling area size. A better sampling pattern is planned to recover the lost precision on large AO radius. - Improved normal reconstruction for the AO pass. - Improve Bent Normal reconstruction resulting in less faceted look on smoothed geometry. - Add Thickness heuristic to avoid overshadowing of thin objects. Factor is currently hardcoded. - Add bent normal support to Glossy reflections. - Change Glossy occlusion to give less light leaks from lightprobes. It can overshadow on smooth surface but this should be mitigated by using SSR. - Use Bent Normal for rough Glossy surfaces. - Occlusion is now correctly evaluated for each BSDF. However this does make everything slower. This is mitigated by the fact the search is a lot faster than before.
2021-02-16 17:01:15 +01:00 · 2021-02-16 17:01:15 +01:00 · 64d96f68d6
parent 6c2e1f3398
commit 64d96f68d6
9 changed files with 386 additions and 260 deletions
--- a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
@ -1,5 +1,6 @@

 #pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
 #pragma BLENDER_REQUIRE(raytrace_lib.glsl)

 /* Based on Practical Realtime Strategies for Accurate Indirect Occlusion
@ -23,10 +24,6 @@
 #  endif
 #endif

-#define MAX_PHI_STEP 32
-#define MAX_SEARCH_ITER 32
-#define MAX_LOD 6.0
-
 uniform sampler2D horizonBuffer;

 /* aoSettings flags */
@ -34,191 +31,224 @@ uniform sampler2D horizonBuffer;
 #define USE_BENT_NORMAL 2
 #define USE_DENOISE 4

-vec4 pack_horizons(vec4 v)
+#define MAX_LOD 6.0
+#define NO_OCCLUSION_DATA OcclusionData(vec4(M_PI, -M_PI, M_PI, -M_PI), 1.0)
+
+struct OcclusionData {
+  /* 4 horizons angles, one in each direction around the view vector to form a cross pattern. */
+  vec4 horizons;
+  /* Custom large scale occlusion. */
+  float custom_occlusion;
+};
+
+vec4 pack_occlusion_data(OcclusionData data)
 {
-  return v * 0.5 + 0.5;
-}
-vec4 unpack_horizons(vec4 v)
-{
-  return v * 2.0 - 1.0;
+  return vec4(1.0 - data.horizons * vec4(1, -1, 1, -1) * M_1_PI);
 }

-/* Returns maximum screen distance an AO ray can travel for a given view depth */
-vec2 get_max_dir(float view_depth)
+OcclusionData unpack_occlusion_data(vec4 v)
+{
+  return OcclusionData((1.0 - v) * vec4(1, -1, 1, -1) * M_PI, 0.0);
+}
+
+/* Returns maximum screen distance an AO ray can travel for a given view depth, in NDC space. */
+vec2 get_ao_area(float view_depth, float radius)
 {
  float homcco = ProjectionMatrix[2][3] * view_depth + ProjectionMatrix[3][3];
-  float max_dist = aoDistance / homcco;
+  float max_dist = radius / homcco;
  return vec2(ProjectionMatrix[0][0], ProjectionMatrix[1][1]) * max_dist;
 }

+vec2 get_ao_noise(void)
+{
+  return texelfetch_noise_tex(gl_FragCoord.xy).xy;
+}
+
 vec2 get_ao_dir(float jitter)
 {
-  /* Only half a turn because we integrate in slices. */
-  jitter *= M_PI;
+  /* Only a quarter of a turn because we integrate using 2 slices.
+   * We use this instead of using utiltex circle noise to improve cache hits
+   * since all tracing direction will be in the same quadrant. */
+  jitter *= M_PI_2;
  return vec2(cos(jitter), sin(jitter));
 }

-void get_max_horizon_grouped(vec4 co1, vec4 co2, vec3 x, float lod, inout float h)
+/* Return horizon angle cosine. */
+float search_horizon(vec3 vI,
+                     vec3 vP,
+                     float noise,
+                     vec2 uv_start,
+                     vec2 uv_dir,
+                     sampler2D depth_tx,
+                     float radius,
+                     const float sample_count)
 {
-  int mip = int(lod) + hizMipOffset;
-  co1 *= mipRatio[mip].xyxy;
-  co2 *= mipRatio[mip].xyxy;
+  float sample_count_inv = 1.0 / sample_count;
+  /* Init at cos(M_PI). */
+  float h = -1.0;

-  float depth1 = textureLod(maxzBuffer, co1.xy, floor(lod)).r;
-  float depth2 = textureLod(maxzBuffer, co1.zw, floor(lod)).r;
-  float depth3 = textureLod(maxzBuffer, co2.xy, floor(lod)).r;
-  float depth4 = textureLod(maxzBuffer, co2.zw, floor(lod)).r;
+  /* TODO(fclem) samples steps should be using the same approach as raytrace. (DDA line algo.) */
+  for (float i = 0.0; i < sample_count; i++) {
+    float t = ((i + noise) * sample_count_inv);
+    vec2 uv = uv_start + uv_dir * t;
+    float lod = min(MAX_LOD, max(i - noise, 0.0) * aoQuality);

-  vec4 len, s_h;
+    int mip = int(lod) + hizMipOffset;
+    float depth = textureLod(depth_tx, uv * mipRatio[mip].xy, floor(lod)).r;

-  vec3 s1 = get_view_space_from_depth(co1.xy, depth1); /* s View coordinate */
-  vec3 omega_s1 = s1 - x;
-  len.x = length(omega_s1);
-  s_h.x = omega_s1.z / len.x;
+    /* Bias depth a bit to avoid self shadowing issues. */
+    depth += 2.0 * 2.4e-7;

-  vec3 s2 = get_view_space_from_depth(co1.zw, depth2); /* s View coordinate */
-  vec3 omega_s2 = s2 - x;
-  len.y = length(omega_s2);
-  s_h.y = omega_s2.z / len.y;
+    vec3 s = get_view_space_from_depth(uv, depth);
+    vec3 omega_s = s - vP;
+    float len = length(omega_s);
+    /* Sample's horizon angle cosine. */
+    float s_h = dot(vI, omega_s / len);
+    /* Blend weight to fade artifacts. */
+    float dist_ratio = abs(len) / radius;
+    /* TODO(fclem) parameter. */
+    float dist_fac = sqr(saturate(dist_ratio * 2.0 - 1.0));

-  vec3 s3 = get_view_space_from_depth(co2.xy, depth3); /* s View coordinate */
-  vec3 omega_s3 = s3 - x;
-  len.z = length(omega_s3);
-  s_h.z = omega_s3.z / len.z;
-
-  vec3 s4 = get_view_space_from_depth(co2.zw, depth4); /* s View coordinate */
-  vec3 omega_s4 = s4 - x;
-  len.w = length(omega_s4);
-  s_h.w = omega_s4.z / len.w;
-
-  /* Blend weight after half the aoDistance to fade artifacts */
-  vec4 blend = saturate((1.0 - len / aoDistance) * 2.0);
-
-  h = mix(h, max(h, s_h.x), blend.x);
-  h = mix(h, max(h, s_h.y), blend.y);
-  h = mix(h, max(h, s_h.z), blend.z);
-  h = mix(h, max(h, s_h.w), blend.w);
+    /* TODO This need to take the stride distance into account. Now it works because stride is
+     * constant. */
+    /* Thickness heuristic (Eq. 9). */
+    if (s_h < h) {
+      /* TODO(fclem) parameter. */
+      const float thickness_fac = 0.2;
+      s_h = mix(h, s_h, thickness_fac);
+    }
+    else {
+      s_h = max(h, s_h);
+    }
+    h = mix(s_h, h, dist_fac);
+  }
+  return fast_acos(h);
 }

-vec2 search_horizon_sweep(vec2 t_phi, vec3 pos, vec2 uvs, float jitter, vec2 max_dir)
+OcclusionData occlusion_search(vec3 vP,
+                               sampler2D depth_tx,
+                               float radius,
+                               const float dir_sample_count)
 {
-  max_dir *= max_v2(abs(t_phi));
-
-  /* Convert to pixel space. */
-  t_phi /= vec2(textureSize(maxzBuffer, 0));
-
-  /* Avoid division by 0 */
-  t_phi += vec2(1e-5);
-
-  jitter *= 0.25;
-
-  /* Compute end points */
-  vec2 corner1 = min(vec2(1.0) - uvs, max_dir);  /* Top right */
-  vec2 corner2 = max(vec2(0.0) - uvs, -max_dir); /* Bottom left */
-  vec2 iter1 = corner1 / t_phi;
-  vec2 iter2 = corner2 / t_phi;
-
-  vec2 min_iter = max(-iter1, -iter2);
-  vec2 max_iter = max(iter1, iter2);
-
-  vec2 times = vec2(-min_v2(min_iter), min_v2(max_iter));
-
-  vec2 h = vec2(-1.0); /* init at cos(pi) */
-
-  /* This is freaking sexy optimized. */
-  for (float i = 0.0, ofs = 4.0, time = -1.0; i < MAX_SEARCH_ITER && time > times.x;
-       i++, time -= ofs, ofs = min(exp2(MAX_LOD) * 4.0, ofs + ofs * aoQuality)) {
-    vec4 t = max(times.xxxx, vec4(time) - (vec4(0.25, 0.5, 0.75, 1.0) - jitter) * ofs);
-    vec4 cos1 = uvs.xyxy + t_phi.xyxy * t.xxyy;
-    vec4 cos2 = uvs.xyxy + t_phi.xyxy * t.zzww;
-    float lod = min(MAX_LOD, max(i - jitter * 4.0, 0.0) * aoQuality);
-    get_max_horizon_grouped(cos1, cos2, pos, lod, h.y);
+  if ((int(aoSettings) & USE_AO) == 0) {
+    return NO_OCCLUSION_DATA;
  }

-  for (float i = 0.0, ofs = 4.0, time = 1.0; i < MAX_SEARCH_ITER && time < times.y;
-       i++, time += ofs, ofs = min(exp2(MAX_LOD) * 4.0, ofs + ofs * aoQuality)) {
-    vec4 t = min(times.yyyy, vec4(time) + (vec4(0.25, 0.5, 0.75, 1.0) - jitter) * ofs);
-    vec4 cos1 = uvs.xyxy + t_phi.xyxy * t.xxyy;
-    vec4 cos2 = uvs.xyxy + t_phi.xyxy * t.zzww;
-    float lod = min(MAX_LOD, max(i - jitter * 4.0, 0.0) * aoQuality);
-    get_max_horizon_grouped(cos1, cos2, pos, lod, h.x);
+  vec2 noise = get_ao_noise();
+  vec2 area = get_ao_area(vP.z, radius);
+  vec2 dir = get_ao_dir(noise.x);
+  vec2 uv = get_uvs_from_view(vP);
+  vec3 vI = ((ProjectionMatrix[3][3] == 0.0) ? normalize(-vP) : vec3(0.0, 0.0, 1.0));
+  vec3 avg_dir = vec3(0.0);
+  float avg_apperture = 0.0;
+
+  OcclusionData data = NO_OCCLUSION_DATA;
+
+  for (int i = 0; i < 2; i++) {
+    /* View > NDC > Uv space. */
+    vec2 uv_dir = dir * area * 0.5;
+    /* Offset the start one pixel to avoid self shadowing. */
+    /* TODO(fclem) Using DDA line algo should fix this. */
+    vec2 px_dir = uv_dir * textureSize(depth_tx, 0);
+    float max_px_dir = max_v2(abs(px_dir));
+    vec2 uv_ofs = (px_dir / max_px_dir) / textureSize(depth_tx, 0);
+    /* No need to trace more. */
+    uv_dir -= uv_ofs;
+
+    if (max_px_dir > 0.0) {
+      data.horizons[0 + i * 2] = search_horizon(
+          vI, vP, noise.y, uv + uv_ofs, uv_dir, depth_tx, radius, dir_sample_count);
+      data.horizons[1 + i * 2] = -search_horizon(
+          vI, vP, noise.y, uv - uv_ofs, -uv_dir, depth_tx, radius, dir_sample_count);
+    }
+    /* Rotate 90 degrees. */
+    dir = vec2(-dir.y, dir.x);
  }

-  return h;
+  return data;
 }

-void integrate_slice(
-    vec3 normal, vec2 t_phi, vec2 horizons, inout float visibility, inout vec3 bent_normal)
+void occlusion_eval(
+    OcclusionData data, vec3 V, vec3 N, vec3 Ng, out float visibility, out vec3 bent_normal)
 {
-  /* Projecting Normal to Plane P defined by t_phi and omega_o */
-  vec3 np = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */
-  vec3 t = vec3(-t_phi, 0.0);
-  vec3 n_proj = normal - np * dot(np, normal);
-  float n_proj_len = max(1e-16, length(n_proj));
+  if ((int(aoSettings) & USE_AO) == 0) {
+    visibility = data.custom_occlusion;
+    bent_normal = N;
+    return;
+  }

-  float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
-  float n = sign(dot(n_proj, t)) * fast_acos(cos_n); /* Angle between view vec and normal */
+  if (min_v4(abs(data.horizons)) == M_PI) {
+    visibility = dot(N, Ng) * 0.5 + 0.5;
+    visibility = min(visibility, data.custom_occlusion);

-  /* (Slide 54) */
-  vec2 h = fast_acos(horizons);
-  h.x = -h.x;
+    if ((int(aoSettings) & USE_BENT_NORMAL) == 0) {
+      bent_normal = N;
+    }
+    else {
+      bent_normal = normalize(N + Ng);
+    }
+    return;
+  }

-  /* Clamping thetas (slide 58) */
-  h.x = n + max(h.x - n, -M_PI_2);
-  h.y = n + min(h.y - n, M_PI_2);
-
-  /* Solving inner integral */
-  vec2 h_2 = 2.0 * h;
-  vec2 vd = -cos(h_2 - n) + cos_n + h_2 * sin(n);
-  float vis = saturate((vd.x + vd.y) * 0.25 * n_proj_len);
-
-  visibility += vis;
-
-  /* O. Klehm, T. Ritschel, E. Eisemann, H.-P. Seidel
-   * Bent Normals and Cones in Screen-space
-   * Sec. 3.1 : Bent normals */
-  float b_angle = (h.x + h.y) * 0.5;
-  bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle)) * vis;
-}
-
-void gtao_deferred(
-    vec3 normal, vec4 noise, float frag_depth, out float visibility, out vec3 bent_normal)
-{
-  /* Fetch early, hide latency! */
-  vec4 horizons = texelFetch(horizonBuffer, ivec2(gl_FragCoord.xy), 0);
-
-  vec4 dirs;
-  dirs.xy = get_ao_dir(noise.x * 0.5);
-  dirs.zw = get_ao_dir(noise.x * 0.5 + 0.5);
-
-  bent_normal = vec3(0.0);
-  visibility = 0.0;
-
-  horizons = unpack_horizons(horizons);
-
-  integrate_slice(normal, dirs.xy, horizons.xy, visibility, bent_normal);
-  integrate_slice(normal, dirs.zw, horizons.zw, visibility, bent_normal);
-
-  bent_normal = safe_normalize(bent_normal);
-
-  visibility *= 0.5; /* We integrated 2 slices. */
-}
-
-void gtao(vec3 normal, vec3 position, vec4 noise, out float visibility, out vec3 bent_normal)
-{
-  vec2 uvs = get_uvs_from_view(position);
-  vec2 max_dir = get_max_dir(position.z);
+  vec2 noise = get_ao_noise();
  vec2 dir = get_ao_dir(noise.x);

-  bent_normal = normal * 1e-8;
-  visibility = 1e-8;
+  visibility = 0.0;
+  bent_normal = N * 0.001;

-  /* Only trace in 2 directions. May lead to a darker result but since it's mostly for
-   * alpha blended objects that will have overdraw, we limit the performance impact. */
-  vec2 horizons = search_horizon_sweep(dir, position, uvs, noise.y, max_dir);
-  integrate_slice(normal, dir, horizons, visibility, bent_normal);
+  for (int i = 0; i < 2; i++) {
+    vec3 T = transform_direction(ViewMatrixInverse, vec3(dir, 0.0));
+    /* Setup integration domain around V. */
+    vec3 B = normalize(cross(V, T));
+    T = normalize(cross(B, V));

-  bent_normal = normalize(bent_normal / visibility);
+    float proj_N_len;
+    vec3 proj_N = normalize_len(N - B * dot(N, B), proj_N_len);
+    vec3 proj_Ng = normalize(Ng - B * dot(Ng, B));
+
+    vec2 h = (i == 0) ? data.horizons.xy : data.horizons.zw;
+
+    float N_sin = dot(proj_N, T);
+    float Ng_sin = dot(proj_Ng, T);
+    float N_cos = saturate(dot(proj_N, V));
+    float Ng_cos = saturate(dot(proj_Ng, V));
+    /* Gamma, angle between normalized projected normal and view vector. */
+    float angle_Ng = sign(Ng_sin) * fast_acos(Ng_cos);
+    float angle_N = sign(N_sin) * fast_acos(N_cos);
+    /* Add a little bias to fight self shadowing. */
+    const float max_angle = M_PI_2 - 0.05;
+    /* Clamp horizons to hemisphere around shading normal. */
+    h = clamp(h, angle_N - max_angle, angle_N + max_angle);
+
+    float bent_angle = (h.x + h.y) * 0.5;
+    /* NOTE: here we multiply z by 0.5 as it shows less difference with the geometric normal.
+     * Also modulate by projected normal length to reduce issues with slanted surfaces.
+     * All of this is ad-hoc and not really grounded. */
+    bent_normal += proj_N_len * (T * sin(bent_angle) + V * 0.5 * cos(bent_angle));
+
+    /* Clamp to geometric normal only for integral to keep smooth bent normal. */
+    /* This is done to match Cycles ground truth but adds some computation. */
+    h = clamp(h, angle_Ng - max_angle, angle_Ng + max_angle);
+
+    /* Inner integral (Eq. 7). */
+    float a = dot(-cos(2.0 * h - angle_N) + cos(angle_N) + 2.0 * h * sin(angle_N), vec2(0.25));
+    /* Correct normal not on plane (Eq. 8). */
+    visibility += proj_N_len * a;
+
+    /* Rotate 90 degrees. */
+    dir = vec2(-dir.y, dir.x);
+  }
+  /* We integrated 2 directions. */
+  visibility *= 0.5;
+
+  visibility = min(visibility, data.custom_occlusion);
+
+  if ((int(aoSettings) & USE_BENT_NORMAL) == 0) {
+    bent_normal = N;
+  }
+  else {
+    bent_normal = normalize(mix(bent_normal, N, sqr(sqr(sqr(visibility)))));
+  }
 }

 /* Multibounce approximation base on surface albedo.
@ -240,53 +270,103 @@ float gtao_multibounce(float visibility, vec3 albedo)
  return max(x, ((x * a + b) * x + c) * x);
 }

-float diffuse_occlusion(vec3 N, vec3 vis_cone_dir, float vis_cone_aperture_cos, vec3 albedo)
+float diffuse_occlusion(OcclusionData data, vec3 V, vec3 N, vec3 Ng)
 {
-  if ((int(aoSettings) & USE_AO) == 0) {
-    return 1.0;
+  vec3 unused;
+  float visibility;
+  occlusion_eval(data, V, N, Ng, visibility, unused);
+  /* Scale by user factor */
+  visibility = pow(saturate(visibility), aoFactor);
+  return visibility;
+}
+
+float diffuse_occlusion(
+    OcclusionData data, vec3 V, vec3 N, vec3 Ng, vec3 albedo, out vec3 bent_normal)
+{
+  float visibility;
+  occlusion_eval(data, V, N, Ng, visibility, bent_normal);
+
+  visibility = gtao_multibounce(visibility, albedo);
+  /* Scale by user factor */
+  visibility = pow(saturate(visibility), aoFactor);
+  return visibility;
+}
+
+/**
+ * Approximate the area of intersection of two spherical caps
+ * radius1 : First cap’s radius (arc length in radians)
+ * radius2 : Second caps’ radius (in radians)
+ * dist : Distance between caps (radians between centers of caps)
+ * Note: Result is divided by pi to save one multiply.
+ **/
+float spherical_cap_intersection(float radius1, float radius2, float dist)
+{
+  /* From "Ambient Aperture Lighting" by Chris Oat
+   * Slide 15. */
+  float max_radius = max(radius1, radius2);
+  float min_radius = min(radius1, radius2);
+  float sum_radius = radius1 + radius2;
+  float area;
+  if (dist <= max_radius - min_radius) {
+    /* One cap in completely inside the other */
+    area = 1.0 - cos(min_radius);
  }
-  /* If the shading normal is orthogonal to the geometric normal, it should be half lit. */
-  float horizon_fac = saturate(dot(N, vis_cone_dir) * 0.5 + 0.5);
-  float ao = vis_cone_aperture_cos * horizon_fac;
-  return gtao_multibounce(ao, albedo);
+  else if (dist >= sum_radius) {
+    /* No intersection exists */
+    area = 0;
+  }
+  else {
+    float diff = max_radius - min_radius;
+    area = smoothstep(0.0, 1.0, 1.0 - saturate((dist - diff) / (sum_radius - diff)));
+    area *= 1.0 - cos(min_radius);
+  }
+  return area;
 }

-float specular_occlusion(float NV, float AO, float roughness)
+float specular_occlusion(
+    OcclusionData data, vec3 V, vec3 N, float roughness, inout vec3 specular_dir)
 {
-  return saturate(pow(NV + AO, roughness) - 1.0 + AO);
+  vec3 visibility_dir;
+  float visibility;
+  occlusion_eval(data, V, N, N, visibility, visibility_dir);
+
+  specular_dir = normalize(mix(specular_dir, visibility_dir, roughness * (1.0 - visibility)));
+
+  /* Visibility to cone angle (eq. 18). */
+  float vis_angle = fast_acos(sqrt(1 - visibility));
+  /* Roughness to cone angle (eq. 26). */
+  float spec_angle = max(0.001, fast_acos(cone_cosine(roughness)));
+  /* Angle between cone axes. */
+  float cone_cone_dist = fast_acos(saturate(dot(visibility_dir, specular_dir)));
+  float cone_nor_dist = fast_acos(saturate(dot(N, specular_dir)));
+
+  float isect_solid_angle = spherical_cap_intersection(vis_angle, spec_angle, cone_cone_dist);
+  float specular_solid_angle = spherical_cap_intersection(M_PI_2, spec_angle, cone_nor_dist);
+  float specular_occlusion = isect_solid_angle / specular_solid_angle;
+  /* Mix because it is unstable in unoccluded areas. */
+  visibility = mix(isect_solid_angle / specular_solid_angle, 1.0, pow(visibility, 8.0));
+
+  /* Scale by user factor */
+  visibility = pow(saturate(visibility), aoFactor);
+  return visibility;
 }

-/* Use the right occlusion  */
-float occlusion_compute(vec3 N, vec3 vpos, vec4 rand, out vec3 bent_normal)
+/* Use the right occlusion. */
+OcclusionData occlusion_load(vec3 vP, float custom_occlusion)
 {
-#ifndef USE_REFRACTION
+  /* Default to fully openned cone. */
+  OcclusionData data = NO_OCCLUSION_DATA;
+
+#ifdef ENABLE_DEFERED_AO
  if ((int(aoSettings) & USE_AO) != 0) {
-    float visibility;
-    vec3 vnor = mat3(ViewMatrix) * N;
-
-#  ifdef ENABLE_DEFERED_AO
-    gtao_deferred(vnor, rand, gl_FragCoord.z, visibility, bent_normal);
-#  else
-    gtao(vnor, vpos, rand, visibility, bent_normal);
-#  endif
-
-    /* Prevent some problems down the road. */
-    visibility = max(1e-3, visibility);
-
-    if ((int(aoSettings) & USE_BENT_NORMAL) != 0) {
-      bent_normal = transform_direction(ViewMatrixInverse, bent_normal);
-    }
-    else {
-      bent_normal = N;
-    }
-
-    /* Scale by user factor */
-    visibility = pow(visibility, aoFactor);
-
-    return visibility;
+    data = unpack_occlusion_data(texelFetch(horizonBuffer, ivec2(gl_FragCoord.xy), 0));
  }
+#else
+  /* For blended surfaces and  */
+  data = occlusion_search(vP, maxzBuffer, aoDistance, 8.0);
 #endif

-  bent_normal = N;
-  return 1.0;
+  data.custom_occlusion = custom_occlusion;
+
+  return data;
 }
--- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
@ -1,13 +1,9 @@

 #pragma BLENDER_REQUIRE(common_math_lib.glsl)

-vec3 diffuse_dominant_dir(vec3 N, vec3 vis_cone_dir, float vis_cone_aperture_cos)
+vec3 diffuse_dominant_dir(vec3 bent_normal)
 {
-  /* TODO(fclem) revisit this. bent too much towards vis_cone_dir. */
-  vis_cone_aperture_cos *= sqr(vis_cone_aperture_cos);
-
-  N = mix(vis_cone_dir, N, vis_cone_aperture_cos);
-  return normalize(N);
+  return bent_normal;
 }

 vec3 specular_dominant_dir(vec3 N, vec3 V, float roughness)
--- a/source/blender/draw/engines/eevee/shaders/closure_eval_diffuse_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_diffuse_lib.glsl
@ -27,10 +27,12 @@ ClosureEvalDiffuse closure_Diffuse_eval_init(inout ClosureInputDiffuse cl_in,
  cl_out.radiance = vec3(0.0);

  ClosureEvalDiffuse cl_eval;
-  cl_eval.ambient_occlusion = diffuse_occlusion(
-      cl_in.N, cl_common.bent_normal, cl_common.occlusion, cl_in.albedo);
-  cl_eval.probe_sampling_dir = diffuse_dominant_dir(
-      cl_in.N, cl_common.bent_normal, cl_common.occlusion);
+  cl_eval.ambient_occlusion = diffuse_occlusion(cl_common.occlusion_data,
+                                                cl_common.V,
+                                                cl_in.N,
+                                                cl_common.Ng,
+                                                cl_in.albedo,
+                                                cl_eval.probe_sampling_dir);
  return cl_eval;
 }

--- a/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_glossy_lib.glsl
@ -45,7 +45,11 @@ ClosureEvalGlossy closure_Glossy_eval_init(inout ClosureInputGlossy cl_in,
  ClosureEvalGlossy cl_eval;
  cl_eval.ltc_mat = texture(utilTex, vec3(lut_uv, LTC_MAT_LAYER));
  cl_eval.probe_sampling_dir = specular_dominant_dir(cl_in.N, cl_common.V, sqr(cl_in.roughness));
-  cl_eval.spec_occlusion = specular_occlusion(NV, cl_common.occlusion, cl_in.roughness);
+  cl_eval.spec_occlusion = specular_occlusion(cl_common.occlusion_data,
+                                              cl_common.V,
+                                              cl_common.N,
+                                              cl_in.roughness,
+                                              cl_eval.probe_sampling_dir);
  cl_eval.raytrace_radiance = vec3(0.0);

 #ifdef STEP_RESOLVE /* SSR */
--- a/source/blender/draw/engines/eevee/shaders/closure_eval_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/closure_eval_lib.glsl
@ -167,6 +167,8 @@ struct ClosureInputCommon {
 #define CLOSURE_INPUT_COMMON_DEFAULT ClosureInputCommon(1.0)

 struct ClosureEvalCommon {
+  /** Result of SSAO. */
+  OcclusionData occlusion_data;
  /** View vector. */
  vec3 V;
  /** Surface position. */
@ -177,15 +179,12 @@ struct ClosureEvalCommon {
  vec3 vN;
  /** Surface position. (viewspace) */
  vec3 vP;
+  /** Geometric normal, always facing camera. */
+  vec3 Ng;
  /** Geometric normal, always facing camera. (viewspace) */
  vec3 vNg;
  /** Random numbers. 3 random sequences. zw is a random point on a circle. */
  vec4 rand;
-  /** Final occlusion factor. Mix of the user occlusion and SSAO. */
-  float occlusion;
-  /** Least occluded direction in the hemisphere. */
-  vec3 bent_normal;
-
  /** Specular probe accumulator. Shared between planar and cubemap probe. */
  float specular_accum;
  /** Diffuse probe accumulator. */
@ -208,7 +207,8 @@ ClosureEvalCommon closure_Common_eval_init(ClosureInputCommon cl_in)
  cl_eval.N = safe_normalize(gl_FrontFacing ? worldNormal : -worldNormal);
  cl_eval.vN = safe_normalize(gl_FrontFacing ? viewNormal : -viewNormal);
  cl_eval.vP = viewPosition;
-  cl_eval.vNg = safe_normalize(cross(dFdx(viewPosition), dFdy(viewPosition)));
+  cl_eval.Ng = safe_normalize(cross(dFdx(cl_eval.P), dFdy(cl_eval.P)));
+  cl_eval.vNg = transform_direction(ViewMatrix, cl_eval.Ng);
  /* TODO(fclem) See if we can avoid this complicated setup. */
  cl_eval.tracing_depth = gl_FragCoord.z;
  /* Constant bias (due to depth buffer precision) */
@ -218,10 +218,7 @@ ClosureEvalCommon closure_Common_eval_init(ClosureInputCommon cl_in)
  /* Convert to view Z. */
  cl_eval.tracing_depth = get_view_z_from_depth(cl_eval.tracing_depth);

-  /* TODO(fclem) Do occlusion evaluation per Closure using shading normal. */
-  cl_eval.occlusion = min(
-      cl_in.occlusion,
-      occlusion_compute(cl_eval.N, cl_eval.vP, cl_eval.rand, cl_eval.bent_normal));
+  cl_eval.occlusion_data = occlusion_load(cl_eval.vP, cl_in.occlusion);

  cl_eval.specular_accum = 1.0;
  cl_eval.diffuse_accum = 1.0;
--- a/source/blender/draw/engines/eevee/shaders/effect_gtao_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_gtao_frag.glsl
@ -26,60 +26,101 @@ uniform sampler2D depthBuffer;

 #endif

-uniform float rotationOffset;
+/* Similar to https://atyuwen.github.io/posts/normal-reconstruction/.
+ * This samples the depth buffer 4 time for each direction to get the most correct
+ * implicit normal reconstruction out of the depth buffer. */
+vec3 view_position_derivative_from_depth(vec2 uvs, vec2 ofs, vec3 vP, float depth_center)
+{
+  vec2 uv1 = uvs - ofs * 2.0;
+  vec2 uv2 = uvs - ofs;
+  vec2 uv3 = uvs + ofs;
+  vec2 uv4 = uvs + ofs * 2.0;
+  vec4 H;
+  H.x = gtao_textureLod(gtao_depthBuffer, uv1, 0.0).r;
+  H.y = gtao_textureLod(gtao_depthBuffer, uv2, 0.0).r;
+  H.z = gtao_textureLod(gtao_depthBuffer, uv3, 0.0).r;
+  H.w = gtao_textureLod(gtao_depthBuffer, uv4, 0.0).r;
+  /* Fix issue with depth precision. Take even larger diff. */
+  vec4 diff = abs(vec4(depth_center, H.yzw) - H.x);
+  if (max_v4(diff) < 2.4e-7 && all(lessThan(diff.xyz, diff.www))) {
+    return 0.25 * (get_view_space_from_depth(uv3, H.w) - get_view_space_from_depth(uv1, H.x));
+  }
+  /* Simplified (H.xw + 2.0 * (H.yz - H.xw)) - depth_center */
+  vec2 deltas = abs((2.0 * H.yz - H.xw) - depth_center);
+  if (deltas.x < deltas.y) {
+    return vP - get_view_space_from_depth(uv2, H.y);
+  }
+  else {
+    return get_view_space_from_depth(uv3, H.z) - vP;
+  }
+}
+
+/* TODO(fclem) port to a common place for other effects to use. */
+bool reconstruct_view_position_and_normal_from_depth(vec2 texel, out vec3 vP, out vec3 vNg)
+{
+  vec2 texel_size = 1.0 / vec2(textureSize(gtao_depthBuffer, 0).xy);
+  vec2 uvs = gl_FragCoord.xy * texel_size;
+  float depth_center = gtao_textureLod(gtao_depthBuffer, uvs, 0.0).r;
+
+  /* Background case. */
+  if (depth_center == 1.0) {
+    return false;
+  }
+
+  vP = get_view_space_from_depth(uvs, depth_center);
+
+  vec3 dPdx = view_position_derivative_from_depth(uvs, texel_size * vec2(1, 0), vP, depth_center);
+  vec3 dPdy = view_position_derivative_from_depth(uvs, texel_size * vec2(0, 1), vP, depth_center);
+
+  vNg = safe_normalize(cross(dPdx, dPdy));
+
+  return true;
+}

 #ifdef DEBUG_AO

+in vec4 uvcoordsvar;
+
 void main()
 {
-  vec2 texel_size = 1.0 / vec2(textureSize(depthBuffer, 0)).xy;
-  vec2 uvs = saturate(gl_FragCoord.xy * texel_size);
+  vec3 vP, vNg;

-  float depth = textureLod(depthBuffer, uvs, 0.0).r;
+  if (!reconstruct_view_position_and_normal_from_depth(gl_FragCoord.xy, vP, vNg)) {
+    /* Handle Background case. Prevent artifact due to uncleared Horizon Render Target. */
+    FragColor = vec4(0.0);
+  }
+  else {
+    vec3 P = transform_point(ViewMatrixInverse, vP);
+    vec3 worldPosition = P; /* For cameraVec macro. TODO(fclem) make cameraVec(P). */
+    vec3 viewPosition = vP; /* For viewCameraVec macro. TODO(fclem) make viewCameraVec(vP). */
+    vec3 V = cameraVec;
+    vec3 vV = viewCameraVec;
+    vec3 vN = normal_decode(texture(normalBuffer, uvcoordsvar.xy).rg, vV);
+    vec3 N = transform_direction(ViewMatrixInverse, vN);
+    vec3 Ng = transform_direction(ViewMatrixInverse, vNg);

-  vec3 viewPosition = get_view_space_from_depth(uvs, depth);
-  vec3 V = viewCameraVec;
-  vec3 normal = normal_decode(texture(normalBuffer, uvs).rg, V);
+    OcclusionData data = occlusion_load(vP, 1.0);

-  vec3 bent_normal;
-  float visibility;
+    float visibility = diffuse_occlusion(data, V, N, Ng);

-  vec4 noise = texelfetch_noise_tex(gl_FragCoord.xy);
-
-  gtao_deferred(normal, noise, depth, visibility, bent_normal);
-
-  /* Handle Background case. Prevent artifact due to uncleared Horizon Render Target. */
-  FragColor = vec4((depth == 1.0) ? 0.0 : visibility);
+    FragColor = vec4(visibility);
+  }
 }

 #else

 void main()
 {
-  vec2 uvs = saturate(gl_FragCoord.xy / vec2(textureSize(gtao_depthBuffer, 0).xy));
+  vec2 uvs = gl_FragCoord.xy / vec2(textureSize(gtao_depthBuffer, 0).xy);
  float depth = gtao_textureLod(gtao_depthBuffer, uvs, 0.0).r;
+  vec3 vP = get_view_space_from_depth(uvs, depth);

-  if (depth == 1.0) {
-    /* Do not trace for background */
-    FragColor = vec4(0.0);
-    return;
+  OcclusionData data = NO_OCCLUSION_DATA;
+  /* Do not trace for background */
+  if (depth != 1.0) {
+    data = occlusion_search(vP, maxzBuffer, aoDistance, 8.0);
  }

-  /* Avoid self shadowing. */
-  depth = saturate(depth - 3e-6); /* Tweaked for 24bit depth buffer. */
-
-  vec3 viewPosition = get_view_space_from_depth(uvs, depth);
-  vec4 noise = texelfetch_noise_tex(gl_FragCoord.xy);
-  vec2 max_dir = get_max_dir(viewPosition.z);
-  vec4 dirs;
-  dirs.xy = get_ao_dir(noise.x * 0.5);
-  dirs.zw = get_ao_dir(noise.x * 0.5 + 0.5);
-
-  /* Search in 4 directions. */
-  FragColor.xy = search_horizon_sweep(dirs.xy, viewPosition, uvs, noise.y, max_dir);
-  FragColor.zw = search_horizon_sweep(dirs.zw, viewPosition, uvs, noise.y, max_dir);
-
-  /* Resize output for integer texture. */
-  FragColor = pack_horizons(FragColor);
+  FragColor = pack_occlusion_data(data);
 }
 #endif
--- a/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/lightprobe_lib.glsl
@ -215,8 +215,8 @@ vec3 probe_evaluate_planar(int id, PlanarData pd, vec3 W, vec3 N, vec3 V, float

 void fallback_cubemap(vec3 N,
                      vec3 V,
-                      vec3 W,
-                      vec3 viewPosition,
+                      vec3 P,
+                      vec3 vP,
                      float roughness,
                      float roughnessSquared,
                      inout vec4 spec_accum)
@ -224,21 +224,15 @@ void fallback_cubemap(vec3 N,
  /* Specular probes */
  vec3 spec_dir = specular_dominant_dir(N, V, roughnessSquared);

-#ifdef SSR_AO
-  vec4 rand = texelfetch_noise_tex(gl_FragCoord.xy);
-  vec3 bent_normal;
-  float final_ao = occlusion_compute(N, viewPosition, rand, bent_normal);
-  final_ao = specular_occlusion(dot(N, V), final_ao, roughness);
-#else
-  const float final_ao = 1.0;
-#endif
+  OcclusionData occlusion_data = occlusion_load(vP, 1.0);
+  float final_ao = specular_occlusion(occlusion_data, V, N, roughness, spec_dir);

  /* Starts at 1 because 0 is world probe */
  for (int i = 1; i < MAX_PROBE && i < prbNumRenderCube && spec_accum.a < 0.999; i++) {
-    float fade = probe_attenuation_cube(i, W);
+    float fade = probe_attenuation_cube(i, P);

    if (fade > 0.0) {
-      vec3 spec = final_ao * probe_evaluate_cube(i, W, spec_dir, roughness);
+      vec3 spec = final_ao * probe_evaluate_cube(i, P, spec_dir, roughness);
      accumulate_light(spec, fade, spec_accum);
    }
  }
--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@ -122,6 +122,12 @@ vec3 safe_normalize(vec3 v)
  return v / len;
 }

+vec3 normalize_len(vec3 v, out float len)
+{
+  len = length(v);
+  return v / len;
+}
+
 /** \} */

 /* ---------------------------------------------------------------------- */
--- a/source/blender/gpu/shaders/material/gpu_shader_material_ambient_occlusion.glsl
+++ b/source/blender/gpu/shaders/material/gpu_shader_material_ambient_occlusion.glsl
@ -4,7 +4,13 @@ void node_ambient_occlusion(
 {
  vec3 bent_normal;
  vec4 rand = texelfetch_noise_tex(gl_FragCoord.xy);
-  result_ao = occlusion_compute(normalize(normal), viewPosition, rand, bent_normal);
+  OcclusionData data = occlusion_load(viewPosition, 1.0);
+
+  vec3 V = cameraVec;
+  vec3 N = normalize(normal);
+  vec3 Ng = safe_normalize(cross(dFdx(worldPosition), dFdy(worldPosition)));
+
+  result_ao = diffuse_occlusion(data, V, N, Ng);
  result_color = result_ao * color;
 }
 #else