Cleanup: refactor OptiX shadow intersection for upcoming changes

This commit is contained in:
Brecht Van Lommel 2021-10-13 18:19:51 +02:00
parent 509b637d59
commit 5d565062ed
8 changed files with 79 additions and 126 deletions

View File

@ -81,7 +81,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
/* If no transparent shadows, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, &current_isect);
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
ctx->opaque_hit = true;
return;

View File

@ -197,7 +197,7 @@ ccl_device_inline
/* todo: optimize so primitive visibility flag indicates if
* the primitive has a transparent shadow shader? */
const int flags = intersection_get_shader_flags(kg, isect);
const int flags = intersection_get_shader_flags(kg, isect->prim, isect->type);
if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) {
/* If no transparent shadows, all light is blocked and we can

View File

@ -140,14 +140,12 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
/* Utility to quickly get flags from an intersection. */
ccl_device_forceinline int intersection_get_shader_flags(
ccl_global const KernelGlobals *ccl_restrict kg,
ccl_private const Intersection *ccl_restrict isect)
ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type)
{
const int prim = isect->prim;
int shader = 0;
#ifdef __HAIR__
if (isect->type & PRIMITIVE_ALL_TRIANGLE)
if (type & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
@ -195,4 +193,33 @@ ccl_device_forceinline int intersection_get_object_flags(
return kernel_tex_fetch(__object_flag, isect->object);
}
/* TODO: find a better (faster) solution for this. Maybe store offset per object for
* attributes needed in intersection? */
ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg,
const int object,
const uint id)
{
uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
if (UNLIKELY(attr_map.y == 0)) {
return (int)ATTR_STD_NOT_FOUND;
}
else {
/* Chain jump to a different part of the table. */
attr_offset = attr_map.z;
}
}
else {
attr_offset += ATTR_PRIM_TYPES;
}
attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
}
/* return result */
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
CCL_NAMESPACE_END

View File

@ -172,14 +172,12 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
{
#ifdef __SHADOW_RECORD_ALL__
bool ignore_intersection = false;
int prim = optixGetPrimitiveIndex();
const uint object = get_object_id();
# ifdef __VISIBILITY_FLAG__
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
ignore_intersection = true;
return optixIgnoreIntersection();
}
# endif
@ -202,29 +200,39 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
ignore_intersection = true;
return optixIgnoreIntersection();
}
}
# endif
int num_hits = optixGetPayload_2();
int record_index = num_hits;
# ifndef __TRANSPARENT_SHADOWS__
/* No transparent shadows support compiled in, make opaque. */
optixSetPayload_5(true);
return optixTerminateRay();
# else
const int max_hits = optixGetPayload_3();
if (!ignore_intersection) {
optixSetPayload_2(num_hits + 1);
/* If no transparent shadows, all light is blocked and we can stop immediately. */
if (max_hits == 0 ||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
optixSetPayload_5(true);
return optixTerminateRay();
}
/* Record transparent intersection. */
const int num_hits = optixGetPayload_2();
int record_index = num_hits;
optixSetPayload_2(num_hits + 1);
Intersection *const isect_array = get_payload_ptr_0<Intersection>();
# ifdef __TRANSPARENT_SHADOWS__
if (num_hits >= max_hits) {
if (record_index >= max_hits) {
/* If maximum number of hits reached, find a hit to replace. */
const int num_recorded_hits = min(max_hits, num_hits);
float max_recorded_t = isect_array[0].t;
int max_recorded_hit = 0;
for (int i = 1; i < num_recorded_hits; i++) {
for (int i = 1; i < max_hits; i++) {
if (isect_array[i].t > max_recorded_t) {
max_recorded_t = isect_array[i].t;
max_recorded_hit = i;
@ -232,39 +240,25 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
}
if (optixGetRayTmax() >= max_recorded_t) {
/* Accept hit, so that OptiX won't consider any more hits beyond the distance of the current
* hit anymore. */
/* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
* current hit anymore. */
return;
}
record_index = max_recorded_hit;
}
# endif
if (!ignore_intersection) {
Intersection *const isect = isect_array + record_index;
isect->u = u;
isect->v = v;
isect->t = optixGetRayTmax();
isect->prim = prim;
isect->object = object;
isect->type = type;
Intersection *const isect = isect_array + record_index;
isect->u = u;
isect->v = v;
isect->t = optixGetRayTmax();
isect->prim = prim;
isect->object = object;
isect->type = type;
# ifdef __TRANSPARENT_SHADOWS__
/* Detect if this surface has a shader with transparent shadows. */
if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
# endif
/* If no transparent shadows, all light is blocked and we can stop immediately. */
optixSetPayload_5(true);
return optixTerminateRay();
# ifdef __TRANSPARENT_SHADOWS__
}
# endif
}
/* Continue tracing. */
optixIgnoreIntersection();
#endif
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
}
extern "C" __global__ void __anyhit__kernel_optix_volume_test()

View File

@ -27,31 +27,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
ccl_device_inline int find_attribute_curve_motion(ccl_global const KernelGlobals *kg,
int object,
uint id,
ccl_private AttributeElement *elem)
{
/* todo: find a better (faster) solution for this, maybe store offset per object.
*
* NOTE: currently it's not a bottleneck because in test scenes the loop below runs
* zero iterations and rendering is really slow with motion curves. For until other
* areas are speed up it's probably not so crucial to optimize this out.
*/
uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_GEOMETRY;
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
attr_offset += ATTR_PRIM_TYPES;
attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
}
*elem = (AttributeElement)attr_map.y;
/* return result */
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
int offset,
int numkeys,
@ -92,13 +67,12 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
/* figure out which steps we need to fetch and their interpolation factor */
int maxstep = numsteps * 2;
int step = min((int)(time * maxstep), maxstep - 1);
float t = time * maxstep - step;
const int maxstep = numsteps * 2;
const int step = min((int)(time * maxstep), maxstep - 1);
const float t = time * maxstep - step;
/* find attribute */
AttributeElement elem;
int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch key coordinates */
@ -160,13 +134,12 @@ ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
object_motion_info(kg, object, &numsteps, NULL, &numkeys);
/* figure out which steps we need to fetch and their interpolation factor */
int maxstep = numsteps * 2;
int step = min((int)(time * maxstep), maxstep - 1);
float t = time * maxstep - step;
const int maxstep = numsteps * 2;
const int step = min((int)(time * maxstep), maxstep - 1);
const float t = time * maxstep - step;
/* find attribute */
AttributeElement elem;
int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch key coordinates */

View File

@ -27,41 +27,12 @@
#pragma once
#include "kernel/bvh/bvh_util.h"
CCL_NAMESPACE_BEGIN
/* Time interpolation of vertex positions and normals */
ccl_device_inline int find_attribute_motion(ccl_global const KernelGlobals *kg,
int object,
uint id,
ccl_private AttributeElement *elem)
{
/* todo: find a better (faster) solution for this, maybe store offset per object */
uint attr_offset = object_attribute_map_offset(kg, object);
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
while (attr_map.x != id) {
if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
if (UNLIKELY(attr_map.y == 0)) {
return (int)ATTR_STD_NOT_FOUND;
}
else {
/* Chain jump to a different part of the table. */
attr_offset = attr_map.z;
}
}
else {
attr_offset += ATTR_PRIM_TYPES;
}
attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
}
*elem = (AttributeElement)attr_map.y;
/* return result */
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg,
uint4 tri_vindex,
int offset,
@ -129,8 +100,7 @@ ccl_device_inline void motion_triangle_vertices(
float t = time * maxstep - step;
/* find attribute */
AttributeElement elem;
int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch vertex coordinates */
@ -164,8 +134,7 @@ ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGl
float t = time * maxstep - step;
/* find attribute */
AttributeElement elem;
int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* fetch normals */

View File

@ -56,8 +56,7 @@ ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlo
int step = min((int)(sd->time * maxstep), maxstep - 1);
float t = sd->time * maxstep - step;
/* Find attribute. */
AttributeElement elem;
int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* Fetch vertex coordinates. */
float3 verts[3], next_verts[3];
@ -96,8 +95,7 @@ ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlo
/* Compute smooth normal. */
if (sd->shader & SHADER_SMOOTH_NORMAL) {
/* Find attribute. */
AttributeElement elem;
int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL);
kernel_assert(offset != ATTR_STD_NOT_FOUND);
/* Fetch vertex coordinates. */
float3 normals[3], next_normals[3];

View File

@ -862,15 +862,7 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_privat
#endif
}
/* Transparent Shadows */
#ifdef __TRANSPARENT_SHADOWS__
ccl_device bool shader_transparent_shadow(ccl_global const KernelGlobals *kg,
ccl_private Intersection *isect)
{
return (intersection_get_shader_flags(kg, isect) & SD_HAS_TRANSPARENT_SHADOW) != 0;
}
#endif /* __TRANSPARENT_SHADOWS__ */
/* Cryptomatte */
ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader)
{