Cycles: Rework OptiX visibility flags handling

Before the visibility test against the visibility flags was performed in an any-hit program in OptiX
(called `__anyhit__kernel_optix_visibility_test`), which was using the `__prim_visibility` array.
This is not entirely correct however, since `__prim_visibility` is filled with the merged visibility
flags of all objects that reference that primitive, so if one object uses different visibility flags
than another object, but they both are instances of the same geometry, they would appear the same
way. The reason that the any-hit program was used rather than the OptiX instance visibility mask is
that the latter is currently limited to 8 bits only, which is not sufficient to contain all Cycles
visibility flags (12 bits).

To mostly fix the problem with multiple instances and different visibility flags, I changed things to
use the OptiX instance visibility mask for a subset of the Cycles visibility flags (`PATH_RAY_CAMERA`
to `PATH_RAY_VOLUME_SCATTER`, which fit into 8 bits) and only fall back to the visibility test any-hit
program if that isn't enough (e.g. the ray visibility mask exceeds 8 bits or when using the built-in
curves from OptiX, since the any-hit program is then also used to skip the curve endcaps).

This may also improve performance in some cases, since by default OptiX can now perform the normal
scene intersection trace calls entirely on RT cores without having to jump back to the SM on every
hit to execute the any-hit program.

Fixes T89801

Differential Revision: https://developer.blender.org/D12604
This commit is contained in:
Patrick Mours 2021-09-22 16:23:08 +02:00
parent 7270ba011c
commit 2189dfd6e2
Notes: blender-bot 2023-02-14 06:17:14 +01:00
Referenced by issue #89801, Collection instances (Hair particle system, geometry nodes) visible when set to "Indirect Only" on view layer on OptiX Cycles
4 changed files with 127 additions and 60 deletions

View File

@ -315,6 +315,11 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
@ -397,6 +402,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
trace_css = std::max(trace_css,
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
trace_css = std::max(trace_css,
@ -421,6 +427,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
if (motion_blur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@ -459,6 +466,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD]);
pipeline_groups.push_back(groups[PG_HITS]);
pipeline_groups.push_back(groups[PG_HITL]);
pipeline_groups.push_back(groups[PG_HITV]);
if (motion_blur) {
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
@ -1390,25 +1398,33 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Set user instance ID to object index (but leave low bit blank). */
instance.instanceId = ob->get_device_index() << 1;
/* Have to have at least one bit in the mask, or else instance would always be culled. */
instance.visibilityMask = 1;
/* Add some of the object visibility bits to the mask.
* __prim_visibility contains the combined visibility bits of all instances, so is not
* reliable if they differ between instances. But the OptiX visibility mask can only contain
* 8 bits, so have to trade-off here and select just a few important ones.
*/
instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
if (ob->get_geometry()->has_volume) {
/* Volumes have a special bit set in the visibility mask so a trace can mask only volumes.
*/
instance.visibilityMask |= 2;
/* Have to have at least one bit in the mask, or else instance would always be culled. */
if (0 == instance.visibilityMask) {
instance.visibilityMask = 0xFF;
}
if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
/* Same applies to curves (so they can be skipped in local trace calls). */
instance.visibilityMask |= 4;
if (motion_blur && ob->get_geometry()->has_motion_blur() &&
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
if (ob->get_geometry()->geometry_type == Geometry::HAIR &&
static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
if (motion_blur && ob->get_geometry()->has_motion_blur()) {
/* Select between motion blur and non-motion blur built-in intersection module. */
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
}
else {
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
* since it needs to filter out endcaps there).
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
}
/* Insert motion traversable if object has motion. */
if (motion_blur && ob->use_motion()) {
@ -1474,7 +1490,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
/* Disable instance transform if object uses motion transform already. */
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
/* Get traversable handle to motion transform. */
optixConvertPointerToTraversableHandle(context,
@ -1491,7 +1507,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
else {
/* Disable instance transform if geometry already has it applied to vertex data. */
instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
/* Non-instanced objects read ID from 'prim_object', so distinguish
* them from instanced objects with the low bit set. */
instance.instanceId |= 1;

View File

@ -40,6 +40,7 @@ enum {
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
PG_HITL, /* __BVH_LOCAL__ hit group (only used for triangles). */
PG_HITV, /* __VOLUME__ hit group. */
PG_HITD_MOTION,
PG_HITS_MOTION,
PG_CALL_SVM_AO,
@ -51,7 +52,7 @@ enum {
static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
static const int NUM_MIS_PROGRAM_GROUPS = 1;
static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
static const int NUM_HIT_PROGRAM_GROUPS = 5;
static const int NUM_HIT_PROGRAM_GROUPS = 6;
static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 3;

View File

@ -167,15 +167,25 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = PRIMITIVE_NONE;
uint ray_mask = visibility & 0xFF;
uint ray_flags = OPTIX_RAY_FLAG_NONE;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
}
else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
ray_flags = OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
0xF,
OPTIX_RAY_FLAG_NONE,
0, // SBT offset for PG_HITD
ray_mask,
ray_flags,
0, /* SBT offset for PG_HITD */
0,
0,
p0,
@ -251,11 +261,11 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF;
uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF;
uint p4 = local_object;
// Is set to zero on miss or if ray is aborted, so can be used as return value
/* Is set to zero on miss or if ray is aborted, so can be used as return value. */
uint p5 = max_hits;
if (local_isect) {
local_isect->num_hits = 0; // Initialize hit count to zero
local_isect->num_hits = 0; /* Initialize hit count to zero. */
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
@ -263,11 +273,10 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
// Skip curves
0x3,
// Need to always call into __anyhit__kernel_optix_local_hit
0xFF,
/* Need to always call into __anyhit__kernel_optix_local_hit. */
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
2, // SBT offset for PG_HITL
2, /* SBT offset for PG_HITL */
0,
0,
p0,
@ -365,17 +374,22 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = false;
*num_hits = 0; // Initialize hit count to zero
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
*num_hits = 0; /* Initialize hit count to zero. */
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
0xF,
// Need to always call into __anyhit__kernel_optix_shadow_all_hit
ray_mask,
/* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
1, // SBT offset for PG_HITS
1, /* SBT offset for PG_HITS */
0,
0,
p0,
@ -444,16 +458,21 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
uint p4 = visibility;
uint p5 = PRIMITIVE_NONE;
uint ray_mask = visibility & 0xFF;
if (0 == ray_mask && (visibility & ~0xFF) != 0) {
ray_mask = 0xFF;
}
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
0.0f,
ray->t,
ray->time,
// Skip everything but volumes
0x2,
OPTIX_RAY_FLAG_NONE,
0, // SBT offset for PG_HITD
ray_mask,
/* Need to always call into __anyhit__kernel_optix_volume_test. */
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
3, /* SBT offset for PG_HITV */
0,
0,
p0,

View File

@ -19,7 +19,7 @@
#include "kernel/device/optix/compat.h"
#include "kernel/device/optix/globals.h"
#include "kernel/device/gpu/image.h" // Texture lookup uses normal CUDA intrinsics
#include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */
#include "kernel/integrator/integrator_state.h"
#include "kernel/integrator/integrator_state_flow.h"
@ -44,18 +44,18 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
template<bool always = false> ccl_device_forceinline uint get_object_id()
{
#ifdef __OBJECT_MOTION__
// Always get the the instance ID from the TLAS
// There might be a motion transform node between TLAS and BLAS which does not have one
/* Always get the the instance ID from the TLAS.
* There might be a motion transform node between TLAS and BLAS which does not have one. */
uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
#else
uint object = optixGetInstanceId();
#endif
// Choose between always returning object ID or only for instances
/* Choose between always returning object ID or only for instances. */
if (always || (object & 1) == 0)
// Can just remove the low bit since instance always contains object ID
/* Can just remove the low bit since instance always contains object ID. */
return object >> 1;
else
// Set to OBJECT_NONE if this is not an instanced object
/* Set to OBJECT_NONE if this is not an instanced object. */
return OBJECT_NONE;
}
@ -93,23 +93,30 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st
extern "C" __global__ void __miss__kernel_optix_miss()
{
// 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss
/* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
optixSetPayload_5(PRIMITIVE_NONE);
}
extern "C" __global__ void __anyhit__kernel_optix_local_hit()
{
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
/* Ignore curves. */
return optixIgnoreIntersection();
}
#endif
#ifdef __BVH_LOCAL__
const uint object = get_object_id<true>();
if (object != optixGetPayload_4() /* local_object */) {
// Only intersect with matching object
/* Only intersect with matching object. */
return optixIgnoreIntersection();
}
const uint max_hits = optixGetPayload_5();
if (max_hits == 0) {
// Special case for when no hit information is requested, just report that something was hit
/* Special case for when no hit information is requested, just report that something was hit */
optixSetPayload_5(true);
return optixTerminateRay();
}
@ -136,8 +143,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
}
else {
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
// Record closest intersection only
// Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
/* Record closest intersection only.
* Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
*/
return optixIgnoreIntersection();
}
@ -154,14 +162,14 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
// Record geometric normal
/* Record geometric normal. */
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
// Continue tracing (without this the trace call would return after the first hit)
/* Continue tracing (without this the trace call would return after the first hit). */
optixIgnoreIntersection();
#endif
}
@ -190,7 +198,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
u = __uint_as_float(optixGetAttribute_0());
v = __uint_as_float(optixGetAttribute_1());
// Filter out curve endcaps
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
ignore_intersection = true;
}
@ -241,10 +249,10 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
isect->type = kernel_tex_fetch(__prim_type, prim);
# ifdef __TRANSPARENT_SHADOWS__
// Detect if this surface has a shader with transparent shadows
/* Detect if this surface has a shader with transparent shadows. */
if (!shader_transparent_shadow(NULL, isect) || max_hits == 0) {
# endif
// If no transparent shadows, all light is blocked and we can stop immediately
/* If no transparent shadows, all light is blocked and we can stop immediately. */
optixSetPayload_5(true);
return optixTerminateRay();
# ifdef __TRANSPARENT_SHADOWS__
@ -252,24 +260,39 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
# endif
}
// Continue tracing
/* Continue tracing. */
optixIgnoreIntersection();
#endif
}
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
extern "C" __global__ void __anyhit__kernel_optix_volume_test()
{
uint visibility = optixGetPayload_4();
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
/* Ignore curves. */
return optixIgnoreIntersection();
}
#endif
#ifdef __VISIBILITY_FLAG__
const uint prim = optixGetPrimitiveIndex();
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
return optixIgnoreIntersection();
}
#endif
const uint object = get_object_id<true>();
if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
return optixIgnoreIntersection();
}
}
extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
{
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
// Filter out curve endcaps
/* Filter out curve endcaps. */
const float u = __uint_as_float(optixGetAttribute_0());
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
@ -277,18 +300,26 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
}
#endif
// Shadow ray early termination
#ifdef __VISIBILITY_FLAG__
const uint prim = optixGetPrimitiveIndex();
const uint visibility = optixGetPayload_4();
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
return optixIgnoreIntersection();
}
/* Shadow ray early termination. */
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
return optixTerminateRay();
}
#endif
}
extern "C" __global__ void __closesthit__kernel_optix_hit()
{
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); // Intersection distance
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
optixSetPayload_3(optixGetPrimitiveIndex());
optixSetPayload_4(get_object_id());
// Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index
/* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */
optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
if (optixIsTriangleHit()) {
@ -297,7 +328,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_2(__float_as_uint(barycentrics.x));
}
else {
optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()'
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
optixSetPayload_2(optixGetAttribute_1());
}
}
@ -311,7 +342,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
float3 P = optixGetObjectRayOrigin();
float3 dir = optixGetObjectRayDirection();
// The direction is not normalized by default, but the curve intersection routine expects that
/* The direction is not normalized by default, but the curve intersection routine expects that */
float len;
dir = normalize_len(dir, &len);
@ -323,15 +354,15 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
Intersection isect;
isect.t = optixGetRayTmax();
// Transform maximum distance into object space
/* Transform maximum distance into object space. */
if (isect.t != FLT_MAX)
isect.t *= len;
if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) {
optixReportIntersection(isect.t / len,
type & PRIMITIVE_ALL,
__float_as_int(isect.u), // Attribute_0
__float_as_int(isect.v)); // Attribute_1
__float_as_int(isect.u), /* Attribute_0 */
__float_as_int(isect.v)); /* Attribute_1 */
}
}