Cycles: Add support for native OptiX curve primitive

This patch adds support for the curve primitive from OptiX to Cycles. It's currently hidden
behind a debug option, since there can be some slight rendering differences still (because no
backface culling is performed and something seems off with endcaps). The curve primitive
was added with the OptiX 7.1 SDK and requires a r450 driver or newer, so this also updates
the codebase to be able to build with the new SDK.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D8223
This commit is contained in:
Patrick Mours 2020-07-06 12:25:54 +02:00
parent 95f0f31279
commit 737bd549b6
9 changed files with 303 additions and 84 deletions

View File

@ -830,6 +830,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
debug_use_cuda_split_kernel: BoolProperty(name="Split Kernel", default=False)
debug_optix_cuda_streams: IntProperty(name="CUDA Streams", default=1, min=1)
debug_optix_curves_api: BoolProperty(name="Native OptiX Curve Primitive", default=False)
debug_opencl_kernel_type: EnumProperty(
name="OpenCL Kernel Type",

View File

@ -2031,6 +2031,7 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
col = layout.column()
col.label(text="OptiX Flags:")
col.prop(cscene, "debug_optix_cuda_streams")
col.prop(cscene, "debug_optix_curves_api")
col.separator()

View File

@ -92,6 +92,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
/* Synchronize OptiX flags. */
flags.optix.cuda_streams = get_int(cscene, "debug_optix_cuda_streams");
flags.optix.curves_api = get_boolean(cscene, "debug_optix_curves_api");
/* Synchronize OpenCL device type. */
switch (get_enum(cscene, "debug_opencl_device_type")) {
case 0:

View File

@ -131,8 +131,12 @@ class OptiXDevice : public CUDADevice {
PG_RGEN,
PG_MISS,
PG_HITD, // Default hit group
PG_HITL, // __BVH_LOCAL__ hit group
PG_HITS, // __SHADOW_RECORD_ALL__ hit group
PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles)
# if OPTIX_ABI_VERSION >= 36
PG_HITD_MOTION,
PG_HITS_MOTION,
# endif
# ifdef WITH_CYCLES_DEBUG
PG_EXCP,
# endif
@ -177,6 +181,7 @@ class OptiXDevice : public CUDADevice {
OptixDeviceContext context = NULL;
OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module
OptixModule builtin_modules[2] = {};
OptixPipeline pipelines[NUM_PIPELINES] = {};
bool motion_blur = false;
@ -264,6 +269,9 @@ class OptiXDevice : public CUDADevice {
// Unload modules
if (optix_module != NULL)
optixModuleDestroy(optix_module);
for (unsigned int i = 0; i < 2; ++i)
if (builtin_modules[i] != NULL)
optixModuleDestroy(builtin_modules[i]);
for (unsigned int i = 0; i < NUM_PIPELINES; ++i)
if (pipelines[i] != NULL)
optixPipelineDestroy(pipelines[i]);
@ -338,6 +346,12 @@ class OptiXDevice : public CUDADevice {
optixModuleDestroy(optix_module);
optix_module = NULL;
}
for (unsigned int i = 0; i < 2; ++i) {
if (builtin_modules[i] != NULL) {
optixModuleDestroy(builtin_modules[i]);
builtin_modules[i] = NULL;
}
}
for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
if (pipelines[i] != NULL) {
optixPipelineDestroy(pipelines[i]);
@ -369,6 +383,18 @@ class OptiXDevice : public CUDADevice {
# endif
pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h
# if OPTIX_ABI_VERSION >= 36
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
if (requested_features.use_hair) {
if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
}
else {
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
}
}
# endif
// Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
// This is necessary since objects may be reported to have motion if the Vector pass is
// active, but may still need to be rendered without motion blur if that isn't active as well
@ -442,6 +468,34 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
}
# if OPTIX_ABI_VERSION >= 36
if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
OptixBuiltinISOptions builtin_options;
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
builtin_options.usesMotionBlur = false;
check_result_optix_ret(optixBuiltinISModuleGet(
context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
if (motion_blur) {
builtin_options.usesMotionBlur = true;
check_result_optix_ret(optixBuiltinISModuleGet(
context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
}
}
# endif
}
if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {
@ -493,8 +547,14 @@ class OptiXDevice : public CUDADevice {
unsigned int trace_css = stack_size[PG_HITD].cssCH;
// This is based on the maximum of closest-hit and any-hit/intersection programs
trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
# if OPTIX_ABI_VERSION >= 36
trace_css = std::max(trace_css,
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
trace_css = std::max(trace_css,
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
# endif
OptixPipelineLinkOptions link_options;
link_options.maxTraceDepth = 1;
@ -503,17 +563,23 @@ class OptiXDevice : public CUDADevice {
# else
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
# endif
link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur;
# if OPTIX_ABI_VERSION < 24
link_options.overrideUsesMotionBlur = motion_blur;
# endif
{ // Create path tracing pipeline
OptixProgramGroup pipeline_groups[] = {
groups[PG_RGEN],
groups[PG_MISS],
groups[PG_HITD],
groups[PG_HITS],
groups[PG_HITL],
groups[PG_RGEN],
groups[PG_MISS],
groups[PG_HITD],
groups[PG_HITS],
groups[PG_HITL],
# if OPTIX_ABI_VERSION >= 36
groups[PG_HITD_MOTION],
groups[PG_HITS_MOTION],
# endif
# ifdef WITH_CYCLES_DEBUG
groups[PG_EXCP],
groups[PG_EXCP],
# endif
};
check_result_optix_ret(
@ -530,8 +596,8 @@ class OptiXDevice : public CUDADevice {
const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
// Set stack size depending on pipeline options
check_result_optix_ret(optixPipelineSetStackSize(
pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
check_result_optix_ret(
optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
}
// Only need to create shader evaluation pipeline if one of these features is used:
@ -541,15 +607,19 @@ class OptiXDevice : public CUDADevice {
if (use_shader_eval_pipeline) { // Create shader evaluation pipeline
OptixProgramGroup pipeline_groups[] = {
groups[PG_BAKE],
groups[PG_DISP],
groups[PG_BACK],
groups[PG_MISS],
groups[PG_HITD],
groups[PG_HITS],
groups[PG_HITL],
groups[PG_BAKE],
groups[PG_DISP],
groups[PG_BACK],
groups[PG_MISS],
groups[PG_HITD],
groups[PG_HITS],
groups[PG_HITL],
# if OPTIX_ABI_VERSION >= 36
groups[PG_HITD_MOTION],
groups[PG_HITS_MOTION],
# endif
# ifdef WITH_CYCLES_DEBUG
groups[PG_EXCP],
groups[PG_EXCP],
# endif
};
check_result_optix_ret(
@ -672,7 +742,11 @@ class OptiXDevice : public CUDADevice {
sbt_params.missRecordCount = 1;
sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS
# if OPTIX_ABI_VERSION >= 36
sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
// Launch the ray generation program
check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@ -836,7 +910,9 @@ class OptiXDevice : public CUDADevice {
assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1));
# if OPTIX_ABI_VERSION < 28
denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
# endif
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
@ -849,7 +925,11 @@ class OptiXDevice : public CUDADevice {
check_result_optix_ret(
optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
# if OPTIX_ABI_VERSION < 28
const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
# else
const size_t scratch_size = sizes.withOverlapScratchSizeInBytes;
# endif
const size_t scratch_offset = sizes.stateSizeInBytes;
// Allocate denoiser state if tile size has changed since last setup
@ -993,7 +1073,11 @@ class OptiXDevice : public CUDADevice {
sbt_params.missRecordCount = 1;
sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS
# if OPTIX_ABI_VERSION >= 36
sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
# else
sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
# endif
check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
cuda_stream[thread_index],
@ -1070,7 +1154,7 @@ class OptiXDevice : public CUDADevice {
&build_input,
1,
temp_mem.device_pointer,
temp_mem.device_size,
sizes.tempSizeInBytes,
out_data,
sizes.outputSizeInBytes,
&out_handle,
@ -1142,7 +1226,6 @@ class OptiXDevice : public CUDADevice {
continue;
}
const size_t num_curves = hair->num_curves();
const size_t num_segments = hair->num_segments();
size_t num_motion_steps = 1;
@ -1152,7 +1235,18 @@ class OptiXDevice : public CUDADevice {
}
device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
aabb_data.alloc(num_segments * num_motion_steps);
# if OPTIX_ABI_VERSION >= 36
device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
// Four control points for each curve segment
const size_t num_vertices = num_segments * 4;
if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
index_data.alloc(num_segments);
vertex_data.alloc(num_vertices * num_motion_steps);
}
else
# endif
aabb_data.alloc(num_segments * num_motion_steps);
// Get AABBs for each motion step
for (size_t step = 0; step < num_motion_steps; ++step) {
@ -1165,44 +1259,127 @@ class OptiXDevice : public CUDADevice {
keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
}
size_t i = step * num_segments;
for (size_t j = 0; j < num_curves; ++j) {
const Hair::Curve c = hair->get_curve(j);
for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
const Hair::Curve curve = hair->get_curve(j);
for (size_t k = 0; k < c.num_segments(); ++i, ++k) {
BoundBox bounds = BoundBox::empty;
c.bounds_grow(k, keys, hair->curve_radius.data(), bounds);
for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
# if OPTIX_ABI_VERSION >= 36
if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
int k0 = curve.first_key + segment;
int k1 = k0 + 1;
int ka = max(k0 - 1, curve.first_key);
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
aabb_data[i].minX = bounds.min.x;
aabb_data[i].minY = bounds.min.y;
aabb_data[i].minZ = bounds.min.z;
aabb_data[i].maxX = bounds.max.x;
aabb_data[i].maxY = bounds.max.y;
aabb_data[i].maxZ = bounds.max.z;
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
const float4 pw = make_float4(hair->curve_radius[ka],
hair->curve_radius[k0],
hair->curve_radius[k1],
hair->curve_radius[kb]);
// Convert Catmull-Rom data to Bezier spline
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
index_data[i] = i * 4;
float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
v[0] = make_float4(
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
v[1] = make_float4(
dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw));
v[2] = make_float4(
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
v[3] = make_float4(
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
}
else
# endif
{
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
const size_t index = step * num_segments + i;
aabb_data[index].minX = bounds.min.x;
aabb_data[index].minY = bounds.min.y;
aabb_data[index].minZ = bounds.min.z;
aabb_data[index].maxX = bounds.max.x;
aabb_data[index].maxY = bounds.max.y;
aabb_data[index].maxZ = bounds.max.z;
}
}
}
}
// Upload AABB data to GPU
aabb_data.copy_to_device();
# if OPTIX_ABI_VERSION >= 36
index_data.copy_to_device();
vertex_data.copy_to_device();
# endif
vector<device_ptr> aabb_ptrs;
aabb_ptrs.reserve(num_motion_steps);
# if OPTIX_ABI_VERSION >= 36
vector<device_ptr> width_ptrs;
vector<device_ptr> vertex_ptrs;
width_ptrs.reserve(num_motion_steps);
vertex_ptrs.reserve(num_motion_steps);
# endif
for (size_t step = 0; step < num_motion_steps; ++step) {
aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
# if OPTIX_ABI_VERSION >= 36
const device_ptr base_ptr = vertex_data.device_pointer +
step * num_vertices * sizeof(float4);
width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size
vertex_ptrs.push_back(base_ptr);
# endif
}
// Disable visibility test anyhit program, since it is already checked during intersection
// Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
// Force a single any-hit call, so shadow record-all behavior works correctly
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
OptixBuildInput build_input = {};
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
build_input.aabbArray.numPrimitives = num_segments;
build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
build_input.aabbArray.flags = &build_flags;
build_input.aabbArray.numSbtRecords = 1;
build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
# if OPTIX_ABI_VERSION >= 36
if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
build_input.curveArray.numPrimitives = num_segments;
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
build_input.curveArray.numVertices = num_vertices;
build_input.curveArray.vertexStrideInBytes = sizeof(float4);
build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
build_input.curveArray.widthStrideInBytes = sizeof(float4);
build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
build_input.curveArray.indexStrideInBytes = sizeof(int);
build_input.curveArray.flag = build_flags;
build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
}
else
# endif
{
// Disable visibility test any-hit program, since it is already checked during
// intersection. Those trace calls that require anyhit can force it with a ray flag.
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
# if OPTIX_ABI_VERSION < 23
build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
build_input.aabbArray.numPrimitives = num_segments;
build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
build_input.aabbArray.flags = &build_flags;
build_input.aabbArray.numSbtRecords = 1;
build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
# else
build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
build_input.customPrimitiveArray.numPrimitives = num_segments;
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
build_input.customPrimitiveArray.flags = &build_flags;
build_input.customPrimitiveArray.numSbtRecords = 1;
build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
# endif
}
// Allocate memory for new BLAS and build it
OptixTraversableHandle handle;
@ -1257,8 +1434,8 @@ class OptiXDevice : public CUDADevice {
vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
}
// No special build flags for triangle primitives
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE;
// Force a single any-hit call, so shadow record-all behavior works correctly
unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
OptixBuildInput build_input = {};
build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
@ -1324,9 +1501,26 @@ class OptiXDevice : public CUDADevice {
// Set user instance ID to object index
instance.instanceId = ob->get_device_index();
// Volumes have a special bit set in the visibility mask so a trace can mask only volumes
// See 'scene_intersect_volume' in bvh.h
instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1);
// Have to have at least one bit in the mask, or else instance would always be culled
instance.visibilityMask = 1;
if (ob->geometry->has_volume) {
// Volumes have a special bit set in the visibility mask so a trace can mask only volumes
instance.visibilityMask |= 2;
}
if (ob->geometry->type == Geometry::HAIR) {
// Same applies to curves (so they can be skipped in local trace calls)
instance.visibilityMask |= 4;
# if OPTIX_ABI_VERSION >= 36
if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
// Select between motion blur and non-motion blur built-in intersection module
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
# endif
}
// Insert motion traversable if object has motion
if (motion_blur && ob->use_motion()) {

View File

@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
0xFF,
0xF,
OPTIX_RAY_FLAG_NONE,
0,
0,
0, // SBT offset for PG_HITD
0,
0,
p0,
p1,
p2,
@ -264,12 +264,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
// Skip curves
0x3,
// Need to always call into __anyhit__kernel_optix_local_hit
0xFF,
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
1,
2, // SBT offset for PG_HITL
0,
0,
0, // SBT offset for PG_HITL
p0,
p1,
p2,
@ -374,12 +375,12 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
0xF,
// Need to always call into __anyhit__kernel_optix_shadow_all_hit
0xFF,
OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
2,
1, // SBT offset for PG_HITS
0,
0,
0, // SBT offset for PG_HITS
p0,
p1,
*num_hits,
@ -458,12 +459,12 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
0.0f,
ray->t,
ray->time,
// Visibility mask set to only intersect objects with volumes
0x02,
// Skip everything but volumes
0x2,
OPTIX_RAY_FLAG_NONE,
0,
0,
0, // SBT offset for PG_HITD
0,
0,
p0,
p1,
p2,

View File

@ -734,7 +734,6 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg,
}
sd->u = isect->u;
sd->v = isect->v;
P = P + D * t;
@ -750,6 +749,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg,
sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
sd->Ng = -D;
sd->v = isect->v;
# if 0
/* This approximates the position and geometric normal of a thick curve too,
@ -764,8 +764,11 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg,
* This could be optimized by recording the normal in the intersection,
* however for Optix this would go beyond the size of the payload. */
const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, isect->u));
sd->Ng = normalize(P - P_inside);
sd->N = sd->Ng;
const float3 Ng = normalize(P - P_inside);
sd->N = Ng;
sd->Ng = Ng;
sd->v = 0.0f;
}
# ifdef __DPDU__

View File

@ -15,6 +15,7 @@
* limitations under the License.
*/
// clang-format off
#include "kernel/kernel_compat_optix.h"
#include "util/util_atomic.h"
#include "kernel/kernel_types.h"
@ -23,6 +24,7 @@
#include "kernel/kernel_path.h"
#include "kernel/kernel_bake.h"
// clang-format on
template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
{
@ -139,8 +141,8 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
}
else {
if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
// Record closest intersection only (do not terminate ray here, since there is no guarantee
// about distance ordering in anyhit)
// Record closest intersection only
// Do not terminate ray here, since there is no guarantee about distance ordering in any-hit
return optixIgnoreIntersection();
}
@ -153,15 +155,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
isect->object = get_object_id();
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
if (optixIsTriangleHit()) {
const float2 barycentrics = optixGetTriangleBarycentrics();
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
}
else {
isect->u = __uint_as_float(optixGetAttribute_0());
isect->v = __uint_as_float(optixGetAttribute_1());
}
const float2 barycentrics = optixGetTriangleBarycentrics();
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
// Record geometric normal
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
@ -198,10 +194,18 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
isect->u = 1.0f - barycentrics.y - barycentrics.x;
isect->v = barycentrics.x;
}
# ifdef __HAIR__
else {
isect->u = __uint_as_float(optixGetAttribute_0());
const float u = __uint_as_float(optixGetAttribute_0());
isect->u = u;
isect->v = __uint_as_float(optixGetAttribute_1());
// Filter out curve endcaps
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
}
}
# endif
# ifdef __TRANSPARENT_SHADOWS__
// Detect if this surface has a shader with transparent shadows
@ -213,7 +217,6 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
# ifdef __TRANSPARENT_SHADOWS__
}
// TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work?
optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++
// Continue tracing
@ -227,13 +230,25 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
uint visibility = optixGetPayload_4();
#ifdef __VISIBILITY_FLAG__
const uint prim = optixGetPrimitiveIndex();
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0)
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
return optixIgnoreIntersection();
}
#endif
#ifdef __HAIR__
if (!optixIsTriangleHit()) {
// Filter out curve endcaps
const float u = __uint_as_float(optixGetAttribute_0());
if (u == 0.0f || u == 1.0f) {
return optixIgnoreIntersection();
}
}
#endif
// Shadow ray early termination
if (visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
return optixTerminateRay();
}
}
extern "C" __global__ void __closesthit__kernel_optix_hit()
@ -250,7 +265,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
optixSetPayload_2(__float_as_uint(barycentrics.x));
}
else {
optixSetPayload_1(optixGetAttribute_0());
optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()'
optixSetPayload_2(optixGetAttribute_1());
}
}
@ -286,7 +301,6 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
__float_as_int(isect.u), // Attribute_0
__float_as_int(isect.v)); // Attribute_1
}
}
extern "C" __global__ void __intersection__curve_ribbon()

View File

@ -83,6 +83,7 @@ DebugFlags::OptiX::OptiX()
void DebugFlags::OptiX::reset()
{
cuda_streams = 1;
curves_api = false;
}
DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false)

View File

@ -108,6 +108,9 @@ class DebugFlags {
/* Number of CUDA streams to launch kernels concurrently from. */
int cuda_streams;
/* Use OptiX curves API for hair instead of custom implementation. */
bool curves_api;
};
/* Descriptor of OpenCL feature-set to be used. */