Cycles: port curve-ray intersection from Embree for use in Cycles GPU

This keeps render results compatible for combined CPU + GPU rendering.
Peformance and quality primitives is quite different than before. There
are now two options:

* Rounded Ribbon: render hair as flat ribbon with (fake) rounded normals, for
  fast rendering. Hair curves are subdivided with a fixed number of user
  specified subdivisions.

  This gives relatively good results, especially when used with the Principled
  Hair BSDF and hair viewed from a typical distance. There are artifacts when
  viewed closed up, though this was also the case with all previous primitives
  (but different ones).

* 3D Curve: render hair as 3D curve, for accurate results when viewing hair
  close up. This automatically subdivides the curve until it is smooth.

  This gives higher quality than any of the previous primitives, but does come
  at a performance cost and is somewhat slower than our previous Thick curves.

The main problem here is performance. For CPU and OpenCL rendering performance
seems usually quite close or better for similar quality results.

However for CUDA and Optix, performance of 3D curve intersection is problematic,
with e.g. 1.45x longer render time in Koro (though there is no equivalent quality
and rounded ribbons seem fine for that scene). Any help or ideas to optimize this
are welcome.

Ref T73778

Depends on D8012

Maniphest Tasks: T73778

Differential Revision: https://developer.blender.org/D8013
This commit is contained in:
Brecht Van Lommel 2020-02-18 20:54:41 +01:00 committed by Brecht Van Lommel
parent d1ef5146d7
commit 207338bb58
Notes: blender-bot 2023-02-14 08:29:54 +01:00
Referenced by issue #82966, Banding artifacts in particle hair at render time (Rounded Ribbons)
Referenced by issue #73778, Cycles: Embree improvements
13 changed files with 766 additions and 418 deletions

View File

@ -73,8 +73,8 @@ enum_panorama_types = (
)
enum_curve_shape = (
('RIBBONS', "Ribbons", "Ignore thickness of each hair"),
('THICK', "Thick", "Use thickness of hair when rendering"),
('RIBBONS', "Rounded Ribbons", "Render hair as flat ribbon with rounded normals, for fast rendering"),
('THICK', "3D Curves", "Render hair as 3D curve, for accurate results when viewing hair close up"),
)
enum_tile_order = (
@ -1223,7 +1223,7 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
name="Shape",
description="Form of hair",
items=enum_curve_shape,
default='THICK',
default='RIBBONS',
)
use_curves: BoolProperty(
name="Use Cycles Hair Rendering",
@ -1234,7 +1234,7 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
name="Subdivisions",
description="Number of subdivisions used in Cardinal curve intersection (power of 2)",
min=0, max=24,
default=4,
default=2,
)
@classmethod

View File

@ -407,7 +407,6 @@ class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
col = layout.column()
col.prop(ccscene, "shape", text="Shape")
if ccscene.shape == 'RIBBONS':
# TODO: use for embree
col.prop(ccscene, "subdivisions", text="Curve subdivisions")

View File

@ -132,6 +132,7 @@ class DeviceRequestedFeatures {
/* BVH/sampling kernel features. */
bool use_hair;
bool use_hair_thick;
bool use_object_motion;
bool use_camera_motion;
@ -178,6 +179,7 @@ class DeviceRequestedFeatures {
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
use_hair_thick = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
@ -200,6 +202,7 @@ class DeviceRequestedFeatures {
max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
use_hair_thick == requested_features.use_hair_thick &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&

View File

@ -428,11 +428,20 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
if (requested_features.use_hair) {
// Add curve intersection programs
group_descs[PG_HITD].hitgroup.moduleIS = optix_module;
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve";
group_descs[PG_HITS].hitgroup.moduleIS = optix_module;
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve";
// Add curve intersection programs
if (requested_features.use_hair_thick) {
// Slower programs for thick hair since that also slows down ribbons.
// Ideally this should not be needed.
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_all";
}
else {
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
}
}
if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {

View File

@ -119,13 +119,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
differential3 *domega_in,
float *pdf)
{
/* For curves use the smooth normal, particularly for ribbons the geometric
* normal gives too much darkening otherwise. */
int label;
const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sc->N : sd->Ng;
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:
label = bsdf_diffuse_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -140,7 +143,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
#ifdef __SVM__
case CLOSURE_BSDF_OREN_NAYAR_ID:
label = bsdf_oren_nayar_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -155,7 +158,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
label = bsdf_phong_ramp_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -169,7 +172,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
label = bsdf_diffuse_ramp_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -184,7 +187,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
label = bsdf_translucent_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -198,7 +201,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFLECTION_ID:
label = bsdf_reflection_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -212,7 +215,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_REFRACTION_ID:
label = bsdf_refraction_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -226,7 +229,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
label = bsdf_transparent_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -244,7 +247,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
label = bsdf_microfacet_ggx_sample(kg,
sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -260,7 +263,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_sample(kg,
sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -277,7 +280,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
label = bsdf_microfacet_multi_ggx_glass_sample(kg,
sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -294,7 +297,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
label = bsdf_microfacet_beckmann_sample(kg,
sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -308,7 +311,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
label = bsdf_ashikhmin_shirley_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -322,7 +325,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
label = bsdf_ashikhmin_velvet_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -336,7 +339,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
label = bsdf_diffuse_toon_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -350,7 +353,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
label = bsdf_glossy_toon_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -364,7 +367,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
label = bsdf_hair_reflection_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -378,7 +381,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
label = bsdf_hair_transmission_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -398,7 +401,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
label = bsdf_principled_diffuse_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -412,7 +415,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
label = bsdf_principled_sheen_sample(sc,
sd->Ng,
Ng,
sd->I,
sd->dI.dx,
sd->dI.dy,
@ -485,9 +488,12 @@ ccl_device_inline
const float3 omega_in,
float *pdf)
{
/* For curves use the smooth normal, particularly for ribbons the geometric
* normal gives too much darkening otherwise. */
const float3 Ng = (sd->type & PRIMITIVE_ALL_CURVE) ? sd->N : sd->Ng;
float3 eval;
if (dot(sd->Ng, omega_in) >= 0.0f) {
if (dot(Ng, omega_in) >= 0.0f) {
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
case CLOSURE_BSDF_BSSRDF_ID:

View File

@ -206,9 +206,6 @@ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bs
float3 X = safe_normalize(sd->dPdu);
float3 Y = safe_normalize(cross(X, sd->I));
float3 Z = safe_normalize(cross(X, Y));
/* TODO: the solution below works where sd->Ng is the normal
* pointing from the center of the curve to the shading point.
* It doesn't work for triangles, see https://developer.blender.org/T43625 */
/* h -1..0..1 means the rays goes from grazing the hair, to hitting it at
* the center, to grazing the other edge. This is the sine of the angle

View File

@ -23,33 +23,6 @@ CCL_NAMESPACE_BEGIN
#ifdef __HAIR__
/* Interpolation of curve geometry */
ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
{
float fc = 0.71f;
float data[4];
float t2 = t * t;
data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
}
ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
{
float data[4];
float fc = 0.71f;
float t2 = t * t;
float t3 = t2 * t;
data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
data[3] = fc * t3 - fc * t2;
return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
}
/* Reading attributes on various curve elements */
ccl_device float curve_attribute_float(
@ -243,7 +216,7 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
}
else {
motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
}
r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;

File diff suppressed because it is too large Load Diff

View File

@ -50,14 +50,14 @@ ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg,
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
int offset,
int numkeys,
int numsteps,
int step,
int k0,
int k1,
float4 keys[2])
ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals *kg,
int offset,
int numkeys,
int numsteps,
int step,
int k0,
int k1,
float4 keys[2])
{
if (step == numsteps) {
/* center step: regular key location */
@ -77,7 +77,7 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
}
/* return 2 curve key locations */
ccl_device_inline void motion_curve_keys(
ccl_device_inline void motion_curve_keys_linear(
KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
{
/* get motion info */
@ -97,8 +97,8 @@ ccl_device_inline void motion_curve_keys(
/* fetch key coordinates */
float4 next_keys[2];
motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step, k0, k1, keys);
motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
/* interpolate between steps */
keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];

View File

@ -86,10 +86,7 @@ ccl_device_noinline
#ifdef __HAIR__
if (sd->type & PRIMITIVE_ALL_CURVE) {
/* curve */
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
sd->shader = __float_as_int(curvedata.z);
sd->P = curve_refine(kg, sd, isect, ray);
curve_shader_setup(kg, sd, isect, ray);
}
else
#endif

View File

@ -256,11 +256,9 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
}
#ifdef __HAIR__
extern "C" __global__ void __intersection__curve()
ccl_device_inline void optix_intersection_curve(const uint prim, const uint type)
{
const uint prim = optixGetPrimitiveIndex();
const uint object = get_object_id<true>();
const uint type = kernel_tex_fetch(__prim_type, prim);
const uint visibility = optixGetPayload_4();
float3 P = optixGetObjectRayOrigin();
@ -288,6 +286,24 @@ extern "C" __global__ void __intersection__curve()
__float_as_int(isect.u), // Attribute_0
__float_as_int(isect.v)); // Attribute_1
}
}
extern "C" __global__ void __intersection__curve_ribbon()
{
const uint prim = optixGetPrimitiveIndex();
const uint type = kernel_tex_fetch(__prim_type, prim);
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
optix_intersection_curve(prim, type);
}
}
extern "C" __global__ void __intersection__curve_all()
{
const uint prim = optixGetPrimitiveIndex();
const uint type = kernel_tex_fetch(__prim_type, prim);
optix_intersection_curve(prim, type);
}
#endif

View File

@ -36,13 +36,12 @@ void curvebounds(float *lower, float *upper, float3 *p, int dim)
float *p2 = &p[2].x;
float *p3 = &p[3].x;
float fc = 0.71f;
/* Catmull-Rom weights. */
float curve_coef[4];
curve_coef[0] = p1[dim];
curve_coef[1] = -fc * p0[dim] + fc * p2[dim];
curve_coef[2] = 2.0f * fc * p0[dim] + (fc - 3.0f) * p1[dim] + (3.0f - 2.0f * fc) * p2[dim] -
fc * p3[dim];
curve_coef[3] = -fc * p0[dim] + (2.0f - fc) * p1[dim] + (fc - 2.0f) * p2[dim] + fc * p3[dim];
curve_coef[1] = 0.5f * (-p0[dim] + p2[dim]);
curve_coef[2] = 0.5f * (2 * p0[dim] - 5 * p1[dim] + 4 * p2[dim] - p3[dim]);
curve_coef[3] = 0.5f * (-p0[dim] + 3 * p1[dim] - 3 * p2[dim] + p3[dim]);
float discroot = curve_coef[2] * curve_coef[2] - 3 * curve_coef[3] * curve_coef[1];
float ta = -1.0f;
@ -115,7 +114,8 @@ void CurveSystemManager::device_update(Device *device,
kcurve->curveflags |= CURVE_KN_RIBBONS;
}
kcurve->subdivisions = subdivisions;
/* Matching the tesselation rate limit in Embree. */
kcurve->subdivisions = clamp(1 << subdivisions, 1, 16);
}
if (progress.get_cancel())

View File

@ -21,6 +21,7 @@
#include "render/bake.h"
#include "render/buffers.h"
#include "render/camera.h"
#include "render/curves.h"
#include "render/graph.h"
#include "render/integrator.h"
#include "render/light.h"
@ -773,6 +774,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
*/
bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR;
requested_features.use_hair = false;
requested_features.use_hair_thick = (scene->curve_system_manager->curve_shape == CURVE_THICK);
requested_features.use_object_motion = false;
requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
foreach (Object *object, scene->objects) {