T61513: Refactored Cycles Attribute Retrieval

There is a generic function to retrieve float and float3 attributes
`primitive_attribute_float` and primitive_attribute_float3`. Inside
these functions an prioritised if-else construction checked where
the attribute is stored and then retrieved from that location.

Actually the calling function most of the time already knows where
the data is stored. So we could simplify this by splitting these
functions and remove the check logic.

This patch splits the `primitive_attribute_float?` functions into
`primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`.
What leads to less branching and more optimum kernels.

The original function is still being used by OSL and `svm_node_attr`.

This will reduce the compilation time and render time for kernels.
Especially in production scenes there is a lot of benefit.

Impact in compilation times

    job  |   scene_name    | previous |  new  | percentage
  -------+-----------------+----------+-------+------------
  t61513 | empty           |    10.63 | 10.66 |          0%
  t61513 | bmw             |    17.91 | 17.65 |          1%
  t61513 | fishycat        |    19.57 | 17.68 |         10%
  t61513 | barbershop      |    54.10 | 24.41 |         55%
  t61513 | classroom       |    17.55 | 16.29 |          7%
  t61513 | koro            |    18.92 | 18.05 |          5%
  t61513 | pavillion       |    17.43 | 16.52 |          5%
  t61513 | splash279       |    16.48 | 14.91 |         10%
  t61513 | volume_emission |    36.22 | 21.60 |         40%

Impact in render times

    job  |   scene_name    | previous |  new   | percentage
  -------+-----------------+----------+--------+------------
  61513 | empty           |    21.06 |  20.35 |          3%
  61513 | bmw             |   198.44 | 190.05 |          4%
  61513 | fishycat        |   394.20 | 401.25 |         -2%
  61513 | barbershop      |  1188.16 | 912.39 |         23%
  61513 | classroom       |   341.08 | 340.38 |          0%
  61513 | koro            |   472.43 | 471.80 |          0%
  61513 | pavillion       |   905.77 | 899.80 |          1%
  61513 | splash279       |    55.26 |  54.86 |          1%
  61513 | volume_emission |    62.59 |  61.70 |          1%

There is also a possitive impact when using CPU and CUDA, but they are small.

I didn't split the hair logic from the surface logic due to:

* Hair and surface use same attribute types. It was not clear if it could be
  splitted when looking at the code only.
* Hair and surface are quick to compile and to read. So the benefit is quite
  small.

Differential Revision: https://developer.blender.org/D4375
This commit is contained in:
Jeroen Bakker 2019-02-19 15:41:22 +01:00
parent 8138eb0dfe
commit e6f5632eb1
8 changed files with 136 additions and 73 deletions

View File

@ -22,7 +22,6 @@
CCL_NAMESPACE_BEGIN
/* Generic primitive attribute reading functions */
ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
@ -41,7 +40,9 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
#endif
#ifdef __VOLUME__
else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float(kg, sd, desc, dx, dy);
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
return volume_attribute_float(kg, sd, desc);
}
#endif
else {
@ -51,6 +52,43 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
}
}
ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float *dx, float *dy)
{
if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
if(subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if(sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float(kg, sd, desc, dx, dy);
}
#endif
else {
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
return 0.0f;
}
}
#ifdef __VOLUME__
ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float(kg, sd, desc);
}
else {
return 0.0f;
}
}
#endif
ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
@ -69,7 +107,9 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
#endif
#ifdef __VOLUME__
else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float3(kg, sd, desc, dx, dy);
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
return volume_attribute_float3(kg, sd, desc);
}
#endif
else {
@ -79,6 +119,43 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
}
}
ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float3 *dx, float3 *dy)
{
if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
if(subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float3(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if(sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float3(kg, sd, desc, dx, dy);
}
#endif
else {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
return make_float3(0.0f, 0.0f, 0.0f);
}
}
#ifdef __VOLUME__
ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float3(kg, sd, desc);
}
else {
return make_float3(0.0f, 0.0f, 0.0f);
}
}
#endif
/* Default UV coordinate */
ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
@ -88,7 +165,7 @@ ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
if(desc.offset == ATTR_STD_NOT_FOUND)
return make_float3(0.0f, 0.0f, 0.0f);
float3 uv = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
float3 uv = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
uv.z = 1.0f;
return uv;
}
@ -104,8 +181,8 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
return false;
float3 uv3 = primitive_attribute_float3(kg, sd, desc_uv, NULL, NULL);
float face_id_f = primitive_attribute_float(kg, sd, desc_face_id, NULL, NULL);
float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
*uv = make_float2(uv3.x, uv3.y);
*face_id = (int)face_id_f;
@ -130,7 +207,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
if(desc.offset != ATTR_STD_NOT_FOUND) {
float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
object_normal_transform(kg, sd, &data);
return cross(sd->N, normalize(cross(data, sd->N)));
@ -176,10 +253,10 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
/* lookup attributes */
motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys;
motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
#ifdef __HAIR__
if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {

View File

@ -47,27 +47,20 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
return P;
}
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
return average(float4_to_float3(r));
}
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
if(r.w > 1e-6f && r.w != 1.0f) {
/* For RGBA colors, unpremultiply after interpolation. */
return float4_to_float3(r) / r.w;

View File

@ -561,7 +561,7 @@ static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
return false;
}
static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
static bool get_primitive_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
const TypeDesc& type, bool derivatives, void *val)
{
if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
@ -849,7 +849,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
if(attr.desc.element != ATTR_ELEMENT_OBJECT) {
/* triangle and vertex attributes */
if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val))
if(get_primitive_attribute(kg, sd, attr, type, derivatives, val))
return true;
else
return get_mesh_attribute(kg, sd, attr, type, derivatives, val);

View File

@ -52,24 +52,22 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
if(desc.type == NODE_ATTR_FLOAT) {
float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
if (desc.type == NODE_ATTR_FLOAT) {
float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f);
}
else {
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
stack_store_float(stack, out_offset, average(f));
stack_store_float3(stack, out_offset, make_float3(f, f, f));
}
}
else {
if(desc.type == NODE_ATTR_FLOAT3) {
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
stack_store_float3(stack, out_offset, f);
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f));
}
else {
float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
stack_store_float3(stack, out_offset, make_float3(f, f, f));
stack_store_float3(stack, out_offset, f);
}
}
}
@ -86,28 +84,24 @@ void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
if(desc.type == NODE_ATTR_FLOAT) {
float dx;
float f = primitive_attribute_float(kg, sd, desc, &dx, NULL);
if (desc.type == NODE_ATTR_FLOAT) {
float dx;
float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f+dx);
}
else {
float3 dx;
float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL);
stack_store_float(stack, out_offset, average(f+dx));
stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
}
}
else {
if(desc.type == NODE_ATTR_FLOAT3) {
float3 dx;
float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL);
stack_store_float3(stack, out_offset, f+dx);
float3 dx;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f+dx));
}
else {
float dx;
float f = primitive_attribute_float(kg, sd, desc, &dx, NULL);
stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
stack_store_float3(stack, out_offset, f+dx);
}
}
}
@ -127,28 +121,24 @@ void svm_node_attr_bump_dy(KernelGlobals *kg,
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
if(desc.type == NODE_ATTR_FLOAT) {
float dy;
float f = primitive_attribute_float(kg, sd, desc, NULL, &dy);
if (desc.type == NODE_ATTR_FLOAT) {
float dy;
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f+dy);
}
else {
float3 dy;
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy);
stack_store_float(stack, out_offset, average(f+dy));
stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
}
}
else {
if(desc.type == NODE_ATTR_FLOAT3) {
float3 dy;
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy);
stack_store_float3(stack, out_offset, f+dy);
float3 dy;
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f+dy));
}
else {
float dy;
float f = primitive_attribute_float(kg, sd, desc, NULL, &dy);
stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
stack_store_float3(stack, out_offset, f+dy);
}
}
}

View File

@ -30,7 +30,7 @@ ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, floa
if(desc.offset != ATTR_STD_NOT_FOUND) {
float3 P, dPdx, dPdy;
P = primitive_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
object_position_transform(kg, sd, &P);
object_dir_transform(kg, sd, &dPdx);

View File

@ -744,7 +744,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
float random = 0.0f;
if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
random = primitive_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
}
else {
random = stack_load_float_default(stack, random_ofs, data_node3.y);
@ -974,7 +974,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl
/* Density and color attribute lookup if available. */
const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
if(attr_density.offset != ATTR_STD_NOT_FOUND) {
primitive_density = primitive_attribute_float(kg, sd, attr_density, NULL, NULL);
primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
density = fmaxf(density * primitive_density, 0.0f);
}
}
@ -985,7 +985,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl
const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
if(attr_color.offset != ATTR_STD_NOT_FOUND) {
color *= primitive_attribute_float3(kg, sd, attr_color, NULL, NULL);
color *= primitive_volume_attribute_float3(kg, sd, attr_color);
}
/* Add closure for volume scattering. */
@ -1026,7 +1026,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl
/* Add flame temperature from attribute if available. */
const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
if(attr_temperature.offset != ATTR_STD_NOT_FOUND) {
float temperature = primitive_attribute_float(kg, sd, attr_temperature, NULL, NULL);
float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
T *= fmaxf(temperature, 0.0f);
}

View File

@ -137,7 +137,7 @@ ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd,
const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
float3 tangent;
if(attr.offset != ATTR_STD_NOT_FOUND) {
tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL);
tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
}
else {
tangent = normalize(sd->dPdu);
@ -146,7 +146,7 @@ ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd,
float3 bitangent = normalize(cross(normal, tangent));
const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
if(attr_sign.offset != ATTR_STD_NOT_FOUND) {
float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL);
float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
bitangent *= sign;
}

View File

@ -292,12 +292,12 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
}
/* get _unnormalized_ interpolated normal and tangent */
float3 tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL);
float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL);
float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
float3 normal;
if(sd->shader & SHADER_SMOOTH_NORMAL) {
normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL);
normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL);
}
else {
normal = sd->Ng;
@ -360,25 +360,28 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack
decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
float3 tangent;
float3 attribute_value;
const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
if (desc.offset != ATTR_STD_NOT_FOUND) {
attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
}
if(direction_type == NODE_TANGENT_UVMAP) {
/* UV map */
const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
if(desc.offset == ATTR_STD_NOT_FOUND)
tangent = make_float3(0.0f, 0.0f, 0.0f);
else
tangent = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
tangent = attribute_value;
}
else {
/* radial */
const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
float3 generated;
if(desc.offset == ATTR_STD_NOT_FOUND)
generated = sd->P;
else
generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
generated = attribute_value;
if(axis == NODE_TANGENT_AXIS_X)
tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));