Cycles: refactor to split surface and volume attribute lookup more

This avoids OpenCL inlining heavy volume interpolation code once for every
data type, which could cause a performance regression when we add a float4
data type in the next commit.

Ref D2057
This commit is contained in:
Brecht Van Lommel 2020-10-26 18:13:53 +01:00
parent fb88d4eda8
commit ee6b989f8e
5 changed files with 186 additions and 193 deletions

View File

@ -21,38 +21,11 @@
CCL_NAMESPACE_BEGIN
/* Generic primitive attribute reading functions */
ccl_device_inline float primitive_attribute_float(
KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
if (subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if (sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float(kg, sd, desc, dx, dy);
}
#endif
#ifdef __VOLUME__
else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
if (dx)
*dx = 0.0f;
if (dy)
*dy = 0.0f;
return volume_attribute_float(kg, sd, desc);
}
#endif
else {
if (dx)
*dx = 0.0f;
if (dy)
*dy = 0.0f;
return 0.0f;
}
}
/* Surface Attributes
*
* Read geometry attributes for surface shading. This is distinct from volume
* attributes for performance, mainly for GPU performance to avoid bringing in
* heavy volume interpolation code. */
ccl_device_inline float primitive_surface_attribute_float(
KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
@ -77,117 +50,6 @@ ccl_device_inline float primitive_surface_attribute_float(
}
}
#ifdef __VOLUME__
ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float(kg, sd, desc);
}
else {
return 0.0f;
}
}
#endif
ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float2 *dx,
float2 *dy)
{
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
if (subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float2(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if (sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float2(kg, sd, desc, dx, dy);
}
#endif
#ifdef __VOLUME__
else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
kernel_assert(0);
if (dx)
*dx = make_float2(0.0f, 0.0f);
if (dy)
*dy = make_float2(0.0f, 0.0f);
return make_float2(0.0f, 0.0f);
}
#endif
else {
if (dx)
*dx = make_float2(0.0f, 0.0f);
if (dy)
*dy = make_float2(0.0f, 0.0f);
return make_float2(0.0f, 0.0f);
}
}
ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float3 *dx,
float3 *dy)
{
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
if (subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float3(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if (sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float3(kg, sd, desc, dx, dy);
}
#endif
#ifdef __VOLUME__
else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
if (dy)
*dy = make_float3(0.0f, 0.0f, 0.0f);
return volume_attribute_float3(kg, sd, desc);
}
#endif
else {
if (dx)
*dx = make_float3(0.0f, 0.0f, 0.0f);
if (dy)
*dy = make_float3(0.0f, 0.0f, 0.0f);
return make_float3(0.0f, 0.0f, 0.0f);
}
}
ccl_device_inline float4 primitive_attribute_float4(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float4 *dx,
float4 *dy)
{
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
if (subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float4(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if (sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float4(kg, sd, desc, dx, dy);
}
#endif
else {
if (dx)
*dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
if (dy)
*dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
@ -240,18 +102,80 @@ ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg,
}
}
ccl_device_inline float4 primitive_surface_attribute_float4(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc,
float4 *dx,
float4 *dy)
{
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
if (subd_triangle_patch(kg, sd) == ~0)
return triangle_attribute_float4(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if (sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float4(kg, sd, desc, dx, dy);
}
#endif
else {
if (dx)
*dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
if (dy)
*dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
#ifdef __VOLUME__
/* Volume Attributes
*
* Read geometry attributes for volume shading. This is distinct from surface
* attributes for performance, mainly for GPU performance to avoid bringing in
* heavy volume interpolation code. */
ccl_device_inline bool primitive_is_volume_attribute(const ShaderData *sd,
const AttributeDescriptor desc)
{
return (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL);
}
ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if (primitive_is_volume_attribute(sd, desc)) {
return volume_attribute_value_to_float(volume_attribute_float4(kg, sd, desc));
}
else {
return 0.0f;
}
}
ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
return volume_attribute_float3(kg, sd, desc);
if (primitive_is_volume_attribute(sd, desc)) {
return volume_attribute_value_to_float3(volume_attribute_float4(kg, sd, desc));
}
else {
return make_float3(0.0f, 0.0f, 0.0f);
}
}
ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
if (primitive_is_volume_attribute(sd, desc)) {
return volume_attribute_float4(kg, sd, desc);
}
else {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
}
#endif
/* Default UV coordinate */

View File

@ -47,9 +47,30 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
return P;
}
ccl_device float volume_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
ccl_device float volume_attribute_value_to_float(const float4 value)
{
return average(float4_to_float3(value));
}
ccl_device float volume_attribute_value_to_alpha(const float4 value)
{
return value.w;
}
ccl_device float3 volume_attribute_value_to_float3(const float4 value)
{
if (value.w > 1e-6f && value.w != 1.0f) {
/* For RGBA colors, unpremultiply after interpolation. */
return float4_to_float3(value) / value.w;
}
else {
return float4_to_float3(value);
}
}
ccl_device float4 volume_attribute_float4(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
/* todo: optimize this so we don't have to transform both here and in
* kernel_tex_image_interp_3d when possible. Also could optimize for the
@ -58,27 +79,7 @@ ccl_device float volume_attribute_float(KernelGlobals *kg,
object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
return average(float4_to_float3(r));
}
ccl_device float3 volume_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
const AttributeDescriptor desc)
{
float3 P = sd->P;
object_inverse_position_transform(kg, sd, &P);
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
if (r.w > 1e-6f && r.w != 1.0f) {
/* For RGBA colors, unpremultiply after interpolation. */
return float4_to_float3(r) / r.w;
}
else {
return float4_to_float3(r);
}
return kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
}
#endif

View File

@ -675,26 +675,50 @@ static bool get_primitive_attribute(KernelGlobals *kg,
if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
float3 fval[3];
fval[0] = primitive_attribute_float3(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
if (primitive_is_volume_attribute(sd, attr.desc)) {
fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc);
}
else {
memset(fval, 0, sizeof(fval));
fval[0] = primitive_surface_attribute_float3(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float3(fval, type, derivatives, val);
}
else if (attr.type == TypeFloat2) {
float2 fval[3];
fval[0] = primitive_attribute_float2(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float2(fval, type, derivatives, val);
if (primitive_is_volume_attribute(sd, attr.desc)) {
assert(!"Float2 attribute not support for volumes");
return false;
}
else {
float2 fval[3];
fval[0] = primitive_surface_attribute_float2(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float2(fval, type, derivatives, val);
}
}
else if (attr.type == TypeDesc::TypeFloat) {
float fval[3];
fval[0] = primitive_attribute_float(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
if (primitive_is_volume_attribute(sd, attr.desc)) {
memset(fval, 0, sizeof(fval));
fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc);
}
else {
fval[0] = primitive_surface_attribute_float(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float(fval, type, derivatives, val);
}
else if (attr.type == TypeRGBA) {
float4 fval[3];
fval[0] = primitive_attribute_float4(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
if (primitive_is_volume_attribute(sd, attr.desc)) {
memset(fval, 0, sizeof(fval));
fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc);
}
else {
fval[0] = primitive_surface_attribute_float4(
kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float4(fval, type, derivatives, val);
}
else {

View File

@ -50,9 +50,27 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
#ifdef __VOLUME__
/* Volumes
* NOTE: moving this into its own node type might help improve performance. */
if (primitive_is_volume_attribute(sd, desc)) {
const float4 value = volume_attribute_float4(kg, sd, desc);
if (type == NODE_ATTR_FLOAT) {
const float f = volume_attribute_value_to_float(value);
stack_store_float(stack, out_offset, f);
}
else {
const float3 f = volume_attribute_value_to_float3(value);
stack_store_float3(stack, out_offset, f);
}
return;
}
#endif
/* Surface */
if (desc.type == NODE_ATTR_FLOAT) {
float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f);
}
@ -61,7 +79,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL);
float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f.x);
}
@ -70,7 +88,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
else if (desc.type == NODE_ATTR_RGBA) {
float4 f = primitive_attribute_float4(kg, sd, desc, NULL, NULL);
float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f)));
}
@ -79,7 +97,7 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
}
}
else {
float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(f));
}
@ -95,7 +113,20 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
#ifdef __VOLUME__
/* Volume */
if (primitive_is_volume_attribute(sd, desc)) {
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, 0.0f);
}
else {
stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
}
return;
}
#endif
/* Surface */
if (desc.type == NODE_ATTR_FLOAT) {
float dx;
float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
@ -108,7 +139,7 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 dx;
float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL);
float2 f = primitive_surface_attribute_float2(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f.x + dx.x);
}
@ -118,7 +149,7 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
}
else if (desc.type == NODE_ATTR_RGBA) {
float4 dx;
float4 f = primitive_attribute_float4(kg, sd, desc, &dx, NULL);
float4 f = primitive_surface_attribute_float4(kg, sd, desc, &dx, NULL);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f + dx)));
}
@ -144,7 +175,20 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *
uint out_offset = 0;
AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
#ifdef __VOLUME__
/* Volume */
if (primitive_is_volume_attribute(sd, desc)) {
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, 0.0f);
}
else {
stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
}
return;
}
#endif
/* Surface */
if (desc.type == NODE_ATTR_FLOAT) {
float dy;
float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
@ -157,7 +201,7 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *
}
else if (desc.type == NODE_ATTR_FLOAT2) {
float2 dy;
float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy);
float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, f.x + dy.x);
}
@ -167,7 +211,7 @@ ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *
}
else if (desc.type == NODE_ATTR_RGBA) {
float4 dy;
float4 f = primitive_attribute_float4(kg, sd, desc, NULL, &dy);
float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, &dy);
if (type == NODE_ATTR_FLOAT) {
stack_store_float(stack, out_offset, average(float4_to_float3(f + dy)));
}

View File

@ -25,7 +25,7 @@ ccl_device void svm_node_vertex_color(KernelGlobals *kg,
{
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, NULL);
float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, NULL);
stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
stack_store_float(stack, alpha_offset, vertex_color.w);
}
@ -51,7 +51,7 @@ ccl_device_noinline
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
float4 dx;
float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, &dx, NULL);
float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, &dx, NULL);
vertex_color += dx;
stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
stack_store_float(stack, alpha_offset, vertex_color.w);
@ -78,7 +78,7 @@ ccl_device_noinline
AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
float4 dy;
float4 vertex_color = primitive_attribute_float4(kg, sd, descriptor, NULL, &dy);
float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, &dy);
vertex_color += dy;
stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
stack_store_float(stack, alpha_offset, vertex_color.w);