Cycles: CUDA bicubic and tricubic texture interpolation support.

While cubic interpolation is quite expensive on the CPU compared to linear
interpolation, the difference on the GPU is quite small.
This commit is contained in:
Brecht Van Lommel 2017-10-07 02:15:12 +02:00
parent 23098cda99
commit 2d92988f6b
7 changed files with 158 additions and 55 deletions

View File

@ -1208,6 +1208,8 @@ class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = use_cpu(context)
sub.prop(cworld, "volume_sampling", text="")
sub = col.column()
sub.active = not use_opencl(context)
sub.prop(cworld, "volume_interpolation", text="")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
@ -1307,6 +1309,8 @@ class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel):
sub = col.column()
sub.active = use_cpu(context)
sub.prop(cmat, "volume_sampling", text="")
sub = col.column()
sub.active = not use_opencl(context)
sub.prop(cmat, "volume_interpolation", text="")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")

View File

@ -50,15 +50,8 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_GPU__
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#endif
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
@ -69,15 +62,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_GPU__
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
r = kernel_tex_image_interp_3d_ex(kg, desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
#endif
InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE;
float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp);
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);

View File

@ -430,12 +430,12 @@ template<typename T> struct TextureInterpolator {
static ccl_always_inline float4 interp_3d(const TextureInfo& info,
float x, float y, float z,
int interpolation = INTERPOLATION_LINEAR)
InterpolationType interp)
{
if(UNLIKELY(!info.data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
switch(interpolation) {
switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
case INTERPOLATION_CLOSEST:
return interp_3d_closest(info, x, y, z);
case INTERPOLATION_LINEAR:
@ -468,29 +468,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
InterpolationType interp = (InterpolationType)info.interpolation;
switch(kernel_tex_type(id)) {
case IMAGE_DATA_TYPE_HALF:
return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_BYTE:
return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_FLOAT:
return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_HALF4:
return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_BYTE4:
return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
case IMAGE_DATA_TYPE_FLOAT4:
default:
return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
}
}
ccl_device float4 kernel_tex_image_interp_3d_ex(KernelGlobals *kg, int id, float x, float y, float z, int interp)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);

View File

@ -18,7 +18,115 @@
/* Kepler */
ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f);
}
ccl_device float cubic_w1(float a)
{
return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f);
}
ccl_device float cubic_w2(float a)
{
return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f);
}
ccl_device float cubic_w3(float a)
{
return (1.0f/6.0f)*(a*a*a);
}
/* g0 and g1 are the two amplitude functions. */
ccl_device float cubic_g0(float a)
{
return cubic_w0(a) + cubic_w1(a);
}
ccl_device float cubic_g1(float a)
{
return cubic_w2(a) + cubic_w3(a);
}
/* h0 and h1 are the two offset functions */
ccl_device float cubic_h0(float a)
{
/* Note +0.5 offset to compensate for CUDA linear filtering convention. */
return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f;
}
ccl_device float cubic_h1(float a)
{
return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
}
/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
template<typename T>
ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y)
{
x = (x * info.width) - 0.5f;
y = (y * info.height) - 0.5f;
float px = floor(x);
float py = floor(y);
float fx = x - px;
float fy = y - py;
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float x0 = (px + cubic_h0(fx)) / info.width;
float x1 = (px + cubic_h1(fx)) / info.width;
float y0 = (py + cubic_h0(fy)) / info.height;
float y1 = (py + cubic_h1(fy)) / info.height;
return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) +
g1x * tex2D<T>(tex, x1, y0)) +
cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) +
g1x * tex2D<T>(tex, x1, y1));
}
/* Fast tricubic texture lookup using 8 bilinear lookups. */
template<typename T>
ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z)
{
x = (x * info.width) - 0.5f;
y = (y * info.height) - 0.5f;
z = (z * info.depth) - 0.5f;
float px = floor(x);
float py = floor(y);
float pz = floor(z);
float fx = x - px;
float fy = y - py;
float fz = z - pz;
float g0x = cubic_g0(fx);
float g1x = cubic_g1(fx);
float g0y = cubic_g0(fy);
float g1y = cubic_g1(fy);
float g0z = cubic_g0(fz);
float g1z = cubic_g1(fz);
float x0 = (px + cubic_h0(fx)) / info.width;
float x1 = (px + cubic_h1(fx)) / info.width;
float y0 = (py + cubic_h0(fy)) / info.height;
float y1 = (py + cubic_h1(fy)) / info.height;
float z0 = (pz + cubic_h0(fz)) / info.depth;
float z1 = (pz + cubic_h1(fz)) / info.depth;
return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) +
g1x * tex3D<T>(tex, x1, y0, z0)) +
g1y * (g0x * tex3D<T>(tex, x0, y1, z0) +
g1x * tex3D<T>(tex, x1, y1, z0))) +
g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) +
g1x * tex3D<T>(tex, x1, y0, z1)) +
g1y * (g0x * tex3D<T>(tex, x0, y1, z1) +
g1x * tex3D<T>(tex, x1, y1, z1)));
}
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
CUtexObject tex = (CUtexObject)info.data;
@ -29,29 +137,56 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
return tex2D<float4>(tex, x, y);
if(info.interpolation == INTERPOLATION_CUBIC) {
return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y);
}
else {
return tex2D<float4>(tex, x, y);
}
}
/* float, byte and half */
else {
float f = tex2D<float>(tex, x, y);
float f;
if(info.interpolation == INTERPOLATION_CUBIC) {
f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y);
}
else {
f = tex2D<float>(tex, x, y);
}
return make_float4(f, f, f, 1.0f);
}
}
ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
CUtexObject tex = (CUtexObject)info.data;
uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp;
const int texture_type = kernel_tex_type(id);
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
texture_type == IMAGE_DATA_TYPE_BYTE4 ||
texture_type == IMAGE_DATA_TYPE_HALF4)
{
return tex3D<float4>(tex, x, y, z);
if(interpolation == INTERPOLATION_CUBIC) {
return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z);
}
else {
return tex3D<float4>(tex, x, y, z);
}
}
else {
float f = tex3D<float>(tex, x, y, z);
float f;
if(interpolation == INTERPOLATION_CUBIC) {
f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z);
}
else {
f = tex3D<float>(tex, x, y, z);
}
return make_float4(f, f, f, 1.0f);
}
}
@ -60,7 +195,7 @@ ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y,
/* Fermi */
ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{
float4 r;
switch(id) {
@ -158,7 +293,7 @@ ccl_device float4 kernel_tex_image_interp(void *kg, int id, float x, float y)
return r;
}
ccl_device float4 kernel_tex_image_interp_3d(void *kg, int id, float x, float y, float z)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
{
float4 r;
switch(id) {

View File

@ -142,7 +142,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
}
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
{
const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
@ -150,7 +150,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x,
uint height = info->height;
uint offset = 0;
uint depth = info->depth;
uint interpolation = info->interpolation;
uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
uint extension = info->extension;
/* Actual sampling. */

View File

@ -1043,7 +1043,7 @@ bool OSLRenderServices::texture3d(ustring filename,
bool status;
if(filename.length() && filename[0] == '@') {
int slot = atoi(filename.c_str() + 1);
float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z);
float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE);
result[0] = rgba[0];
if(nchannels > 1)

View File

@ -43,7 +43,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
co = transform_point(&tfm, co);
}
float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z);
float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE);
#else
float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
#endif