Cycles: Use curve approximation for blackbody instead of lookup table

Now we calculate color in range 800..12000 using an approximation a/x+bx+c for R and G and ((at + b)t + c)t + d) for B.
Max absolute error for RGB for non-lut function is less than 0.0001, which is enough to get the same 8 bit/channel color as for OSL with a noticeable performance difference.
However there is a slight visible difference between previous non-OSL implementation because of lookup table interpolation and offset-by-one mistake.
The previous implementation gave black color outside of soft range (t > 12000), now it gives the same color as for 12000.

Also blackbody node without input connected is being converted to value input at shader compile time.

Reviewers: dingto, sergey

Reviewed By: dingto

Subscribers: nutel, brecht, juicyfruit

Differential Revision: https://developer.blender.org/D1280
This commit is contained in:
Sv. Lockal 2015-05-05 06:11:54 +00:00
parent 22bbd1c512
commit 7201f6d14c
13 changed files with 76 additions and 258 deletions

View File

@ -38,12 +38,6 @@ CCL_NAMESPACE_BEGIN
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_HITS 4
#define BB_DRAPER 800.0f
#define BB_MAX_TABLE_RANGE 12000.0f
#define BB_TABLE_XPOWER 1.5f
#define BB_TABLE_YPOWER 5.0f
#define BB_TABLE_SPACING 2.0f
#define BECKMANN_TABLE_SIZE 256
#define TEX_NUM_FLOAT_IMAGES 5
@ -989,9 +983,8 @@ typedef struct KernelCurves {
} KernelCurves;
typedef struct KernelTables {
int blackbody_offset;
int beckmann_offset;
int pad1, pad2;
int pad1, pad2, pad3;
} KernelTables;
typedef struct KernelData {

View File

@ -142,6 +142,8 @@ CCL_NAMESPACE_END
#include "svm_noise.h"
#include "svm_texture.h"
#include "svm_math_util.h"
#include "svm_attribute.h"
#include "svm_gradient.h"
#include "svm_blackbody.h"
@ -164,7 +166,6 @@ CCL_NAMESPACE_END
#include "svm_mapping.h"
#include "svm_normal.h"
#include "svm_wave.h"
#include "svm_math_util.h"
#include "svm_math.h"
#include "svm_mix.h"
#include "svm_ramp.h"

View File

@ -36,46 +36,10 @@ CCL_NAMESPACE_BEGIN
ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
{
/* Output */
float3 color_rgb = make_float3(0.0f, 0.0f, 0.0f);
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
if(temperature < BB_DRAPER) {
/* just return very very dim red */
color_rgb = make_float3(1.0e-6f,0.0f,0.0f);
}
else if(temperature <= BB_MAX_TABLE_RANGE) {
/* This is the overall size of the table */
const int lookuptablesize = 956;
const float lookuptablenormalize = 1.0f/956.0f;
/* reconstruct a proper index for the table lookup, compared to OSL we don't look up two colors
just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
float t = powf((temperature - BB_DRAPER) * (1.0f / BB_TABLE_SPACING), (1.0f / BB_TABLE_XPOWER));
int blackbody_table_offset = kernel_data.tables.blackbody_offset;
/* Retrieve colors from the lookup table */
float lutval = t*lookuptablenormalize;
float R = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
lutval = (t + 319.0f*1.0f)*lookuptablenormalize;
float G = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
lutval = (t + 319.0f*2.0f)*lookuptablenormalize;
float B = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
R = powf(R, BB_TABLE_YPOWER);
G = powf(G, BB_TABLE_YPOWER);
B = powf(B, BB_TABLE_YPOWER);
color_rgb = make_float3(R, G, B);
}
/* Luminance */
float l = linear_rgb_to_gray(color_rgb);
if(l != 0.0f)
color_rgb /= l;
float3 color_rgb = svm_math_blackbody_color(temperature);
if(stack_valid(col_offset))
stack_store_float3(stack, col_offset, color_rgb);

View File

@ -104,5 +104,67 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
return Fac;
}
ccl_device float3 svm_math_blackbody_color(float t) {
/* Calculate color in range 800..12000 using an approximation
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
* which is enough to get the same 8 bit/channel color.
*/
const float rc[6][3] = {
{ 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
{ 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
{ 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
{ 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f },
{ 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f },
{ 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f },
};
const float gc[6][3] = {
{ -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f },
{ -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f },
{ -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f },
{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
};
const float bc[6][4] = {
{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
{ -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f },
{ 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f },
};
if(t >= 12000.0f)
return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
/* Define a macro to reduce stack usage for nvcc */
#define MAKE_BB_RGB(i) make_float3(\
rc[i][0] / t + rc[i][1] * t + rc[i][2],\
gc[i][0] / t + gc[i][1] * t + gc[i][2],\
((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3])
if(t >= 6365.0f)
return MAKE_BB_RGB(5);
if(t >= 3315.0f)
return MAKE_BB_RGB(4);
if(t >= 1902.0f)
return MAKE_BB_RGB(3);
if(t >= 1449.0f)
return MAKE_BB_RGB(2);
if(t >= 1167.0f)
return MAKE_BB_RGB(1);
if(t >= 965.0f)
return MAKE_BB_RGB(0);
#undef MAKE_BB_RGB
/* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
return make_float3(4.70366907f, 0.0f, 0.0f);
}
CCL_NAMESPACE_END

View File

@ -18,7 +18,6 @@ set(SRC
attribute.cpp
background.cpp
bake.cpp
blackbody.cpp
buffers.cpp
camera.cpp
film.cpp
@ -47,7 +46,6 @@ set(SRC_HEADERS
attribute.h
bake.h
background.h
blackbody.h
buffers.h
camera.h
film.h

View File

@ -1,140 +0,0 @@
/*
* Adapted from Open Shading Language with this license:
*
* Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
* All Rights Reserved.
*
* Modifications Copyright 2013, Blender Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Sony Pictures Imageworks nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blackbody.h"
#include "util_color.h"
#include "util_math.h"
#include "kernel_types.h"
CCL_NAMESPACE_BEGIN
vector<float> blackbody_table_build()
{
/* quoted from OSLs opcolor.cpp
In order to speed up the blackbody computation, we have a table
storing the precomputed BB values for a range of temperatures. Less
than BB_DRAPER always returns 0. Greater than BB_MAX_TABLE_RANGE
does the full computation, we think it'll be rare to inquire higher
temperatures.
Since the bb function is so nonlinear, we actually space the table
entries nonlinearly, with the relationship between the table index i
and the temperature T as follows:
i = ((T-Draper)/spacing)^(1/xpower)
T = pow(i, xpower) * spacing + Draper
And furthermore, we store in the table the true value raised ^(1/5).
I tuned this a bit, and with the current values we can have all
blackbody results accurate to within 0.1% with a table size of 317
(about 5 KB of data).
*/
const float cie_colour_match[81][3] = {
{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
{0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
{0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
{1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
{1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
{0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
{0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
{0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
{0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
{0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
{0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
{0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
{0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
{0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
{0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
{0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
};
const double c1 = 3.74183e-16; // 2*pi*h*c^2, W*m^2
const double c2 = 1.4388e-2; // h*c/k, m*K
// h is Planck's const, k is Boltzmann's
const float dlambda = 5.0f * 1e-9f; // in meters
/* Blackbody table from 800 to 12k Kelvin (319 entries (317+2 offset) * 3) */
vector<float> blackbody_table(956);
float X, Y, Z;
/* ToDo: bring this back to what OSL does with the lastTemperature limit ? */
for(int i = 0; i <= 317; ++i) {
double Temperature = pow((double)i, (double)BB_TABLE_XPOWER) * (double)BB_TABLE_SPACING + (double)BB_DRAPER;
X = 0;
Y = 0;
Z = 0;
/* from OSL "spectrum_to_XYZ" */
for(int n = 0; n < 81; ++n) {
float lambda = 380.0f + 5.0f * n;
double wlm = lambda * 1e-9f; // Wavelength in meters
// N.B. spec_intens returns result in W/m^2 but it's a differential,
// needs to be scaled by dlambda!
float spec_intens = float((c1 * pow(wlm, -5.0)) / (exp(c2 / (wlm * Temperature)) -1.0));
float Me = spec_intens * dlambda;
X += Me * cie_colour_match[n][0];
Y += Me * cie_colour_match[n][1];
Z += Me * cie_colour_match[n][2];
}
/* Convert from xyz color space */
float3 col = xyz_to_rgb(X, Y, Z);
/* Clamp to zero if values are smaller */
col = max(col, make_float3(0.0f, 0.0f, 0.0f));
col.x = powf(col.x, 1.0f / BB_TABLE_YPOWER);
col.y = powf(col.y, 1.0f / BB_TABLE_YPOWER);
col.z = powf(col.z, 1.0f / BB_TABLE_YPOWER);
/* Store in table in RRRGGGBBB format */
blackbody_table[i] = col.x;
blackbody_table[i+319*1] = col.y;
blackbody_table[i+319*2] = col.z;
}
return blackbody_table;
}
CCL_NAMESPACE_END

View File

@ -1,28 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __BLACKBODY_H__
#define __BLACKBODY_H__
#include "util_vector.h"
CCL_NAMESPACE_BEGIN
vector<float> blackbody_table_build();
CCL_NAMESPACE_END
#endif /* __BLACKBODY_H__ */

View File

@ -194,7 +194,6 @@ public:
virtual bool has_surface_emission() { return false; }
virtual bool has_surface_transparent() { return false; }
virtual bool has_surface_bssrdf() { return false; }
virtual bool has_converter_blackbody() { return false; }
virtual bool has_bssrdf_bump() { return false; }
virtual bool has_spatial_varying() { return false; }
virtual bool has_object_dependency() { return false; }

View File

@ -3652,9 +3652,17 @@ void BlackbodyNode::compile(SVMCompiler& compiler)
ShaderInput *temperature_in = input("Temperature");
ShaderOutput *color_out = output("Color");
compiler.stack_assign(temperature_in);
compiler.stack_assign(color_out);
compiler.add_node(NODE_BLACKBODY, temperature_in->stack_offset, color_out->stack_offset);
if(temperature_in->link == NULL) {
float3 color = svm_math_blackbody_color(temperature_in->value.x);
compiler.add_node(NODE_VALUE_V, color_out->stack_offset);
compiler.add_node(NODE_VALUE_V, color);
}
else {
compiler.stack_assign(temperature_in);
compiler.add_node(NODE_BLACKBODY, temperature_in->stack_offset, color_out->stack_offset);
}
}
void BlackbodyNode::compile(OSLCompiler& compiler)

View File

@ -549,8 +549,6 @@ public:
class BlackbodyNode : public ShaderNode {
public:
SHADER_NODE_CLASS(BlackbodyNode)
bool has_converter_blackbody() { return true; }
};
class MathNode : public ShaderNode {

View File

@ -15,7 +15,6 @@
*/
#include "background.h"
#include "blackbody.h"
#include "device.h"
#include "graph.h"
#include "light.h"
@ -32,7 +31,6 @@
CCL_NAMESPACE_BEGIN
vector<float> ShaderManager::blackbody_table;
vector<float> ShaderManager::beckmann_table;
/* Beckmann sampling precomputed table, see bsdf_microfacet.h */
@ -149,7 +147,6 @@ Shader::Shader()
has_surface_transparent = false;
has_surface_emission = false;
has_surface_bssrdf = false;
has_converter_blackbody = false;
has_volume = false;
has_displacement = false;
has_bssrdf_bump = false;
@ -243,7 +240,6 @@ void Shader::tag_used(Scene *scene)
ShaderManager::ShaderManager()
{
need_update = true;
blackbody_table_offset = TABLE_OFFSET_INVALID;
beckmann_table_offset = TABLE_OFFSET_INVALID;
}
@ -340,7 +336,6 @@ void ShaderManager::device_update_common(Device *device,
uint shader_flag_size = scene->shaders.size()*4;
uint *shader_flag = dscene->shader_flag.resize(shader_flag_size);
uint i = 0;
bool has_converter_blackbody = false;
bool has_volumes = false;
foreach(Shader *shader, scene->shaders) {
@ -367,8 +362,6 @@ void ShaderManager::device_update_common(Device *device,
flag |= SD_HETEROGENEOUS_VOLUME;
if(shader->has_bssrdf_bump)
flag |= SD_HAS_BSSRDF_BUMP;
if(shader->has_converter_blackbody)
has_converter_blackbody = true;
if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR)
flag |= SD_VOLUME_EQUIANGULAR;
if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
@ -394,23 +387,6 @@ void ShaderManager::device_update_common(Device *device,
/* lookup tables */
KernelTables *ktables = &dscene->data.tables;
/* blackbody lookup table */
if(has_converter_blackbody && blackbody_table_offset == TABLE_OFFSET_INVALID) {
if(blackbody_table.size() == 0) {
thread_scoped_lock lock(lookup_table_mutex);
if(blackbody_table.size() == 0) {
blackbody_table = blackbody_table_build();
}
}
blackbody_table_offset = scene->lookup_tables->add_table(dscene, blackbody_table);
ktables->blackbody_offset = (int)blackbody_table_offset;
}
else if(!has_converter_blackbody && blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
/* beckmann lookup table */
if(beckmann_table_offset == TABLE_OFFSET_INVALID) {
@ -431,11 +407,6 @@ void ShaderManager::device_update_common(Device *device,
void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene)
{
if(blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
if(beckmann_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(beckmann_table_offset);
beckmann_table_offset = TABLE_OFFSET_INVALID;

View File

@ -104,7 +104,6 @@ public:
bool has_volume;
bool has_displacement;
bool has_surface_bssrdf;
bool has_converter_blackbody;
bool has_bssrdf_bump;
bool has_heterogeneous_volume;
bool has_object_dependency;
@ -173,10 +172,8 @@ protected:
AttributeIDMap unique_attribute_id;
thread_mutex lookup_table_mutex;
static vector<float> blackbody_table;
static vector<float> beckmann_table;
size_t blackbody_table_offset;
size_t beckmann_table_offset;
};

View File

@ -393,10 +393,6 @@ void SVMCompiler::generate_node(ShaderNode *node, set<ShaderNode*>& done)
current_shader->has_heterogeneous_volume = true;
}
/* detect if we have a blackbody converter, to prepare lookup table */
if(node->has_converter_blackbody())
current_shader->has_converter_blackbody = true;
if(node->has_object_dependency()) {
current_shader->has_object_dependency = true;
}
@ -716,7 +712,6 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in
shader->has_surface_transparent = false;
shader->has_surface_bssrdf = false;
shader->has_bssrdf_bump = false;
shader->has_converter_blackbody = false;
shader->has_volume = false;
shader->has_displacement = false;
shader->has_heterogeneous_volume = false;