Cycles: Tweaks to support CUDA 8 toolkit
All the changes are mainly giving explicit tips on inlining functions, so they match how inlining worked with previous toolkit. This make kernel compiled by CUDA 8 render in average with same speed as previous kernels. Some scenes are somewhat faster, some of them are somewhat slower. But slowdown is within 1% so far. On a positive side it allows us to enable newer generation cards on buildbots (so GTX 10x0 will be officially supported soon).
This commit is contained in:
parent
7065022f7a
commit
6353ecb996
|
@ -321,11 +321,11 @@ public:
|
|||
return "";
|
||||
}
|
||||
if(cuda_version < 60) {
|
||||
printf("Unsupported CUDA version %d.%d detected, you need CUDA 7.5.\n", cuda_version/10, cuda_version%10);
|
||||
printf("Unsupported CUDA version %d.%d detected, you need CUDA 7.5 or newer.\n", cuda_version/10, cuda_version%10);
|
||||
return "";
|
||||
}
|
||||
else if(cuda_version != 75)
|
||||
printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 is officially supported.\n", cuda_version/10, cuda_version%10);
|
||||
else if(cuda_version != 75 && cuda_version != 80)
|
||||
printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 and 8.0 are officially supported.\n", cuda_version/10, cuda_version%10);
|
||||
|
||||
/* Compile. */
|
||||
string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu")));
|
||||
|
|
|
@ -37,11 +37,16 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const uint max_hits,
|
||||
uint *num_hits)
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const uint max_hits,
|
||||
uint *num_hits)
|
||||
{
|
||||
/* todo:
|
||||
* - likely and unlikely for if() statements
|
||||
|
|
|
@ -35,12 +35,17 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
int subsurface_object,
|
||||
uint *lcg_state,
|
||||
int max_hits)
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
SubsurfaceIntersection *ss_isect,
|
||||
int subsurface_object,
|
||||
uint *lcg_state,
|
||||
int max_hits)
|
||||
{
|
||||
/* todo:
|
||||
* - test if pushing distance on the stack helps (for non shadow rays)
|
||||
|
|
|
@ -40,16 +40,21 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility
|
||||
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
|
||||
, uint *lcg_state,
|
||||
float difl,
|
||||
float extmax
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
)
|
||||
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility
|
||||
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
|
||||
, uint *lcg_state,
|
||||
float difl,
|
||||
float extmax
|
||||
#endif
|
||||
)
|
||||
{
|
||||
/* todo:
|
||||
* - test if pushing distance on the stack helps (for non shadow rays)
|
||||
|
|
|
@ -36,10 +36,15 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility)
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect,
|
||||
const uint visibility)
|
||||
{
|
||||
/* todo:
|
||||
* - test if pushing distance on the stack helps (for non shadow rays)
|
||||
|
|
|
@ -36,11 +36,16 @@
|
|||
*
|
||||
*/
|
||||
|
||||
ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const uint max_hits,
|
||||
const uint visibility)
|
||||
#ifndef __KERNEL_GPU__
|
||||
ccl_device
|
||||
#else
|
||||
ccl_device_inline
|
||||
#endif
|
||||
uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect_array,
|
||||
const uint max_hits,
|
||||
const uint visibility)
|
||||
{
|
||||
/* todo:
|
||||
* - test if pushing distance on the stack helps (for non shadow rays)
|
||||
|
|
|
@ -36,7 +36,15 @@
|
|||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
|
||||
ccl_device_inline int bsdf_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const ShaderClosure *sc,
|
||||
float randu,
|
||||
float randv,
|
||||
float3 *eval,
|
||||
float3 *omega_in,
|
||||
differential3 *domega_in,
|
||||
float *pdf)
|
||||
{
|
||||
int label;
|
||||
|
||||
|
|
|
@ -62,7 +62,11 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
|
|||
return 2.0f / (roughness*roughness) - 2.0f;
|
||||
}
|
||||
|
||||
ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
|
||||
ccl_device_inline float3 bsdf_ashikhmin_shirley_eval_reflect(
|
||||
const ShaderClosure *sc,
|
||||
const float3 I,
|
||||
const float3 omega_in,
|
||||
float *pdf)
|
||||
{
|
||||
const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
|
||||
float3 N = bsdf->N;
|
||||
|
|
|
@ -25,11 +25,18 @@
|
|||
* energy is used. In combination with MIS, that is enough to produce an unbiased result, although
|
||||
* the balance heuristic isn't necessarily optimal anymore.
|
||||
*/
|
||||
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state
|
||||
ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
|
||||
float3 wi,
|
||||
float3 wo,
|
||||
const bool wo_outside,
|
||||
const float3 color,
|
||||
const float alpha_x,
|
||||
const float alpha_y,
|
||||
ccl_addr_space uint *lcg_state
|
||||
#ifdef MF_MULTI_GLASS
|
||||
, const float eta
|
||||
, const float eta
|
||||
#elif defined(MF_MULTI_GLOSSY)
|
||||
, float3 *n, float3 *k
|
||||
, float3 *n, float3 *k
|
||||
#endif
|
||||
)
|
||||
{
|
||||
|
|
|
@ -141,7 +141,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r)
|
|||
}
|
||||
|
||||
/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
|
||||
ccl_device float bssrdf_cubic_quintic_root_find(float xi)
|
||||
ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi)
|
||||
{
|
||||
/* newton-raphson iteration, usually succeeds in 2-4 iterations, except
|
||||
* outside 0.02 ... 0.98 where it can go up to 10, so overall performance
|
||||
|
@ -255,7 +255,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r)
|
|||
* Returns scaled radius, meaning the result is to be scaled up by d.
|
||||
* Since there's no closed form solution we do Newton-Raphson method to find it.
|
||||
*/
|
||||
ccl_device float bssrdf_burley_root_find(float xi)
|
||||
ccl_device_inline float bssrdf_burley_root_find(float xi)
|
||||
{
|
||||
const float tolerance = 1e-6f;
|
||||
const int max_iteration_count = 10;
|
||||
|
@ -389,7 +389,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float
|
|||
bssrdf_burley_sample(sc, xi, r, h);
|
||||
}
|
||||
|
||||
ccl_device float bssrdf_pdf(const ShaderClosure *sc, float r)
|
||||
ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r)
|
||||
{
|
||||
if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
|
||||
return bssrdf_cubic_pdf(sc, r);
|
||||
|
|
|
@ -23,7 +23,11 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* Generic primitive attribute reading functions */
|
||||
|
||||
ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
|
||||
ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
|
||||
const ShaderData *sd,
|
||||
AttributeElement elem,
|
||||
int offset,
|
||||
float *dx, float *dy)
|
||||
{
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
|
||||
if(subd_triangle_patch(kg, sd) == ~0)
|
||||
|
@ -48,7 +52,12 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
|
||||
ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
|
||||
const ShaderData *sd,
|
||||
AttributeElement elem,
|
||||
int offset,
|
||||
float3 *dx,
|
||||
float3 *dy)
|
||||
{
|
||||
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
|
||||
if(subd_triangle_patch(kg, sd) == ~0)
|
||||
|
@ -75,7 +84,7 @@ ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData
|
|||
|
||||
/* Default UV coordinate */
|
||||
|
||||
ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
|
||||
ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
|
||||
{
|
||||
AttributeElement elem_uv;
|
||||
int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv);
|
||||
|
@ -144,7 +153,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
|
|||
|
||||
/* Motion vector for motion pass */
|
||||
|
||||
ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
|
||||
ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
|
||||
{
|
||||
/* center position */
|
||||
float3 center;
|
||||
|
|
|
@ -44,7 +44,9 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
|
|||
}
|
||||
#endif /* __KERNEL_GPU__ */
|
||||
|
||||
ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P)
|
||||
ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
|
||||
const ShaderData *sd,
|
||||
float3 P)
|
||||
{
|
||||
/* todo: optimize this so it's just a single matrix multiplication when
|
||||
* possible (not motion blur), or perhaps even just translation + scale */
|
||||
|
|
|
@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
#ifdef __BAKING__
|
||||
|
||||
ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
|
||||
int pass_filter, int sample)
|
||||
ccl_device_inline void compute_light_pass(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathRadiance *L,
|
||||
RNG rng,
|
||||
int pass_filter,
|
||||
int sample)
|
||||
{
|
||||
/* initialize master radiance accumulator */
|
||||
kernel_assert(kernel_data.film.use_light_pass);
|
||||
|
|
|
@ -211,7 +211,10 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
|
|||
|
||||
/* Panorama Camera */
|
||||
|
||||
ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
|
||||
ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
|
||||
float raster_x, float raster_y,
|
||||
float lens_u, float lens_v,
|
||||
ccl_addr_space Ray *ray)
|
||||
{
|
||||
Transform rastertocamera = kernel_data.cam.rastertocamera;
|
||||
float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
|
||||
|
@ -303,8 +306,12 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
|
|||
|
||||
/* Common */
|
||||
|
||||
ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v,
|
||||
float lens_u, float lens_v, float time, ccl_addr_space Ray *ray)
|
||||
ccl_device_inline void camera_sample(KernelGlobals *kg,
|
||||
int x, int y,
|
||||
float filter_u, float filter_v,
|
||||
float lens_u, float lens_v,
|
||||
float time,
|
||||
ccl_addr_space Ray *ray)
|
||||
{
|
||||
/* pixel filter */
|
||||
int filter_table_offset = kernel_data.film.filter_table_offset;
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
/* Qualifier wrappers for different names on different devices */
|
||||
|
||||
#define ccl_device __device__ __inline__
|
||||
#define ccl_device_inline __device__ __inline__
|
||||
#define ccl_device_inline __device__ __forceinline__
|
||||
#define ccl_device_noinline __device__ __noinline__
|
||||
#define ccl_global
|
||||
#define ccl_constant
|
||||
|
|
|
@ -44,11 +44,11 @@ typedef struct LightSample {
|
|||
*
|
||||
* Note: light_p is modified when sample_coord is true.
|
||||
*/
|
||||
ccl_device float area_light_sample(float3 P,
|
||||
float3 *light_p,
|
||||
float3 axisu, float3 axisv,
|
||||
float randu, float randv,
|
||||
bool sample_coord)
|
||||
ccl_device_inline float area_light_sample(float3 P,
|
||||
float3 *light_p,
|
||||
float3 axisu, float3 axisv,
|
||||
float randu, float randv,
|
||||
bool sample_coord)
|
||||
{
|
||||
/* In our name system we're using P for the center,
|
||||
* which is o in the paper.
|
||||
|
@ -268,11 +268,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals
|
|||
return false;
|
||||
}
|
||||
|
||||
ccl_device float background_portal_pdf(KernelGlobals *kg,
|
||||
float3 P,
|
||||
float3 direction,
|
||||
int ignore_portal,
|
||||
bool *is_possible)
|
||||
ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
|
||||
float3 P,
|
||||
float3 direction,
|
||||
int ignore_portal,
|
||||
bool *is_possible)
|
||||
{
|
||||
float portal_pdf = 0.0f;
|
||||
|
||||
|
@ -367,7 +367,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg,
|
|||
return make_float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
|
||||
ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
|
||||
float3 P,
|
||||
float randu, float randv,
|
||||
float *pdf)
|
||||
{
|
||||
/* Probability of sampling portals instead of the map. */
|
||||
float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
|
||||
|
@ -507,8 +510,11 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
|
|||
return t*t/cos_pi;
|
||||
}
|
||||
|
||||
ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
|
||||
float randu, float randv, float3 P, LightSample *ls)
|
||||
ccl_device_inline void lamp_light_sample(KernelGlobals *kg,
|
||||
int lamp,
|
||||
float randu, float randv,
|
||||
float3 P,
|
||||
LightSample *ls)
|
||||
{
|
||||
float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
|
||||
float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
|
||||
|
|
|
@ -436,7 +436,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
|
|||
|
||||
#ifdef __SUBSURFACE__
|
||||
|
||||
ccl_device bool kernel_path_subsurface_scatter(
|
||||
ccl_device_inline bool kernel_path_subsurface_scatter(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
|
|
|
@ -222,8 +222,13 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
|
|||
#endif
|
||||
|
||||
/* path tracing: bounce off or through surface to with new direction stored in ray */
|
||||
ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng,
|
||||
ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray)
|
||||
ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
|
||||
ccl_addr_space RNG *rng,
|
||||
ShaderData *sd,
|
||||
ccl_addr_space float3 *throughput,
|
||||
ccl_addr_space PathState *state,
|
||||
PathRadiance *L,
|
||||
ccl_addr_space Ray *ray)
|
||||
{
|
||||
/* no BSDF? we can stop here */
|
||||
if(ccl_fetch(sd, flag) & SD_BSDF) {
|
||||
|
|
|
@ -18,8 +18,14 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
#ifdef __VOLUME_SCATTER__
|
||||
|
||||
ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
|
||||
ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
|
||||
ccl_device_inline void kernel_path_volume_connect_light(
|
||||
KernelGlobals *kg,
|
||||
RNG *rng,
|
||||
ShaderData *sd,
|
||||
ShaderData *emission_sd,
|
||||
float3 throughput,
|
||||
PathState *state,
|
||||
PathRadiance *L)
|
||||
{
|
||||
#ifdef __EMISSION__
|
||||
if(!kernel_data.integrator.use_direct_light)
|
||||
|
|
|
@ -130,7 +130,10 @@ ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float w
|
|||
return make_float2(u, v);
|
||||
}
|
||||
|
||||
ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
|
||||
ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v,
|
||||
float lens,
|
||||
float fov,
|
||||
float width, float height)
|
||||
{
|
||||
u = (u - 0.5f) * width;
|
||||
v = (v - 0.5f) * height;
|
||||
|
@ -189,7 +192,7 @@ ccl_device float2 direction_to_mirrorball(float3 dir)
|
|||
return make_float2(u, v);
|
||||
}
|
||||
|
||||
ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
|
||||
ccl_device_inline float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
|
||||
{
|
||||
switch(kernel_data.cam.panorama_type) {
|
||||
case PANORAMA_EQUIRECTANGULAR:
|
||||
|
@ -205,7 +208,7 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
|
||||
ccl_device_inline float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
|
||||
{
|
||||
switch(kernel_data.cam.panorama_type) {
|
||||
case PANORAMA_EQUIRECTANGULAR:
|
||||
|
@ -221,9 +224,9 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device float3 spherical_stereo_position(KernelGlobals *kg,
|
||||
float3 dir,
|
||||
float3 pos)
|
||||
ccl_device_inline float3 spherical_stereo_position(KernelGlobals *kg,
|
||||
float3 dir,
|
||||
float3 pos)
|
||||
{
|
||||
float interocular_offset = kernel_data.cam.interocular_offset;
|
||||
|
||||
|
|
|
@ -149,8 +149,11 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
|
|||
/* ShaderData setup from BSSRDF scatter */
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd,
|
||||
const Intersection *isect, const Ray *ray)
|
||||
ccl_device void shader_setup_from_subsurface(
|
||||
KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const Intersection *isect,
|
||||
const Ray *ray)
|
||||
{
|
||||
bool backfacing = sd->flag & SD_BACKFACING;
|
||||
|
||||
|
@ -226,14 +229,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
|
|||
|
||||
/* ShaderData setup from position sampled on mesh */
|
||||
|
||||
ccl_device void shader_setup_from_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const float3 P,
|
||||
const float3 Ng,
|
||||
const float3 I,
|
||||
int shader, int object, int prim,
|
||||
float u, float v, float t,
|
||||
float time)
|
||||
ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
const float3 P,
|
||||
const float3 Ng,
|
||||
const float3 I,
|
||||
int shader, int object, int prim,
|
||||
float u, float v, float t,
|
||||
float time)
|
||||
{
|
||||
/* vectors */
|
||||
ccl_fetch(sd, P) = P;
|
||||
|
@ -445,7 +448,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
|
|||
/* Merging */
|
||||
|
||||
#if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
|
||||
ccl_device void shader_merge_closures(ShaderData *sd)
|
||||
ccl_device_inline void shader_merge_closures(ShaderData *sd)
|
||||
{
|
||||
/* merge identical closures, better when we sample a single closure at a time */
|
||||
for(int i = 0; i < sd->num_closure; i++) {
|
||||
|
@ -554,9 +557,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg,
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd,
|
||||
float randu, float randv, BsdfEval *bsdf_eval,
|
||||
float3 *omega_in, differential3 *domega_in, float *pdf)
|
||||
ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float randu, float randv,
|
||||
BsdfEval *bsdf_eval,
|
||||
float3 *omega_in,
|
||||
differential3 *domega_in,
|
||||
float *pdf)
|
||||
{
|
||||
int sampled = 0;
|
||||
|
||||
|
@ -991,8 +998,12 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
|
|||
|
||||
/* Volume Evaluation */
|
||||
|
||||
ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
|
||||
PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx)
|
||||
ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
VolumeStack *stack,
|
||||
int path_flag,
|
||||
ShaderContext ctx)
|
||||
{
|
||||
/* reset closures once at the start, we will be accumulating the closures
|
||||
* for all volumes in the stack into a single array of closures */
|
||||
|
|
|
@ -85,7 +85,11 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha
|
|||
return NULL;
|
||||
}
|
||||
|
||||
ccl_device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
|
||||
ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
|
||||
ShaderClosure *sc,
|
||||
float disk_r,
|
||||
float r,
|
||||
bool all)
|
||||
{
|
||||
#ifdef BSSRDF_MULTI_EVAL
|
||||
/* this is the veach one-sample model with balance heuristic, some pdf
|
||||
|
@ -214,7 +218,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
|
|||
/* Subsurface scattering step, from a point on the surface to other
|
||||
* nearby points on the same object.
|
||||
*/
|
||||
ccl_device int subsurface_scatter_multi_intersect(
|
||||
ccl_device_inline int subsurface_scatter_multi_intersect(
|
||||
KernelGlobals *kg,
|
||||
SubsurfaceIntersection* ss_isect,
|
||||
ShaderData *sd,
|
||||
|
|
|
@ -36,7 +36,11 @@ typedef struct VolumeShaderCoefficients {
|
|||
} VolumeShaderCoefficients;
|
||||
|
||||
/* evaluate shader to get extinction coefficient at P */
|
||||
ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
|
||||
ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
float3 P,
|
||||
float3 *extinction)
|
||||
{
|
||||
sd->P = P;
|
||||
shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
|
||||
|
@ -58,7 +62,11 @@ ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *s
|
|||
}
|
||||
|
||||
/* evaluate shader to get absorption, scattering and emission at P */
|
||||
ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
|
||||
ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
PathState *state,
|
||||
float3 P,
|
||||
VolumeShaderCoefficients *coeff)
|
||||
{
|
||||
sd->P = P;
|
||||
shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
|
||||
|
|
|
@ -123,7 +123,10 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
|
|||
}
|
||||
}
|
||||
|
||||
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
|
||||
ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint4 node)
|
||||
{
|
||||
NodeAttributeType type, mesh_type;
|
||||
AttributeElement elem;
|
||||
|
|
|
@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* Geometry Node */
|
||||
|
||||
ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
|
||||
ccl_device_inline void svm_node_geometry(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint type,
|
||||
uint out_offset)
|
||||
{
|
||||
float3 data;
|
||||
|
||||
|
@ -94,7 +98,11 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s
|
|||
|
||||
/* Particle Info */
|
||||
|
||||
ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
|
||||
ccl_device void svm_node_particle_info(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint type,
|
||||
uint out_offset)
|
||||
{
|
||||
switch(type) {
|
||||
case NODE_INFO_PAR_INDEX: {
|
||||
|
@ -146,7 +154,11 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float
|
|||
|
||||
/* Hair Info */
|
||||
|
||||
ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
|
||||
ccl_device void svm_node_hair_info(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
float *stack,
|
||||
uint type,
|
||||
uint out_offset)
|
||||
{
|
||||
float data;
|
||||
float3 data3;
|
||||
|
|
|
@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
|
||||
|
||||
ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg,
|
||||
int offset,
|
||||
float f,
|
||||
bool interpolate,
|
||||
bool extrapolate,
|
||||
int table_size)
|
||||
ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg,
|
||||
int offset,
|
||||
float f,
|
||||
bool interpolate,
|
||||
bool extrapolate,
|
||||
int table_size)
|
||||
{
|
||||
if((f < 0.0f || f > 1.0f) && extrapolate) {
|
||||
float4 t0, dy;
|
||||
|
|
|
@ -21,11 +21,11 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
|
||||
|
||||
ccl_device float3 rgb_ramp_lookup(const float3 *ramp,
|
||||
float f,
|
||||
bool interpolate,
|
||||
bool extrapolate,
|
||||
int table_size)
|
||||
ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp,
|
||||
float f,
|
||||
bool interpolate,
|
||||
bool extrapolate,
|
||||
int table_size)
|
||||
{
|
||||
if ((f < 0.0f || f > 1.0f) && extrapolate) {
|
||||
float3 t0, dy;
|
||||
|
|
|
@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
|
|||
stack_store_float3(stack, out_offset, data);
|
||||
}
|
||||
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
|
@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
|
|||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
|
||||
ShaderData *sd,
|
||||
int path_flag,
|
||||
float *stack,
|
||||
uint4 node,
|
||||
int *offset)
|
||||
{
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
float3 data;
|
||||
|
|
|
@ -1477,10 +1477,10 @@ ccl_device bool ray_triangle_intersect(
|
|||
return true;
|
||||
}
|
||||
|
||||
ccl_device bool ray_triangle_intersect_uv(
|
||||
float3 ray_P, float3 ray_D, float ray_t,
|
||||
float3 v0, float3 v1, float3 v2,
|
||||
float *isect_u, float *isect_v, float *isect_t)
|
||||
ccl_device_inline bool ray_triangle_intersect_uv(
|
||||
float3 ray_P, float3 ray_D, float ray_t,
|
||||
float3 v0, float3 v1, float3 v2,
|
||||
float *isect_u, float *isect_v, float *isect_t)
|
||||
{
|
||||
/* Calculate intersection */
|
||||
float3 e1 = v1 - v0;
|
||||
|
|
Loading…
Reference in New Issue