Cycles: reduce GPU state memory a little
* isect Ng is no longer needed for shadows, for main path needed for SSS only * Reduce rng_offset and queued_kernel to 16 bits Ref D12889
This commit is contained in:
parent
3065d26097
commit
2430f75279
|
@ -107,7 +107,6 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
|
|||
Intersection *isect)
|
||||
{
|
||||
isect->t = ray->tfar;
|
||||
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
|
||||
|
@ -142,7 +141,6 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
|
|||
isect->u = 1.0f - hit->v - hit->u;
|
||||
isect->v = hit->u;
|
||||
isect->t = ray->tfar;
|
||||
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
|
|
|
@ -180,9 +180,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
|
|||
isect.v = v;
|
||||
isect.t = 1.0f;
|
||||
isect.type = PRIMITIVE_TRIANGLE;
|
||||
#ifdef __EMBREE__
|
||||
isect.Ng = Ng;
|
||||
#endif
|
||||
integrator_state_write_isect(kg, state, &isect);
|
||||
|
||||
/* Setup next kernel to execute. */
|
||||
|
|
|
@ -40,13 +40,12 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
|
|||
KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current transparent ray bounce depth. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* DeviceKernel bit indicating queued kernels.
|
||||
* TODO: reduce size? */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* DeviceKernel bit indicating queued kernels. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Random number generator seed. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Random number dimension offset. */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* enum PathRayFlag */
|
||||
KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Multiple importance sampling
|
||||
|
@ -89,8 +88,6 @@ KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
|
|||
KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* TODO: exclude for GPU. */
|
||||
KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_END(isect)
|
||||
|
||||
/*************** Subsurface closure state for subsurface kernel ***************/
|
||||
|
@ -99,6 +96,7 @@ KERNEL_STRUCT_BEGIN(subsurface)
|
|||
KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
|
||||
KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
|
||||
KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
|
||||
KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
|
||||
KERNEL_STRUCT_END(subsurface)
|
||||
|
||||
/********************************** Volume Stack ******************************/
|
||||
|
@ -117,9 +115,8 @@ KERNEL_STRUCT_BEGIN(shadow_path)
|
|||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Current transparent ray bounce depth. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* DeviceKernel bit indicating queued kernels.
|
||||
* TODO: reduce size? */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* DeviceKernel bit indicating queued kernels. */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* enum PathRayFlag */
|
||||
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* Throughput. */
|
||||
|
@ -152,8 +149,6 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
|
|||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
|
||||
/* TODO: exclude for GPU. */
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
|
||||
KERNEL_STRUCT_END_ARRAY(shadow_isect,
|
||||
INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
|
||||
INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
|
||||
|
|
|
@ -82,9 +82,6 @@ ccl_device_forceinline void integrator_state_write_isect(
|
|||
INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
|
||||
INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
|
||||
INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
|
||||
#ifdef __EMBREE__
|
||||
INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_state_read_isect(
|
||||
|
@ -96,9 +93,6 @@ ccl_device_forceinline void integrator_state_read_isect(
|
|||
isect->u = INTEGRATOR_STATE(state, isect, u);
|
||||
isect->v = INTEGRATOR_STATE(state, isect, v);
|
||||
isect->t = INTEGRATOR_STATE(state, isect, t);
|
||||
#ifdef __EMBREE__
|
||||
isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
|
||||
|
@ -136,9 +130,6 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
|
|||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
|
||||
#ifdef __EMBREE__
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_state_read_shadow_isect(
|
||||
|
@ -150,9 +141,6 @@ ccl_device_forceinline void integrator_state_read_shadow_isect(
|
|||
isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
|
||||
isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
|
||||
isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
|
||||
#ifdef __EMBREE__
|
||||
isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,
|
||||
|
|
|
@ -56,7 +56,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
|
|||
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
|
||||
|
||||
/* Pass along object info, reusing isect to save memory. */
|
||||
INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
|
||||
INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
|
||||
INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
|
||||
|
||||
uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
|
||||
|
@ -160,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
|
|||
|
||||
if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
|
||||
float3 P = INTEGRATOR_STATE(state, ray, P);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
|
||||
const float3 offset_P = ray_offset(P, -Ng);
|
||||
|
||||
integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
|
||||
|
|
|
@ -45,7 +45,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
|
|||
const float3 P = INTEGRATOR_STATE(state, ray, P);
|
||||
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
|
||||
const float time = INTEGRATOR_STATE(state, ray, time);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
|
||||
const int object = INTEGRATOR_STATE(state, isect, object);
|
||||
|
||||
/* Read subsurface scattering parameters. */
|
||||
|
|
|
@ -193,7 +193,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
|
|||
const float3 N = INTEGRATOR_STATE(state, ray, D);
|
||||
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
|
||||
const float time = INTEGRATOR_STATE(state, ray, time);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
|
||||
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
|
||||
const int object = INTEGRATOR_STATE(state, isect, object);
|
||||
|
||||
/* Sample diffuse surface scatter into the object. */
|
||||
|
|
|
@ -489,9 +489,6 @@ typedef struct Ray {
|
|||
/* Intersection */
|
||||
|
||||
typedef struct Intersection {
|
||||
#ifdef __EMBREE__
|
||||
float3 Ng;
|
||||
#endif
|
||||
float t, u, v;
|
||||
int prim;
|
||||
int object;
|
||||
|
|
Loading…
Reference in New Issue