Cycles: reduce GPU state memory a little

* isect Ng is no longer needed for shadows, for main path needed for SSS only
* Reduce rng_offset and queued_kernel to 16 bits

Ref D12889
This commit is contained in:
Brecht Van Lommel 2021-10-18 17:53:32 +02:00
parent 3065d26097
commit 2430f75279
8 changed files with 10 additions and 35 deletions

View File

@ -107,7 +107,6 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
Intersection *isect)
{
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
@ -142,7 +141,6 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
isect->u = 1.0f - hit->v - hit->u;
isect->v = hit->u;
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
isect->prim = hit->primID +

View File

@ -180,9 +180,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
isect.v = v;
isect.t = 1.0f;
isect.type = PRIMITIVE_TRIANGLE;
#ifdef __EMBREE__
isect.Ng = Ng;
#endif
integrator_state_write_isect(kg, state, &isect);
/* Setup next kernel to execute. */

View File

@ -40,13 +40,12 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current transparent ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
/* DeviceKernel bit indicating queued kernels.
* TODO: reduce size? */
KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* DeviceKernel bit indicating queued kernels. */
KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* Random number generator seed. */
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
/* Random number dimension offset. */
KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Multiple importance sampling
@ -89,8 +88,6 @@ KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
/* TODO: exclude for GPU. */
KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END(isect)
/*************** Subsurface closure state for subsurface kernel ***************/
@ -99,6 +96,7 @@ KERNEL_STRUCT_BEGIN(subsurface)
KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_END(subsurface)
/********************************** Volume Stack ******************************/
@ -117,9 +115,8 @@ KERNEL_STRUCT_BEGIN(shadow_path)
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current transparent ray bounce depth. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
/* DeviceKernel bit indicating queued kernels.
* TODO: reduce size? */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* DeviceKernel bit indicating queued kernels. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
@ -152,8 +149,6 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
/* TODO: exclude for GPU. */
KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END_ARRAY(shadow_isect,
INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
INTEGRATOR_SHADOW_ISECT_SIZE_GPU)

View File

@ -82,9 +82,6 @@ ccl_device_forceinline void integrator_state_write_isect(
INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
#ifdef __EMBREE__
INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
#endif
}
ccl_device_forceinline void integrator_state_read_isect(
@ -96,9 +93,6 @@ ccl_device_forceinline void integrator_state_read_isect(
isect->u = INTEGRATOR_STATE(state, isect, u);
isect->v = INTEGRATOR_STATE(state, isect, v);
isect->t = INTEGRATOR_STATE(state, isect, t);
#ifdef __EMBREE__
isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
#endif
}
ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
@ -136,9 +130,6 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
#ifdef __EMBREE__
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
#endif
}
ccl_device_forceinline void integrator_state_read_shadow_isect(
@ -150,9 +141,6 @@ ccl_device_forceinline void integrator_state_read_shadow_isect(
isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
#ifdef __EMBREE__
isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
#endif
}
ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,

View File

@ -56,7 +56,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
/* Pass along object info, reusing isect to save memory. */
INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
@ -160,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
float3 P = INTEGRATOR_STATE(state, ray, P);
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const float3 offset_P = ray_offset(P, -Ng);
integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);

View File

@ -45,7 +45,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const float3 P = INTEGRATOR_STATE(state, ray, P);
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
const float time = INTEGRATOR_STATE(state, ray, time);
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const int object = INTEGRATOR_STATE(state, isect, object);
/* Read subsurface scattering parameters. */

View File

@ -193,7 +193,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float3 N = INTEGRATOR_STATE(state, ray, D);
const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
const float time = INTEGRATOR_STATE(state, ray, time);
const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
const int object = INTEGRATOR_STATE(state, isect, object);
/* Sample diffuse surface scatter into the object. */

View File

@ -489,9 +489,6 @@ typedef struct Ray {
/* Intersection */
typedef struct Intersection {
#ifdef __EMBREE__
float3 Ng;
#endif
float t, u, v;
int prim;
int object;