Fix T92363: OptiX fails with ambient occlusion node, after recent changes

This triggered a compiler bug where it does not handle the sub.s16 PTX
instruction. Instead refactor the code so we don't need to do uint16_t
subtraction at all.

Also update OptiX device to remove the AO pass direct callable.

Thanks Patrick Mours for figuring this out.
This commit is contained in:
Brecht Van Lommel 2021-10-21 17:00:37 +02:00
parent aea2287af3
commit be558d2d97
Notes: blender-bot 2023-02-14 08:08:56 +01:00
Referenced by issue #92363, OptiX kernels fail to load when Ambient Occlusion node is used
6 changed files with 8 additions and 17 deletions

View File

@ -377,9 +377,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
"__direct_callable__svm_node_bevel";
group_descs[PG_CALL_AO_PASS].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
group_descs[PG_CALL_AO_PASS].callables.moduleDC = optix_module;
group_descs[PG_CALL_AO_PASS].callables.entryFunctionNameDC = "__direct_callable__ao_pass";
}
optix_assert(optixProgramGroupCreate(

View File

@ -45,7 +45,6 @@ enum {
PG_HITS_MOTION,
PG_CALL_SVM_AO,
PG_CALL_SVM_BEVEL,
PG_CALL_AO_PASS,
NUM_PROGRAM_GROUPS
};

View File

@ -136,6 +136,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1;
INTEGRATOR_STATE_WRITE(state, shadow_path, rng_offset) += PRNG_BOUNCE_NUM;
}
/* Note we do not need to check max_transparent_bounce here, the number

View File

@ -200,9 +200,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(
shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(state, path, rng_offset) -
PRNG_BOUNCE_NUM * transparent_bounce;
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
@ -370,9 +369,8 @@ ccl_device_forceinline void integrate_surface_ao_pass(
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) =
INTEGRATOR_STATE(state, path, rng_offset) -
PRNG_BOUNCE_NUM * INTEGRATOR_STATE(state, path, transparent_bounce);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(

View File

@ -799,9 +799,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
state, path, render_pixel_index);
INTEGRATOR_STATE_WRITE(
shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(state, path, rng_offset) -
PRNG_BOUNCE_NUM * transparent_bounce;
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
state, path, rng_offset);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
state, path, rng_hash);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(

View File

@ -299,11 +299,8 @@ ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state,
ccl_private RNGState *rng_state)
{
const uint shadow_bounces = INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash);
rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset) +
PRNG_BOUNCE_NUM * shadow_bounces;
rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset);
rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample);
}