Merge branch 'blender-v3.0-release'

This commit is contained in:
Brecht Van Lommel 2021-11-10 20:19:09 +01:00
commit 3fa86f4b28
5 changed files with 36 additions and 4 deletions

View File

@ -440,7 +440,11 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
mark_as_advanced(WITH_CUDA_DYNLOAD)
# AMD HIP
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
if(WIN32)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" ON)
else()
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles AMD HIP support" OFF)
endif()
option(WITH_CYCLES_HIP_BINARIES "Build Cycles AMD HIP binaries" OFF)
set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 CACHE STRING "AMD HIP architectures to build binaries for")
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)

View File

@ -81,4 +81,5 @@ if(NOT APPLE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUBIN_COMPILER OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
endif()

View File

@ -1160,6 +1160,8 @@ bool HIPDevice::should_use_graphics_interop()
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
/* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
# if 0
HIPContextScope scope(this);
int num_all_devices = 0;
@ -1178,6 +1180,7 @@ bool HIPDevice::should_use_graphics_interop()
return true;
}
}
# endif
return false;
}

View File

@ -33,7 +33,10 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
return make_unique<OptiXDenoiser>(path_trace_device, params);
}
return make_unique<OIDNDenoiser>(path_trace_device, params);
/* Always fallback to OIDN. */
DenoiseParams oidn_params = params;
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
return make_unique<OIDNDenoiser>(path_trace_device, oidn_params);
}
Denoiser::Denoiser(Device *path_trace_device, const DenoiseParams &params)

View File

@ -523,6 +523,26 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
* Film.
*/
ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
const int rgba_offset,
const int rgba_stride,
const int x,
const int y,
const half4 half_pixel)
{
/* Work around HIP issue with half float display, see T92972. */
#ifdef __KERNEL_HIP__
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
out[0] = half_pixel.x;
out[1] = half_pixel.y;
out[2] = half_pixel.z;
out[3] = half_pixel.w;
#else
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
*out = half_pixel;
#endif
}
#define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
ccl_gpu_kernel_signature(film_convert_##variant, \
@ -588,8 +608,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
\
film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \
\
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; \
*out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
const half4 half_pixel = float4_to_half4_display( \
make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \
}
/* 1 channel inputs */