Fix T92972: Cycles HIP wrong render display after a recent refactor

It's unclear why this fails. Maybe the size of half4 is not the expected
8 bytes and adjacent pixels are overwritten. Or there is some bug in the
HIP compiler writing a struct into global memory, which we probably don't
do elsewhere in the kernel.

Thanks to Thomas, William and Jeroen for helping investigate this.
This commit is contained in:
Brecht Van Lommel 2021-11-10 19:43:19 +01:00
parent c8e93da0a7
commit 6b0008129e
Notes: blender-bot 2023-02-14 08:42:53 +01:00
Referenced by issue #92972, Cycles HIP graphics interop issue on AMD 5500 XT and Windows
1 changed files with 23 additions and 2 deletions

View File

@ -486,6 +486,26 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
processor(kfilm_convert, buffer, pixel);
}
ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
const int rgba_offset,
const int rgba_stride,
const int x,
const int y,
const half4 half_pixel)
{
/* Work around HIP issue with half float display, see T92972. */
#ifdef __KERNEL_HIP__
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
out[0] = half_pixel.x;
out[1] = half_pixel.y;
out[2] = half_pixel.z;
out[3] = half_pixel.w;
#else
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
*out = half_pixel;
#endif
}
/* Common implementation for half4 destination and 4-channel input pass. */
template<typename Processor>
ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
@ -516,8 +536,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
*out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
const half4 half_pixel = float4_to_half4_display(
make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
}
/* Common implementation for half4 destination and 3-channel input pass. */