Fix T92972: Cycles HIP wrong render display after a recent refactor
It's unclear why this fails. Maybe the size of half4 is not the expected 8 bytes and adjacent pixels are overwritten. Or there is some bug in the HIP compiler writing a struct into global memory, which we probably don't do elsewhere in the kernel. Thanks to Thomas, William and Jeroen for helping investigate this.
This commit is contained in:
parent
c8e93da0a7
commit
6b0008129e
Notes:
blender-bot
2023-02-14 08:42:53 +01:00
Referenced by issue #92972, Cycles HIP graphics interop issue on AMD 5500 XT and Windows
|
@ -486,6 +486,26 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
|
|||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
|
||||
const int rgba_offset,
|
||||
const int rgba_stride,
|
||||
const int x,
|
||||
const int y,
|
||||
const half4 half_pixel)
|
||||
{
|
||||
/* Work around HIP issue with half float display, see T92972. */
|
||||
#ifdef __KERNEL_HIP__
|
||||
ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
|
||||
out[0] = half_pixel.x;
|
||||
out[1] = half_pixel.y;
|
||||
out[2] = half_pixel.z;
|
||||
out[3] = half_pixel.w;
|
||||
#else
|
||||
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
|
||||
*out = half_pixel;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Common implementation for half4 destination and 4-channel input pass. */
|
||||
template<typename Processor>
|
||||
ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
|
||||
|
@ -516,8 +536,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
|
|||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
|
||||
|
||||
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
|
||||
*out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
|
||||
const half4 half_pixel = float4_to_half4_display(
|
||||
make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
|
||||
kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
|
||||
}
|
||||
|
||||
/* Common implementation for half4 destination and 3-channel input pass. */
|
||||
|
|
Loading…
Reference in New Issue