Metal: Adding alternative support for GPU_PRIM_TRI_FAN/LINE_LOOP For Metal backend.
- Metal uniform array compatibility in DRW module. - Guard OpenGL-specific workarounds and flushes behind GPU_type_matches_ex API guard. Add further render boundaries for render paths called outside of the main loop. Authored by Apple: Michael Parkin-White Ref: T96261 Reviewed By: fclem Differential Revision: https://developer.blender.org/D14438
This commit is contained in:
parent
84fde382e4
commit
922d53a791
Notes:
blender-bot
2023-02-13 15:51:22 +01:00
Referenced by issue #96920, Regression: Hair strands are drawn in wrong place.
|
@ -108,7 +108,7 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata,
|
|||
* MinMax Pyramid
|
||||
*/
|
||||
|
||||
if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY)) {
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
/* Intel gpu seems to have problem rendering to only depth hiz_format */
|
||||
DRW_texture_ensure_2d(&txl->maxzbuffer, UNPACK2(effects->hiz_size), GPU_R32F, DRW_TEX_MIPMAP);
|
||||
GPU_framebuffer_ensure_config(&fbl->maxzbuffer_fb,
|
||||
|
@ -230,7 +230,7 @@ void EEVEE_effects_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
|
|||
|
||||
/* Intel gpu seems to have problem rendering to only depth format.
|
||||
* Use color texture instead. */
|
||||
if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY)) {
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
downsample_write = DRW_STATE_WRITE_COLOR;
|
||||
}
|
||||
|
||||
|
|
|
@ -200,7 +200,8 @@ void EEVEE_occlusion_compute(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
|
|||
}
|
||||
|
||||
if (GPU_mip_render_workaround() ||
|
||||
GPU_type_matches(GPU_DEVICE_INTEL_UHD, GPU_OS_WIN, GPU_DRIVER_ANY)) {
|
||||
GPU_type_matches_ex(
|
||||
GPU_DEVICE_INTEL_UHD, GPU_OS_WIN, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
/* Fix dot corruption on intel HD5XX/HD6XX series. */
|
||||
GPU_flush();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include "GPU_batch.h"
|
||||
#include "GPU_batch_utils.h"
|
||||
#include "GPU_capabilities.h"
|
||||
|
||||
#include "MEM_guardedalloc.h"
|
||||
|
||||
|
@ -395,12 +396,12 @@ GPUBatch *DRW_cache_quad_get(void)
|
|||
|
||||
int v = 0;
|
||||
int flag = VCLASS_EMPTY_SCALED;
|
||||
const float p[4][2] = {{-1.0f, -1.0f}, {-1.0f, 1.0f}, {1.0f, 1.0f}, {1.0f, -1.0f}};
|
||||
const float p[4][2] = {{-1.0f, 1.0f}, {1.0f, 1.0f}, {-1.0f, -1.0f}, {1.0f, -1.0f}};
|
||||
for (int a = 0; a < 4; a++) {
|
||||
GPU_vertbuf_vert_set(vbo, v++, &(Vert){{p[a][0], p[a][1], 0.0f}, flag});
|
||||
}
|
||||
|
||||
SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_FAN, vbo, NULL, GPU_BATCH_OWNS_VBO);
|
||||
SHC.drw_quad = GPU_batch_create_ex(GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO);
|
||||
}
|
||||
return SHC.drw_quad;
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include "GPU_framebuffer.h"
|
||||
#include "GPU_immediate.h"
|
||||
#include "GPU_matrix.h"
|
||||
#include "GPU_platform.h"
|
||||
#include "GPU_shader_shared.h"
|
||||
#include "GPU_state.h"
|
||||
#include "GPU_uniform_buffer.h"
|
||||
|
@ -1706,7 +1707,9 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
|
|||
drw_engines_draw_scene();
|
||||
|
||||
/* Fix 3D view "lagging" on APPLE and WIN32+NVIDIA. (See T56996, T61474) */
|
||||
GPU_flush();
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
GPU_flush();
|
||||
}
|
||||
|
||||
DRW_stats_reset();
|
||||
|
||||
|
@ -1938,6 +1941,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
|
|||
};
|
||||
drw_context_state_init();
|
||||
|
||||
/* Begin GPU workload Boundary */
|
||||
GPU_render_begin();
|
||||
|
||||
const int size[2] = {engine->resolution_x, engine->resolution_y};
|
||||
|
||||
drw_manager_init(&DST, NULL, size);
|
||||
|
@ -1993,6 +1999,9 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
|
|||
|
||||
/* Reset state after drawing */
|
||||
DRW_state_reset();
|
||||
|
||||
/* End GPU workload Boundary */
|
||||
GPU_render_end();
|
||||
}
|
||||
|
||||
void DRW_render_object_iter(
|
||||
|
@ -2072,7 +2081,10 @@ void DRW_custom_pipeline(DrawEngineType *draw_engine_type,
|
|||
* resources as the main thread (viewport) may lead to data
|
||||
* races and undefined behavior on certain drivers. Using
|
||||
* GPU_finish to sync seems to fix the issue. (see T62997) */
|
||||
GPU_finish();
|
||||
eGPUBackendType type = GPU_backend_get_type();
|
||||
if (type == GPU_BACKEND_OPENGL) {
|
||||
GPU_finish();
|
||||
}
|
||||
|
||||
drw_manager_exit(&DST);
|
||||
}
|
||||
|
@ -2173,7 +2185,9 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph,
|
|||
drw_engines_draw_scene();
|
||||
|
||||
/* Fix 3D view being "laggy" on macos and win+nvidia. (See T56996, T61474) */
|
||||
GPU_flush();
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
GPU_flush();
|
||||
}
|
||||
|
||||
if (DST.draw_ctx.evil_C) {
|
||||
DefaultFramebufferList *dfbl = DRW_viewport_framebuffer_list_get();
|
||||
|
@ -3094,6 +3108,7 @@ void DRW_opengl_context_enable_ex(bool UNUSED(restore))
|
|||
* This shall remain in effect until immediate mode supports
|
||||
* multiple threads. */
|
||||
BLI_ticket_mutex_lock(DST.gl_context_mutex);
|
||||
GPU_render_begin();
|
||||
WM_opengl_context_activate(DST.gl_context);
|
||||
GPU_context_active_set(DST.gpu_context);
|
||||
}
|
||||
|
@ -3105,7 +3120,9 @@ void DRW_opengl_context_disable_ex(bool restore)
|
|||
#ifdef __APPLE__
|
||||
/* Need to flush before disabling draw context, otherwise it does not
|
||||
* always finish drawing and viewport can be empty or partially drawn */
|
||||
GPU_flush();
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
GPU_flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (BLI_thread_is_main() && restore) {
|
||||
|
@ -3116,6 +3133,10 @@ void DRW_opengl_context_disable_ex(bool restore)
|
|||
GPU_context_active_set(NULL);
|
||||
}
|
||||
|
||||
/* Render boundaries are opened and closed here as this may be
|
||||
* called outside of an existing render loop. */
|
||||
GPU_render_end();
|
||||
|
||||
BLI_ticket_mutex_unlock(DST.gl_context_mutex);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -498,9 +498,13 @@ void DRW_shgroup_uniform_vec4_array_copy(DRWShadingGroup *shgroup,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Each array element stored as an individual entry in the uniform list.
|
||||
* All entries from the same array share the same base location,
|
||||
* and array-size used to determine the number of elements
|
||||
* copied in draw_update_uniforms. */
|
||||
for (int i = 0; i < arraysize; i++) {
|
||||
drw_shgroup_uniform_create_ex(
|
||||
shgroup, location + i, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, 1);
|
||||
shgroup, location, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, arraysize);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -584,21 +584,85 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
|
|||
DRWCommandsState *state,
|
||||
bool *use_tfeedback)
|
||||
{
|
||||
#define MAX_UNIFORM_STACK_SIZE 64
|
||||
|
||||
/* Uniform array elements stored as separate entries. We need to batch these together */
|
||||
int current_uniform_array_loc = -1;
|
||||
unsigned int current_array_index = 0;
|
||||
static union {
|
||||
int istack[MAX_UNIFORM_STACK_SIZE];
|
||||
float fstack[MAX_UNIFORM_STACK_SIZE];
|
||||
} uniform_stack;
|
||||
|
||||
/* Loop through uniforms. */
|
||||
for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) {
|
||||
DRWUniform *uni = unichunk->uniforms;
|
||||
|
||||
for (int i = 0; i < unichunk->uniform_used; i++, uni++) {
|
||||
|
||||
/* For uniform array copies, copy per-array-element data into local buffer before upload. */
|
||||
if (uni->arraysize > 1 &&
|
||||
(uni->type == DRW_UNIFORM_INT_COPY || uni->type == DRW_UNIFORM_FLOAT_COPY)) {
|
||||
|
||||
/* Begin copying uniform array. */
|
||||
if (current_array_index == 0) {
|
||||
current_uniform_array_loc = uni->location;
|
||||
}
|
||||
|
||||
/* Debug check same array loc. */
|
||||
BLI_assert(current_uniform_array_loc > -1);
|
||||
BLI_assert(current_uniform_array_loc == uni->location);
|
||||
|
||||
/* Copy array element data to local buffer. */
|
||||
BLI_assert(((current_array_index + 1) * uni->length) <= MAX_UNIFORM_STACK_SIZE);
|
||||
if (uni->type == DRW_UNIFORM_INT_COPY) {
|
||||
memcpy(&uniform_stack.istack[current_array_index * uni->length],
|
||||
uni->ivalue,
|
||||
sizeof(int) * uni->length);
|
||||
}
|
||||
else {
|
||||
memcpy(&uniform_stack.fstack[current_array_index * uni->length],
|
||||
uni->fvalue,
|
||||
sizeof(float) * uni->length);
|
||||
}
|
||||
current_array_index++;
|
||||
BLI_assert(current_array_index <= uni->arraysize);
|
||||
|
||||
/* Flush array data to shader. */
|
||||
if (current_array_index == uni->arraysize) {
|
||||
if (uni->type == DRW_UNIFORM_INT_COPY) {
|
||||
GPU_shader_uniform_vector_int(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.istack);
|
||||
}
|
||||
else {
|
||||
GPU_shader_uniform_vector(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uniform_stack.fstack);
|
||||
}
|
||||
current_array_index = 0;
|
||||
current_uniform_array_loc = -1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle standard cases. */
|
||||
switch (uni->type) {
|
||||
case DRW_UNIFORM_INT_COPY:
|
||||
GPU_shader_uniform_vector_int(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue);
|
||||
BLI_assert(uni->arraysize == 1);
|
||||
if (uni->arraysize == 1) {
|
||||
GPU_shader_uniform_vector_int(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue);
|
||||
}
|
||||
break;
|
||||
case DRW_UNIFORM_INT:
|
||||
GPU_shader_uniform_vector_int(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uni->pvalue);
|
||||
break;
|
||||
case DRW_UNIFORM_FLOAT_COPY:
|
||||
GPU_shader_uniform_vector(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue);
|
||||
BLI_assert(uni->arraysize == 1);
|
||||
if (uni->arraysize == 1) {
|
||||
GPU_shader_uniform_vector(
|
||||
shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue);
|
||||
}
|
||||
break;
|
||||
case DRW_UNIFORM_FLOAT:
|
||||
GPU_shader_uniform_vector(
|
||||
|
@ -673,6 +737,9 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
|
|||
}
|
||||
}
|
||||
}
|
||||
/* Ensure uniform arrays copied. */
|
||||
BLI_assert(current_array_index == 0);
|
||||
BLI_assert(current_uniform_array_loc == -1);
|
||||
}
|
||||
|
||||
BLI_INLINE void draw_select_buffer(DRWShadingGroup *shgroup,
|
||||
|
|
|
@ -91,6 +91,7 @@ static void drw_deferred_shader_compilation_exec(
|
|||
short *do_update,
|
||||
float *progress)
|
||||
{
|
||||
GPU_render_begin();
|
||||
DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
|
||||
void *gl_context = comp->gl_context;
|
||||
GPUContext *gpu_context = comp->gpu_context;
|
||||
|
@ -138,7 +139,9 @@ static void drw_deferred_shader_compilation_exec(
|
|||
*progress = (float)comp->shaders_done / (float)total;
|
||||
*do_update = true;
|
||||
|
||||
GPU_flush();
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
GPU_flush();
|
||||
}
|
||||
BLI_mutex_unlock(&comp->compilation_lock);
|
||||
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
|
@ -157,6 +160,7 @@ static void drw_deferred_shader_compilation_exec(
|
|||
if (use_main_context_workaround) {
|
||||
GPU_context_main_unlock();
|
||||
}
|
||||
GPU_render_end();
|
||||
}
|
||||
|
||||
static void drw_deferred_shader_compilation_free(void *custom_data)
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
|
||||
#include "GPU_batch.h"
|
||||
#include "GPU_batch_presets.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_immediate.h"
|
||||
#include "GPU_immediate_util.h"
|
||||
#include "GPU_matrix.h"
|
||||
|
@ -1084,12 +1085,23 @@ static void ui_draw_colorband_handle_tri(
|
|||
static void ui_draw_colorband_handle_box(
|
||||
uint pos, float x1, float y1, float x2, float y2, bool fill)
|
||||
{
|
||||
immBegin(fill ? GPU_PRIM_TRI_FAN : GPU_PRIM_LINE_LOOP, 4);
|
||||
immVertex2f(pos, x1, y1);
|
||||
immVertex2f(pos, x1, y2);
|
||||
immVertex2f(pos, x2, y2);
|
||||
immVertex2f(pos, x2, y1);
|
||||
immEnd();
|
||||
if (fill) {
|
||||
immBegin(GPU_PRIM_TRI_STRIP, 4);
|
||||
immVertex2f(pos, x2, y1);
|
||||
immVertex2f(pos, x1, y1);
|
||||
immVertex2f(pos, x2, y2);
|
||||
immVertex2f(pos, x1, y2);
|
||||
immEnd();
|
||||
}
|
||||
else {
|
||||
immBegin(GPU_PRIM_LINE_STRIP, 5);
|
||||
immVertex2f(pos, x1, y1);
|
||||
immVertex2f(pos, x1, y2);
|
||||
immVertex2f(pos, x2, y2);
|
||||
immVertex2f(pos, x2, y1);
|
||||
immVertex2f(pos, x1, y1);
|
||||
immEnd();
|
||||
}
|
||||
}
|
||||
|
||||
static void ui_draw_colorband_handle(uint shdr_pos,
|
||||
|
|
|
@ -1178,7 +1178,7 @@ static bool draw_widgetbase_batch_skip_draw_cache(void)
|
|||
{
|
||||
/* MacOS is known to have issues on Mac Mini and MacBook Pro with Intel Iris GPU.
|
||||
* For example, T78307. */
|
||||
if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY)) {
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ void ED_screen_draw_edges(wmWindow *win)
|
|||
BLI_rcti_do_minmax_v(&scissor_rect, (int[2]){area->v3->vec.x, area->v3->vec.y});
|
||||
}
|
||||
|
||||
if (GPU_type_matches(GPU_DEVICE_INTEL_UHD, GPU_OS_UNIX, GPU_DRIVER_ANY)) {
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_INTEL_UHD, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
/* For some reason, on linux + Intel UHD Graphics 620 the driver
|
||||
* hangs if we don't flush before this. (See T57455) */
|
||||
GPU_flush();
|
||||
|
|
|
@ -20,10 +20,12 @@ typedef enum {
|
|||
GPU_PRIM_LINES,
|
||||
GPU_PRIM_TRIS,
|
||||
GPU_PRIM_LINE_STRIP,
|
||||
GPU_PRIM_LINE_LOOP, /* GL has this, Vulkan does not */
|
||||
GPU_PRIM_LINE_LOOP, /* GL has this, Vulkan and Metal do not */
|
||||
GPU_PRIM_TRI_STRIP,
|
||||
GPU_PRIM_TRI_FAN,
|
||||
GPU_PRIM_TRI_FAN, /* Metal API does not support this. */
|
||||
|
||||
/* Metal API does not support ADJ primitive types but
|
||||
* handled via the geometry-shader-alternative path. */
|
||||
GPU_PRIM_LINES_ADJ,
|
||||
GPU_PRIM_TRIS_ADJ,
|
||||
GPU_PRIM_LINE_STRIP_ADJ,
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "GPU_batch.h"
|
||||
#include "GPU_batch_presets.h" /* own include */
|
||||
#include "GPU_batch_utils.h"
|
||||
#include "GPU_context.h"
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Local Structures
|
||||
|
@ -320,11 +321,12 @@ GPUBatch *GPU_batch_preset_quad(void)
|
|||
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(preset_2d_format());
|
||||
GPU_vertbuf_data_alloc(vbo, 4);
|
||||
|
||||
float pos_data[4][2] = {{0.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 1.0f}, {1.0f, 0.0f}};
|
||||
float pos_data[4][2] = {{0.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}};
|
||||
GPU_vertbuf_attr_fill(vbo, g_presets_2d.attr_id.pos, pos_data);
|
||||
/* Don't fill the color. */
|
||||
|
||||
g_presets_2d.batch.quad = GPU_batch_create_ex(GPU_PRIM_TRI_FAN, vbo, NULL, GPU_BATCH_OWNS_VBO);
|
||||
g_presets_2d.batch.quad = GPU_batch_create_ex(
|
||||
GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO);
|
||||
|
||||
gpu_batch_presets_register(g_presets_2d.batch.quad);
|
||||
}
|
||||
|
|
|
@ -16,14 +16,6 @@ void main()
|
|||
vec2 uv;
|
||||
vec2 co;
|
||||
|
||||
#ifdef GPU_METAL
|
||||
/* Metal API does not support Triangle fan primitive topology.
|
||||
* When this shader is called using Triangle-Strip, vertex ID's
|
||||
* are in a different order. */
|
||||
# define GPU_PRIM_TRI_STRIP
|
||||
#endif
|
||||
|
||||
#ifdef GPU_PRIM_TRI_STRIP
|
||||
if (gl_VertexID == 0) {
|
||||
co = rect_geom.xw;
|
||||
uv = rect_icon.xw;
|
||||
|
@ -40,24 +32,6 @@ void main()
|
|||
co = rect_geom.zy;
|
||||
uv = rect_icon.zy;
|
||||
}
|
||||
#else
|
||||
if (gl_VertexID == 0) {
|
||||
co = rect_geom.xy;
|
||||
uv = rect_icon.xy;
|
||||
}
|
||||
else if (gl_VertexID == 1) {
|
||||
co = rect_geom.xw;
|
||||
uv = rect_icon.xw;
|
||||
}
|
||||
else if (gl_VertexID == 2) {
|
||||
co = rect_geom.zw;
|
||||
uv = rect_icon.zw;
|
||||
}
|
||||
else {
|
||||
co = rect_geom.zy;
|
||||
uv = rect_icon.zy;
|
||||
}
|
||||
#endif
|
||||
|
||||
gl_Position = ModelViewProjectionMatrix * vec4(co, 0.0f, 1.0f);
|
||||
texCoord_interp = uv;
|
||||
|
|
Loading…
Reference in New Issue