Geometry Nodes: Parallelize mesh grid primitive

On a Ryzen 3700x, this ended up 2.5x faster than before. More
benchmarking details are included in the differential revision.

For smaller grids, all this should do is increase the
code size a bit, and add a few more if statements.

Differential Revision: https://developer.blender.org/D13617
This commit is contained in:
Hans Goudey 2021-12-20 10:34:31 -06:00
parent 1d25ba175e
commit cb96435047
1 changed files with 82 additions and 55 deletions

View File

@ -14,6 +14,8 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "BLI_task.hh"
#include "DNA_mesh_types.h"
#include "DNA_meshdata_types.h"
@ -38,11 +40,13 @@ static void calculate_uvs(
const float dx = (size_x == 0.0f) ? 0.0f : 1.0f / size_x;
const float dy = (size_y == 0.0f) ? 0.0f : 1.0f / size_y;
for (const int i : loops.index_range()) {
const float3 &co = verts[loops[i].v].co;
uvs[i].x = (co.x + size_x * 0.5f) * dx;
uvs[i].y = (co.y + size_y * 0.5f) * dy;
}
threading::parallel_for(loops.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
const float3 &co = verts[loops[i].v].co;
uvs[i].x = (co.x + size_x * 0.5f) * dx;
uvs[i].y = (co.y + size_y * 0.5f) * dy;
}
});
uv_attribute.save();
}
@ -70,72 +74,95 @@ Mesh *create_grid_mesh(const int verts_x,
const float dy = edges_y == 0 ? 0.0f : size_y / edges_y;
const float x_shift = edges_x / 2.0f;
const float y_shift = edges_y / 2.0f;
for (const int x_index : IndexRange(verts_x)) {
for (const int y_index : IndexRange(verts_y)) {
const int vert_index = x_index * verts_y + y_index;
verts[vert_index].co[0] = (x_index - x_shift) * dx;
verts[vert_index].co[1] = (y_index - y_shift) * dy;
verts[vert_index].co[2] = 0.0f;
threading::parallel_for(IndexRange(verts_x), 512, [&](IndexRange x_range) {
for (const int x : x_range) {
const int y_offset = x * verts_y;
threading::parallel_for(IndexRange(verts_y), 512, [&](IndexRange y_range) {
for (const int y : y_range) {
const int vert_index = y_offset + y;
verts[vert_index].co[0] = (x - x_shift) * dx;
verts[vert_index].co[1] = (y - y_shift) * dy;
verts[vert_index].co[2] = 0.0f;
}
});
}
}
});
}
/* Point all vertex normals in the up direction. */
const short up_normal[3] = {0, 0, SHRT_MAX};
for (MVert &vert : verts) {
copy_v3_v3_short(vert.no, up_normal);
{
const short up_normal[3] = {0, 0, SHRT_MAX};
for (MVert &vert : verts) {
copy_v3_v3_short(vert.no, up_normal);
}
}
/* Build the horizontal edges in the X direction. */
const int y_edges_start = 0;
const int x_edges_start = verts_x * edges_y;
const short edge_flag = (edges_x == 0 || edges_y == 0) ? ME_LOOSEEDGE :
ME_EDGEDRAW | ME_EDGERENDER;
int edge_index = 0;
for (const int x : IndexRange(verts_x)) {
for (const int y : IndexRange(edges_y)) {
const int vert_index = x * verts_y + y;
MEdge &edge = edges[edge_index++];
edge.v1 = vert_index;
edge.v2 = vert_index + 1;
edge.flag = edge_flag;
/* Build the horizontal edges in the X direction. */
threading::parallel_for(IndexRange(verts_x), 512, [&](IndexRange x_range) {
for (const int x : x_range) {
const int y_vert_offset = x * verts_y;
const int y_edge_offset = y_edges_start + x * edges_y;
threading::parallel_for(IndexRange(edges_y), 512, [&](IndexRange y_range) {
for (const int y : y_range) {
const int vert_index = y_vert_offset + y;
MEdge &edge = edges[y_edge_offset + y];
edge.v1 = vert_index;
edge.v2 = vert_index + 1;
edge.flag = edge_flag;
}
});
}
}
});
/* Build the vertical edges in the Y direction. */
const int x_edges_start = edge_index;
for (const int y : IndexRange(verts_y)) {
for (const int x : IndexRange(edges_x)) {
const int vert_index = x * verts_y + y;
MEdge &edge = edges[edge_index++];
edge.v1 = vert_index;
edge.v2 = vert_index + verts_y;
edge.flag = edge_flag;
threading::parallel_for(IndexRange(verts_y), 512, [&](IndexRange y_range) {
for (const int y : y_range) {
const int x_edge_offset = x_edges_start + y * edges_x;
threading::parallel_for(IndexRange(edges_x), 512, [&](IndexRange x_range) {
for (const int x : x_range) {
const int vert_index = x * verts_y + y;
MEdge &edge = edges[x_edge_offset + x];
edge.v1 = vert_index;
edge.v2 = vert_index + verts_y;
edge.flag = edge_flag;
}
});
}
}
});
int loop_index = 0;
int poly_index = 0;
for (const int x : IndexRange(edges_x)) {
for (const int y : IndexRange(edges_y)) {
MPoly &poly = polys[poly_index++];
poly.loopstart = loop_index;
poly.totloop = 4;
const int vert_index = x * verts_y + y;
threading::parallel_for(IndexRange(edges_x), 512, [&](IndexRange x_range) {
for (const int x : x_range) {
const int y_offset = x * edges_y;
threading::parallel_for(IndexRange(edges_y), 512, [&](IndexRange y_range) {
for (const int y : y_range) {
const int poly_index = y_offset + y;
const int loop_index = poly_index * 4;
MPoly &poly = polys[poly_index];
poly.loopstart = loop_index;
poly.totloop = 4;
const int vert_index = x * verts_y + y;
MLoop &loop_a = loops[loop_index++];
loop_a.v = vert_index;
loop_a.e = x_edges_start + edges_x * y + x;
MLoop &loop_b = loops[loop_index++];
loop_b.v = vert_index + verts_y;
loop_b.e = y_edges_start + edges_y * (x + 1) + y;
MLoop &loop_c = loops[loop_index++];
loop_c.v = vert_index + verts_y + 1;
loop_c.e = x_edges_start + edges_x * (y + 1) + x;
MLoop &loop_d = loops[loop_index++];
loop_d.v = vert_index + 1;
loop_d.e = y_edges_start + edges_y * x + y;
MLoop &loop_a = loops[loop_index];
loop_a.v = vert_index;
loop_a.e = x_edges_start + edges_x * y + x;
MLoop &loop_b = loops[loop_index + 1];
loop_b.v = vert_index + verts_y;
loop_b.e = y_edges_start + edges_y * (x + 1) + y;
MLoop &loop_c = loops[loop_index + 2];
loop_c.v = vert_index + verts_y + 1;
loop_c.e = x_edges_start + edges_x * (y + 1) + x;
MLoop &loop_d = loops[loop_index + 3];
loop_d.v = vert_index + 1;
loop_d.e = y_edges_start + edges_y * x + y;
}
});
}
}
});
if (mesh->totpoly != 0) {
calculate_uvs(mesh, verts, loops, size_x, size_y);