Mikktspace: Speed up the merging of identical vertices

Previously, Mikktspace just bucketed the vertices based on one spatial coordinate and then ran full pairwise comparisons inside each bucket.
However, since models are three-dimensional, the bucketing has a massive false-positive rate, and since pairwise comparison is O(n^2), the merging process is very slow.

But, since we only care about exactly identical vertices, there is a much more efficient approach - we can just hash all values belonging to each vertex and form buckets based on the hash.
Since the hash has 32 bits and considers all values, false-positives are very unlikely - and since both hashing and the radixsort that's used for bucketing are O(n), both asymptotical and
real-world performance (as well as code complexity) are significantly improved.
This commit is contained in:
Lukas Stockner 2017-06-04 23:04:47 +02:00
parent 40f528a7da
commit 119846a6bb
1 changed files with 108 additions and 281 deletions

View File

@ -447,305 +447,132 @@ typedef struct {
int index;
} STmpVert;
static const int g_iCells = 2048;
static const float g_iCells_fl = 2048.0f;
#ifdef _MSC_VER
# define NOINLINE __declspec(noinline)
#else
# define NOINLINE __attribute__ ((noinline))
#endif
// it is IMPORTANT that this function is called to evaluate the hash since
// inlining could potentially reorder instructions and generate different
// results for the same effective input value fVal.
static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
{
const float fIndex = g_iCells_fl * ((fVal-fMin)/(fMax-fMin));
const int iIndex = (int)fIndex;
return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
}
static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in);
static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries);
static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
typedef unsigned int uint;
static uint float_as_uint(const float v)
{
return *((uint*)(&v));
}
#define HASH(x, y, z) (((x) * 73856093) ^ ((y) * 19349663) ^ ((z) * 83492791))
#define HASH_F(x, y, z) HASH(float_as_uint(x), float_as_uint(y), float_as_uint(z))
/* Sort comp and data based on comp.
* comp2 and data2 are used as temporary storage. */
static void radixsort_pair(uint *comp, int *data, uint *comp2, int *data2, int n)
{
int shift = 0;
for(int pass = 0; pass < 4; pass++, shift+=8) {
int bins[257] = {0};
/* Count number of elements per bin. */
for(int i = 0; i < n; i++) {
bins[((comp[i] >> shift) & 0xff) + 1]++;
}
/* Compute prefix sum to find position of each bin in the sorted array. */
for(int i = 2; i < 256; i++) {
bins[i] += bins[i-1];
}
/* Insert the elements in their correct location based on their bin. */
for(int i = 0; i < n; i++) {
int pos = bins[(comp[i] >> shift) & 0xff]++;
comp2[pos] = comp[i];
data2[pos] = data[i];
}
/* Swap arrays. */
int *tmpdata = data; data = data2; data2 = tmpdata;
uint *tmpcomp = comp; comp = comp2; comp2 = tmpcomp;
}
}
/* Merge identical vertices.
* To find vertices with identical position, normal and texcoord, we calculate a hash of the 9 values.
* Then, by sorting based on that hash, identical elements (having identical hashes) will be moved next to each other.
* Since there might be hash collisions, the elements of each block are then compared with each other and duplicates
* are merged.
*/
static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
{
int numVertices = iNrTrianglesIn*3;
// Generate bounding box
int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL;
STmpVert * pTmpVert = NULL;
int i=0, iChannel=0, k=0, e=0;
int iMaxCount=0;
SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
float fMin, fMax;
for (i=1; i<(iNrTrianglesIn*3); i++)
{
const int index = piTriList_in_and_out[i];
uint *hashes = (uint*) malloc(sizeof(uint)*numVertices);
int *indices = (int*) malloc(sizeof(int)*numVertices);
uint *temp_hashes = (uint*) malloc(sizeof(uint)*numVertices);
int *temp_indices = (int*) malloc(sizeof(int)*numVertices);
const SVec3 vP = GetPosition(pContext, index);
if (vMin.x > vP.x) vMin.x = vP.x;
else if (vMax.x < vP.x) vMax.x = vP.x;
if (vMin.y > vP.y) vMin.y = vP.y;
else if (vMax.y < vP.y) vMax.y = vP.y;
if (vMin.z > vP.z) vMin.z = vP.z;
else if (vMax.z < vP.z) vMax.z = vP.z;
}
if(hashes == NULL || indices == NULL || temp_hashes == NULL || temp_indices == NULL) {
free(hashes);
free(indices);
free(temp_hashes);
free(temp_indices);
vDim = vsub(vMax,vMin);
iChannel = 0;
fMin = vMin.x; fMax=vMax.x;
if (vDim.y>vDim.x && vDim.y>vDim.z)
{
iChannel=1;
fMin = vMin.y;
fMax = vMax.y;
}
else if (vDim.z>vDim.x)
{
iChannel=2;
fMin = vMin.z;
fMax = vMax.z;
}
// make allocations
piHashTable = (int *) malloc(sizeof(int[3])*iNrTrianglesIn);
piHashCount = (int *) malloc(sizeof(int)*g_iCells);
piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
{
if (piHashTable!=NULL) free(piHashTable);
if (piHashCount!=NULL) free(piHashCount);
if (piHashOffsets!=NULL) free(piHashOffsets);
if (piHashCount2!=NULL) free(piHashCount2);
GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
return;
}
memset(piHashCount, 0, sizeof(int)*g_iCells);
memset(piHashCount2, 0, sizeof(int)*g_iCells);
// count amount of elements in each cell unit
for (i=0; i<(iNrTrianglesIn*3); i++)
{
for (int i = 0; i < numVertices; i++) {
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
const int iCell = FindGridCell(fMin, fMax, fVal);
++piHashCount[iCell];
}
const uint hashP = HASH_F(vP.x, vP.y, vP.z);
// evaluate start index of each cell.
piHashOffsets[0]=0;
for (k=1; k<g_iCells; k++)
piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
// insert vertices
for (i=0; i<(iNrTrianglesIn*3); i++)
{
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
const int iCell = FindGridCell(fMin, fMax, fVal);
int * pTable = NULL;
assert(piHashCount2[iCell]<piHashCount[iCell]);
pTable = &piHashTable[piHashOffsets[iCell]];
pTable[piHashCount2[iCell]] = i; // vertex i has been inserted.
++piHashCount2[iCell];
}
for (k=0; k<g_iCells; k++)
assert(piHashCount2[k] == piHashCount[k]); // verify the count
free(piHashCount2);
// find maximum amount of entries in any hash entry
iMaxCount = piHashCount[0];
for (k=1; k<g_iCells; k++)
if (iMaxCount<piHashCount[k])
iMaxCount=piHashCount[k];
pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
// complete the merge
for (k=0; k<g_iCells; k++)
{
// extract table of cell k and amount of entries in it
int * pTable = &piHashTable[piHashOffsets[k]];
const int iEntries = piHashCount[k];
if (iEntries < 2) continue;
if (pTmpVert!=NULL)
{
for (e=0; e<iEntries; e++)
{
int i = pTable[e];
const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y;
pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i;
}
MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1);
}
else
MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
}
if (pTmpVert!=NULL) { free(pTmpVert); }
free(piHashTable);
free(piHashCount);
free(piHashOffsets);
}
static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in)
{
// make bbox
int c=0, l=0, channel=0;
float fvMin[3], fvMax[3];
float dx=0, dy=0, dz=0, fSep=0;
for (c=0; c<3; c++)
{ fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c]; }
for (l=(iL_in+1); l<=iR_in; l++) {
for (c=0; c<3; c++) {
if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
}
}
dx = fvMax[0]-fvMin[0];
dy = fvMax[1]-fvMin[1];
dz = fvMax[2]-fvMin[2];
channel = 0;
if (dy>dx && dy>dz) channel=1;
else if (dz>dx) channel=2;
fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
// stop if all vertices are NaNs
if (!isfinite(fSep))
return;
// terminate recursion when the separation/average value
// is no longer strictly between fMin and fMax values.
if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
{
// complete the weld
for (l=iL_in; l<=iR_in; l++)
{
int i = pTmpVert[l].index;
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
const SVec3 vN = GetNormal(pContext, index);
const SVec3 vT = GetTexCoord(pContext, index);
tbool bNotFound = TTRUE;
int l2=iL_in, i2rec=-1;
while (l2<l && bNotFound)
{
const int i2 = pTmpVert[l2].index;
const int index2 = piTriList_in_and_out[i2];
const SVec3 vP2 = GetPosition(pContext, index2);
const SVec3 vN2 = GetNormal(pContext, index2);
const SVec3 vT2 = GetTexCoord(pContext, index2);
i2rec=i2;
//if (vP==vP2 && vN==vN2 && vT==vT2)
if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z &&
vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z)
bNotFound = TFALSE;
else
++l2;
}
// merge if previously found
if (!bNotFound)
piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
}
}
else
{
int iL=iL_in, iR=iR_in;
assert((iR_in-iL_in)>0); // at least 2 entries
// separate (by fSep) all points between iL_in and iR_in in pTmpVert[]
while (iL < iR)
{
tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE;
while ((!bReadyLeftSwap) && iL<iR)
{
assert(iL>=iL_in && iL<=iR_in);
bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep);
if (!bReadyLeftSwap) ++iL;
}
while ((!bReadyRightSwap) && iL<iR)
{
assert(iR>=iL_in && iR<=iR_in);
bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
if (!bReadyRightSwap) --iR;
}
assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) );
if (bReadyLeftSwap && bReadyRightSwap)
{
const STmpVert sTmp = pTmpVert[iL];
assert(iL<iR);
pTmpVert[iL] = pTmpVert[iR];
pTmpVert[iR] = sTmp;
++iL; --iR;
}
}
assert(iL==(iR+1) || (iL==iR));
if (iL==iR)
{
const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
if (bReadyRightSwap) ++iL;
else --iR;
}
// only need to weld when there is more than 1 instance of the (x,y,z)
if (iL_in < iR)
MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR); // weld all left of fSep
if (iL < iR_in)
MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in); // weld all right of (or equal to) fSep
}
}
static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries)
{
// this can be optimized further using a tree structure or more hashing.
int e=0;
for (e=0; e<iEntries; e++)
{
int i = pTable[e];
const int index = piTriList_in_and_out[i];
const SVec3 vP = GetPosition(pContext, index);
const SVec3 vN = GetNormal(pContext, index);
const uint hashN = HASH_F(vN.x, vN.y, vN.z);
const SVec3 vT = GetTexCoord(pContext, index);
const uint hashT = HASH_F(vT.x, vT.y, vT.z);
tbool bNotFound = TTRUE;
int e2=0, i2rec=-1;
while (e2<e && bNotFound)
{
const int i2 = pTable[e2];
const int index2 = piTriList_in_and_out[i2];
const SVec3 vP2 = GetPosition(pContext, index2);
const SVec3 vN2 = GetNormal(pContext, index2);
const SVec3 vT2 = GetTexCoord(pContext, index2);
i2rec = i2;
if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
bNotFound = TFALSE;
else
++e2;
}
// merge if previously found
if (!bNotFound)
piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
hashes[i] = HASH(hashP, hashN, hashT);
indices[i] = i;
}
radixsort_pair(hashes, indices, temp_hashes, temp_indices, numVertices);
free(temp_hashes);
free(temp_indices);
/* Process blocks of vertices with the same hash.
* Vertices in the block might still be separate, but we know for sure that
* vertices in different blocks will never be identical. */
int blockstart = 0;
while (blockstart < numVertices) {
/* Find end of this block (exclusive). */
uint hash = hashes[blockstart];
int blockend = blockstart+1;
for(; blockend < numVertices; blockend++) {
if(hashes[blockend] != hash) break;
}
for(int i = blockstart; i < blockend; i++) {
int index1 = piTriList_in_and_out[indices[i]];
const SVec3 vP = GetPosition(pContext, index1);
const SVec3 vN = GetNormal(pContext, index1);
const SVec3 vT = GetTexCoord(pContext, index1);
for(int i2 = i+1; i2 < blockend; i2++) {
int index2 = piTriList_in_and_out[indices[i2]];
if(index1 == index2) continue;
if(veq(vP, GetPosition(pContext, index2)) &&
veq(vN, GetNormal(pContext, index2)) &&
veq(vT, GetTexCoord(pContext, index2)))
{
piTriList_in_and_out[indices[i2]] = index1;
/* Once i2>i has been identified as a duplicate, we can stop since any
* i3>i2>i that is a duplicate of i (and therefore also i2) will also be
* compared to i2 and therefore be identified there anyways. */
break;
}
}
}
/* Advance to next block. */
blockstart = blockend;
}
free(hashes);
free(indices);
}
static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)