BLI_memblock: Refactor for faster iteration and allocation

Remove the clear allocation flag as it has little impact since there should
be very few allocation per redraw.

Make BLI_memblock_alloc and BLI_memblock_iterstep much more cache efficient
removing them almost entirely from performance profiles.
This commit is contained in:
Clément Foucault 2019-05-21 00:54:03 +02:00
parent 657165db94
commit 21dc2816d6
4 changed files with 69 additions and 58 deletions

View File

@ -35,16 +35,19 @@ struct BLI_memblock;
typedef struct BLI_memblock BLI_memblock;
typedef void (*MemblockValFreeFP)(void *val);
BLI_memblock *BLI_memblock_create(uint elem_size,
const bool clear_alloc) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
BLI_memblock *BLI_memblock_create(uint elem_size) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
void *BLI_memblock_alloc(BLI_memblock *mblk) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP valfreefp) ATTR_NONNULL(1);
void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback) ATTR_NONNULL(1);
typedef struct BLI_memblock_iter {
BLI_memblock *mblk;
int current_index;
int elem_per_chunk;
void **chunk_list;
int cur_index;
int end_index;
int chunk_max_ofs;
int chunk_idx;
int elem_size;
int elem_ofs;
} BLI_memblock_iter;
void BLI_memblock_iternew(BLI_memblock *pool, BLI_memblock_iter *iter) ATTR_NONNULL();

View File

@ -49,18 +49,19 @@ struct BLI_memblock {
int elem_next;
/** Last "touched" element. */
int elem_last;
/** Offset in a chunk of the next elem. */
int elem_next_ofs;
/** Max offset in a chunk. */
int chunk_max_ofs;
/** Id of the chunk used for the next allocation. */
int chunk_next;
/** Chunck size in bytes. */
int chunk_size;
/** Number of allocated chunck. */
int chunk_len;
/** Clear newly allocated chuncks. */
bool clear_alloc;
};
/**
* /clear_alloc will clear the memory the first time a chunck is allocated.
*/
BLI_memblock *BLI_memblock_create(uint elem_size, const bool clear_alloc)
BLI_memblock *BLI_memblock_create(uint elem_size)
{
BLI_assert(elem_size < BLI_MEM_BLOCK_CHUNK_SIZE);
@ -71,22 +72,16 @@ BLI_memblock *BLI_memblock_create(uint elem_size, const bool clear_alloc)
mblk->chunk_size = BLI_MEM_BLOCK_CHUNK_SIZE;
mblk->chunk_len = CHUNK_LIST_SIZE;
mblk->chunk_list = MEM_callocN(sizeof(void *) * (uint)mblk->chunk_len, "chunk list");
mblk->clear_alloc = clear_alloc;
mblk->chunk_list[0] = MEM_callocN((uint)mblk->chunk_size, "BLI_memblock chunk");
mblk->chunk_max_ofs = (mblk->chunk_size / mblk->elem_size) * mblk->elem_size;
mblk->elem_next_ofs = 0;
mblk->chunk_next = 0;
return mblk;
}
void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback)
{
if (free_callback) {
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
for (int i = mblk->elem_last; i >= 0; i--) {
int chunk_idx = i / elem_per_chunk;
int elem_idx = i - elem_per_chunk * chunk_idx;
void *val = (char *)(mblk->chunk_list[chunk_idx]) + mblk->elem_size * elem_idx;
free_callback(val);
}
}
BLI_memblock_clear(mblk, free_callback);
for (int i = 0; i < mblk->chunk_len; i++) {
MEM_SAFE_FREE(mblk->chunk_list[i]);
@ -100,7 +95,7 @@ void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback)
void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP free_callback)
{
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
int last_used_chunk = (mblk->elem_next - 1) / elem_per_chunk;
int last_used_chunk = mblk->elem_next / elem_per_chunk;
if (free_callback) {
for (int i = mblk->elem_last; i >= mblk->elem_next; i--) {
@ -122,53 +117,66 @@ void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP free_callback)
mblk->elem_last = mblk->elem_next - 1;
mblk->elem_next = 0;
mblk->elem_next_ofs = 0;
mblk->chunk_next = 0;
}
void *BLI_memblock_alloc(BLI_memblock *mblk)
{
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
int chunk_idx = mblk->elem_next / elem_per_chunk;
int elem_idx = mblk->elem_next - elem_per_chunk * chunk_idx;
/* Bookeeping. */
if (mblk->elem_last < mblk->elem_next) {
mblk->elem_last = mblk->elem_next;
}
mblk->elem_next++;
if (UNLIKELY(chunk_idx >= mblk->chunk_len)) {
mblk->chunk_len += CHUNK_LIST_SIZE;
mblk->chunk_list = MEM_recallocN(mblk->chunk_list, sizeof(void *) * (uint)mblk->chunk_len);
}
void *ptr = (char *)(mblk->chunk_list[mblk->chunk_next]) + mblk->elem_next_ofs;
if (UNLIKELY(mblk->chunk_list[chunk_idx] == NULL)) {
if (mblk->clear_alloc) {
mblk->chunk_list[chunk_idx] = MEM_callocN((uint)mblk->chunk_size, "BLI_memblock chunk");
mblk->elem_next_ofs += mblk->elem_size;
if (mblk->elem_next_ofs == mblk->chunk_max_ofs) {
mblk->elem_next_ofs = 0;
mblk->chunk_next++;
if (UNLIKELY(mblk->chunk_next >= mblk->chunk_len)) {
mblk->chunk_len += CHUNK_LIST_SIZE;
mblk->chunk_list = MEM_recallocN(mblk->chunk_list, sizeof(void *) * (uint)mblk->chunk_len);
}
else {
mblk->chunk_list[chunk_idx] = MEM_mallocN((uint)mblk->chunk_size, "BLI_memblock chunk");
if (UNLIKELY(mblk->chunk_list[mblk->chunk_next] == NULL)) {
mblk->chunk_list[mblk->chunk_next] = MEM_callocN((uint)mblk->chunk_size,
"BLI_memblock chunk");
}
}
return (char *)(mblk->chunk_list[chunk_idx]) + mblk->elem_size * elem_idx;
return ptr;
}
void BLI_memblock_iternew(BLI_memblock *mblk, BLI_memblock_iter *iter)
{
iter->mblk = mblk;
iter->current_index = 0;
iter->elem_per_chunk = mblk->chunk_size / mblk->elem_size;
/* Small copy of the memblock used for better cache coherence. */
iter->chunk_list = mblk->chunk_list;
iter->end_index = mblk->elem_next;
iter->cur_index = 0;
iter->chunk_idx = 0;
iter->elem_ofs = 0;
iter->elem_size = mblk->elem_size;
iter->chunk_max_ofs = mblk->chunk_max_ofs;
}
void *BLI_memblock_iterstep(BLI_memblock_iter *iter)
{
if (iter->current_index >= iter->mblk->elem_next) {
if (iter->cur_index == iter->end_index) {
return NULL;
}
int chunk_idx = iter->current_index / iter->elem_per_chunk;
int elem_idx = iter->current_index - iter->elem_per_chunk * chunk_idx;
iter->current_index++;
iter->cur_index++;
return (char *)(iter->mblk->chunk_list[chunk_idx]) + iter->mblk->elem_size * elem_idx;
void *ptr = (char *)(iter->chunk_list[iter->chunk_idx]) + iter->elem_ofs;
iter->elem_ofs += iter->elem_size;
if (iter->elem_ofs == iter->chunk_max_ofs) {
iter->elem_ofs = 0;
iter->chunk_idx++;
}
return ptr;
}

View File

@ -284,9 +284,9 @@ DRWInstanceDataList *DRW_instance_data_list_create(void)
{
DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList");
idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch), true);
idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch), true);
idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle), true);
idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch));
idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch));
idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle));
BLI_addtail(&g_idatalists, idatalist);

View File

@ -608,28 +608,28 @@ static void drw_viewport_var_init(void)
DST.vmempool = GPU_viewport_mempool_get(DST.viewport);
if (DST.vmempool->calls == NULL) {
DST.vmempool->calls = BLI_memblock_create(sizeof(DRWCall), false);
DST.vmempool->calls = BLI_memblock_create(sizeof(DRWCall));
}
if (DST.vmempool->states == NULL) {
DST.vmempool->states = BLI_memblock_create(sizeof(DRWCallState), false);
DST.vmempool->states = BLI_memblock_create(sizeof(DRWCallState));
}
if (DST.vmempool->cullstates == NULL) {
DST.vmempool->cullstates = BLI_memblock_create(sizeof(DRWCullingState), false);
DST.vmempool->cullstates = BLI_memblock_create(sizeof(DRWCullingState));
}
if (DST.vmempool->shgroups == NULL) {
DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup), false);
DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup));
}
if (DST.vmempool->uniforms == NULL) {
DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform), false);
DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform));
}
if (DST.vmempool->views == NULL) {
DST.vmempool->views = BLI_memblock_create(sizeof(DRWView), false);
DST.vmempool->views = BLI_memblock_create(sizeof(DRWView));
}
if (DST.vmempool->passes == NULL) {
DST.vmempool->passes = BLI_memblock_create(sizeof(DRWPass), false);
DST.vmempool->passes = BLI_memblock_create(sizeof(DRWPass));
}
if (DST.vmempool->images == NULL) {
DST.vmempool->images = BLI_memblock_create(sizeof(GPUTexture *), false);
DST.vmempool->images = BLI_memblock_create(sizeof(GPUTexture *));
}
DST.idatalist = GPU_viewport_instance_data_list_get(DST.viewport);