BLI_memblock: Refactor for faster iteration and allocation
Remove the clear allocation flag as it has little impact since there should be very few allocation per redraw. Make BLI_memblock_alloc and BLI_memblock_iterstep much more cache efficient removing them almost entirely from performance profiles.
This commit is contained in:
parent
657165db94
commit
21dc2816d6
|
@ -35,16 +35,19 @@ struct BLI_memblock;
|
|||
typedef struct BLI_memblock BLI_memblock;
|
||||
typedef void (*MemblockValFreeFP)(void *val);
|
||||
|
||||
BLI_memblock *BLI_memblock_create(uint elem_size,
|
||||
const bool clear_alloc) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
|
||||
BLI_memblock *BLI_memblock_create(uint elem_size) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
|
||||
void *BLI_memblock_alloc(BLI_memblock *mblk) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
|
||||
void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP valfreefp) ATTR_NONNULL(1);
|
||||
void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback) ATTR_NONNULL(1);
|
||||
|
||||
typedef struct BLI_memblock_iter {
|
||||
BLI_memblock *mblk;
|
||||
int current_index;
|
||||
int elem_per_chunk;
|
||||
void **chunk_list;
|
||||
int cur_index;
|
||||
int end_index;
|
||||
int chunk_max_ofs;
|
||||
int chunk_idx;
|
||||
int elem_size;
|
||||
int elem_ofs;
|
||||
} BLI_memblock_iter;
|
||||
|
||||
void BLI_memblock_iternew(BLI_memblock *pool, BLI_memblock_iter *iter) ATTR_NONNULL();
|
||||
|
|
|
@ -49,18 +49,19 @@ struct BLI_memblock {
|
|||
int elem_next;
|
||||
/** Last "touched" element. */
|
||||
int elem_last;
|
||||
/** Offset in a chunk of the next elem. */
|
||||
int elem_next_ofs;
|
||||
/** Max offset in a chunk. */
|
||||
int chunk_max_ofs;
|
||||
/** Id of the chunk used for the next allocation. */
|
||||
int chunk_next;
|
||||
/** Chunck size in bytes. */
|
||||
int chunk_size;
|
||||
/** Number of allocated chunck. */
|
||||
int chunk_len;
|
||||
/** Clear newly allocated chuncks. */
|
||||
bool clear_alloc;
|
||||
};
|
||||
|
||||
/**
|
||||
* /clear_alloc will clear the memory the first time a chunck is allocated.
|
||||
*/
|
||||
BLI_memblock *BLI_memblock_create(uint elem_size, const bool clear_alloc)
|
||||
BLI_memblock *BLI_memblock_create(uint elem_size)
|
||||
{
|
||||
BLI_assert(elem_size < BLI_MEM_BLOCK_CHUNK_SIZE);
|
||||
|
||||
|
@ -71,22 +72,16 @@ BLI_memblock *BLI_memblock_create(uint elem_size, const bool clear_alloc)
|
|||
mblk->chunk_size = BLI_MEM_BLOCK_CHUNK_SIZE;
|
||||
mblk->chunk_len = CHUNK_LIST_SIZE;
|
||||
mblk->chunk_list = MEM_callocN(sizeof(void *) * (uint)mblk->chunk_len, "chunk list");
|
||||
mblk->clear_alloc = clear_alloc;
|
||||
mblk->chunk_list[0] = MEM_callocN((uint)mblk->chunk_size, "BLI_memblock chunk");
|
||||
mblk->chunk_max_ofs = (mblk->chunk_size / mblk->elem_size) * mblk->elem_size;
|
||||
mblk->elem_next_ofs = 0;
|
||||
mblk->chunk_next = 0;
|
||||
return mblk;
|
||||
}
|
||||
|
||||
void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback)
|
||||
{
|
||||
if (free_callback) {
|
||||
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
|
||||
|
||||
for (int i = mblk->elem_last; i >= 0; i--) {
|
||||
int chunk_idx = i / elem_per_chunk;
|
||||
int elem_idx = i - elem_per_chunk * chunk_idx;
|
||||
void *val = (char *)(mblk->chunk_list[chunk_idx]) + mblk->elem_size * elem_idx;
|
||||
free_callback(val);
|
||||
}
|
||||
}
|
||||
BLI_memblock_clear(mblk, free_callback);
|
||||
|
||||
for (int i = 0; i < mblk->chunk_len; i++) {
|
||||
MEM_SAFE_FREE(mblk->chunk_list[i]);
|
||||
|
@ -100,7 +95,7 @@ void BLI_memblock_destroy(BLI_memblock *mblk, MemblockValFreeFP free_callback)
|
|||
void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP free_callback)
|
||||
{
|
||||
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
|
||||
int last_used_chunk = (mblk->elem_next - 1) / elem_per_chunk;
|
||||
int last_used_chunk = mblk->elem_next / elem_per_chunk;
|
||||
|
||||
if (free_callback) {
|
||||
for (int i = mblk->elem_last; i >= mblk->elem_next; i--) {
|
||||
|
@ -122,53 +117,66 @@ void BLI_memblock_clear(BLI_memblock *mblk, MemblockValFreeFP free_callback)
|
|||
|
||||
mblk->elem_last = mblk->elem_next - 1;
|
||||
mblk->elem_next = 0;
|
||||
mblk->elem_next_ofs = 0;
|
||||
mblk->chunk_next = 0;
|
||||
}
|
||||
|
||||
void *BLI_memblock_alloc(BLI_memblock *mblk)
|
||||
{
|
||||
int elem_per_chunk = mblk->chunk_size / mblk->elem_size;
|
||||
int chunk_idx = mblk->elem_next / elem_per_chunk;
|
||||
int elem_idx = mblk->elem_next - elem_per_chunk * chunk_idx;
|
||||
|
||||
/* Bookeeping. */
|
||||
if (mblk->elem_last < mblk->elem_next) {
|
||||
mblk->elem_last = mblk->elem_next;
|
||||
}
|
||||
|
||||
mblk->elem_next++;
|
||||
|
||||
if (UNLIKELY(chunk_idx >= mblk->chunk_len)) {
|
||||
mblk->chunk_len += CHUNK_LIST_SIZE;
|
||||
mblk->chunk_list = MEM_recallocN(mblk->chunk_list, sizeof(void *) * (uint)mblk->chunk_len);
|
||||
}
|
||||
void *ptr = (char *)(mblk->chunk_list[mblk->chunk_next]) + mblk->elem_next_ofs;
|
||||
|
||||
if (UNLIKELY(mblk->chunk_list[chunk_idx] == NULL)) {
|
||||
if (mblk->clear_alloc) {
|
||||
mblk->chunk_list[chunk_idx] = MEM_callocN((uint)mblk->chunk_size, "BLI_memblock chunk");
|
||||
mblk->elem_next_ofs += mblk->elem_size;
|
||||
|
||||
if (mblk->elem_next_ofs == mblk->chunk_max_ofs) {
|
||||
mblk->elem_next_ofs = 0;
|
||||
mblk->chunk_next++;
|
||||
|
||||
if (UNLIKELY(mblk->chunk_next >= mblk->chunk_len)) {
|
||||
mblk->chunk_len += CHUNK_LIST_SIZE;
|
||||
mblk->chunk_list = MEM_recallocN(mblk->chunk_list, sizeof(void *) * (uint)mblk->chunk_len);
|
||||
}
|
||||
else {
|
||||
mblk->chunk_list[chunk_idx] = MEM_mallocN((uint)mblk->chunk_size, "BLI_memblock chunk");
|
||||
|
||||
if (UNLIKELY(mblk->chunk_list[mblk->chunk_next] == NULL)) {
|
||||
mblk->chunk_list[mblk->chunk_next] = MEM_callocN((uint)mblk->chunk_size,
|
||||
"BLI_memblock chunk");
|
||||
}
|
||||
}
|
||||
|
||||
return (char *)(mblk->chunk_list[chunk_idx]) + mblk->elem_size * elem_idx;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void BLI_memblock_iternew(BLI_memblock *mblk, BLI_memblock_iter *iter)
|
||||
{
|
||||
iter->mblk = mblk;
|
||||
iter->current_index = 0;
|
||||
iter->elem_per_chunk = mblk->chunk_size / mblk->elem_size;
|
||||
/* Small copy of the memblock used for better cache coherence. */
|
||||
iter->chunk_list = mblk->chunk_list;
|
||||
iter->end_index = mblk->elem_next;
|
||||
iter->cur_index = 0;
|
||||
iter->chunk_idx = 0;
|
||||
iter->elem_ofs = 0;
|
||||
iter->elem_size = mblk->elem_size;
|
||||
iter->chunk_max_ofs = mblk->chunk_max_ofs;
|
||||
}
|
||||
|
||||
void *BLI_memblock_iterstep(BLI_memblock_iter *iter)
|
||||
{
|
||||
if (iter->current_index >= iter->mblk->elem_next) {
|
||||
if (iter->cur_index == iter->end_index) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int chunk_idx = iter->current_index / iter->elem_per_chunk;
|
||||
int elem_idx = iter->current_index - iter->elem_per_chunk * chunk_idx;
|
||||
iter->current_index++;
|
||||
iter->cur_index++;
|
||||
|
||||
return (char *)(iter->mblk->chunk_list[chunk_idx]) + iter->mblk->elem_size * elem_idx;
|
||||
void *ptr = (char *)(iter->chunk_list[iter->chunk_idx]) + iter->elem_ofs;
|
||||
|
||||
iter->elem_ofs += iter->elem_size;
|
||||
|
||||
if (iter->elem_ofs == iter->chunk_max_ofs) {
|
||||
iter->elem_ofs = 0;
|
||||
iter->chunk_idx++;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
|
|
@ -284,9 +284,9 @@ DRWInstanceDataList *DRW_instance_data_list_create(void)
|
|||
{
|
||||
DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList");
|
||||
|
||||
idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch), true);
|
||||
idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch), true);
|
||||
idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle), true);
|
||||
idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch));
|
||||
idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch));
|
||||
idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle));
|
||||
|
||||
BLI_addtail(&g_idatalists, idatalist);
|
||||
|
||||
|
|
|
@ -608,28 +608,28 @@ static void drw_viewport_var_init(void)
|
|||
DST.vmempool = GPU_viewport_mempool_get(DST.viewport);
|
||||
|
||||
if (DST.vmempool->calls == NULL) {
|
||||
DST.vmempool->calls = BLI_memblock_create(sizeof(DRWCall), false);
|
||||
DST.vmempool->calls = BLI_memblock_create(sizeof(DRWCall));
|
||||
}
|
||||
if (DST.vmempool->states == NULL) {
|
||||
DST.vmempool->states = BLI_memblock_create(sizeof(DRWCallState), false);
|
||||
DST.vmempool->states = BLI_memblock_create(sizeof(DRWCallState));
|
||||
}
|
||||
if (DST.vmempool->cullstates == NULL) {
|
||||
DST.vmempool->cullstates = BLI_memblock_create(sizeof(DRWCullingState), false);
|
||||
DST.vmempool->cullstates = BLI_memblock_create(sizeof(DRWCullingState));
|
||||
}
|
||||
if (DST.vmempool->shgroups == NULL) {
|
||||
DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup), false);
|
||||
DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup));
|
||||
}
|
||||
if (DST.vmempool->uniforms == NULL) {
|
||||
DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform), false);
|
||||
DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform));
|
||||
}
|
||||
if (DST.vmempool->views == NULL) {
|
||||
DST.vmempool->views = BLI_memblock_create(sizeof(DRWView), false);
|
||||
DST.vmempool->views = BLI_memblock_create(sizeof(DRWView));
|
||||
}
|
||||
if (DST.vmempool->passes == NULL) {
|
||||
DST.vmempool->passes = BLI_memblock_create(sizeof(DRWPass), false);
|
||||
DST.vmempool->passes = BLI_memblock_create(sizeof(DRWPass));
|
||||
}
|
||||
if (DST.vmempool->images == NULL) {
|
||||
DST.vmempool->images = BLI_memblock_create(sizeof(GPUTexture *), false);
|
||||
DST.vmempool->images = BLI_memblock_create(sizeof(GPUTexture *));
|
||||
}
|
||||
|
||||
DST.idatalist = GPU_viewport_instance_data_list_get(DST.viewport);
|
||||
|
|
Loading…
Reference in New Issue