ID Management: Improve speed of code used when creating/renaming and ID.

This commit affects `check_for_dupid()` helper:
* Add a special, quicker code path dedicated to sequential addition of a
  large number of IDs using the same base name.

This gives a significant speedup to adding 'randomly'-named IDs:

| Number and type of names of IDs  | old code | new code | speed improvement |
| -------------------------------- | -------- | -------- | ----------------- |
| 40K, mixed (14k rand, 26k const) |      49s |      39s |               26% |
| 40K, fully random                |      51s |      51s |                0% |
| 40K, fully constant              |      71s |      40s |               78% |

Note that 'random' names give no improvement as expected, since this new code
path will never be used in such cases.
This commit is contained in:
Bastien Montagne 2019-12-19 16:27:13 +01:00
parent 2aab727009
commit 4cc8201a65
1 changed files with 147 additions and 35 deletions

View File

@ -1612,6 +1612,53 @@ void id_sort_by_name(ListBase *lb, ID *id)
}
#undef ID_SORT_STEP_SIZE
/* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
#define MAX_NUMBER 1000000000
/* We do not want to get "name.000", so minimal number is 1. */
#define MIN_NUMBER 1
/* The maximum value up to which we search for the actual smallest unused number. Beyond that
* value, we will only use the first biggest unused number, without trying to 'fill the gaps'
* in-between already used numbers... */
#define MAX_NUMBERS_IN_USE 1024
/**
* Helper building final ID name from given root_name and number.
*
* If everything goes well and we do generate a valid final ID anme in given name, we return true.
* In case the final name would overflow the allowed ID name length, or given number is bigger than
* maximum allowed value, we truncate further the root_name (and given name, which is assumed to
* have the same 'root_name' part), and return false.
*/
static bool id_name_final_build(char *name, char *root_name, size_t root_name_len, int number)
{
char number_str[11]; /* Dot + nine digits + NULL terminator. */
size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
/* If the number would lead to an overflow of the maximum ID name length, we need to truncate
* the root name part and do all the number checks again. */
if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
if (root_name_len + number_str_len >= MAX_ID_NAME - 2) {
root_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
}
else {
root_name_len--;
}
root_name[root_name_len] = '\0';
/* Code above may have generated invalid utf-8 string, due to raw truncation.
* Ensure we get a valid one now. */
root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len);
/* Also truncate orig name, and start the whole check again. */
name[root_name_len] = '\0';
return false;
}
/* We have our final number, we can put it in name and exit the function. */
BLI_strncpy(name + root_name_len, number_str, number_str_len + 1);
return true;
}
/**
* Check to see if an ID name is already used, and find a new one if so.
* Return true if a new name was created (returned in name).
@ -1622,28 +1669,100 @@ void id_sort_by_name(ListBase *lb, ID *id)
*/
static bool check_for_dupid(ListBase *lb, ID *id, char *name)
{
/* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
#define MAX_NUMBER 1000000000
/* We do not want to get "name.000", so minimal number is 1. */
#define MIN_NUMBER 1
BLI_assert(strlen(name) < MAX_ID_NAME - 2);
ID *id_test = lb->first;
bool is_name_changed = false;
if (id_test == NULL) {
return is_name_changed;
}
const short id_type = (short)GS(id_test->name);
/* Static storage of previous handled ID/name info, used to perform a quicker test and optimize
* creation of huge number of IDs using the same given root name. */
static char prev_orig_root_name[MAX_ID_NAME - 2] = {0};
static char prev_final_root_name[MAX_ID_NAME - 2] = {0};
static short prev_id_type = ID_LINK_PLACEHOLDER; /* Should never exist in actual ID list. */
static int prev_number = MIN_NUMBER - 1;
/* Initial test to check whether we can 'shortcut' the more complex loop of the main code below.
* Note that we do not do that for low numbers, as that would prevent using actual smallest
* available number in some cases, and benefits of this special case handling mostly show up with
* high numbers anyway. */
if (id_type == prev_id_type && prev_number >= MAX_NUMBERS_IN_USE &&
prev_number < MAX_NUMBER - 1 && name[0] == prev_final_root_name[0]) {
/* Get the name and number parts ("name.number"). */
char root_name[MAX_ID_NAME - 2];
int number = MIN_NUMBER;
size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.');
size_t prev_final_root_name_len = strlen(prev_final_root_name);
size_t prev_orig_root_name_len = strlen(prev_orig_root_name);
if (root_name_len == prev_orig_root_name_len &&
STREQLEN(root_name, prev_orig_root_name, prev_orig_root_name_len)) {
/* Once we have ensured given root_name and original previous one are the same, we can check
* that previously used number is actually used, and that next one is free. */
/* Note that from now on, we only used previous final root name, as it might have been
* truncated from original one due to number suffix length. */
char final_name[MAX_ID_NAME - 2];
char prev_final_name[MAX_ID_NAME - 2];
BLI_strncpy(final_name, prev_final_root_name, prev_final_root_name_len + 1);
BLI_strncpy(prev_final_name, prev_final_root_name, prev_final_root_name_len + 1);
if (id_name_final_build(final_name, root_name, prev_final_root_name_len, prev_number + 1) &&
id_name_final_build(prev_final_name, root_name, prev_final_root_name_len, prev_number)) {
/* We succeffuly built valid final names of previous and current iterations, now we have to
* ensure that previous final name is indeed used in curent ID list, and that current one
* is not. */
bool is_valid = false;
for (id_test = lb->first; id_test; id_test = id_test->next) {
if (id != id_test && !ID_IS_LINKED(id_test)) {
if (id_test->name[2] == final_name[0] && STREQ(final_name, id_test->name + 2)) {
/* We expect final_name to not be already used, so this is a failure. */
is_valid = false;
break;
}
/* Previous final name should only be found once in the list, so if it was found
* already, no need to do a string comparison again. */
if (!is_valid && id_test->name[2] == prev_final_name[0] &&
STREQ(prev_final_name, id_test->name + 2)) {
is_valid = true;
}
}
}
if (is_valid) {
/* Only the number changed, prev_orig_root_name, prev_root_name and prev_id_type remain
* the same. */
prev_number++;
strcpy(name, final_name);
return true;
}
}
}
}
/* To speed up finding smallest unused number within [0 .. MAX_NUMBERS_IN_USE - 1].
* We do not bother beyond that point. */
#define MAX_NUMBERS_IN_USE 1024
BLI_bitmap *numbers_in_use = BLI_BITMAP_NEW_ALLOCA(MAX_NUMBERS_IN_USE);
ID *id_test;
bool is_first_run = true;
while (true) {
/* Get the name and number parts ("name.number"). */
char root_name[MAX_ID_NAME - 2];
int number = MIN_NUMBER;
size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.');
/* Store previous original given root name now, as we might alter it later in code below. */
if (is_first_run) {
strcpy(prev_orig_root_name, root_name);
is_first_run = false;
}
/* In case we get an insane initial number suffix in given name. */
/* Note: BLI_split_name_num() cannot return negative numbers, so we do not have to check for
* that here. */
@ -1680,11 +1799,17 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name)
* Note however that name might have been changed (truncated) in a previous iteration already.
*/
if (!is_orig_name_used) {
/* Don't bother updating prev_ static variables here, this case is not supposed to happen
* that often, and is not straight-forward here, so just ignore and reset them to default. */
prev_id_type = ID_LINK_PLACEHOLDER;
prev_final_root_name[0] = '\0';
prev_number = MIN_NUMBER - 1;
return is_name_changed;
}
/* Decide which value of nr to use, either the smallest unused one if possible, or default to
* the first largest unused one we got from previous loop. */
/* Decide which value of number to use, either the smallest unused one if possible, or default
* to the first largest unused one we got from previous loop. */
for (int i = MIN_NUMBER; i < MAX_NUMBERS_IN_USE; i++) {
if (!BLI_BITMAP_TEST_BOOL(numbers_in_use, i)) {
number = i;
@ -1697,45 +1822,32 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name)
* We can't be bothered to look for the lowest unused number beyond
* (MAX_NUMBERS_IN_USE - 1).
*/
/* We know for wure that name will be changed. */
is_name_changed = true;
char number_str[11]; /* Dot + nine digits + NULL char. */
size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
/* If the number would lead to an overflow of the maximum ID name length, we need to truncate
* the root name part and do all the number checks again. */
if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
if (root_name_len + number_str_len >= MAX_ID_NAME - 2) {
root_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
}
else {
root_name_len--;
}
root_name[root_name_len] = '\0';
/* Code above may have generated invalid utf-8 string, due to raw truncation.
* Ensure we get a valid one now. */
root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len);
/* If id_name_final_build helper returns false, it had to truncate further given name, hence we
* have to go over the whole check again. */
if (!id_name_final_build(name, root_name, root_name_len, number)) {
/* We have to clear our list of small used numbers before we do the whole check again. */
BLI_bitmap_set_all(numbers_in_use, false, MAX_NUMBERS_IN_USE);
/* Copy back truncated root name into orig name, and start the whole check again. */
BLI_strncpy(name, root_name, root_name_len + 1);
is_name_changed = true;
continue;
}
/* We have our final number, we can put it in name and exit the function. */
BLI_strncpy(name + root_name_len, number_str, number_str_len + 1);
is_name_changed = true;
/* Update prev_ static variables, in case next call is for the same type of IDs and with the
* same initial root name, we can skip a lot of above process. */
prev_id_type = id_type;
strcpy(prev_final_root_name, root_name);
prev_number = number;
return is_name_changed;
}
#undef MAX_NUMBERS_IN_USE
}
#undef MIN_NUMBER
#undef MAX_NUMBER
}
/**
* Ensures given ID has a unique name in given listbase.