Search: take word order into account in string search

Differential Revision: https://developer.blender.org/D14223
This commit is contained in:
Jacques Lucke 2022-03-02 17:25:39 +01:00
parent ac45540a34
commit 9789a6c6d9
1 changed files with 37 additions and 16 deletions

View File

@ -151,6 +151,8 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
}
}
static constexpr int unused_word = -1;
/**
* Takes a query and tries to match it with the first characters of some words. For example, "msfv"
* matches "Mark Sharp from Vertices". Multiple letters of the beginning of a word can be matched
@ -163,7 +165,7 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
*/
static bool match_word_initials(StringRef query,
Span<StringRef> words,
Span<bool> word_is_usable,
Span<int> word_match_map,
MutableSpan<bool> r_word_is_matched,
int start = 0)
{
@ -189,13 +191,13 @@ static bool match_word_initials(StringRef query,
/* Try starting to match at another word. In some cases one can still find matches this
* way. */
return match_word_initials(
query, words, word_is_usable, r_word_is_matched, first_found_word_index + 1);
query, words, word_match_map, r_word_is_matched, first_found_word_index + 1);
}
return false;
}
/* Skip words that the caller does not want us to use. */
if (!word_is_usable[word_index]) {
if (word_match_map[word_index] != unused_word) {
word_index++;
BLI_assert(char_index == 0);
continue;
@ -225,12 +227,12 @@ static bool match_word_initials(StringRef query,
static int get_shortest_word_index_that_startswith(StringRef query,
Span<StringRef> words,
Span<bool> word_is_usable)
Span<int> word_match_map)
{
int best_word_size = INT32_MAX;
int best_word_index = -1;
for (const int i : words.index_range()) {
if (!word_is_usable[i]) {
if (word_match_map[i] != unused_word) {
continue;
}
StringRef word = words[i];
@ -246,11 +248,11 @@ static int get_shortest_word_index_that_startswith(StringRef query,
static int get_word_index_that_fuzzy_matches(StringRef query,
Span<StringRef> words,
Span<bool> word_is_usable,
Span<int> word_match_map,
int *r_error_count)
{
for (const int i : words.index_range()) {
if (!word_is_usable[i]) {
if (word_match_map[i] != unused_word) {
continue;
}
StringRef word = words[i];
@ -269,20 +271,22 @@ static int get_word_index_that_fuzzy_matches(StringRef query,
*/
static int score_query_against_words(Span<StringRef> query_words, Span<StringRef> result_words)
{
/* Remember which words have been matched, so that they are not matched again. */
Array<bool, 64> word_is_usable(result_words.size(), true);
/* A mapping from #result_words to #query_words. It's mainly used to determine if a word has been
* matched already to avoid matching it again. */
Array<int, 64> word_match_map(result_words.size(), unused_word);
/* Start with some high score, because otherwise the final score might become negative. */
int total_match_score = 1000;
for (StringRef query_word : query_words) {
for (const int query_word_index : query_words.index_range()) {
const StringRef query_word = query_words[query_word_index];
{
/* Check if any result word begins with the query word. */
const int word_index = get_shortest_word_index_that_startswith(
query_word, result_words, word_is_usable);
query_word, result_words, word_match_map);
if (word_index >= 0) {
total_match_score += 10;
word_is_usable[word_index] = false;
word_match_map[word_index] = query_word_index;
continue;
}
}
@ -290,12 +294,12 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
/* Try to match against word initials. */
Array<bool, 64> matched_words(result_words.size());
const bool success = match_word_initials(
query_word, result_words, word_is_usable, matched_words);
query_word, result_words, word_match_map, matched_words);
if (success) {
total_match_score += 3;
for (const int i : result_words.index_range()) {
if (matched_words[i]) {
word_is_usable[i] = false;
word_match_map[i] = query_word_index;
}
}
continue;
@ -305,10 +309,10 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
/* Fuzzy match against words. */
int error_count = 0;
const int word_index = get_word_index_that_fuzzy_matches(
query_word, result_words, word_is_usable, &error_count);
query_word, result_words, word_match_map, &error_count);
if (word_index >= 0) {
total_match_score += 3 - error_count;
word_is_usable[word_index] = false;
word_match_map[word_index] = query_word_index;
continue;
}
}
@ -317,6 +321,23 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
return -1;
}
{
/* Add penalty when query words are not in the correct order. */
Vector<int> match_indices;
for (const int index : word_match_map) {
if (index != unused_word) {
match_indices.append(index);
}
}
if (!match_indices.is_empty()) {
for (const int i : IndexRange(match_indices.size() - 1)) {
if (match_indices[i] > match_indices[i + 1]) {
total_match_score -= 1;
}
}
}
}
return total_match_score;
}