Search: take word order into account in string search
Differential Revision: https://developer.blender.org/D14223
This commit is contained in:
parent
ac45540a34
commit
9789a6c6d9
|
@ -151,6 +151,8 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
|
|||
}
|
||||
}
|
||||
|
||||
static constexpr int unused_word = -1;
|
||||
|
||||
/**
|
||||
* Takes a query and tries to match it with the first characters of some words. For example, "msfv"
|
||||
* matches "Mark Sharp from Vertices". Multiple letters of the beginning of a word can be matched
|
||||
|
@ -163,7 +165,7 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
|
|||
*/
|
||||
static bool match_word_initials(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable,
|
||||
Span<int> word_match_map,
|
||||
MutableSpan<bool> r_word_is_matched,
|
||||
int start = 0)
|
||||
{
|
||||
|
@ -189,13 +191,13 @@ static bool match_word_initials(StringRef query,
|
|||
/* Try starting to match at another word. In some cases one can still find matches this
|
||||
* way. */
|
||||
return match_word_initials(
|
||||
query, words, word_is_usable, r_word_is_matched, first_found_word_index + 1);
|
||||
query, words, word_match_map, r_word_is_matched, first_found_word_index + 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Skip words that the caller does not want us to use. */
|
||||
if (!word_is_usable[word_index]) {
|
||||
if (word_match_map[word_index] != unused_word) {
|
||||
word_index++;
|
||||
BLI_assert(char_index == 0);
|
||||
continue;
|
||||
|
@ -225,12 +227,12 @@ static bool match_word_initials(StringRef query,
|
|||
|
||||
static int get_shortest_word_index_that_startswith(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable)
|
||||
Span<int> word_match_map)
|
||||
{
|
||||
int best_word_size = INT32_MAX;
|
||||
int best_word_index = -1;
|
||||
for (const int i : words.index_range()) {
|
||||
if (!word_is_usable[i]) {
|
||||
if (word_match_map[i] != unused_word) {
|
||||
continue;
|
||||
}
|
||||
StringRef word = words[i];
|
||||
|
@ -246,11 +248,11 @@ static int get_shortest_word_index_that_startswith(StringRef query,
|
|||
|
||||
static int get_word_index_that_fuzzy_matches(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable,
|
||||
Span<int> word_match_map,
|
||||
int *r_error_count)
|
||||
{
|
||||
for (const int i : words.index_range()) {
|
||||
if (!word_is_usable[i]) {
|
||||
if (word_match_map[i] != unused_word) {
|
||||
continue;
|
||||
}
|
||||
StringRef word = words[i];
|
||||
|
@ -269,20 +271,22 @@ static int get_word_index_that_fuzzy_matches(StringRef query,
|
|||
*/
|
||||
static int score_query_against_words(Span<StringRef> query_words, Span<StringRef> result_words)
|
||||
{
|
||||
/* Remember which words have been matched, so that they are not matched again. */
|
||||
Array<bool, 64> word_is_usable(result_words.size(), true);
|
||||
/* A mapping from #result_words to #query_words. It's mainly used to determine if a word has been
|
||||
* matched already to avoid matching it again. */
|
||||
Array<int, 64> word_match_map(result_words.size(), unused_word);
|
||||
|
||||
/* Start with some high score, because otherwise the final score might become negative. */
|
||||
int total_match_score = 1000;
|
||||
|
||||
for (StringRef query_word : query_words) {
|
||||
for (const int query_word_index : query_words.index_range()) {
|
||||
const StringRef query_word = query_words[query_word_index];
|
||||
{
|
||||
/* Check if any result word begins with the query word. */
|
||||
const int word_index = get_shortest_word_index_that_startswith(
|
||||
query_word, result_words, word_is_usable);
|
||||
query_word, result_words, word_match_map);
|
||||
if (word_index >= 0) {
|
||||
total_match_score += 10;
|
||||
word_is_usable[word_index] = false;
|
||||
word_match_map[word_index] = query_word_index;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -290,12 +294,12 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
|
|||
/* Try to match against word initials. */
|
||||
Array<bool, 64> matched_words(result_words.size());
|
||||
const bool success = match_word_initials(
|
||||
query_word, result_words, word_is_usable, matched_words);
|
||||
query_word, result_words, word_match_map, matched_words);
|
||||
if (success) {
|
||||
total_match_score += 3;
|
||||
for (const int i : result_words.index_range()) {
|
||||
if (matched_words[i]) {
|
||||
word_is_usable[i] = false;
|
||||
word_match_map[i] = query_word_index;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
@ -305,10 +309,10 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
|
|||
/* Fuzzy match against words. */
|
||||
int error_count = 0;
|
||||
const int word_index = get_word_index_that_fuzzy_matches(
|
||||
query_word, result_words, word_is_usable, &error_count);
|
||||
query_word, result_words, word_match_map, &error_count);
|
||||
if (word_index >= 0) {
|
||||
total_match_score += 3 - error_count;
|
||||
word_is_usable[word_index] = false;
|
||||
word_match_map[word_index] = query_word_index;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -317,6 +321,23 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef
|
|||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
/* Add penalty when query words are not in the correct order. */
|
||||
Vector<int> match_indices;
|
||||
for (const int index : word_match_map) {
|
||||
if (index != unused_word) {
|
||||
match_indices.append(index);
|
||||
}
|
||||
}
|
||||
if (!match_indices.is_empty()) {
|
||||
for (const int i : IndexRange(match_indices.size() - 1)) {
|
||||
if (match_indices[i] > match_indices[i + 1]) {
|
||||
total_match_score -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return total_match_score;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue