BLI: new string search api that supports fuzzy and prefix matching
This adds a generic string search library in `BLI_string_search.h`. The library has a simple to use C api that allows it's users to filter and sort a set of possible search results based on some query string. Reviewers: Severin Differential Revision: https://developer.blender.org/D8825
This commit is contained in:
parent
4ccd5bf5c6
commit
45bd8fdc2b
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct StringSearch StringSearch;
|
||||
|
||||
StringSearch *BLI_string_search_new(void);
|
||||
void BLI_string_search_add(StringSearch *search, const char *str, void *user_data);
|
||||
int BLI_string_search_query(StringSearch *search, const char *query, void ***r_data);
|
||||
void BLI_string_search_free(StringSearch *search);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
# include "BLI_linear_allocator.hh"
|
||||
# include "BLI_span.hh"
|
||||
# include "BLI_string_ref.hh"
|
||||
# include "BLI_vector.hh"
|
||||
|
||||
namespace blender::string_search {
|
||||
|
||||
int damerau_levenshtein_distance(StringRef a, StringRef b);
|
||||
int get_fuzzy_match_errors(StringRef query, StringRef full);
|
||||
void extract_normalized_words(StringRef str,
|
||||
LinearAllocator<> &allocator,
|
||||
Vector<StringRef, 64> &r_words);
|
||||
|
||||
} // namespace blender::string_search
|
||||
|
||||
#endif
|
|
@ -124,6 +124,7 @@ set(SRC
|
|||
intern/storage.c
|
||||
intern/string.c
|
||||
intern/string_cursor_utf8.c
|
||||
intern/string_search.cc
|
||||
intern/string_utf8.c
|
||||
intern/string_utils.c
|
||||
intern/system.c
|
||||
|
@ -267,6 +268,7 @@ set(SRC
|
|||
BLI_strict_flags.h
|
||||
BLI_string.h
|
||||
BLI_string_cursor_utf8.h
|
||||
BLI_string_search.h
|
||||
BLI_string_ref.hh
|
||||
BLI_string_utf8.h
|
||||
BLI_string_utils.h
|
||||
|
@ -411,6 +413,7 @@ if(WITH_GTESTS)
|
|||
tests/BLI_span_test.cc
|
||||
tests/BLI_stack_cxx_test.cc
|
||||
tests/BLI_stack_test.cc
|
||||
tests/BLI_string_search_test.cc
|
||||
tests/BLI_string_ref_test.cc
|
||||
tests/BLI_string_test.cc
|
||||
tests/BLI_string_utf8_test.cc
|
||||
|
|
|
@ -0,0 +1,475 @@
|
|||
/*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "BLI_array.hh"
|
||||
#include "BLI_linear_allocator.hh"
|
||||
#include "BLI_multi_value_map.hh"
|
||||
#include "BLI_span.hh"
|
||||
#include "BLI_string.h"
|
||||
#include "BLI_string_ref.hh"
|
||||
#include "BLI_string_search.h"
|
||||
#include "BLI_string_utf8.h"
|
||||
#include "BLI_timeit.hh"
|
||||
|
||||
namespace blender::string_search {
|
||||
|
||||
static int64_t count_utf8_code_points(StringRef str)
|
||||
{
|
||||
return static_cast<int64_t>(BLI_strnlen_utf8(str.data(), static_cast<size_t>(str.size())));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the cost of transforming string a into b. The cost/distance is the minimal number of
|
||||
* operations that need to be executed. Valid operations are deletion, insertion, substitution and
|
||||
* transposition.
|
||||
*
|
||||
* This function is utf8 aware in the sense that it works at the level of individual code points
|
||||
* (1-4 bytes long) instead of on individual bytes.
|
||||
*/
|
||||
int damerau_levenshtein_distance(StringRef a, StringRef b)
|
||||
{
|
||||
constexpr int deletion_cost = 1;
|
||||
constexpr int insertion_cost = 1;
|
||||
constexpr int substitution_cost = 1;
|
||||
constexpr int transposition_cost = 1;
|
||||
|
||||
const int size_a = count_utf8_code_points(a);
|
||||
const int size_b = count_utf8_code_points(b);
|
||||
|
||||
/* Instead of keeping the entire table in memory, only keep three rows. The algorithm only
|
||||
* accesses these rows and nothing older.
|
||||
* All three rows are usually allocated on the stack. At most a single heap allocation is done,
|
||||
* if the reserved stack space is too small. */
|
||||
const int row_length = size_b + 1;
|
||||
Array<int, 64> rows(row_length * 3);
|
||||
|
||||
/* Store rows as spans so that it is cheap to swap them. */
|
||||
MutableSpan v0{rows.data() + row_length * 0, row_length};
|
||||
MutableSpan v1{rows.data() + row_length * 1, row_length};
|
||||
MutableSpan v2{rows.data() + row_length * 2, row_length};
|
||||
|
||||
/* Only v1 needs to be initialized. */
|
||||
for (const int i : IndexRange(row_length)) {
|
||||
v1[i] = i * insertion_cost;
|
||||
}
|
||||
|
||||
uint32_t prev_unicode_a;
|
||||
size_t offset_a = 0;
|
||||
for (const int i : IndexRange(size_a)) {
|
||||
v2[0] = (i + 1) * deletion_cost;
|
||||
|
||||
const uint32_t unicode_a = BLI_str_utf8_as_unicode_and_size(a.data() + offset_a, &offset_a);
|
||||
|
||||
uint32_t prev_unicode_b;
|
||||
size_t offset_b = 0;
|
||||
for (const int j : IndexRange(size_b)) {
|
||||
const uint32_t unicode_b = BLI_str_utf8_as_unicode_and_size(b.data() + offset_b, &offset_b);
|
||||
|
||||
/* Check how costly the different operations would be and pick the cheapest - the one with
|
||||
* minimal cost. */
|
||||
int new_cost = std::min({v1[j + 1] + deletion_cost,
|
||||
v2[j] + insertion_cost,
|
||||
v1[j] + (unicode_a != unicode_b) * substitution_cost});
|
||||
if (i > 0 && j > 0) {
|
||||
if (unicode_a == prev_unicode_b && prev_unicode_a == unicode_b) {
|
||||
new_cost = std::min(new_cost, v0[j - 1] + transposition_cost);
|
||||
}
|
||||
}
|
||||
|
||||
v2[j + 1] = new_cost;
|
||||
prev_unicode_b = unicode_b;
|
||||
}
|
||||
|
||||
/* Swap the three rows, so that the next row can be computed. */
|
||||
std::tie(v0, v1, v2) = std::tuple<MutableSpan<int>, MutableSpan<int>, MutableSpan<int>>(
|
||||
v1, v2, v0);
|
||||
prev_unicode_a = unicode_a;
|
||||
}
|
||||
|
||||
return v1.last();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns -1 when this is no reasonably good match.
|
||||
* Otherwise returns the number of errors in the match.
|
||||
*/
|
||||
int get_fuzzy_match_errors(StringRef query, StringRef full)
|
||||
{
|
||||
/* If it is a perfect partial match, return immediatly. */
|
||||
if (full.find(query) != StringRef::not_found) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int query_size = count_utf8_code_points(query);
|
||||
const int full_size = count_utf8_code_points(full);
|
||||
|
||||
/* If there is only a single character which is not in the full string, this is not a match. */
|
||||
if (query_size == 1) {
|
||||
return -1;
|
||||
}
|
||||
BLI_assert(query.size() >= 2);
|
||||
|
||||
/* Allow more errors when the size grows larger. */
|
||||
const int max_errors = query_size <= 1 ? 0 : query_size / 8 + 1;
|
||||
|
||||
/* If the query is too large, this cannot be a match. */
|
||||
if (query_size - full_size > max_errors) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const uint32_t query_first_unicode = BLI_str_utf8_as_unicode(query.data());
|
||||
const uint32_t query_second_unicode = BLI_str_utf8_as_unicode(query.data() +
|
||||
BLI_str_utf8_size(query.data()));
|
||||
|
||||
const char *full_begin = full.begin();
|
||||
const char *full_end = full.end();
|
||||
|
||||
const char *window_begin = full_begin;
|
||||
const char *window_end = window_begin;
|
||||
const int window_size = std::min(query_size + max_errors, full_size);
|
||||
const int extra_chars = window_size - query_size;
|
||||
const int max_acceptable_distance = max_errors + extra_chars;
|
||||
|
||||
for (int i = 0; i < window_size; i++) {
|
||||
window_end += BLI_str_utf8_size(window_end);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
StringRef window{window_begin, window_end};
|
||||
const uint32_t window_begin_unicode = BLI_str_utf8_as_unicode(window_begin);
|
||||
int distance = 0;
|
||||
/* Expect that the first or second character of the query is correct. This helps to avoid
|
||||
* computing the more expensive distance function. */
|
||||
if (ELEM(window_begin_unicode, query_first_unicode, query_second_unicode)) {
|
||||
distance = damerau_levenshtein_distance(query, window);
|
||||
if (distance <= max_acceptable_distance) {
|
||||
return distance;
|
||||
}
|
||||
}
|
||||
if (window_end == full_end) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* When the distance is way too large, we can skip a couple of code points, because the
|
||||
* distance can't possibly become as short as required. */
|
||||
const int window_offset = std::max(1, distance / 2);
|
||||
for (int i = 0; i < window_offset && window_end < full_end; i++) {
|
||||
window_begin += BLI_str_utf8_size(window_begin);
|
||||
window_end += BLI_str_utf8_size(window_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a query and tries to match it with the first characters of some words. For example, "msfv"
|
||||
* matches "Mark Sharp from Vertices". Multiple letters of the beginning of a word can be matched
|
||||
* as well. For example, "seboulo" matches "select boundary loop". The order of words is important.
|
||||
* So "bose" does not match "select boundary". However, individual words can be skipped. For
|
||||
* example, "rocc" matches "rotate edge ccw".
|
||||
*
|
||||
* Returns true when the match was successfull. If it was successfull, the used words are tagged in
|
||||
* r_word_is_matched.
|
||||
*/
|
||||
static bool match_word_initials(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable,
|
||||
MutableSpan<bool> r_word_is_matched,
|
||||
int start = 0)
|
||||
{
|
||||
if (start >= words.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
r_word_is_matched.fill(false);
|
||||
|
||||
size_t query_index = 0;
|
||||
int word_index = start;
|
||||
size_t char_index = 0;
|
||||
|
||||
int first_found_word_index = -1;
|
||||
|
||||
while (query_index < query.size()) {
|
||||
const uint query_unicode = BLI_str_utf8_as_unicode_and_size(query.data() + query_index,
|
||||
&query_index);
|
||||
while (true) {
|
||||
/* We are at the end of words, no complete match has been found yet. */
|
||||
if (word_index >= words.size()) {
|
||||
if (first_found_word_index >= 0) {
|
||||
/* Try starting to match at another word. In some cases one can still find matches this
|
||||
* way. */
|
||||
return match_word_initials(
|
||||
query, words, word_is_usable, r_word_is_matched, first_found_word_index + 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Skip words that the caller does not want us to use. */
|
||||
if (!word_is_usable[word_index]) {
|
||||
word_index++;
|
||||
BLI_assert(char_index == 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
StringRef word = words[word_index];
|
||||
/* Try to match the current character with the current word. */
|
||||
if (static_cast<int>(char_index) < word.size()) {
|
||||
const uint32_t char_unicode = BLI_str_utf8_as_unicode_and_size(word.data() + char_index,
|
||||
&char_index);
|
||||
if (query_unicode == char_unicode) {
|
||||
r_word_is_matched[word_index] = true;
|
||||
if (first_found_word_index == -1) {
|
||||
first_found_word_index = word_index;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Could not find a match in the current word, go to the beginning of the next word. */
|
||||
word_index += 1;
|
||||
char_index = 0;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int get_shortest_word_index_that_startswith(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable)
|
||||
{
|
||||
int best_word_size = INT32_MAX;
|
||||
int bset_word_index = -1;
|
||||
for (const int i : words.index_range()) {
|
||||
if (!word_is_usable[i]) {
|
||||
continue;
|
||||
}
|
||||
StringRef word = words[i];
|
||||
if (word.startswith(query)) {
|
||||
if (word.size() < best_word_size) {
|
||||
bset_word_index = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bset_word_index;
|
||||
}
|
||||
|
||||
static int get_word_index_that_fuzzy_matches(StringRef query,
|
||||
Span<StringRef> words,
|
||||
Span<bool> word_is_usable,
|
||||
int *r_error_count)
|
||||
{
|
||||
for (const int i : words.index_range()) {
|
||||
if (!word_is_usable[i]) {
|
||||
continue;
|
||||
}
|
||||
StringRef word = words[i];
|
||||
const int error_count = get_fuzzy_match_errors(query, word);
|
||||
if (error_count >= 0) {
|
||||
*r_error_count = error_count;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks how well the query matches a result. If it does not match, -1 is returned. A positive
|
||||
* return value indicates how good the match is. The higher the value, the better the match.
|
||||
*/
|
||||
static int score_query_against_words(Span<StringRef> query_words, Span<StringRef> result_words)
|
||||
{
|
||||
/* Remember which words have been matched, so that they are not matched again. */
|
||||
Array<bool, 64> word_is_usable(result_words.size(), true);
|
||||
|
||||
/* Start with some high score, because otherwise the final score might become negative. */
|
||||
int total_match_score = 1000;
|
||||
|
||||
for (StringRef query_word : query_words) {
|
||||
{
|
||||
/* Check if any result word begins with the query word. */
|
||||
const int word_index = get_shortest_word_index_that_startswith(
|
||||
query_word, result_words, word_is_usable);
|
||||
if (word_index >= 0) {
|
||||
total_match_score += 10;
|
||||
word_is_usable[word_index] = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
{
|
||||
/* Try to match against word initials. */
|
||||
Array<bool, 64> matched_words(result_words.size());
|
||||
const bool success = match_word_initials(
|
||||
query_word, result_words, word_is_usable, matched_words);
|
||||
if (success) {
|
||||
total_match_score += 3;
|
||||
for (const int i : result_words.index_range()) {
|
||||
if (matched_words[i]) {
|
||||
word_is_usable[i] = false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
{
|
||||
/* Fuzzy match against words. */
|
||||
int error_count = 0;
|
||||
const int word_index = get_word_index_that_fuzzy_matches(
|
||||
query_word, result_words, word_is_usable, &error_count);
|
||||
if (word_index >= 0) {
|
||||
total_match_score += 3 - error_count;
|
||||
word_is_usable[word_index] = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Couldn't match query word with anything. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
return total_match_score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string into words and normalizes them (currently that just means converting to lower
|
||||
* case). The returned strings are allocated in the given allocator.
|
||||
*/
|
||||
void extract_normalized_words(StringRef str,
|
||||
LinearAllocator<> &allocator,
|
||||
Vector<StringRef, 64> &r_words)
|
||||
{
|
||||
const uint32_t unicode_space = BLI_str_utf8_as_unicode(" ");
|
||||
const uint32_t unicode_right_triangle = BLI_str_utf8_as_unicode("▶");
|
||||
|
||||
auto is_separator = [&](uint32_t unicode) {
|
||||
return ELEM(unicode, unicode_space, unicode_right_triangle);
|
||||
};
|
||||
|
||||
/* Make a copy of the string so that we can edit it. */
|
||||
StringRef str_copy = allocator.copy_string(str);
|
||||
char *mutable_copy = const_cast<char *>(str_copy.data());
|
||||
const size_t str_size_in_bytes = static_cast<size_t>(str.size());
|
||||
BLI_str_tolower_ascii(mutable_copy, str_size_in_bytes);
|
||||
|
||||
/* Iterate over all unicode code points to split individual words. */
|
||||
bool is_in_word = false;
|
||||
size_t word_start = 0;
|
||||
size_t offset = 0;
|
||||
while (offset < str_size_in_bytes) {
|
||||
size_t size = 0;
|
||||
uint32_t unicode = BLI_str_utf8_as_unicode_and_size(str.data() + offset, &size);
|
||||
if (is_separator(unicode)) {
|
||||
if (is_in_word) {
|
||||
r_words.append(
|
||||
str_copy.substr(static_cast<int>(word_start), static_cast<int>(offset - word_start)));
|
||||
is_in_word = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!is_in_word) {
|
||||
word_start = offset;
|
||||
is_in_word = true;
|
||||
}
|
||||
}
|
||||
offset += size;
|
||||
}
|
||||
/* If the last word is not followed by a separator, it has to be handld separately. */
|
||||
if (is_in_word) {
|
||||
r_words.append(str_copy.drop_prefix(static_cast<int>(word_start)));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::string_search
|
||||
|
||||
struct SearchItem {
|
||||
blender::Span<blender::StringRef> normalized_words;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
struct StringSearch {
|
||||
blender::LinearAllocator<> allocator;
|
||||
blender::Vector<SearchItem> items;
|
||||
};
|
||||
|
||||
StringSearch *BLI_string_search_new()
|
||||
{
|
||||
return new StringSearch();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new possible result to the search.
|
||||
* The caller keeps ownership of all parameters.
|
||||
*/
|
||||
void BLI_string_search_add(StringSearch *search, const char *str, void *user_data)
|
||||
{
|
||||
using namespace blender;
|
||||
Vector<StringRef, 64> words;
|
||||
string_search::extract_normalized_words(str, search->allocator, words);
|
||||
search->items.append({search->allocator.construct_array_copy(words.as_span()), user_data});
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter and sort all previously added search items.
|
||||
* Returns an array containing the filtered user data.
|
||||
* The caller has to free the returned array.
|
||||
*/
|
||||
int BLI_string_search_query(StringSearch *search, const char *query, void ***r_data)
|
||||
{
|
||||
using namespace blender;
|
||||
|
||||
LinearAllocator<> allocator;
|
||||
Vector<StringRef, 64> query_words;
|
||||
string_search::extract_normalized_words(query, allocator, query_words);
|
||||
|
||||
/* Compute score of every result. */
|
||||
MultiValueMap<int, int> result_indices_by_score;
|
||||
for (const int result_index : search->items.index_range()) {
|
||||
const int score = string_search::score_query_against_words(
|
||||
query_words, search->items[result_index].normalized_words);
|
||||
if (score >= 0) {
|
||||
result_indices_by_score.add(score, result_index);
|
||||
}
|
||||
}
|
||||
|
||||
Vector<int> found_scores;
|
||||
for (const int score : result_indices_by_score.keys()) {
|
||||
found_scores.append(score);
|
||||
}
|
||||
std::sort(found_scores.begin(), found_scores.end(), std::greater<int>());
|
||||
|
||||
/* Add results to output vector in correct order. First come the results with the best match
|
||||
* score. Results with the same score are in the order they have been added to the search. */
|
||||
Vector<int> sorted_result_indices;
|
||||
for (const int score : found_scores) {
|
||||
Span<int> indices = result_indices_by_score.lookup(score);
|
||||
sorted_result_indices.extend(indices);
|
||||
}
|
||||
|
||||
void **sorted_data = static_cast<void **>(
|
||||
MEM_malloc_arrayN(static_cast<size_t>(sorted_result_indices.size()), sizeof(void *), AT));
|
||||
for (const int i : sorted_result_indices.index_range()) {
|
||||
const int result_index = sorted_result_indices[i];
|
||||
SearchItem &item = search->items[result_index];
|
||||
sorted_data[i] = item.user_data;
|
||||
}
|
||||
|
||||
*r_data = sorted_data;
|
||||
|
||||
return sorted_result_indices.size();
|
||||
}
|
||||
|
||||
void BLI_string_search_free(StringSearch *string_search)
|
||||
{
|
||||
delete string_search;
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/* Apache License, Version 2.0 */
|
||||
|
||||
#include "testing/testing.h"
|
||||
|
||||
#include "BLI_array.hh"
|
||||
#include "BLI_string_search.h"
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
namespace blender::string_search::tests {
|
||||
|
||||
TEST(string_search, damerau_levenshtein_distance)
|
||||
{
|
||||
EXPECT_EQ(damerau_levenshtein_distance("test", "test"), 0);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("hello", "ell"), 2);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("hello", "hel"), 2);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("ell", "hello"), 2);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("hell", "hello"), 1);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("hello", "hallo"), 1);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("test", ""), 4);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("", "hello"), 5);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("Test", "test"), 1);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("ab", "ba"), 1);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("what", "waht"), 1);
|
||||
EXPECT_EQ(damerau_levenshtein_distance("what", "ahwt"), 2);
|
||||
}
|
||||
|
||||
TEST(string_search, get_fuzzy_match_errors)
|
||||
{
|
||||
EXPECT_EQ(get_fuzzy_match_errors("a", "b"), -1);
|
||||
EXPECT_EQ(get_fuzzy_match_errors("", "abc"), 0);
|
||||
EXPECT_EQ(get_fuzzy_match_errors("hello", "hallo"), 1);
|
||||
EXPECT_EQ(get_fuzzy_match_errors("hap", "hello"), -1);
|
||||
EXPECT_EQ(get_fuzzy_match_errors("armature", "▶restore"), -1);
|
||||
}
|
||||
|
||||
TEST(string_search, extract_normalized_words)
|
||||
{
|
||||
LinearAllocator<> allocator;
|
||||
Vector<StringRef, 64> words;
|
||||
extract_normalized_words("hello world▶test another test▶ 3", allocator, words);
|
||||
EXPECT_EQ(words.size(), 6);
|
||||
EXPECT_EQ(words[0], "hello");
|
||||
EXPECT_EQ(words[1], "world");
|
||||
EXPECT_EQ(words[2], "test");
|
||||
EXPECT_EQ(words[3], "another");
|
||||
EXPECT_EQ(words[4], "test");
|
||||
EXPECT_EQ(words[5], "3");
|
||||
}
|
||||
|
||||
} // namespace blender::string_search::tests
|
Loading…
Reference in New Issue