Add (r)partition funcs to BLI_string, to get left-most/right-most first occurence of delimiters.

Inspired by Python (r)partition str functions. Also added some Gtest cases for those new funcs.

Reviewed by Campbell Barton, many thanks!
This commit is contained in:
Bastien Montagne 2014-07-04 14:14:06 +02:00
parent 85c4feab02
commit e3c8cf0a9e
Notes: blender-bot 2023-02-14 11:27:37 +01:00
Referenced by issue #37874, Select Similar for Nodes
9 changed files with 387 additions and 0 deletions

View File

@ -37,6 +37,7 @@
#include "blf_internal.h"
#include "BLI_utildefines.h"
#include "BLI_string_utf8.h"
unsigned int blf_next_p2(unsigned int x)

View File

@ -79,6 +79,10 @@ int BLI_str_rstrip_float_zero(char *str, const char pad) ATTR_NONNULL();
int BLI_str_index_in_array_n(const char *__restrict str, const char **__restrict str_array, const int str_array_len) ATTR_NONNULL();
int BLI_str_index_in_array(const char *__restrict str, const char **__restrict str_array) ATTR_NONNULL();
size_t BLI_str_partition(const char *str, const unsigned int delim[], char **sep, char **suf) ATTR_NONNULL();
size_t BLI_str_rpartition(const char *str, const unsigned int delim[], char **sep, char **suf) ATTR_NONNULL();
size_t BLI_str_partition_ex(const char *str, const unsigned int delim[], char **sep, char **suf, const bool from_right) ATTR_NONNULL();
#ifdef __cplusplus
}
#endif

View File

@ -66,6 +66,10 @@ int BLI_wcswidth(const wchar_t *pwcs, size_t n) ATTR_NONNULL();
int BLI_str_utf8_char_width(const char *p) ATTR_NONNULL(); /* warning, can return -1 on bad chars */
int BLI_str_utf8_char_width_safe(const char *p) ATTR_NONNULL();
size_t BLI_str_partition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf) ATTR_NONNULL();
size_t BLI_str_rpartition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf) ATTR_NONNULL();
size_t BLI_str_partition_ex_utf8(const char *str, const unsigned int delim[], char **sep, char **suf, const bool from_right) ATTR_NONNULL();
#define BLI_UTF8_MAX 6 /* mem */
#define BLI_UTF8_WIDTH_MAX 2 /* columns */
#define BLI_UTF8_ERR ((unsigned int)-1)

View File

@ -681,3 +681,63 @@ int BLI_str_index_in_array(const char *str, const char **str_array)
return -1;
}
/**
* Find the first char matching one of the chars in \a delim, from left.
*
* \param str The string to search within.
* \param delim The set of delimiters to search for, as unicode values.
* \param sep Return value, set to the first delimiter found (or NULL if none found).
* \param suf Return value, set to next char after the first delimiter found (or NULL if none found).
* \return The length of the prefix (i.e. *sep - str).
*/
size_t BLI_str_partition(const char *str, const unsigned int delim[], char **sep, char **suf)
{
return BLI_str_partition_ex(str, delim, sep, suf, false);
}
/**
* Find the first char matching one of the chars in \a delim, from right.
*
* \param str The string to search within.
* \param delim The set of delimiters to search for, as unicode values.
* \param sep Return value, set to the first delimiter found (or NULL if none found).
* \param suf Return value, set to next char after the first delimiter found (or NULL if none found).
* \return The length of the prefix (i.e. *sep - str).
*/
size_t BLI_str_rpartition(const char *str, const unsigned int delim[], char **sep, char **suf)
{
return BLI_str_partition_ex(str, delim, sep, suf, true);
}
/**
* Find the first char matching one of the chars in \a delim, either from left or right.
*
* \param str The string to search within.
* \param delim The set of delimiters to search for, as unicode values.
* \param sep Return value, set to the first delimiter found (or NULL if none found).
* \param suf Return value, set to next char after the first delimiter found (or NULL if none found).
* \param from_right If %true, search from the right of \a str, else, search from its left.
* \return The length of the prefix (i.e. *sep - str).
*/
size_t BLI_str_partition_ex(const char *str, const unsigned int delim[], char **sep, char **suf, const bool from_right)
{
const unsigned int *d;
char *(*func)(const char *str, int c) = from_right ? strrchr : strchr;
*sep = *suf = NULL;
for (d = delim; *d != '\0'; ++d) {
char *tmp = func(str, (int)*d);
if (tmp && (from_right ? (*sep < tmp) : (!*sep || *sep > tmp))) {
*sep = tmp;
}
}
if (*sep) {
*suf = *sep + 1;
return (size_t)(*sep - str);
}
return strlen(str);
}

View File

@ -703,3 +703,49 @@ char *BLI_str_prev_char_utf8(const char *p)
}
}
/* end glib copy */
size_t BLI_str_partition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf)
{
return BLI_str_partition_ex_utf8(str, delim, sep, suf, false);
}
size_t BLI_str_rpartition_utf8(const char *str, const unsigned int delim[], char **sep, char **suf)
{
return BLI_str_partition_ex_utf8(str, delim, sep, suf, true);
}
size_t BLI_str_partition_ex_utf8(const char *str, const unsigned int delim[], char **sep, char **suf,
const bool from_right)
{
const unsigned int *d;
const size_t str_len = strlen(str);
size_t index;
*suf = (char *)(str + str_len);
for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str), index = 0;
*sep != NULL && **sep != '\0';
*sep = (char *)(from_right ? (char *)BLI_str_find_prev_char_utf8(str, *sep) : str + index))
{
const unsigned int c = BLI_str_utf8_as_unicode_and_size(*sep, &index);
if (c == BLI_UTF8_ERR) {
*suf = *sep = NULL;
break;
}
for (d = delim; *d != '\0'; ++d) {
if (*d == c) {
/* *suf is already correct in case from_right is true. */
if (!from_right)
*suf = (char *)(str + index);
return (size_t)(*sep - str);
}
}
*suf = *sep; /* Useful in 'from_right' case! */
}
*suf = *sep = NULL;
return str_len;
}

View File

@ -33,6 +33,7 @@
#include <stdio.h>
#include "BLI_utildefines.h"
#include "BLI_string.h"
#include "BLI_math.h"

View File

@ -33,6 +33,7 @@
#include <stdlib.h>
#include <string.h>
#include "BLI_utildefines.h"
#include "BLI_string.h"
#include "MEM_guardedalloc.h"

View File

@ -0,0 +1,269 @@
/* Apache License, Version 2.0 */
#include "testing/testing.h"
extern "C" {
#include "BLI_utildefines.h"
#include "BLI_string.h"
#include "BLI_string_utf8.h"
}
/* -------------------------------------------------------------------- */
/* stubs */
extern "C" {
int mk_wcwidth(wchar_t ucs);
int mk_wcswidth(const wchar_t *pwcs, size_t n);
int mk_wcwidth(wchar_t ucs)
{
return 0;
}
int mk_wcswidth(const wchar_t *pwcs, size_t n)
{
return 0;
}
}
/* -------------------------------------------------------------------- */
/* tests */
/* BLI_str_partition */
TEST(string, StrPartition)
{
const unsigned int delim[] = {'-', '.', '_', 0x00F1 /* n tilde */, 0x262F /* ying-yang */, '\0'};
char *sep, *suf;
size_t pre_ln;
{
const char *str = "mat.e-r_ial";
/* "mat.e-r_ial" -> "mat", '.', "e-r_ial", 3 */
pre_ln = BLI_str_partition(str, delim, &sep, &suf);
EXPECT_EQ(3, pre_ln);
EXPECT_EQ(&str[3], sep);
EXPECT_STREQ("e-r_ial", suf);
}
/* Corner cases. */
{
const char *str = ".mate-rial--";
/* ".mate-rial--" -> "", '.', "mate-rial--", 0 */
pre_ln = BLI_str_partition(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(&str[0], sep);
EXPECT_STREQ("mate-rial--", suf);
}
{
const char *str = ".__.--_";
/* ".__.--_" -> "", '.', "__.--_", 0 */
pre_ln = BLI_str_partition(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(&str[0], sep);
EXPECT_STREQ("__.--_", suf);
}
{
const char *str = "";
/* "" -> "", NULL, NULL, 0 */
pre_ln = BLI_str_partition(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
{
const char *str = "material";
/* "material" -> "material", NULL, NULL, 8 */
pre_ln = BLI_str_partition(str, delim, &sep, &suf);
EXPECT_EQ(8, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
}
/* BLI_str_rpartition */
TEST(string, StrRPartition)
{
const unsigned int delim[] = {'-', '.', '_', 0x00F1 /* n tilde */, 0x262F /* ying-yang */, '\0'};
char *sep, *suf;
size_t pre_ln;
{
const char *str = "mat.e-r_ial";
/* "mat.e-r_ial" -> "mat.e-r", '_', "ial", 7 */
pre_ln = BLI_str_rpartition(str, delim, &sep, &suf);
EXPECT_EQ(7, pre_ln);
EXPECT_EQ(&str[7], sep);
EXPECT_STREQ("ial", suf);
}
/* Corner cases. */
{
const char *str = ".mate-rial--";
/* ".mate-rial--" -> ".mate-rial-", '-', "", 11 */
pre_ln = BLI_str_rpartition(str, delim, &sep, &suf);
EXPECT_EQ(11, pre_ln);
EXPECT_EQ(&str[11], sep);
EXPECT_STREQ("", suf);
}
{
const char *str = ".__.--_";
/* ".__.--_" -> ".__.--", '_', "", 6 */
pre_ln = BLI_str_rpartition(str, delim, &sep, &suf);
EXPECT_EQ(6, pre_ln);
EXPECT_EQ(&str[6], sep);
EXPECT_STREQ("", suf);
}
{
const char *str = "";
/* "" -> "", NULL, NULL, 0 */
pre_ln = BLI_str_rpartition(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
{
const char *str = "material";
/* "material" -> "material", NULL, NULL, 8 */
pre_ln = BLI_str_rpartition(str, delim, &sep, &suf);
EXPECT_EQ(8, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
}
/* BLI_str_partition_utf8 */
TEST(string, StrPartitionUtf8)
{
const unsigned int delim[] = {'-', '.', '_', 0x00F1 /* n tilde */, 0x262F /* ying-yang */, '\0'};
char *sep, *suf;
size_t pre_ln;
{
const char *str = "ma\xc3\xb1te-r\xe2\x98\xafial";
/* "ma\xc3\xb1te-r\xe2\x98\xafial" -> "ma", '\xc3\xb1', "te-r\xe2\x98\xafial", 2 */
pre_ln = BLI_str_partition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(2, pre_ln);
EXPECT_EQ(&str[2], sep);
EXPECT_STREQ("te-r\xe2\x98\xafial", suf);
}
/* Corner cases. */
{
const char *str = "\xe2\x98\xafmate-rial-\xc3\xb1";
/* "\xe2\x98\xafmate-rial-\xc3\xb1" -> "", '\xe2\x98\xaf', "mate-rial-\xc3\xb1", 0 */
pre_ln = BLI_str_partition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(&str[0], sep);
EXPECT_STREQ("mate-rial-\xc3\xb1", suf);
}
{
const char *str = "\xe2\x98\xaf.\xc3\xb1_.--\xc3\xb1";
/* "\xe2\x98\xaf.\xc3\xb1_.--\xc3\xb1" -> "", '\xe2\x98\xaf', ".\xc3\xb1_.--\xc3\xb1", 0 */
pre_ln = BLI_str_partition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(&str[0], sep);
EXPECT_STREQ(".\xc3\xb1_.--\xc3\xb1", suf);
}
{
const char *str = "";
/* "" -> "", NULL, NULL, 0 */
pre_ln = BLI_str_partition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
{
const char *str = "material";
/* "material" -> "material", NULL, NULL, 8 */
pre_ln = BLI_str_partition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(8, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
}
/* BLI_str_rpartition_utf8 */
TEST(string, StrRPartitionUtf8)
{
const unsigned int delim[] = {'-', '.', '_', 0x00F1 /* n tilde */, 0x262F /* ying-yang */, '\0'};
char *sep, *suf;
size_t pre_ln;
{
const char *str = "ma\xc3\xb1te-r\xe2\x98\xafial";
/* "ma\xc3\xb1te-r\xe2\x98\xafial" -> "mat\xc3\xb1te-r", '\xe2\x98\xaf', "ial", 8 */
pre_ln = BLI_str_rpartition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(8, pre_ln);
EXPECT_EQ(&str[8], sep);
EXPECT_STREQ("ial", suf);
}
/* Corner cases. */
{
const char *str = "\xe2\x98\xafmate-rial-\xc3\xb1";
/* "\xe2\x98\xafmate-rial-\xc3\xb1" -> "\xe2\x98\xafmate-rial-", '\xc3\xb1', "", 13 */
pre_ln = BLI_str_rpartition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(13, pre_ln);
EXPECT_EQ(&str[13], sep);
EXPECT_STREQ("", suf);
}
{
const char *str = "\xe2\x98\xaf.\xc3\xb1_.--\xc3\xb1";
/* "\xe2\x98\xaf.\xc3\xb1_.--\xc3\xb1" -> "\xe2\x98\xaf.\xc3\xb1_.--", '\xc3\xb1', "", 10 */
pre_ln = BLI_str_rpartition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(10, pre_ln);
EXPECT_EQ(&str[10], sep);
EXPECT_STREQ("", suf);
}
{
const char *str = "";
/* "" -> "", NULL, NULL, 0 */
pre_ln = BLI_str_rpartition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(0, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
{
const char *str = "material";
/* "material" -> "material", NULL, NULL, 8 */
pre_ln = BLI_str_rpartition_utf8(str, delim, &sep, &suf);
EXPECT_EQ(8, pre_ln);
EXPECT_EQ(NULL, sep);
EXPECT_EQ(NULL, suf);
}
}

View File

@ -37,4 +37,5 @@ set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${PLATFORM_LIN
BLENDER_TEST(BLI_stack "bf_blenlib")
BLENDER_TEST(BLI_math_color "bf_blenlib")
BLENDER_TEST(BLI_math_geom "bf_blenlib")
BLENDER_TEST(BLI_string "bf_blenlib")
BLENDER_TEST(BLI_path_util "bf_blenlib;extern_wcwidth;${ZLIB_LIBRARIES}")