BLI_string_utf8: simplify utf8 stepping logic
There were multiple utf8 functions which treated errors slightly differently. Split BLI_str_utf8_as_unicode_step into two functions. - BLI_str_utf8_as_unicode_step_or_error returns error value when decoding fails and doesn't step. - BLI_str_utf8_as_unicode_step always steps forward at least one returning the byte value without decoding (needed to display some latin1 file-paths). Font drawing uses BLI_str_utf8_as_unicode_step and no longer check for error values.
This commit is contained in:
parent
70f890b510
commit
be906f44c6
|
@ -309,15 +309,13 @@ BLI_INLINE GlyphBLF *blf_utf8_next_fast(
|
|||
}
|
||||
(*i_p)++;
|
||||
}
|
||||
else if ((*r_c = BLI_str_utf8_as_unicode_step(str, str_len, i_p)) != BLI_UTF8_ERR) {
|
||||
else {
|
||||
*r_c = BLI_str_utf8_as_unicode_step(str, str_len, i_p);
|
||||
g = blf_glyph_search(gc, *r_c);
|
||||
if (UNLIKELY(g == NULL)) {
|
||||
g = blf_glyph_add(font, gc, FT_Get_Char_Index(font->face, *r_c), *r_c);
|
||||
}
|
||||
}
|
||||
else {
|
||||
g = NULL;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
|
@ -1202,7 +1200,8 @@ int blf_font_count_missing_chars(FontBLF *font,
|
|||
if ((c = str[i]) < GLYPH_ASCII_TABLE_SIZE) {
|
||||
i++;
|
||||
}
|
||||
else if ((c = BLI_str_utf8_as_unicode_step(str, str_len, &i)) != BLI_UTF8_ERR) {
|
||||
else {
|
||||
c = BLI_str_utf8_as_unicode_step(str, str_len, &i);
|
||||
if (FT_Get_Char_Index((font)->face, c) == 0) {
|
||||
missing++;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,9 @@ unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p,
|
|||
unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p,
|
||||
size_t p_len,
|
||||
size_t *__restrict index) ATTR_NONNULL(1, 3);
|
||||
unsigned int BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p,
|
||||
size_t p_len,
|
||||
size_t *__restrict index) ATTR_NONNULL(1, 3);
|
||||
|
||||
size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf);
|
||||
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
|
||||
|
|
|
@ -581,73 +581,66 @@ uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__r
|
|||
}
|
||||
|
||||
/**
|
||||
* Another variant that steps over the index.
|
||||
* UTF8 decoding that steps over the index (unless an error is encountered).
|
||||
*
|
||||
* \param p: The text to step over.
|
||||
* \param p_len: The length of `p`.
|
||||
* \param index: Index of `p` to step over.
|
||||
*
|
||||
* \note currently this also falls back to latin1 for text drawing.
|
||||
* \return the code-point or #BLI_UTF8_ERR if there is a decoding error.
|
||||
*
|
||||
* \note The behavior for clipped text (where `p_len` limits decoding trailing bytes)
|
||||
* must have the same behavior is encountering a nil byte,
|
||||
* so functions that only use the first part of a string has matching behavior to functions
|
||||
* that null terminate the text.
|
||||
*/
|
||||
uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
|
||||
const size_t p_len,
|
||||
size_t *__restrict index)
|
||||
uint BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p,
|
||||
const size_t p_len,
|
||||
size_t *__restrict index)
|
||||
{
|
||||
int i, len;
|
||||
uint mask = 0;
|
||||
uint result;
|
||||
const char c = p[*index];
|
||||
const unsigned char c = (unsigned char)*(p += *index);
|
||||
|
||||
BLI_assert(*index < p_len);
|
||||
BLI_assert(c != '\0');
|
||||
|
||||
UTF8_COMPUTE(c, mask, len, -1);
|
||||
if (UNLIKELY(len == -1)) {
|
||||
const char *p_next = BLI_str_find_next_char_utf8(p + *index, p + p_len);
|
||||
/* #BLI_str_find_next_char_utf8 ensures the nil byte will terminate.
|
||||
* so there is no chance this sets the index past the nil byte (assert this is the case). */
|
||||
BLI_assert(p_next || (memchr(p + *index, '\0', p_len - *index) == NULL));
|
||||
len = (int)((p_next ? (size_t)(p_next - p) : p_len) - *index);
|
||||
result = BLI_UTF8_ERR;
|
||||
if (UNLIKELY(len == -1) || (*index + (size_t)len > p_len)) {
|
||||
return BLI_UTF8_ERR;
|
||||
}
|
||||
else if (UNLIKELY(*index + (size_t)len > p_len)) {
|
||||
/* A multi-byte character reads past the buffer bounds,
|
||||
* match the behavior of encountering an byte with invalid encoding below. */
|
||||
len = 1;
|
||||
result = (uint)c;
|
||||
UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
|
||||
if (UNLIKELY(result == BLI_UTF8_ERR)) {
|
||||
return BLI_UTF8_ERR;
|
||||
}
|
||||
else {
|
||||
/* This is tricky since there are a few ways we can bail out of bad unicode
|
||||
* values, 3 possible solutions. */
|
||||
p += *index;
|
||||
#if 0
|
||||
UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
|
||||
#elif 1
|
||||
/* WARNING: this is NOT part of glib, or supported by similar functions.
|
||||
* this is added for text drawing because some filepaths can have latin1
|
||||
* characters */
|
||||
UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
|
||||
if (result == BLI_UTF8_ERR) {
|
||||
len = 1;
|
||||
result = (uint)c;
|
||||
}
|
||||
/* end warning! */
|
||||
#else
|
||||
/* Without a fallback like '?', text drawing will stop on this value. */
|
||||
UTF8_GET(result, p, i, mask, len, '?');
|
||||
#endif
|
||||
}
|
||||
|
||||
*index += (size_t)len;
|
||||
BLI_assert(*index <= p_len);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF8 decoding that steps over the index (unless an error is encountered).
|
||||
*
|
||||
* \param p: The text to step over.
|
||||
* \param p_len: The length of `p`.
|
||||
* \param index: Index of `p` to step over.
|
||||
* \return the code-point `(p + *index)` if there is a decoding error.
|
||||
*
|
||||
* \note Falls back to `LATIN1` for text drawing.
|
||||
*/
|
||||
uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
|
||||
const size_t p_len,
|
||||
size_t *__restrict index)
|
||||
{
|
||||
uint result = BLI_str_utf8_as_unicode_step_or_error(p, p_len, index);
|
||||
if (UNLIKELY(result == BLI_UTF8_ERR)) {
|
||||
result = (uint)p[*index];
|
||||
*index += 1;
|
||||
}
|
||||
BLI_assert(*index <= p_len);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* was g_unichar_to_utf8 */
|
||||
/**
|
||||
* BLI_str_utf8_from_unicode:
|
||||
|
|
Loading…
Reference in New Issue