Fix T99536: new 3.2 OBJ importer fails with trailing space after wrapped lines

Address the issue by re-working line continuation handling: stop
trying to parse sequences like "backslash, newline" (which is the
bug: it should also handle "backslash, possible whitespace, newline")
during parsing. Instead, fixup line continuations after reading chunks
of input file data - turn backslash and the following newline into
spaces. The rest of parsing code does not have to be aware of them
at all then.

Makes the file attached to T99536 load correctly now. Also will extend
one of the test files in subversion tests repo to contain backslashes
followed by newlines.
This commit is contained in:
Aras Pranckevicius 2022-07-10 18:27:07 +03:00
parent 443690604f
commit 4114ace616
Notes: blender-bot 2023-11-20 12:14:32 +01:00
Referenced by issue #99536, OBJ: new 3.2 importer fails with trailing space after wrapped lines
Referenced by issue #98661, 3.2: Potential candidates for corrective releases
4 changed files with 54 additions and 18 deletions

View File

@ -427,6 +427,11 @@ void OBJParser::parse(Vector<std::unique_ptr<Geometry>> &r_all_geometries,
break; /* No more data to read. */
}
/* Take care of line continuations now (turn them into spaces);
* the rest of the parsing code does not need to worry about them anymore. */
fixup_line_continuations(buffer.data() + buffer_offset,
buffer.data() + buffer_offset + bytes_read);
/* Ensure buffer ends in a newline. */
if (bytes_read < read_buffer_size_) {
if (bytes_read == 0 || buffer[buffer_offset + bytes_read - 1] != '\n') {
@ -445,9 +450,7 @@ void OBJParser::parse(Vector<std::unique_ptr<Geometry>> &r_all_geometries,
while (last_nl > 0) {
--last_nl;
if (buffer[last_nl] == '\n') {
if (last_nl < 1 || buffer[last_nl - 1] != '\\') {
break;
}
break;
}
}
if (buffer[last_nl] != '\n') {

View File

@ -18,14 +18,12 @@ StringRef read_next_line(StringRef &buffer)
const char *start = buffer.begin();
const char *end = buffer.end();
size_t len = 0;
char prev = 0;
const char *ptr = start;
while (ptr < end) {
char c = *ptr++;
if (c == '\n' && prev != '\\') {
if (c == '\n') {
break;
}
prev = c;
++len;
}
@ -35,7 +33,27 @@ StringRef read_next_line(StringRef &buffer)
static bool is_whitespace(char c)
{
return c <= ' ' || c == '\\';
return c <= ' ';
}
void fixup_line_continuations(char *p, char *end)
{
while (true) {
/* Find next backslash, if any. */
char *backslash = std::find(p, end, '\\');
if (backslash == end)
break;
/* Skip over possible whitespace right after it. */
p = backslash + 1;
while (p < end && is_whitespace(*p) && *p != '\n')
++p;
/* If then we have a newline, turn both backslash
* and the newline into regular spaces. */
if (p < end && *p == '\n') {
*backslash = ' ';
*p = ' ';
}
}
}
const char *drop_whitespace(const char *p, const char *end)

View File

@ -6,9 +6,6 @@
/*
* Various text parsing utilities used by OBJ importer.
* The utilities are not directly usable by other formats, since
* they treat backslash (\) as a whitespace character (OBJ format
* allows backslashes to function as a line-continuation character).
*
* Many of these functions take two pointers (p, end) indicating
* which part of a string to operate on, and return a possibly
@ -27,21 +24,22 @@ namespace blender::io::obj {
* The returned line will not have '\n' characters at the end;
* the `buffer` is modified to contain remaining text without
* the input line.
*
* Note that backslash (\) character is treated as a line
* continuation.
*/
StringRef read_next_line(StringRef &buffer);
/**
* Fix up OBJ line continuations by replacing backslash (\) and the
* following newline with spaces.
*/
void fixup_line_continuations(char *p, char *end);
/**
* Drop leading white-space from a string part.
* Note that backslash character is considered white-space.
*/
const char *drop_whitespace(const char *p, const char *end);
/**
* Drop leading non-white-space from a string part.
* Note that backslash character is considered white-space.
*/
const char *drop_non_whitespace(const char *p, const char *end);

View File

@ -10,17 +10,34 @@ namespace blender::io::obj {
TEST(obj_import_string_utils, read_next_line)
{
std::string str = "abc\n \n\nline with \\\ncontinuation\nCRLF ending:\r\na";
std::string str = "abc\n \n\nline with \t spaces\nCRLF ending:\r\na";
StringRef s = str;
EXPECT_STRREF_EQ("abc", read_next_line(s));
EXPECT_STRREF_EQ(" ", read_next_line(s));
EXPECT_STRREF_EQ("", read_next_line(s));
EXPECT_STRREF_EQ("line with \\\ncontinuation", read_next_line(s));
EXPECT_STRREF_EQ("line with \t spaces", read_next_line(s));
EXPECT_STRREF_EQ("CRLF ending:\r", read_next_line(s));
EXPECT_STRREF_EQ("a", read_next_line(s));
EXPECT_TRUE(s.is_empty());
}
TEST(obj_import_string_utils, fixup_line_continuations)
{
const char *str =
"backslash \\\n eol\n"
"backslash spaces \\ \n eol\n"
"without eol \\ is \\\\ \\ left intact\n"
"\\";
const char *exp =
"backslash eol\n"
"backslash spaces eol\n"
"without eol \\ is \\\\ \\ left intact\n"
"\\";
std::string buf(str);
fixup_line_continuations(buf.data(), buf.data() + buf.size());
EXPECT_STRREF_EQ(exp, buf);
}
static StringRef drop_whitespace(StringRef s)
{
return StringRef(drop_whitespace(s.begin(), s.end()), s.end());
@ -54,7 +71,7 @@ TEST(obj_import_string_utils, drop_whitespace)
/* No leading whitespace */
EXPECT_STRREF_EQ("c", drop_whitespace("c"));
/* Case with backslash, should be treated as whitespace */
EXPECT_STRREF_EQ("d", drop_whitespace(" \\ d"));
EXPECT_STRREF_EQ("d", drop_whitespace(" \t d"));
}
TEST(obj_import_string_utils, parse_int_valid)