From bb9b9dad5f5a00adaa867aece30de5e0695c2942 Mon Sep 17 00:00:00 2001 From: Alex Ames Date: Tue, 22 Nov 2022 12:11:14 -0800 Subject: [PATCH] Fixed the BytesConsumed function, which was pointing slightly ahead. (#7657) The BytesConsumed function uses the `cursor_` to determine how many bytes have been consumed by the parser, in case the user of the Parser object wants to step over the parsed flatbuffer that is embedded in some larger string. However, the `cursor_` is always one token ahead, so that it can determine how to consume it. It points at the token that is about to be consumed, which is ahead of the last byte consumed. For example, if you had a string containing these two json objects and parsed them... "{\"key\":\"value\"},{\"key\":\"value\"}" ...then the `cursor_` would be pointing at the comma between the two tables. If you were to hold a pointer to the beginning of the string and add `BytesConsumed()` to it like so: const char* json = // ... parser.ParseJson(json); json += parser.BytesConsumed(); then the pointer would skip over the comma, which is not the expected behavior. It should only consume the table itself. The solution is simple: Just hold onto a previous cursor location and use that for the `BytesConsumed()` call. The previous cursor location just needs to be set to the cursor_ location each time the cursor_ is about to be updated. This will result in `BytesConsumed()` returning the correct number of bytes without the off-by-one-token error. Co-authored-by: Derek Bailey --- include/flatbuffers/idl.h | 7 +++++-- src/idl_parser.cpp | 3 ++- tests/test.cpp | 7 +++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 404482812..19260e77b 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -760,7 +760,8 @@ struct IDLOptions { // This encapsulates where the parser is in the current source file. struct ParserState { ParserState() - : cursor_(nullptr), + : prev_cursor_(nullptr), + cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1), @@ -768,6 +769,7 @@ struct ParserState { protected: void ResetState(const char *source) { + prev_cursor_ = source; cursor_ = source; line_ = 0; MarkNewLine(); @@ -782,7 +784,8 @@ struct ParserState { FLATBUFFERS_ASSERT(cursor_ && line_start_ && cursor_ >= line_start_); return static_cast(cursor_ - line_start_); } - + + const char *prev_cursor_; const char *cursor_; const char *line_start_; int line_; // the current line being parsed diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index bd9780f37..b16c3bec8 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -483,6 +483,7 @@ CheckedError Parser::SkipByteOrderMark() { CheckedError Parser::Next() { doc_comment_.clear(); + prev_cursor_ = cursor_; bool seen_newline = cursor_ == source_; attribute_.clear(); attr_is_trivial_ascii_string_ = true; @@ -3312,7 +3313,7 @@ bool Parser::ParseJson(const char *json, const char *json_filename) { } std::ptrdiff_t Parser::BytesConsumed() const { - return std::distance(source_, cursor_); + return std::distance(source_, prev_cursor_); } CheckedError Parser::StartParseFile(const char *source, diff --git a/tests/test.cpp b/tests/test.cpp index db2916276..15d0d4fc8 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -1434,14 +1434,14 @@ void DoNotRequireEofTest(const std::string &tests_data_path) { ok = parser.Parse(schemafile.c_str(), include_directories); TEST_EQ(ok, true); - const char *str = R"(This string contains two monsters, the first one is { + const char *str = R"(Some text at the beginning. { "name": "Blob", "hp": 5 - } - and the second one is { + }{ "name": "Imp", "hp": 10 } + Some extra text at the end too. )"; const char *tableStart = std::strchr(str, '{'); ok = parser.ParseJson(tableStart); @@ -1453,7 +1453,6 @@ void DoNotRequireEofTest(const std::string &tests_data_path) { tableStart += parser.BytesConsumed(); - tableStart = std::strchr(tableStart + 1, '{'); ok = parser.ParseJson(tableStart); TEST_EQ(ok, true);