Fixed the BytesConsumed function, which was pointing slightly ahead. (#7657)

The BytesConsumed function uses the `cursor_` to determine how many bytes have been consumed by the parser, in case the user of the Parser object wants to step over the parsed flatbuffer that is embedded in some larger string. However, the `cursor_` is always one token ahead, so that it can determine how to consume it. It points at the token that is about to be consumed, which is ahead of the last byte consumed. For example, if you had a string containing these two json objects and parsed them... "{\"key\":\"value\"},{\"key\":\"value\"}" ...then the `cursor_` would be pointing at the comma between the two tables. If you were to hold a pointer to the beginning of the string and add `BytesConsumed()` to it like so: const char* json = // ... parser.ParseJson(json); json += parser.BytesConsumed(); then the pointer would skip over the comma, which is not the expected behavior. It should only consume the table itself. The solution is simple: Just hold onto a previous cursor location and use that for the `BytesConsumed()` call. The previous cursor location just needs to be set to the cursor_ location each time the cursor_ is about to be updated. This will result in `BytesConsumed()` returning the correct number of bytes without the off-by-one-token error. Co-authored-by: Derek Bailey <derekbailey@google.com>
2026-06-01 19:58:15 +00:00 · 2022-11-22 12:11:14 -08:00
parent ade9e19be0
commit bb9b9dad5f
3 changed files with 10 additions and 7 deletions
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -760,7 +760,8 @@ struct IDLOptions {
 // This encapsulates where the parser is in the current source file.
 struct ParserState {
  ParserState()
-      : cursor_(nullptr),
+      : prev_cursor_(nullptr),
+        cursor_(nullptr),
        line_start_(nullptr),
        line_(0),
        token_(-1),
@@ -768,6 +769,7 @@ struct ParserState {

 protected:
  void ResetState(const char *source) {
+    prev_cursor_ = source;
    cursor_ = source;
    line_ = 0;
    MarkNewLine();
@@ -782,7 +784,8 @@ struct ParserState {
    FLATBUFFERS_ASSERT(cursor_ && line_start_ && cursor_ >= line_start_);
    return static_cast<int64_t>(cursor_ - line_start_);
  }
-
+  
+  const char *prev_cursor_;
  const char *cursor_;
  const char *line_start_;
  int line_;  // the current line being parsed
--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
@@ -483,6 +483,7 @@ CheckedError Parser::SkipByteOrderMark() {

 CheckedError Parser::Next() {
  doc_comment_.clear();
+  prev_cursor_ = cursor_;
  bool seen_newline = cursor_ == source_;
  attribute_.clear();
  attr_is_trivial_ascii_string_ = true;
@@ -3312,7 +3313,7 @@ bool Parser::ParseJson(const char *json, const char *json_filename) {
 }

 std::ptrdiff_t Parser::BytesConsumed() const {
-  return std::distance(source_, cursor_);
+  return std::distance(source_, prev_cursor_);
 }

 CheckedError Parser::StartParseFile(const char *source,
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -1434,14 +1434,14 @@ void DoNotRequireEofTest(const std::string &tests_data_path) {
  ok = parser.Parse(schemafile.c_str(), include_directories);
  TEST_EQ(ok, true);

-  const char *str = R"(This string contains two monsters, the first one is {
+  const char *str = R"(Some text at the beginning. {
      "name": "Blob",
      "hp": 5
-    }
-    and the second one is {
+    }{
      "name": "Imp",
      "hp": 10
    }
+    Some extra text at the end too.
  )";
  const char *tableStart = std::strchr(str, '{');
  ok = parser.ParseJson(tableStart);
@@ -1453,7 +1453,6 @@ void DoNotRequireEofTest(const std::string &tests_data_path) {

  tableStart += parser.BytesConsumed();

-  tableStart = std::strchr(tableStart + 1, '{');
  ok = parser.ParseJson(tableStart);
  TEST_EQ(ok, true);