From bb9b9dad5f5a00adaa867aece30de5e0695c2942 Mon Sep 17 00:00:00 2001
From: Alex Ames <alexander.ames@gmail.com>
Date: Tue, 22 Nov 2022 12:11:14 -0800
Subject: [PATCH] Fixed the BytesConsumed function, which was pointing slightly
 ahead. (#7657)

The BytesConsumed function uses the `cursor_` to determine how many
bytes have been consumed by the parser, in case the user of the Parser
object wants to step over the parsed flatbuffer that is embedded in some
larger string. However, the `cursor_` is always one token ahead, so that
it can determine how to consume it. It points at the token that is about
to be consumed, which is ahead of the last byte consumed.

For example, if you had a string containing these two json objects and
parsed them...

    "{\"key\":\"value\"},{\"key\":\"value\"}"

...then the `cursor_` would be pointing at the comma between the two
tables. If you were to hold a pointer to the beginning of the string and
add `BytesConsumed()` to it like so:

    const char* json = // ...
    parser.ParseJson(json);
    json += parser.BytesConsumed();

then the pointer would skip over the comma, which is not the expected
behavior. It should only consume the table itself.

The solution is simple: Just hold onto a previous cursor location and
use that for the `BytesConsumed()` call. The previous cursor location
just needs to be set to the cursor_ location each time the cursor_ is
about to be updated. This will result in `BytesConsumed()` returning
the correct number of bytes without the off-by-one-token error.

Co-authored-by: Derek Bailey <derekbailey@google.com>
---
 include/flatbuffers/idl.h | 7 +++++--
 src/idl_parser.cpp        | 3 ++-
 tests/test.cpp            | 7 +++----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h
index 404482812..19260e77b 100644
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -760,7 +760,8 @@ struct IDLOptions {
 // This encapsulates where the parser is in the current source file.
 struct ParserState {
   ParserState()
-      : cursor_(nullptr),
+      : prev_cursor_(nullptr),
+        cursor_(nullptr),
         line_start_(nullptr),
         line_(0),
         token_(-1),
@@ -768,6 +769,7 @@ struct ParserState {
 
  protected:
   void ResetState(const char *source) {
+    prev_cursor_ = source;
     cursor_ = source;
     line_ = 0;
     MarkNewLine();
@@ -782,7 +784,8 @@ struct ParserState {
     FLATBUFFERS_ASSERT(cursor_ && line_start_ && cursor_ >= line_start_);
     return static_cast<int64_t>(cursor_ - line_start_);
   }
-
+  
+  const char *prev_cursor_;
   const char *cursor_;
   const char *line_start_;
   int line_;  // the current line being parsed
diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp
index bd9780f37..b16c3bec8 100644
--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
@@ -483,6 +483,7 @@ CheckedError Parser::SkipByteOrderMark() {
 
 CheckedError Parser::Next() {
   doc_comment_.clear();
+  prev_cursor_ = cursor_;
   bool seen_newline = cursor_ == source_;
   attribute_.clear();
   attr_is_trivial_ascii_string_ = true;
@@ -3312,7 +3313,7 @@ bool Parser::ParseJson(const char *json, const char *json_filename) {
 }
 
 std::ptrdiff_t Parser::BytesConsumed() const {
-  return std::distance(source_, cursor_);
+  return std::distance(source_, prev_cursor_);
 }
 
 CheckedError Parser::StartParseFile(const char *source,
diff --git a/tests/test.cpp b/tests/test.cpp
index db2916276..15d0d4fc8 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -1434,14 +1434,14 @@ void DoNotRequireEofTest(const std::string &tests_data_path) {
   ok = parser.Parse(schemafile.c_str(), include_directories);
   TEST_EQ(ok, true);
 
-  const char *str = R"(This string contains two monsters, the first one is {
+  const char *str = R"(Some text at the beginning. {
       "name": "Blob",
       "hp": 5
-    }
-    and the second one is {
+    }{
       "name": "Imp",
       "hp": 10
     }
+    Some extra text at the end too.
   )";
   const char *tableStart = std::strchr(str, '{');
   ok = parser.ParseJson(tableStart);
@@ -1453,7 +1453,6 @@ void DoNotRequireEofTest(const std::string &tests_data_path) {
 
   tableStart += parser.BytesConsumed();
 
-  tableStart = std::strchr(tableStart + 1, '{');
   ok = parser.ParseJson(tableStart);
   TEST_EQ(ok, true);