From be894f09df2383844d6c19b1d173fec105451e0f Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <wvo@google.com>
Date: Tue, 19 Aug 2014 14:20:05 -0700
Subject: [PATCH] Schemas now support include files.

Bug: 15521443
Change-Id: I2e1ef97e7225a1a0ecf2ca65e31d49d443003747
Tested: on Linux.
---
 docs/html/md__grammar.html |  3 ++-
 docs/html/md__schemas.html |  4 +++
 docs/source/Grammar.md     |  5 +++-
 docs/source/Schemas.md     | 14 ++++++++++
 include/flatbuffers/idl.h  |  8 +++++-
 include/flatbuffers/util.h | 33 +++++++++++++++++++-----
 samples/sample_text.cpp    |  4 +--
 src/flatc.cpp              | 35 +++++--------------------
 src/idl_parser.cpp         | 53 ++++++++++++++++++++++++++++++++++++--
 tests/include_test1.fbs    |  5 ++++
 tests/include_test2.fbs    |  4 +++
 tests/monster_test.fbs     |  2 ++
 tests/test.cpp             | 18 ++++++-------
 13 files changed, 136 insertions(+), 52 deletions(-)
 create mode 100644 tests/include_test1.fbs
 create mode 100644 tests/include_test2.fbs
diff --git a/docs/html/md__grammar.html b/docs/html/md__grammar.html
index e5278c09b..8d4e525a1 100644
--- a/docs/html/md__grammar.html
+++ b/docs/html/md__grammar.html
@@ -53,7 +53,8 @@ $(document).ready(function(){initNavTree('md__grammar.html','');});
 <div class="title">Formal Grammar of the schema language </div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="textblock"><p>schema = namespace_decl | type_decl | enum_decl | root_decl | object</p>
+<div class="textblock"><p>schema = include* ( namespace_decl | type_decl | enum_decl | root_decl | object )*</p>
+<p>include = <code>include</code> string_constant <code>;</code></p>
 <p>namespace_decl = <code>namespace</code> ident ( <code>.</code> ident )* <code>;</code></p>
 <p>type_decl = ( <code>table</code> | <code>struct</code> ) ident metadata <code>{</code> field_decl+ <code>}</code></p>
 <p>enum_decl = ( <code>enum</code> | <code>union</code> ) ident [ <code>:</code> type ] metadata <code>{</code> commasep( enumval_decl ) <code>}</code></p>
diff --git a/docs/html/md__schemas.html b/docs/html/md__schemas.html
index 9ac82eb49..243851b17 100644
--- a/docs/html/md__schemas.html
+++ b/docs/html/md__schemas.html
@@ -111,6 +111,10 @@ root_type Monster;
 <p>Unions share a lot of properties with enums, but instead of new names for constants, you use names of tables. You can then declare a union field which can hold a reference to any of those types, and additionally a hidden field with the suffix <code>_type</code> is generated that holds the corresponding enum value, allowing you to know which type to cast to at runtime.</p>
 <h3>Namespaces</h3>
 <p>These will generate the corresponding namespace in C++ for all helper code, and packages in Java. You can use <code>.</code> to specify nested namespaces / packages.</p>
+<h3>Includes</h3>
+<p>You can include other schemas files in your current one, e.g.: </p><pre class="fragment">include "mydefinitions.fbs"
+</pre><p>This makes it easier to refer to types defined elsewhere. <code>include</code> automatically ensures each file is parsed just once, even when referred to more than once.</p>
+<p>When using the <code>flatc</code> compiler to generate code for schema definitions, only definitions in the current file will be generated, not those from the included files (those you still generate separately).</p>
 <h3>Root type</h3>
 <p>This declares what you consider to be the root table (or struct) of the serialized data. This is particular important for parsing JSON data, which doesn't include object type information.</p>
 <h3>File identification and extension</h3>
diff --git a/docs/source/Grammar.md b/docs/source/Grammar.md
index 6242bfa6b..33bd17099 100755
--- a/docs/source/Grammar.md
+++ b/docs/source/Grammar.md
@@ -1,6 +1,9 @@
 # Formal Grammar of the schema language
 
-schema = namespace\_decl | type\_decl | enum\_decl | root\_decl | object
+schema = include*
+         ( namespace\_decl | type\_decl | enum\_decl | root\_decl | object )*
+
+include = `include` string\_constant `;`
 
 namespace\_decl = `namespace` ident ( `.` ident )* `;`
 
diff --git a/docs/source/Schemas.md b/docs/source/Schemas.md
index a9fa62135..3aab1f534 100755
--- a/docs/source/Schemas.md
+++ b/docs/source/Schemas.md
@@ -141,6 +141,20 @@ These will generate the corresponding namespace in C++ for all helper
 code, and packages in Java. You can use `.` to specify nested namespaces /
 packages.
 
+### Includes
+
+You can include other schemas files in your current one, e.g.:
+
+    include "mydefinitions.fbs"
+    
+This makes it easier to refer to types defined elsewhere. `include`
+automatically ensures each file is parsed just once, even when referred to
+more than once.
+
+When using the `flatc` compiler to generate code for schema definitions,
+only definitions in the current file will be generated, not those from the
+included files (those you still generate separately).
+
 ### Root type
 
 This declares what you consider to be the root table (or struct) of the
diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h
index 9ac114882..680ed15bc 100644
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -249,11 +249,16 @@ class Parser {
 
   // Parse the string containing either schema or JSON data, which will
   // populate the SymbolTable's or the FlatBufferBuilder above.
-  bool Parse(const char *_source);
+  // filepath indicates the file that _source was loaded from, it is
+  // used to resolve any include statements.
+  bool Parse(const char *_source, const char *filepath);
 
   // Set the root type. May override the one set in the schema.
   bool SetRootType(const char *name);
 
+  // Mark all definitions as already having code generated.
+  void MarkGenerated();
+
  private:
   void Next();
   bool IsNext(int t);
@@ -295,6 +300,7 @@ class Parser {
 
   std::vector<std::pair<Value, FieldDef *>> field_stack_;
   std::vector<uint8_t> struct_stack_;
+  std::map<std::string, bool> included_files_;
 };
 
 // Utility functions for generators:
diff --git a/include/flatbuffers/util.h b/include/flatbuffers/util.h
index a356f68fb..34cf3b80b 100644
--- a/include/flatbuffers/util.h
+++ b/include/flatbuffers/util.h
@@ -25,13 +25,6 @@
 
 namespace flatbuffers {
 
-static const char kPosixPathSeparator = '/';
-#ifdef _WIN32
-static const char kPathSeparator = '\\';
-#else
-static const char kPathSeparator = kPosixPathSeparator;
-#endif // _WIN32
-
 // Convert an integer or floating point value to a string.
 // In contrast to std::stringstream, "char" values are
 // converted to a string of digits.
@@ -107,6 +100,32 @@ inline bool SaveFile(const char *name, const std::string &buf, bool binary) {
   return SaveFile(name, buf.c_str(), buf.size(), binary);
 }
 
+// Functionality for minimalistic portable path handling:
+
+static const char kPosixPathSeparator = '/';
+#ifdef _WIN32
+static const char kPathSeparator = '\\';
+static const char *PathSeparatorSet = "\\:/";
+#else
+static const char kPathSeparator = kPosixPathSeparator;
+static const char *PathSeparatorSet = "/";
+#endif // _WIN32
+
+inline std::string StripExtension(const std::string &filepath) {
+  size_t i = filepath.find_last_of(".");
+  return i != std::string::npos ? filepath.substr(0, i) : filepath;
+}
+
+inline std::string StripPath(const std::string &filepath) {
+  size_t i = filepath.find_last_of(PathSeparatorSet);
+  return i != std::string::npos ? filepath.substr(i + 1) : filepath;
+}
+
+inline std::string StripFileName(const std::string &filepath) {
+  size_t i = filepath.find_last_of(PathSeparatorSet);
+  return i != std::string::npos ? filepath.substr(0, i + 1) : "";
+}
+
 }  // namespace flatbuffers
 
 #endif  // FLATBUFFERS_UTIL_H_
diff --git a/samples/sample_text.cpp b/samples/sample_text.cpp
index b1b7527ca..33cb58e15 100755
--- a/samples/sample_text.cpp
+++ b/samples/sample_text.cpp
@@ -37,8 +37,8 @@ int main(int /*argc*/, const char * /*argv*/[]) {
 
   // parse schema first, so we can use it to parse the data after
   flatbuffers::Parser parser;
-  ok = parser.Parse(schemafile.c_str()) &&
-       parser.Parse(jsonfile.c_str());
+  ok = parser.Parse(schemafile.c_str(), "samples/") &&
+       parser.Parse(jsonfile.c_str(), "samples/");
   assert(ok);
 
   // here, parser.builder_ contains a binary buffer that is the parsed data.
diff --git a/src/flatc.cpp b/src/flatc.cpp
index 97cd69976..0bc964d21 100755
--- a/src/flatc.cpp
+++ b/src/flatc.cpp
@@ -100,22 +100,6 @@ static void Error(const char *err, const char *obj, bool usage) {
   exit(1);
 }
 
-std::string StripExtension(const std::string &filename) {
-  size_t i = filename.find_last_of(".");
-  return i != std::string::npos ? filename.substr(0, i) : filename;
-}
-
-std::string StripPath(const std::string &filename) {
-  size_t i = filename.find_last_of(
-    #ifdef _WIN32
-      "\\:"
-    #else
-      "/"
-    #endif
-    );
-  return i != std::string::npos ? filename.substr(i + 1) : filename;
-}
-
 int main(int argc, const char *argv[]) {
   program_name = argv[0];
   flatbuffers::Parser parser;
@@ -187,11 +171,12 @@ int main(int argc, const char *argv[]) {
           reinterpret_cast<const uint8_t *>(contents.c_str()),
           contents.length());
       } else {
-        if (!parser.Parse(contents.c_str()))
+        if (!parser.Parse(contents.c_str(), file_it->c_str()))
           Error(parser.error_.c_str());
       }
 
-      std::string filebase = StripPath(StripExtension(*file_it));
+      std::string filebase = flatbuffers::StripPath(
+                               flatbuffers::StripExtension(*file_it));
 
       for (size_t i = 0; i < num_generators; ++i) {
         if (generator_enabled[i]) {
@@ -204,17 +189,9 @@ int main(int argc, const char *argv[]) {
         }
       }
 
-      // Since the Parser object retains definitions across files, we must
-      // ensure we only output code for these once, in the file they are first
-      // declared:
-      for (auto it = parser.enums_.vec.begin();
-               it != parser.enums_.vec.end(); ++it) {
-        (*it)->generated = true;
-      }
-      for (auto it = parser.structs_.vec.begin();
-               it != parser.structs_.vec.end(); ++it) {
-        (*it)->generated = true;
-      }
+      // We do not want to generate code for the definitions in this file
+      // in any files coming up next.
+      parser.MarkGenerated();
   }
 
   return 0;
diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp
index e8896113d..a5b3f4ed0 100644
--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
@@ -83,7 +83,8 @@ template<> inline Offset<void> atot<Offset<void>>(const char *s) {
   TD(NameSpace, 265, "namespace") \
   TD(RootType, 266, "root_type") \
   TD(FileIdentifier, 267, "file_identifier") \
-  TD(FileExtension, 268, "file_extension")
+  TD(FileExtension, 268, "file_extension") \
+  TD(Include, 269, "include")
 #ifdef __GNUC__
 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
 #endif
@@ -196,6 +197,7 @@ void Parser::Next() {
           if (attribute_ == "union")     { token_ = kTokenUnion;     return; }
           if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; }
           if (attribute_ == "root_type") { token_ = kTokenRootType;  return; }
+          if (attribute_ == "include")   { token_ = kTokenInclude;  return; }
           if (attribute_ == "file_identifier") {
             token_ = kTokenFileIdentifier;
             return;
@@ -781,13 +783,58 @@ bool Parser::SetRootType(const char *name) {
   return root_struct_def != nullptr;
 }
 
-bool Parser::Parse(const char *source) {
+void Parser::MarkGenerated() {
+  // Since the Parser object retains definitions across files, we must
+  // ensure we only output code for definitions once, in the file they are first
+  // declared. This function marks all existing definitions as having already
+  // been generated.
+  for (auto it = enums_.vec.begin();
+           it != enums_.vec.end(); ++it) {
+    (*it)->generated = true;
+  }
+  for (auto it = structs_.vec.begin();
+           it != structs_.vec.end(); ++it) {
+    (*it)->generated = true;
+  }
+}
+
+bool Parser::Parse(const char *source, const char *filepath) {
+  included_files_[filepath] = true;
+  // This is the starting point to reset to if we interrupted our parsing
+  // to deal with an include:
+  restart_parse_after_include:
   source_ = cursor_ = source;
   line_ = 1;
   error_.clear();
   builder_.Clear();
   try {
     Next();
+    // Includes must come first:
+    while (IsNext(kTokenInclude)) {
+      auto name = attribute_;
+      Expect(kTokenStringConstant);
+      name = StripFileName(filepath) + name;
+      if (included_files_.find(name) == included_files_.end()) {
+        // We found an include file that we have not parsed yet.
+        // Load it and parse it.
+        std::string contents;
+        if (!LoadFile(name.c_str(), true, &contents))
+          Error("unable to load include file: " + name);
+        Parse(contents.c_str(), name.c_str());
+        // Any errors, we're done.
+        if (error_.length()) return false;
+        // We do not want to output code for any included files:
+        MarkGenerated();
+        // This is the easiest way to continue this file after an include:
+        // instead of saving and restoring all the state, we simply start the
+        // file anew. This will cause it to encounter the same include statement
+        // again, but this time it will skip it, because it was entered into
+        // included_files_.
+        goto restart_parse_after_include;
+      }
+      Expect(';');
+    }
+    // Now parse all other kinds of declarations:
     while (token_ != kTokenEof) {
       if (token_ == kTokenNameSpace) {
         Next();
@@ -832,6 +879,8 @@ bool Parser::Parse(const char *source) {
         file_extension_ = attribute_;
         Expect(kTokenStringConstant);
         Expect(';');
+      } else if(token_ == kTokenInclude) {
+        Error("includes must come before declarations");
       } else {
         ParseDecl();
       }
diff --git a/tests/include_test1.fbs b/tests/include_test1.fbs
new file mode 100644
index 000000000..11aebe81f
--- /dev/null
+++ b/tests/include_test1.fbs
@@ -0,0 +1,5 @@
+include "include_test2.fbs";
+include "include_test2.fbs";  // should be skipped
+include "include_test1.fbs";  // should be skipped
+
+
diff --git a/tests/include_test2.fbs b/tests/include_test2.fbs
new file mode 100644
index 000000000..3b9c86ea9
--- /dev/null
+++ b/tests/include_test2.fbs
@@ -0,0 +1,4 @@
+include "include_test2.fbs";    // should be skipped
+
+enum FromInclude:long { IncludeVal }
+
diff --git a/tests/monster_test.fbs b/tests/monster_test.fbs
index 800618f4e..7494144fb 100755
--- a/tests/monster_test.fbs
+++ b/tests/monster_test.fbs
@@ -1,5 +1,7 @@
 // example IDL file
 
+include "include_test1.fbs";
+
 namespace MyGame.Example;
 
 enum Color:byte (bit_flags) { Red = 0, Green, Blue = 3 }
diff --git a/tests/test.cpp b/tests/test.cpp
index e10367c4c..3129dfdbb 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -192,8 +192,8 @@ void ParseAndGenerateTextTest() {
 
   // parse schema first, so we can use it to parse the data after
   flatbuffers::Parser parser;
-  TEST_EQ(parser.Parse(schemafile.c_str()), true);
-  TEST_EQ(parser.Parse(jsonfile.c_str()), true);
+  TEST_EQ(parser.Parse(schemafile.c_str(), "tests/"), true);
+  TEST_EQ(parser.Parse(jsonfile.c_str(), "tests/"), true);
 
   // here, parser.builder_ contains a binary buffer that is the parsed data.
 
@@ -406,12 +406,12 @@ void FuzzTest2() {
 
   // Parse the schema, parse the generated data, then generate text back
   // from the binary and compare against the original.
-  TEST_EQ(parser.Parse(schema.c_str()), true);
+  TEST_EQ(parser.Parse(schema.c_str(), ""), true);
 
   const std::string &json =
     definitions[num_definitions - 1].instances[0] + "\n";
 
-  TEST_EQ(parser.Parse(json.c_str()), true);
+  TEST_EQ(parser.Parse(json.c_str(), ""), true);
 
   std::string jsongen;
   flatbuffers::GeneratorOptions opts;
@@ -443,7 +443,7 @@ void FuzzTest2() {
 // Test that parser errors are actually generated.
 void TestError(const char *src, const char *error_substr) {
   flatbuffers::Parser parser;
-  TEST_EQ(parser.Parse(src), false);  // Must signal error
+  TEST_EQ(parser.Parse(src, ""), false);  // Must signal error
   // Must be the error we're expecting
   TEST_NOTNULL(strstr(parser.error_.c_str(), error_substr));
 }
@@ -495,10 +495,10 @@ void ScientificTest() {
   flatbuffers::Parser parser;
 
   // Simple schema.
-  TEST_EQ(parser.Parse("table X { Y:float; } root_type X;"), true);
+  TEST_EQ(parser.Parse("table X { Y:float; } root_type X;", ""), true);
 
   // Test scientific notation numbers.
-  TEST_EQ(parser.Parse("{ Y:0.0314159e+2 }"), true);
+  TEST_EQ(parser.Parse("{ Y:0.0314159e+2 }", ""), true);
   auto root = flatbuffers::GetRoot<float>(parser.builder_.GetBufferPointer());
   // root will point to the table, which is a 32bit vtable offset followed
   // by a float:
@@ -509,11 +509,11 @@ void EnumStringsTest() {
   flatbuffers::Parser parser1;
   TEST_EQ(parser1.Parse("enum E:byte { A, B, C } table T { F:[E]; }"
                         "root_type T;"
-                        "{ F:[ A, B, \"C\", \"A B C\" ] }"), true);
+                        "{ F:[ A, B, \"C\", \"A B C\" ] }", ""), true);
   flatbuffers::Parser parser2;
   TEST_EQ(parser2.Parse("enum E:byte { A, B, C } table T { F:[int]; }"
                         "root_type T;"
-                        "{ F:[ \"E.C\", \"E.A E.B E.C\" ] }"), true);
+                        "{ F:[ \"E.C\", \"E.A E.B E.C\" ] }", ""), true);
 }