From be894f09df2383844d6c19b1d173fec105451e0f Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Tue, 19 Aug 2014 14:20:05 -0700 Subject: [PATCH] Schemas now support include files. Bug: 15521443 Change-Id: I2e1ef97e7225a1a0ecf2ca65e31d49d443003747 Tested: on Linux. --- docs/html/md__grammar.html | 3 ++- docs/html/md__schemas.html | 4 +++ docs/source/Grammar.md | 5 +++- docs/source/Schemas.md | 14 ++++++++++ include/flatbuffers/idl.h | 8 +++++- include/flatbuffers/util.h | 33 +++++++++++++++++++----- samples/sample_text.cpp | 4 +-- src/flatc.cpp | 35 +++++-------------------- src/idl_parser.cpp | 53 ++++++++++++++++++++++++++++++++++++-- tests/include_test1.fbs | 5 ++++ tests/include_test2.fbs | 4 +++ tests/monster_test.fbs | 2 ++ tests/test.cpp | 18 ++++++------- 13 files changed, 136 insertions(+), 52 deletions(-) create mode 100644 tests/include_test1.fbs create mode 100644 tests/include_test2.fbs diff --git a/docs/html/md__grammar.html b/docs/html/md__grammar.html index e5278c09b..8d4e525a1 100644 --- a/docs/html/md__grammar.html +++ b/docs/html/md__grammar.html @@ -53,7 +53,8 @@ $(document).ready(function(){initNavTree('md__grammar.html','');});
Formal Grammar of the schema language
-

schema = namespace_decl | type_decl | enum_decl | root_decl | object

+

schema = include* ( namespace_decl | type_decl | enum_decl | root_decl | object )*

+

include = include string_constant ;

namespace_decl = namespace ident ( . ident )* ;

type_decl = ( table | struct ) ident metadata { field_decl+ }

enum_decl = ( enum | union ) ident [ : type ] metadata { commasep( enumval_decl ) }

diff --git a/docs/html/md__schemas.html b/docs/html/md__schemas.html index 9ac82eb49..243851b17 100644 --- a/docs/html/md__schemas.html +++ b/docs/html/md__schemas.html @@ -111,6 +111,10 @@ root_type Monster;

Unions share a lot of properties with enums, but instead of new names for constants, you use names of tables. You can then declare a union field which can hold a reference to any of those types, and additionally a hidden field with the suffix _type is generated that holds the corresponding enum value, allowing you to know which type to cast to at runtime.

Namespaces

These will generate the corresponding namespace in C++ for all helper code, and packages in Java. You can use . to specify nested namespaces / packages.

+

Includes

+

You can include other schemas files in your current one, e.g.:

include "mydefinitions.fbs"
+

This makes it easier to refer to types defined elsewhere. include automatically ensures each file is parsed just once, even when referred to more than once.

+

When using the flatc compiler to generate code for schema definitions, only definitions in the current file will be generated, not those from the included files (those you still generate separately).

Root type

This declares what you consider to be the root table (or struct) of the serialized data. This is particular important for parsing JSON data, which doesn't include object type information.

File identification and extension

diff --git a/docs/source/Grammar.md b/docs/source/Grammar.md index 6242bfa6b..33bd17099 100755 --- a/docs/source/Grammar.md +++ b/docs/source/Grammar.md @@ -1,6 +1,9 @@ # Formal Grammar of the schema language -schema = namespace\_decl | type\_decl | enum\_decl | root\_decl | object +schema = include* + ( namespace\_decl | type\_decl | enum\_decl | root\_decl | object )* + +include = `include` string\_constant `;` namespace\_decl = `namespace` ident ( `.` ident )* `;` diff --git a/docs/source/Schemas.md b/docs/source/Schemas.md index a9fa62135..3aab1f534 100755 --- a/docs/source/Schemas.md +++ b/docs/source/Schemas.md @@ -141,6 +141,20 @@ These will generate the corresponding namespace in C++ for all helper code, and packages in Java. You can use `.` to specify nested namespaces / packages. +### Includes + +You can include other schemas files in your current one, e.g.: + + include "mydefinitions.fbs" + +This makes it easier to refer to types defined elsewhere. `include` +automatically ensures each file is parsed just once, even when referred to +more than once. + +When using the `flatc` compiler to generate code for schema definitions, +only definitions in the current file will be generated, not those from the +included files (those you still generate separately). + ### Root type This declares what you consider to be the root table (or struct) of the diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 9ac114882..680ed15bc 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -249,11 +249,16 @@ class Parser { // Parse the string containing either schema or JSON data, which will // populate the SymbolTable's or the FlatBufferBuilder above. - bool Parse(const char *_source); + // filepath indicates the file that _source was loaded from, it is + // used to resolve any include statements. + bool Parse(const char *_source, const char *filepath); // Set the root type. May override the one set in the schema. bool SetRootType(const char *name); + // Mark all definitions as already having code generated. + void MarkGenerated(); + private: void Next(); bool IsNext(int t); @@ -295,6 +300,7 @@ class Parser { std::vector> field_stack_; std::vector struct_stack_; + std::map included_files_; }; // Utility functions for generators: diff --git a/include/flatbuffers/util.h b/include/flatbuffers/util.h index a356f68fb..34cf3b80b 100644 --- a/include/flatbuffers/util.h +++ b/include/flatbuffers/util.h @@ -25,13 +25,6 @@ namespace flatbuffers { -static const char kPosixPathSeparator = '/'; -#ifdef _WIN32 -static const char kPathSeparator = '\\'; -#else -static const char kPathSeparator = kPosixPathSeparator; -#endif // _WIN32 - // Convert an integer or floating point value to a string. // In contrast to std::stringstream, "char" values are // converted to a string of digits. @@ -107,6 +100,32 @@ inline bool SaveFile(const char *name, const std::string &buf, bool binary) { return SaveFile(name, buf.c_str(), buf.size(), binary); } +// Functionality for minimalistic portable path handling: + +static const char kPosixPathSeparator = '/'; +#ifdef _WIN32 +static const char kPathSeparator = '\\'; +static const char *PathSeparatorSet = "\\:/"; +#else +static const char kPathSeparator = kPosixPathSeparator; +static const char *PathSeparatorSet = "/"; +#endif // _WIN32 + +inline std::string StripExtension(const std::string &filepath) { + size_t i = filepath.find_last_of("."); + return i != std::string::npos ? filepath.substr(0, i) : filepath; +} + +inline std::string StripPath(const std::string &filepath) { + size_t i = filepath.find_last_of(PathSeparatorSet); + return i != std::string::npos ? filepath.substr(i + 1) : filepath; +} + +inline std::string StripFileName(const std::string &filepath) { + size_t i = filepath.find_last_of(PathSeparatorSet); + return i != std::string::npos ? filepath.substr(0, i + 1) : ""; +} + } // namespace flatbuffers #endif // FLATBUFFERS_UTIL_H_ diff --git a/samples/sample_text.cpp b/samples/sample_text.cpp index b1b7527ca..33cb58e15 100755 --- a/samples/sample_text.cpp +++ b/samples/sample_text.cpp @@ -37,8 +37,8 @@ int main(int /*argc*/, const char * /*argv*/[]) { // parse schema first, so we can use it to parse the data after flatbuffers::Parser parser; - ok = parser.Parse(schemafile.c_str()) && - parser.Parse(jsonfile.c_str()); + ok = parser.Parse(schemafile.c_str(), "samples/") && + parser.Parse(jsonfile.c_str(), "samples/"); assert(ok); // here, parser.builder_ contains a binary buffer that is the parsed data. diff --git a/src/flatc.cpp b/src/flatc.cpp index 97cd69976..0bc964d21 100755 --- a/src/flatc.cpp +++ b/src/flatc.cpp @@ -100,22 +100,6 @@ static void Error(const char *err, const char *obj, bool usage) { exit(1); } -std::string StripExtension(const std::string &filename) { - size_t i = filename.find_last_of("."); - return i != std::string::npos ? filename.substr(0, i) : filename; -} - -std::string StripPath(const std::string &filename) { - size_t i = filename.find_last_of( - #ifdef _WIN32 - "\\:" - #else - "/" - #endif - ); - return i != std::string::npos ? filename.substr(i + 1) : filename; -} - int main(int argc, const char *argv[]) { program_name = argv[0]; flatbuffers::Parser parser; @@ -187,11 +171,12 @@ int main(int argc, const char *argv[]) { reinterpret_cast(contents.c_str()), contents.length()); } else { - if (!parser.Parse(contents.c_str())) + if (!parser.Parse(contents.c_str(), file_it->c_str())) Error(parser.error_.c_str()); } - std::string filebase = StripPath(StripExtension(*file_it)); + std::string filebase = flatbuffers::StripPath( + flatbuffers::StripExtension(*file_it)); for (size_t i = 0; i < num_generators; ++i) { if (generator_enabled[i]) { @@ -204,17 +189,9 @@ int main(int argc, const char *argv[]) { } } - // Since the Parser object retains definitions across files, we must - // ensure we only output code for these once, in the file they are first - // declared: - for (auto it = parser.enums_.vec.begin(); - it != parser.enums_.vec.end(); ++it) { - (*it)->generated = true; - } - for (auto it = parser.structs_.vec.begin(); - it != parser.structs_.vec.end(); ++it) { - (*it)->generated = true; - } + // We do not want to generate code for the definitions in this file + // in any files coming up next. + parser.MarkGenerated(); } return 0; diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index e8896113d..a5b3f4ed0 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -83,7 +83,8 @@ template<> inline Offset atot>(const char *s) { TD(NameSpace, 265, "namespace") \ TD(RootType, 266, "root_type") \ TD(FileIdentifier, 267, "file_identifier") \ - TD(FileExtension, 268, "file_extension") + TD(FileExtension, 268, "file_extension") \ + TD(Include, 269, "include") #ifdef __GNUC__ __extension__ // Stop GCC complaining about trailing comma with -Wpendantic. #endif @@ -196,6 +197,7 @@ void Parser::Next() { if (attribute_ == "union") { token_ = kTokenUnion; return; } if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; } if (attribute_ == "root_type") { token_ = kTokenRootType; return; } + if (attribute_ == "include") { token_ = kTokenInclude; return; } if (attribute_ == "file_identifier") { token_ = kTokenFileIdentifier; return; @@ -781,13 +783,58 @@ bool Parser::SetRootType(const char *name) { return root_struct_def != nullptr; } -bool Parser::Parse(const char *source) { +void Parser::MarkGenerated() { + // Since the Parser object retains definitions across files, we must + // ensure we only output code for definitions once, in the file they are first + // declared. This function marks all existing definitions as having already + // been generated. + for (auto it = enums_.vec.begin(); + it != enums_.vec.end(); ++it) { + (*it)->generated = true; + } + for (auto it = structs_.vec.begin(); + it != structs_.vec.end(); ++it) { + (*it)->generated = true; + } +} + +bool Parser::Parse(const char *source, const char *filepath) { + included_files_[filepath] = true; + // This is the starting point to reset to if we interrupted our parsing + // to deal with an include: + restart_parse_after_include: source_ = cursor_ = source; line_ = 1; error_.clear(); builder_.Clear(); try { Next(); + // Includes must come first: + while (IsNext(kTokenInclude)) { + auto name = attribute_; + Expect(kTokenStringConstant); + name = StripFileName(filepath) + name; + if (included_files_.find(name) == included_files_.end()) { + // We found an include file that we have not parsed yet. + // Load it and parse it. + std::string contents; + if (!LoadFile(name.c_str(), true, &contents)) + Error("unable to load include file: " + name); + Parse(contents.c_str(), name.c_str()); + // Any errors, we're done. + if (error_.length()) return false; + // We do not want to output code for any included files: + MarkGenerated(); + // This is the easiest way to continue this file after an include: + // instead of saving and restoring all the state, we simply start the + // file anew. This will cause it to encounter the same include statement + // again, but this time it will skip it, because it was entered into + // included_files_. + goto restart_parse_after_include; + } + Expect(';'); + } + // Now parse all other kinds of declarations: while (token_ != kTokenEof) { if (token_ == kTokenNameSpace) { Next(); @@ -832,6 +879,8 @@ bool Parser::Parse(const char *source) { file_extension_ = attribute_; Expect(kTokenStringConstant); Expect(';'); + } else if(token_ == kTokenInclude) { + Error("includes must come before declarations"); } else { ParseDecl(); } diff --git a/tests/include_test1.fbs b/tests/include_test1.fbs new file mode 100644 index 000000000..11aebe81f --- /dev/null +++ b/tests/include_test1.fbs @@ -0,0 +1,5 @@ +include "include_test2.fbs"; +include "include_test2.fbs"; // should be skipped +include "include_test1.fbs"; // should be skipped + + diff --git a/tests/include_test2.fbs b/tests/include_test2.fbs new file mode 100644 index 000000000..3b9c86ea9 --- /dev/null +++ b/tests/include_test2.fbs @@ -0,0 +1,4 @@ +include "include_test2.fbs"; // should be skipped + +enum FromInclude:long { IncludeVal } + diff --git a/tests/monster_test.fbs b/tests/monster_test.fbs index 800618f4e..7494144fb 100755 --- a/tests/monster_test.fbs +++ b/tests/monster_test.fbs @@ -1,5 +1,7 @@ // example IDL file +include "include_test1.fbs"; + namespace MyGame.Example; enum Color:byte (bit_flags) { Red = 0, Green, Blue = 3 } diff --git a/tests/test.cpp b/tests/test.cpp index e10367c4c..3129dfdbb 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -192,8 +192,8 @@ void ParseAndGenerateTextTest() { // parse schema first, so we can use it to parse the data after flatbuffers::Parser parser; - TEST_EQ(parser.Parse(schemafile.c_str()), true); - TEST_EQ(parser.Parse(jsonfile.c_str()), true); + TEST_EQ(parser.Parse(schemafile.c_str(), "tests/"), true); + TEST_EQ(parser.Parse(jsonfile.c_str(), "tests/"), true); // here, parser.builder_ contains a binary buffer that is the parsed data. @@ -406,12 +406,12 @@ void FuzzTest2() { // Parse the schema, parse the generated data, then generate text back // from the binary and compare against the original. - TEST_EQ(parser.Parse(schema.c_str()), true); + TEST_EQ(parser.Parse(schema.c_str(), ""), true); const std::string &json = definitions[num_definitions - 1].instances[0] + "\n"; - TEST_EQ(parser.Parse(json.c_str()), true); + TEST_EQ(parser.Parse(json.c_str(), ""), true); std::string jsongen; flatbuffers::GeneratorOptions opts; @@ -443,7 +443,7 @@ void FuzzTest2() { // Test that parser errors are actually generated. void TestError(const char *src, const char *error_substr) { flatbuffers::Parser parser; - TEST_EQ(parser.Parse(src), false); // Must signal error + TEST_EQ(parser.Parse(src, ""), false); // Must signal error // Must be the error we're expecting TEST_NOTNULL(strstr(parser.error_.c_str(), error_substr)); } @@ -495,10 +495,10 @@ void ScientificTest() { flatbuffers::Parser parser; // Simple schema. - TEST_EQ(parser.Parse("table X { Y:float; } root_type X;"), true); + TEST_EQ(parser.Parse("table X { Y:float; } root_type X;", ""), true); // Test scientific notation numbers. - TEST_EQ(parser.Parse("{ Y:0.0314159e+2 }"), true); + TEST_EQ(parser.Parse("{ Y:0.0314159e+2 }", ""), true); auto root = flatbuffers::GetRoot(parser.builder_.GetBufferPointer()); // root will point to the table, which is a 32bit vtable offset followed // by a float: @@ -509,11 +509,11 @@ void EnumStringsTest() { flatbuffers::Parser parser1; TEST_EQ(parser1.Parse("enum E:byte { A, B, C } table T { F:[E]; }" "root_type T;" - "{ F:[ A, B, \"C\", \"A B C\" ] }"), true); + "{ F:[ A, B, \"C\", \"A B C\" ] }", ""), true); flatbuffers::Parser parser2; TEST_EQ(parser2.Parse("enum E:byte { A, B, C } table T { F:[int]; }" "root_type T;" - "{ F:[ \"E.C\", \"E.A E.B E.C\" ] }"), true); + "{ F:[ \"E.C\", \"E.A E.B E.C\" ] }", ""), true); }