From 451272b61840a3c2b12edeb23b18c9bd0b2aa508 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Tue, 29 Dec 2015 16:33:00 -0800 Subject: [PATCH 1/2] Made error checking macros less prone to clashes. Change-Id: Ieb252ce01446551699d935507bc95ee286fe1ddd Tested: on Linux. --- include/flatbuffers/idl.h | 78 +++++++++++++++++++++------------------ src/idl_parser.cpp | 6 +++ 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 118a74dd8..d4fcccb1a 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -375,9 +375,10 @@ class CheckedError { // Additionally, in GCC we can get these errors statically, for additional // assurance: #ifdef __GNUC__ -#define CHECKED_ERROR CheckedError __attribute__((warn_unused_result)) +#define FLATBUFFERS_CHECKED_ERROR CheckedError \ + __attribute__((warn_unused_result)) #else -#define CHECKED_ERROR CheckedError +#define FLATBUFFERS_CHECKED_ERROR CheckedError #endif class Parser { @@ -435,51 +436,56 @@ class Parser { // See reflection/reflection.fbs void Serialize(); - CHECKED_ERROR CheckBitsFit(int64_t val, size_t bits); + FLATBUFFERS_CHECKED_ERROR CheckBitsFit(int64_t val, size_t bits); private: - CHECKED_ERROR Error(const std::string &msg); - CHECKED_ERROR ParseHexNum(int nibbles, int64_t *val); - CHECKED_ERROR Next(); + FLATBUFFERS_CHECKED_ERROR Error(const std::string &msg); + FLATBUFFERS_CHECKED_ERROR ParseHexNum(int nibbles, int64_t *val); + FLATBUFFERS_CHECKED_ERROR Next(); bool Is(int t); - CHECKED_ERROR Expect(int t); + FLATBUFFERS_CHECKED_ERROR Expect(int t); std::string TokenToStringId(int t); EnumDef *LookupEnum(const std::string &id); - CHECKED_ERROR ParseNamespacing(std::string *id, std::string *last); - CHECKED_ERROR ParseTypeIdent(Type &type); - CHECKED_ERROR ParseType(Type &type); - CHECKED_ERROR AddField(StructDef &struct_def, const std::string &name, - const Type &type, FieldDef **dest); - CHECKED_ERROR ParseField(StructDef &struct_def); - CHECKED_ERROR ParseAnyValue(Value &val, FieldDef *field, size_t parent_fieldn); - CHECKED_ERROR ParseTable(const StructDef &struct_def, std::string *value, - uoffset_t *ovalue); + FLATBUFFERS_CHECKED_ERROR ParseNamespacing(std::string *id, + std::string *last); + FLATBUFFERS_CHECKED_ERROR ParseTypeIdent(Type &type); + FLATBUFFERS_CHECKED_ERROR ParseType(Type &type); + FLATBUFFERS_CHECKED_ERROR AddField(StructDef &struct_def, + const std::string &name, const Type &type, + FieldDef **dest); + FLATBUFFERS_CHECKED_ERROR ParseField(StructDef &struct_def); + FLATBUFFERS_CHECKED_ERROR ParseAnyValue(Value &val, FieldDef *field, + size_t parent_fieldn); + FLATBUFFERS_CHECKED_ERROR ParseTable(const StructDef &struct_def, + std::string *value, uoffset_t *ovalue); void SerializeStruct(const StructDef &struct_def, const Value &val); void AddVector(bool sortbysize, int count); - CHECKED_ERROR ParseVector(const Type &type, uoffset_t *ovalue); - CHECKED_ERROR ParseMetaData(Definition &def); - CHECKED_ERROR TryTypedValue(int dtoken, bool check, Value &e, BaseType req, - bool *destmatch); - CHECKED_ERROR ParseHash(Value &e, FieldDef* field); - CHECKED_ERROR ParseSingleValue(Value &e); - CHECKED_ERROR ParseIntegerFromString(Type &type, int64_t *result); + FLATBUFFERS_CHECKED_ERROR ParseVector(const Type &type, uoffset_t *ovalue); + FLATBUFFERS_CHECKED_ERROR ParseMetaData(Definition &def); + FLATBUFFERS_CHECKED_ERROR TryTypedValue(int dtoken, bool check, Value &e, + BaseType req, bool *destmatch); + FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field); + FLATBUFFERS_CHECKED_ERROR ParseSingleValue(Value &e); + FLATBUFFERS_CHECKED_ERROR ParseIntegerFromString(Type &type, int64_t *result); StructDef *LookupCreateStruct(const std::string &name, bool create_if_new = true, bool definition = false); - CHECKED_ERROR ParseEnum(bool is_union, EnumDef **dest); - CHECKED_ERROR ParseNamespace(); - CHECKED_ERROR StartStruct(const std::string &name, StructDef **dest); - CHECKED_ERROR ParseDecl(); - CHECKED_ERROR ParseProtoFields(StructDef *struct_def, bool isextend, - bool inside_oneof); - CHECKED_ERROR ParseProtoOption(); - CHECKED_ERROR ParseProtoKey(); - CHECKED_ERROR ParseProtoDecl(); - CHECKED_ERROR ParseProtoCurliesOrIdent(); - CHECKED_ERROR ParseTypeFromProtoType(Type *type); + FLATBUFFERS_CHECKED_ERROR ParseEnum(bool is_union, EnumDef **dest); + FLATBUFFERS_CHECKED_ERROR ParseNamespace(); + FLATBUFFERS_CHECKED_ERROR StartStruct(const std::string &name, + StructDef **dest); + FLATBUFFERS_CHECKED_ERROR ParseDecl(); + FLATBUFFERS_CHECKED_ERROR ParseProtoFields(StructDef *struct_def, + bool isextend, bool inside_oneof); + FLATBUFFERS_CHECKED_ERROR ParseProtoOption(); + FLATBUFFERS_CHECKED_ERROR ParseProtoKey(); + FLATBUFFERS_CHECKED_ERROR ParseProtoDecl(); + FLATBUFFERS_CHECKED_ERROR ParseProtoCurliesOrIdent(); + FLATBUFFERS_CHECKED_ERROR ParseTypeFromProtoType(Type *type); - CHECKED_ERROR DoParse(const char *_source, const char **include_paths, - const char *source_filename); + FLATBUFFERS_CHECKED_ERROR DoParse(const char *_source, + const char **include_paths, + const char *source_filename); public: SymbolTable structs_; diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index 6a013c90f..5d675ac05 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -43,7 +43,13 @@ static_assert(BASE_TYPE_UNION == static_cast(reflection::Union), "enums don't match"); +// Any parsing calls have to be wrapped in this macro, which automates +// handling of recursive error checking a bit. It will check the received +// CheckedError object, and return straight away on error. #define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; } + +// These two functions are called hundreds of times below, so define a short +// form: #define NEXT() ECHECK(Next()) #define EXPECT(tok) ECHECK(Expect(tok)) From 4e4a5142fb2d50c08856b3c3292bcf9c649ed2e7 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Tue, 29 Dec 2015 17:05:18 -0800 Subject: [PATCH 2/2] Clarified binary encoding with an example. Change-Id: I60c900a2f7cbd88fe264f7b0dc78ef86b1580655 Tested: in Chrome. --- docs/html/md__internals.html | 31 +++++++++++++++++++++++---- docs/source/Internals.md | 41 +++++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/docs/html/md__internals.html b/docs/html/md__internals.html index c06672475..34168da03 100644 --- a/docs/html/md__internals.html +++ b/docs/html/md__internals.html @@ -75,12 +75,13 @@ $(document).ready(function(){initNavTree('md__internals.html','');});

Structs

These are the simplest, and as mentioned, intended for simple data that benefits from being extra efficient and doesn't need versioning / extensibility. They are always stored inline in their parent (a struct, table, or vector) for maximum compactness. Structs define a consistent memory layout where all components are aligned to their size, and structs aligned to their largest scalar member. This is done independent of the alignment rules of the underlying compiler to guarantee a cross platform compatible layout. This layout is then enforced in the generated code.

Tables

-

These start with an soffset_t to a vtable. This is a signed version of uoffset_t, since vtables may be stored anywhere relative to the object. This offset is substracted (not added) from the object start to arrive at the vtable start. This offset is followed by all the fields as aligned scalars (or offsets). Unlike structs, not all fields need to be present. There is no set order and layout.

+

Unlike structs, these are not stored in inline in their parent, but are referred to by offset.

+

They start with an soffset_t to a vtable. This is a signed version of uoffset_t, since vtables may be stored anywhere relative to the object. This offset is substracted (not added) from the object start to arrive at the vtable start. This offset is followed by all the fields as aligned scalars (or offsets). Unlike structs, not all fields need to be present. There is no set order and layout.

To be able to access fields regardless of these uncertainties, we go through a vtable of offsets. Vtables are shared between any objects that happen to have the same vtable values.

-

The elements of a vtable are all of type voffset_t, which is a uint16_t. The first element is the size of the vtable in bytes, including the size element. The second one is the size of the object, in bytes (including the vtable offset). This size could be used for streaming, to know how many bytes to read to be able to access all fields of the object. The remaining elements are the N offsets, where N is the amount of fields declared in the schema when the code that constructed this buffer was compiled (thus, the size of the table is N + 2).

+

The elements of a vtable are all of type voffset_t, which is a uint16_t. The first element is the size of the vtable in bytes, including the size element. The second one is the size of the object, in bytes (including the vtable offset). This size could be used for streaming, to know how many bytes to read to be able to access all inline fields of the object. The remaining elements are the N offsets, where N is the amount of fields declared in the schema when the code that constructed this buffer was compiled (thus, the size of the table is N + 2).

All accessor functions in the generated code for tables contain the offset into this table as a constant. This offset is checked against the first field (the number of elements), to protect against newer code reading older data. If this offset is out of range, or the vtable entry is 0, that means the field is not present in this object, and the default value is return. Otherwise, the entry is used as offset to the field to be read.

Strings and Vectors

-

Strings are simply a vector of bytes, and are always null-terminated. Vectors are stored as contiguous aligned scalar elements prefixed by a 32bit element count (not including any null termination).

+

Strings are simply a vector of bytes, and are always null-terminated. Vectors are stored as contiguous aligned scalar elements prefixed by a 32bit element count (not including any null termination). Neither is stored inline in their parent, but are referred to by offset.

Construction

The current implementation constructs these buffers backwards (starting at the highest memory address of the buffer), since that significantly reduces the amount of bookkeeping and simplifies the construction API.

Code example

@@ -162,7 +163,29 @@ STRUCT_END(Vec3, 12); }

CreateMonster is a convenience function that calls all functions in MonsterBuilder above for you. Note that if you pass values which are defaults as arguments, it will not actually construct that field, so you can probably use this function instead of the builder class in almost all cases.

inline const Monster *GetMonster(const void *buf) { return flatbuffers::GetRoot<Monster>(buf); }
 

This function is only generated for the root table type, to be able to start traversing a FlatBuffer from a raw buffer pointer.

}; // namespace MyGame
-}; // namespace Sample
+}; // namespace Sample +

Encoding example.

+

Below is a sample encoding for the following JSON corresponding to the above schema:

{ pos: { x: 1, y: 2, z: 3 }, name: "fred", hp: 50 }
+

Resulting in this binary buffer:

// Start of the buffer:
+uint32_t 20  // Offset to the root table.
+
+// Start of the vtable. Not shared in this example, but could be:
+uint16_t 16 // Size of table, starting from here.
+uint16_t 22 // Size of object inline data.
+uint16_t 4, 0, 20, 16, 0, 0  // Offsets to fields from start of (root) table, 0 for not present.
+
+// Start of the root table:
+int32_t 16     // Offset to vtable used (default negative direction)
+float 1, 2, 3  // the Vec3 struct, inline.
+uint32_t 8     // Offset to the name string.
+int16_t 50     // hp field.
+int16_t 0      // Padding for alignment.
+
+// Start of name string:
+uint32_t 4  // Length of string.
+int8_t 'f', 'r', 'e', 'd', 0, 0, 0, 0  // Text + 0 termination + padding.
+

Note that this not the only possible encoding, since the writer has some flexibility in which of the children of root object to write first (though in this case there's only one string), and what order to write the fields in. Different orders may also cause different alignments to happen.

+