mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-03 04:21:13 +00:00
FlatBuffers 64 for C++ (#7935)
* First working hack of adding 64-bit. Don't judge :) * Made vector_downward work on 64 bit types * vector_downward uses size_t, added offset64 to reflection * cleaned up adding offset64 in parser * Add C++ testing skeleton for 64-bit * working test for CreateVector64 * working >2 GiB buffers * support for large strings * simplified CreateString<> to just provide the offset type * generalize CreateVector template * update test_64.afb due to upstream format change * Added Vector64 type, which is just an alias for vector ATM * Switch to Offset64 for Vector64 * Update for reflection bfbs output change * Starting to add support for vector64 type in C++ * made a generic CreateVector that can handle different offsets and vector types * Support for 32-vector with 64-addressing * Vector64 basic builder + tests working * basic support for json vector64 support * renamed fields in test_64bit.fbs to better reflect their use * working C++ vector64 builder * Apply --annotate-sparse-vector to 64-bit tests * Enable Vector64 for --annotate-sparse-vectors * Merged from upstream * Add `near_string` field for testing 32-bit offsets alongside * keep track of where the 32-bit and 64-bit regions are for flatbufferbuilder * move template<> outside class body for GCC * update run.sh to build and run tests * basic assertion for adding 64-bit offset at the wrong time * started to separate `FlatBufferBuilder` into two classes, 1 64-bit aware, the other not * add test for nested flatbuffer vector64, fix bug in alignment of big vectors * fixed CreateDirect method by iterating by Offset64 first * internal refactoring of flatbufferbuilder * block not supported languages in the parser from using 64-bit * evolution tests for adding a vector64 field * conformity tests for adding/removing offset64 attributes * ensure test is for a big buffer * add parser error tests for `offset64` and `vector64` attributes * add missing static that GCC only complains about * remove stdint-uintn.h header that gets automatically added * move 64-bit CalculateOffset internal * fixed return size of EndVector * various fixes on windows * add SizeT to vector_downward * minimze range of size changes in vector and builder * reworked how tracking if 64-offsets are added * Add ReturnT to EndVector * small cleanups * remove need for second Array definition * combine IndirectHelpers into one definition * started support for vector of struct * Support for 32/64-vectors of structs + Offset64 * small cleanups * add verification for vector64 * add sized prefix for 64-bit buffers * add fuzzer for 64-bit * add example of adding many vectors using a wrapper table * run the new -bfbs-gen-embed logic on the 64-bit tests * remove run.sh and fix cmakelist issue * fixed bazel rules * fixed some PR comments * add 64-bit tests to cmakelist
This commit is contained in:
@@ -16,12 +16,15 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "flatbuffers/base.h"
|
||||
#include "flatbuffers/buffer.h"
|
||||
#include "flatbuffers/idl.h"
|
||||
#include "flatbuffers/reflection_generated.h"
|
||||
#include "flatbuffers/util.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
@@ -42,7 +45,8 @@ static const double kPi = 3.14159265358979323846;
|
||||
|
||||
// The enums in the reflection schema should match the ones we use internally.
|
||||
// Compare the last element to check if these go out of sync.
|
||||
static_assert(BASE_TYPE_UNION == static_cast<BaseType>(reflection::Union),
|
||||
static_assert(BASE_TYPE_VECTOR64 ==
|
||||
static_cast<BaseType>(reflection::MaxBaseType - 1),
|
||||
"enums don't match");
|
||||
|
||||
// Any parsing calls have to be wrapped in this macro, which automates
|
||||
@@ -124,6 +128,14 @@ CheckedError atot<Offset<void>>(const char *s, Parser &parser,
|
||||
return NoError();
|
||||
}
|
||||
|
||||
template<>
|
||||
CheckedError atot<Offset64<void>>(const char *s, Parser &parser,
|
||||
Offset64<void> *val) {
|
||||
(void)parser;
|
||||
*val = Offset64<void>(atoi(s));
|
||||
return NoError();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T *LookupTableByName(const SymbolTable<T> &table,
|
||||
const std::string &name,
|
||||
@@ -957,11 +969,11 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
|
||||
ECHECK(AddField(struct_def, name, type, &field));
|
||||
|
||||
if (typefield) {
|
||||
// We preserve the relation between the typefield
|
||||
// and field, so we can easily map it in the code
|
||||
// generators.
|
||||
typefield->sibling_union_field = field;
|
||||
field->sibling_union_field = typefield;
|
||||
// We preserve the relation between the typefield
|
||||
// and field, so we can easily map it in the code
|
||||
// generators.
|
||||
typefield->sibling_union_field = field;
|
||||
field->sibling_union_field = typefield;
|
||||
}
|
||||
|
||||
if (token_ == '=') {
|
||||
@@ -1036,6 +1048,65 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
|
||||
}
|
||||
}
|
||||
|
||||
if (field->attributes.Lookup("vector64") != nullptr) {
|
||||
if (!IsVector(type)) {
|
||||
return Error("`vector64` attribute can only be applied on vectors.");
|
||||
}
|
||||
|
||||
// Upgrade the type to be a BASE_TYPE_VECTOR64, since the attributes are
|
||||
// parsed after the type.
|
||||
const BaseType element_base_type = type.element;
|
||||
type = Type(BASE_TYPE_VECTOR64, type.struct_def, type.enum_def);
|
||||
type.element = element_base_type;
|
||||
|
||||
// Since the field was already added to the parent object, update the type
|
||||
// in place.
|
||||
field->value.type = type;
|
||||
|
||||
// 64-bit vectors imply the offset64 attribute.
|
||||
field->offset64 = true;
|
||||
}
|
||||
|
||||
// Record that this field uses 64-bit offsets.
|
||||
if (field->attributes.Lookup("offset64") != nullptr) {
|
||||
// TODO(derekbailey): would be nice to have this be a recommendation or hint
|
||||
// instead of a warning.
|
||||
if (type.base_type == BASE_TYPE_VECTOR64) {
|
||||
Warning("attribute `vector64` implies `offset64` and isn't required.");
|
||||
}
|
||||
|
||||
field->offset64 = true;
|
||||
}
|
||||
|
||||
// Check for common conditions with Offset64 fields.
|
||||
if (field->offset64) {
|
||||
// TODO(derekbailey): this is where we can disable string support for
|
||||
// offset64, as that is not a hard requirement to have.
|
||||
if (!IsString(type) && !IsVector(type)) {
|
||||
return Error(
|
||||
"only string and vectors can have `offset64` attribute applied");
|
||||
}
|
||||
|
||||
// If this is a Vector, only scalar and scalar-like (structs) items are
|
||||
// allowed.
|
||||
// TODO(derekbailey): allow vector of strings, just require that the strings
|
||||
// are Offset64<string>.
|
||||
if (IsVector(type) &&
|
||||
!((IsScalar(type.element) && !IsEnum(type.VectorType())) ||
|
||||
IsStruct(type.VectorType()))) {
|
||||
return Error("only vectors of scalars are allowed to be 64-bit.");
|
||||
}
|
||||
|
||||
// Lastly, check if it is supported by the specified generated languages. Do
|
||||
// this last so the above checks can inform the user of schema errors to fix
|
||||
// first.
|
||||
if (!Supports64BitOffsets()) {
|
||||
return Error(
|
||||
"fields using 64-bit offsets are not yet supported in at least one "
|
||||
"of the specified programming languages.");
|
||||
}
|
||||
}
|
||||
|
||||
// For historical convenience reasons, string keys are assumed required.
|
||||
// Scalars are kDefault unless otherwise specified.
|
||||
// Nonscalars are kOptional unless required;
|
||||
@@ -1058,7 +1129,8 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
|
||||
if (field->key) {
|
||||
if (struct_def.has_key) return Error("only one field may be set as 'key'");
|
||||
struct_def.has_key = true;
|
||||
auto is_valid = IsScalar(type.base_type) || IsString(type) || IsStruct(type);
|
||||
auto is_valid =
|
||||
IsScalar(type.base_type) || IsString(type) || IsStruct(type);
|
||||
if (IsArray(type)) {
|
||||
is_valid |=
|
||||
IsScalar(type.VectorType().base_type) || IsStruct(type.VectorType());
|
||||
@@ -1161,7 +1233,7 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
|
||||
if (nested->type.base_type != BASE_TYPE_STRING)
|
||||
return Error(
|
||||
"nested_flatbuffer attribute must be a string (the root type)");
|
||||
if (type.base_type != BASE_TYPE_VECTOR || type.element != BASE_TYPE_UCHAR)
|
||||
if (!IsVector(type.base_type) || type.element != BASE_TYPE_UCHAR)
|
||||
return Error(
|
||||
"nested_flatbuffer attribute may only apply to a vector of ubyte");
|
||||
// This will cause an error if the root type of the nested flatbuffer
|
||||
@@ -1230,7 +1302,7 @@ CheckedError Parser::ParseComma() {
|
||||
CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
|
||||
size_t parent_fieldn,
|
||||
const StructDef *parent_struct_def,
|
||||
uoffset_t count, bool inside_vector) {
|
||||
size_t count, bool inside_vector) {
|
||||
switch (val.type.base_type) {
|
||||
case BASE_TYPE_UNION: {
|
||||
FLATBUFFERS_ASSERT(field);
|
||||
@@ -1300,7 +1372,7 @@ CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
|
||||
return Error(
|
||||
"union types vector smaller than union values vector for: " +
|
||||
field->name);
|
||||
enum_idx = vector_of_union_types->Get(count);
|
||||
enum_idx = vector_of_union_types->Get(static_cast<uoffset_t>(count));
|
||||
} else {
|
||||
ECHECK(atot(constant.c_str(), *this, &enum_idx));
|
||||
}
|
||||
@@ -1329,9 +1401,10 @@ CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
|
||||
ECHECK(ParseString(val, field->shared));
|
||||
break;
|
||||
}
|
||||
case BASE_TYPE_VECTOR64:
|
||||
case BASE_TYPE_VECTOR: {
|
||||
uoffset_t off;
|
||||
ECHECK(ParseVector(val.type.VectorType(), &off, field, parent_fieldn));
|
||||
ECHECK(ParseVector(val.type, &off, field, parent_fieldn));
|
||||
val.constant = NumToString(off);
|
||||
break;
|
||||
}
|
||||
@@ -1503,6 +1576,9 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
|
||||
for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1; size;
|
||||
size /= 2) {
|
||||
// Go through elements in reverse, since we're building the data backwards.
|
||||
// TODO(derekbailey): this doesn't work when there are Offset64 fields, as
|
||||
// those have to be built first. So this needs to be changed to iterate over
|
||||
// Offset64 then Offset32 fields.
|
||||
for (auto it = field_stack_.rbegin();
|
||||
it != field_stack_.rbegin() + fieldn_outer; ++it) {
|
||||
auto &field_value = it->first;
|
||||
@@ -1510,7 +1586,7 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
|
||||
if (!struct_def.sortbysize ||
|
||||
size == SizeOf(field_value.type.base_type)) {
|
||||
switch (field_value.type.base_type) {
|
||||
// clang-format off
|
||||
// clang-format off
|
||||
#define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
|
||||
case BASE_TYPE_ ## ENUM: \
|
||||
builder_.Pad(field->padding); \
|
||||
@@ -1541,9 +1617,16 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
|
||||
if (IsStruct(field->value.type)) { \
|
||||
SerializeStruct(*field->value.type.struct_def, field_value); \
|
||||
} else { \
|
||||
CTYPE val; \
|
||||
ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
|
||||
builder_.AddOffset(field_value.offset, val); \
|
||||
/* Special case for fields that use 64-bit addressing */ \
|
||||
if(field->offset64) { \
|
||||
Offset64<void> offset; \
|
||||
ECHECK(atot(field_value.constant.c_str(), *this, &offset)); \
|
||||
builder_.AddOffset(field_value.offset, offset); \
|
||||
} else { \
|
||||
CTYPE val; \
|
||||
ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
|
||||
builder_.AddOffset(field_value.offset, val); \
|
||||
} \
|
||||
} \
|
||||
break;
|
||||
FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD)
|
||||
@@ -1581,7 +1664,7 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
CheckedError Parser::ParseVectorDelimiters(uoffset_t &count, F body) {
|
||||
CheckedError Parser::ParseVectorDelimiters(size_t &count, F body) {
|
||||
EXPECT('[');
|
||||
for (;;) {
|
||||
if ((!opts.strict_json || !count) && Is(']')) break;
|
||||
@@ -1611,10 +1694,11 @@ CheckedError Parser::ParseAlignAttribute(const std::string &align_constant,
|
||||
NumToString(FLATBUFFERS_MAX_ALIGNMENT));
|
||||
}
|
||||
|
||||
CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue,
|
||||
CheckedError Parser::ParseVector(const Type &vector_type, uoffset_t *ovalue,
|
||||
FieldDef *field, size_t fieldn) {
|
||||
uoffset_t count = 0;
|
||||
auto err = ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
|
||||
Type type = vector_type.VectorType();
|
||||
size_t count = 0;
|
||||
auto err = ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
|
||||
Value val;
|
||||
val.type = type;
|
||||
ECHECK(ParseAnyValue(val, field, fieldn, nullptr, count, true));
|
||||
@@ -1634,12 +1718,18 @@ CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue,
|
||||
}
|
||||
|
||||
// TODO Fix using element alignment as size (`elemsize`)!
|
||||
builder_.StartVector(len, elemsize, alignment);
|
||||
for (uoffset_t i = 0; i < count; i++) {
|
||||
if (vector_type.base_type == BASE_TYPE_VECTOR64) {
|
||||
// TODO(derekbailey): this requires a 64-bit builder.
|
||||
// builder_.StartVector<Offset64, uoffset64_t>(len, elemsize, alignment);
|
||||
builder_.StartVector(len, elemsize, alignment);
|
||||
} else {
|
||||
builder_.StartVector(len, elemsize, alignment);
|
||||
}
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
// start at the back, since we're building the data backwards.
|
||||
auto &val = field_stack_.back().first;
|
||||
switch (val.type.base_type) {
|
||||
// clang-format off
|
||||
// clang-format off
|
||||
#define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE,...) \
|
||||
case BASE_TYPE_ ## ENUM: \
|
||||
if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
|
||||
@@ -1657,7 +1747,11 @@ CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue,
|
||||
}
|
||||
|
||||
builder_.ClearOffsets();
|
||||
*ovalue = builder_.EndVector(count);
|
||||
if (vector_type.base_type == BASE_TYPE_VECTOR64) {
|
||||
*ovalue = builder_.EndVector<uoffset64_t>(count);
|
||||
} else {
|
||||
*ovalue = builder_.EndVector(count);
|
||||
}
|
||||
|
||||
if (type.base_type == BASE_TYPE_STRUCT && type.struct_def->has_key) {
|
||||
// We should sort this vector. Find the key first.
|
||||
@@ -1725,8 +1819,8 @@ CheckedError Parser::ParseArray(Value &array) {
|
||||
FlatBufferBuilder builder;
|
||||
const auto &type = array.type.VectorType();
|
||||
auto length = array.type.fixed_length;
|
||||
uoffset_t count = 0;
|
||||
auto err = ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
|
||||
size_t count = 0;
|
||||
auto err = ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
|
||||
stack.emplace_back(Value());
|
||||
auto &val = stack.back();
|
||||
val.type = type;
|
||||
@@ -1977,8 +2071,7 @@ CheckedError Parser::TryTypedValue(const std::string *name, int dtoken,
|
||||
e.type.base_type = req;
|
||||
} else {
|
||||
return Error(std::string("type mismatch: expecting: ") +
|
||||
TypeName(e.type.base_type) +
|
||||
", found: " + TypeName(req) +
|
||||
TypeName(e.type.base_type) + ", found: " + TypeName(req) +
|
||||
", name: " + (name ? *name : "") + ", value: " + e.constant);
|
||||
}
|
||||
}
|
||||
@@ -2595,6 +2688,11 @@ bool Parser::SupportsAdvancedArrayFeatures() const {
|
||||
IDLOptions::kBinary | IDLOptions::kRust | IDLOptions::kTs)) == 0;
|
||||
}
|
||||
|
||||
bool Parser::Supports64BitOffsets() const {
|
||||
return (opts.lang_to_generate &
|
||||
~(IDLOptions::kCpp | IDLOptions::kJson | IDLOptions::kBinary)) == 0;
|
||||
}
|
||||
|
||||
Namespace *Parser::UniqueNamespace(Namespace *ns) {
|
||||
for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it) {
|
||||
if (ns->components == (*it)->components) {
|
||||
@@ -3217,10 +3315,9 @@ CheckedError Parser::SkipAnyJsonValue() {
|
||||
});
|
||||
}
|
||||
case '[': {
|
||||
uoffset_t count = 0;
|
||||
return ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
|
||||
return SkipAnyJsonValue();
|
||||
});
|
||||
size_t count = 0;
|
||||
return ParseVectorDelimiters(
|
||||
count, [&](size_t &) -> CheckedError { return SkipAnyJsonValue(); });
|
||||
}
|
||||
case kTokenStringConstant:
|
||||
case kTokenIntegerConstant:
|
||||
@@ -3269,8 +3366,8 @@ CheckedError Parser::ParseFlexBufferValue(flexbuffers::Builder *builder) {
|
||||
}
|
||||
case '[': {
|
||||
auto start = builder->StartVector();
|
||||
uoffset_t count = 0;
|
||||
ECHECK(ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
|
||||
size_t count = 0;
|
||||
ECHECK(ParseVectorDelimiters(count, [&](size_t &) -> CheckedError {
|
||||
return ParseFlexBufferValue(builder);
|
||||
}));
|
||||
builder->EndVector(start, false, false);
|
||||
@@ -3922,7 +4019,7 @@ Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
|
||||
IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0,
|
||||
// result may be platform-dependent if underlying is float (not double)
|
||||
IsFloat(value.type.base_type) ? d : 0.0, deprecated, IsRequired(), key,
|
||||
attr__, docs__, IsOptional(), static_cast<uint16_t>(padding));
|
||||
attr__, docs__, IsOptional(), static_cast<uint16_t>(padding), offset64);
|
||||
// TODO: value.constant is almost always "0", we could save quite a bit of
|
||||
// space by sharing it. Same for common values of value.type.
|
||||
}
|
||||
@@ -3940,6 +4037,7 @@ bool FieldDef::Deserialize(Parser &parser, const reflection::Field *field) {
|
||||
presence = FieldDef::MakeFieldPresence(field->optional(), field->required());
|
||||
padding = field->padding();
|
||||
key = field->key();
|
||||
offset64 = field->offset64();
|
||||
if (!DeserializeAttributes(parser, field->attributes())) return false;
|
||||
// TODO: this should probably be handled by a separate attribute
|
||||
if (attributes.Lookup("flexbuffer")) {
|
||||
@@ -4264,12 +4362,18 @@ std::string Parser::ConformTo(const Parser &base) {
|
||||
auto field_base = struct_def_base->fields.Lookup(field.name);
|
||||
const auto qualified_field_name = qualified_name + "." + field.name;
|
||||
if (field_base) {
|
||||
if (field.value.offset != field_base->value.offset)
|
||||
if (field.value.offset != field_base->value.offset) {
|
||||
return "offsets differ for field: " + qualified_field_name;
|
||||
if (field.value.constant != field_base->value.constant)
|
||||
}
|
||||
if (field.value.constant != field_base->value.constant) {
|
||||
return "defaults differ for field: " + qualified_field_name;
|
||||
if (!EqualByName(field.value.type, field_base->value.type))
|
||||
}
|
||||
if (!EqualByName(field.value.type, field_base->value.type)) {
|
||||
return "types differ for field: " + qualified_field_name;
|
||||
}
|
||||
if (field.offset64 != field_base->offset64) {
|
||||
return "offset types differ for field: " + qualified_field_name;
|
||||
}
|
||||
} else {
|
||||
// Doesn't have to exist, deleting fields is fine.
|
||||
// But we should check if there is a field that has the same offset
|
||||
|
||||
Reference in New Issue
Block a user