forked from BigfootDev/flatbuffers
FlatBuffers 64 for C++ (#7935)
* First working hack of adding 64-bit. Don't judge :) * Made vector_downward work on 64 bit types * vector_downward uses size_t, added offset64 to reflection * cleaned up adding offset64 in parser * Add C++ testing skeleton for 64-bit * working test for CreateVector64 * working >2 GiB buffers * support for large strings * simplified CreateString<> to just provide the offset type * generalize CreateVector template * update test_64.afb due to upstream format change * Added Vector64 type, which is just an alias for vector ATM * Switch to Offset64 for Vector64 * Update for reflection bfbs output change * Starting to add support for vector64 type in C++ * made a generic CreateVector that can handle different offsets and vector types * Support for 32-vector with 64-addressing * Vector64 basic builder + tests working * basic support for json vector64 support * renamed fields in test_64bit.fbs to better reflect their use * working C++ vector64 builder * Apply --annotate-sparse-vector to 64-bit tests * Enable Vector64 for --annotate-sparse-vectors * Merged from upstream * Add `near_string` field for testing 32-bit offsets alongside * keep track of where the 32-bit and 64-bit regions are for flatbufferbuilder * move template<> outside class body for GCC * update run.sh to build and run tests * basic assertion for adding 64-bit offset at the wrong time * started to separate `FlatBufferBuilder` into two classes, 1 64-bit aware, the other not * add test for nested flatbuffer vector64, fix bug in alignment of big vectors * fixed CreateDirect method by iterating by Offset64 first * internal refactoring of flatbufferbuilder * block not supported languages in the parser from using 64-bit * evolution tests for adding a vector64 field * conformity tests for adding/removing offset64 attributes * ensure test is for a big buffer * add parser error tests for `offset64` and `vector64` attributes * add missing static that GCC only complains about * remove stdint-uintn.h header that gets automatically added * move 64-bit CalculateOffset internal * fixed return size of EndVector * various fixes on windows * add SizeT to vector_downward * minimze range of size changes in vector and builder * reworked how tracking if 64-offsets are added * Add ReturnT to EndVector * small cleanups * remove need for second Array definition * combine IndirectHelpers into one definition * started support for vector of struct * Support for 32/64-vectors of structs + Offset64 * small cleanups * add verification for vector64 * add sized prefix for 64-bit buffers * add fuzzer for 64-bit * add example of adding many vectors using a wrapper table * run the new -bfbs-gen-embed logic on the 64-bit tests * remove run.sh and fix cmakelist issue * fixed bazel rules * fixed some PR comments * add 64-bit tests to cmakelist
This commit is contained in:
@@ -1,10 +1,13 @@
|
||||
#include "binary_annotator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "flatbuffers/base.h"
|
||||
#include "flatbuffers/reflection.h"
|
||||
#include "flatbuffers/util.h"
|
||||
#include "flatbuffers/verifier.h"
|
||||
@@ -37,9 +40,9 @@ static BinaryRegion MakeBinaryRegion(
|
||||
return region;
|
||||
}
|
||||
|
||||
static BinarySection MakeBinarySection(
|
||||
const std::string &name, const BinarySectionType type,
|
||||
std::vector<BinaryRegion> regions) {
|
||||
static BinarySection MakeBinarySection(const std::string &name,
|
||||
const BinarySectionType type,
|
||||
std::vector<BinaryRegion> regions) {
|
||||
BinarySection section;
|
||||
section.name = name;
|
||||
section.type = type;
|
||||
@@ -118,12 +121,15 @@ static BinarySection GenerateMissingSection(const uint64_t offset,
|
||||
|
||||
std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
|
||||
flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
|
||||
if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
|
||||
|
||||
if ((is_size_prefixed_ &&
|
||||
!reflection::VerifySizePrefixedSchemaBuffer(verifier)) ||
|
||||
!reflection::VerifySchemaBuffer(verifier)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// The binary is too short to read as a flatbuffers.
|
||||
// TODO(dbaileychess): We could spit out the annotated buffer sections, but
|
||||
// I'm not sure if it is worth it.
|
||||
if (binary_length_ < 4) { return {}; }
|
||||
if (binary_length_ < FLATBUFFERS_MIN_BUFFER_SIZE) { return {}; }
|
||||
|
||||
// Make sure we start with a clean slate.
|
||||
vtables_.clear();
|
||||
@@ -151,7 +157,41 @@ std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
|
||||
}
|
||||
|
||||
uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
|
||||
const auto root_table_offset = ReadScalar<uint32_t>(header_offset);
|
||||
uint64_t offset = header_offset;
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
// If this binary is a size prefixed one, attempt to parse the size.
|
||||
if (is_size_prefixed_) {
|
||||
BinaryRegionComment prefix_length_comment;
|
||||
prefix_length_comment.type = BinaryRegionCommentType::SizePrefix;
|
||||
|
||||
bool has_prefix_value = false;
|
||||
const auto prefix_length = ReadScalar<uoffset64_t>(offset);
|
||||
if (*prefix_length <= binary_length_) {
|
||||
regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset64_t),
|
||||
BinaryRegionType::Uint64, 0, 0,
|
||||
prefix_length_comment));
|
||||
offset += sizeof(uoffset64_t);
|
||||
has_prefix_value = true;
|
||||
}
|
||||
|
||||
if (!has_prefix_value) {
|
||||
const auto prefix_length = ReadScalar<uoffset_t>(offset);
|
||||
if (*prefix_length <= binary_length_) {
|
||||
regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset_t),
|
||||
BinaryRegionType::Uint32, 0, 0,
|
||||
prefix_length_comment));
|
||||
offset += sizeof(uoffset_t);
|
||||
has_prefix_value = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_prefix_value) {
|
||||
SetError(prefix_length_comment, BinaryRegionStatus::ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
const auto root_table_offset = ReadScalar<uint32_t>(offset);
|
||||
|
||||
if (!root_table_offset.has_value()) {
|
||||
// This shouldn't occur, since we validate the min size of the buffer
|
||||
@@ -159,22 +199,20 @@ uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
|
||||
return std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
|
||||
std::vector<BinaryRegion> regions;
|
||||
uint64_t offset = header_offset;
|
||||
// TODO(dbaileychess): sized prefixed value
|
||||
const auto root_table_loc = offset + *root_table_offset;
|
||||
|
||||
BinaryRegionComment root_offset_comment;
|
||||
root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
|
||||
root_offset_comment.name = schema_->root_table()->name()->str();
|
||||
|
||||
if (!IsValidOffset(root_table_offset.value())) {
|
||||
if (!IsValidOffset(root_table_loc)) {
|
||||
SetError(root_offset_comment,
|
||||
BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
|
||||
}
|
||||
|
||||
regions.push_back(
|
||||
MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
root_table_offset.value(), root_offset_comment));
|
||||
regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
|
||||
BinaryRegionType::UOffset, 0,
|
||||
root_table_loc, root_offset_comment));
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
|
||||
@@ -193,7 +231,7 @@ uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
|
||||
AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
|
||||
std::move(regions)));
|
||||
|
||||
return root_table_offset.value();
|
||||
return root_table_loc;
|
||||
}
|
||||
|
||||
BinaryAnnotator::VTable *BinaryAnnotator::GetOrBuildVTable(
|
||||
@@ -656,7 +694,18 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
}
|
||||
|
||||
// Read the offset
|
||||
const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
|
||||
uint64_t offset = 0;
|
||||
uint64_t length = sizeof(uint32_t);
|
||||
BinaryRegionType region_type = BinaryRegionType::UOffset;
|
||||
|
||||
if (field->offset64()) {
|
||||
length = sizeof(uint64_t);
|
||||
region_type = BinaryRegionType::UOffset64;
|
||||
offset = ReadScalar<uint64_t>(field_offset).value_or(0);
|
||||
} else {
|
||||
offset = ReadScalar<uint32_t>(field_offset).value_or(0);
|
||||
}
|
||||
// const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
|
||||
uint64_t offset_of_next_item = 0;
|
||||
BinaryRegionComment offset_field_comment;
|
||||
offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
|
||||
@@ -666,7 +715,7 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
|
||||
// Validate any field that isn't inline (i.e., non-structs).
|
||||
if (!IsInlineField(field)) {
|
||||
if (!offset_from_field.has_value()) {
|
||||
if (offset == 0) {
|
||||
const uint64_t remaining = RemainingBytes(field_offset);
|
||||
|
||||
SetError(offset_field_comment,
|
||||
@@ -678,14 +727,14 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
continue;
|
||||
}
|
||||
|
||||
offset_of_next_item = field_offset + offset_from_field.value();
|
||||
offset_of_next_item = field_offset + offset;
|
||||
|
||||
if (!IsValidOffset(offset_of_next_item)) {
|
||||
SetError(offset_field_comment,
|
||||
BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
offset_of_next_item, offset_field_comment));
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
|
||||
offset_of_next_item,
|
||||
offset_field_comment));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -702,9 +751,9 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
} else {
|
||||
offset_field_comment.default_value = "(table)";
|
||||
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
offset_of_next_item, offset_field_comment));
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type,
|
||||
0, offset_of_next_item,
|
||||
offset_field_comment));
|
||||
|
||||
BuildTable(offset_of_next_item, BinarySectionType::Table,
|
||||
next_object);
|
||||
@@ -713,17 +762,25 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
|
||||
case reflection::BaseType::String: {
|
||||
offset_field_comment.default_value = "(string)";
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
offset_of_next_item, offset_field_comment));
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
|
||||
offset_of_next_item,
|
||||
offset_field_comment));
|
||||
BuildString(offset_of_next_item, table, field);
|
||||
} break;
|
||||
|
||||
case reflection::BaseType::Vector: {
|
||||
offset_field_comment.default_value = "(vector)";
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
offset_of_next_item, offset_field_comment));
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
|
||||
offset_of_next_item,
|
||||
offset_field_comment));
|
||||
BuildVector(offset_of_next_item, table, field, table_offset,
|
||||
vtable->fields);
|
||||
} break;
|
||||
case reflection::BaseType::Vector64: {
|
||||
offset_field_comment.default_value = "(vector64)";
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
|
||||
offset_of_next_item,
|
||||
offset_field_comment));
|
||||
BuildVector(offset_of_next_item, table, field, table_offset,
|
||||
vtable->fields);
|
||||
} break;
|
||||
@@ -768,8 +825,7 @@ void BinaryAnnotator::BuildTable(const uint64_t table_offset,
|
||||
offset_field_comment.default_value =
|
||||
"(union of type `" + enum_type + "`)";
|
||||
|
||||
regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint32_t),
|
||||
BinaryRegionType::UOffset, 0,
|
||||
regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
|
||||
union_offset, offset_field_comment));
|
||||
|
||||
} break;
|
||||
@@ -986,7 +1042,28 @@ void BinaryAnnotator::BuildVector(
|
||||
BinaryRegionComment vector_length_comment;
|
||||
vector_length_comment.type = BinaryRegionCommentType::VectorLength;
|
||||
|
||||
const auto vector_length = ReadScalar<uint32_t>(vector_offset);
|
||||
const bool is_64_bit_vector =
|
||||
field->type()->base_type() == reflection::BaseType::Vector64;
|
||||
|
||||
flatbuffers::Optional<uint64_t> vector_length;
|
||||
uint32_t vector_length_size_type = 0;
|
||||
BinaryRegionType region_type = BinaryRegionType::Uint32;
|
||||
BinarySectionType section_type = BinarySectionType::Vector;
|
||||
|
||||
if (is_64_bit_vector) {
|
||||
auto v = ReadScalar<uint64_t>(vector_offset);
|
||||
if (v.has_value()) { vector_length = v.value(); }
|
||||
vector_length_size_type = sizeof(uint64_t);
|
||||
region_type = BinaryRegionType::Uint64;
|
||||
section_type = BinarySectionType::Vector64;
|
||||
} else {
|
||||
auto v = ReadScalar<uint32_t>(vector_offset);
|
||||
if (v.has_value()) { vector_length = v.value(); }
|
||||
vector_length_size_type = sizeof(uint32_t);
|
||||
region_type = BinaryRegionType::Uint32;
|
||||
section_type = BinarySectionType::Vector;
|
||||
}
|
||||
|
||||
if (!vector_length.has_value()) {
|
||||
const uint64_t remaining = RemainingBytes(vector_offset);
|
||||
SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
|
||||
@@ -1006,7 +1083,7 @@ void BinaryAnnotator::BuildVector(
|
||||
// Validate there are enough bytes left in the binary to process all the
|
||||
// items.
|
||||
const uint64_t last_item_offset =
|
||||
vector_offset + sizeof(uint32_t) +
|
||||
vector_offset + vector_length_size_type +
|
||||
vector_length.value() * GetElementSize(field);
|
||||
|
||||
if (!IsValidOffset(last_item_offset - 1)) {
|
||||
@@ -1016,20 +1093,18 @@ void BinaryAnnotator::BuildVector(
|
||||
MakeSingleRegionBinarySection(
|
||||
std::string(table->name()->c_str()) + "." + field->name()->c_str(),
|
||||
BinarySectionType::Vector,
|
||||
MakeBinaryRegion(vector_offset, sizeof(uint32_t),
|
||||
BinaryRegionType::Uint32, 0, 0,
|
||||
vector_length_comment)));
|
||||
MakeBinaryRegion(vector_offset, vector_length_size_type,
|
||||
region_type, 0, 0, vector_length_comment)));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
regions.push_back(MakeBinaryRegion(vector_offset, sizeof(uint32_t),
|
||||
BinaryRegionType::Uint32, 0, 0,
|
||||
vector_length_comment));
|
||||
regions.push_back(MakeBinaryRegion(vector_offset, vector_length_size_type,
|
||||
region_type, 0, 0, vector_length_comment));
|
||||
// Consume the vector length offset.
|
||||
uint64_t offset = vector_offset + sizeof(uint32_t);
|
||||
uint64_t offset = vector_offset + vector_length_size_type;
|
||||
|
||||
switch (field->type()->element()) {
|
||||
case reflection::BaseType::Obj: {
|
||||
@@ -1302,7 +1377,7 @@ void BinaryAnnotator::BuildVector(
|
||||
AddSection(vector_offset,
|
||||
MakeBinarySection(std::string(table->name()->c_str()) + "." +
|
||||
field->name()->c_str(),
|
||||
BinarySectionType::Vector, std::move(regions)));
|
||||
section_type, std::move(regions)));
|
||||
}
|
||||
|
||||
std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
|
||||
|
||||
Reference in New Issue
Block a user