Annotated Flatbuffer Binary (#7174)

* Annotated Flatbuffer Binary

* Various fixes

* Handles old schema

* handle multiple missing fields

* minor edits

* bazel fix, spelling fix, ascii fix
This commit is contained in:
Derek Bailey
2022-03-18 14:08:05 -07:00
committed by GitHub
parent 0bceba24db
commit d3aeee32bb
18 changed files with 2520 additions and 7 deletions

View File

@@ -34,10 +34,14 @@ cc_library(
cc_library(
name = "flatc_library",
srcs = [
"annotated_binary_text_gen.cpp",
"annotated_binary_text_gen.h",
"bfbs_gen.h",
"bfbs_gen_lua.cpp",
"bfbs_gen_lua.h",
"bfbs_namer.h",
"binary_annotator.cpp",
"binary_annotator.h",
"flatc.cpp",
"namer.h",
],

View File

@@ -0,0 +1,320 @@
#include "annotated_binary_text_gen.h"
#include "flatbuffers/util.h"
namespace flatbuffers {
namespace {
struct OutputConfig {
size_t largest_type_string = 10;
size_t largest_value_string = 20;
size_t max_bytes_per_line = 8;
size_t offset_max_char = 4;
char delimiter = '|';
};
static std::string ToString(const BinarySectionType type) {
switch (type) {
case BinarySectionType::Header: return "header";
case BinarySectionType::Table: return "table";
case BinarySectionType::RootTable: return "root_table";
case BinarySectionType::VTable: return "vtable";
case BinarySectionType::Struct: return "struct";
case BinarySectionType::String: return "string";
case BinarySectionType::Vector: return "vector";
case BinarySectionType::Unknown: return "unknown";
case BinarySectionType::Union: return "union";
case BinarySectionType::Padding: return "padding";
default: return "todo";
}
}
static bool IsOffset(const BinaryRegionType type) {
return type == BinaryRegionType::UOffset || type == BinaryRegionType::SOffset;
}
template<typename T>
std::string ToValueString(const BinaryRegion &region, const uint8_t *binary) {
std::string s;
s += "0x";
const T val = GetScalar<T>(binary + region.offset);
const uint64_t start_index = region.offset + region.length - 1;
for (uint64_t i = 0; i < region.length; ++i) {
s += ToHex(binary[start_index - i]);
}
s += " (";
s += std::to_string(val);
s += ")";
return s;
}
template<>
std::string ToValueString<std::string>(const BinaryRegion &region,
const uint8_t *binary) {
return std::string(reinterpret_cast<const char *>(binary + region.offset),
static_cast<size_t>(region.array_length));
}
static std::string ToValueString(const BinaryRegion &region,
const uint8_t *binary,
const OutputConfig &output_config) {
std::string s;
if (region.array_length) {
if (region.type == BinaryRegionType::Uint8 ||
region.type == BinaryRegionType::Unknown) {
// Interpet each value as a ASCII to aid debugging
for (uint64_t i = 0; i < region.array_length; ++i) {
const uint8_t c = *(binary + region.offset + i);
s += isprint(c) ? toascii(c) : '.';
}
return s;
} else if (region.type == BinaryRegionType::Char) {
// string value
return ToValueString<std::string>(region, binary);
}
}
switch (region.type) {
case BinaryRegionType::Uint32:
return ToValueString<uint32_t>(region, binary);
case BinaryRegionType::Int32: return ToValueString<int32_t>(region, binary);
case BinaryRegionType::Uint16:
return ToValueString<uint16_t>(region, binary);
case BinaryRegionType::Int16: return ToValueString<int16_t>(region, binary);
case BinaryRegionType::Bool: return ToValueString<bool>(region, binary);
case BinaryRegionType::Uint8: return ToValueString<uint8_t>(region, binary);
case BinaryRegionType::Char: return ToValueString<char>(region, binary);
case BinaryRegionType::Byte:
case BinaryRegionType::Int8: return ToValueString<int8_t>(region, binary);
case BinaryRegionType::Int64: return ToValueString<int64_t>(region, binary);
case BinaryRegionType::Uint64:
return ToValueString<uint64_t>(region, binary);
case BinaryRegionType::Double: return ToValueString<double>(region, binary);
case BinaryRegionType::Float: return ToValueString<float>(region, binary);
// Handle Offsets separately, incase they add additional details.
case BinaryRegionType::UOffset:
s += ToValueString<uint32_t>(region, binary);
break;
case BinaryRegionType::SOffset:
s += ToValueString<int32_t>(region, binary);
break;
case BinaryRegionType::VOffset:
s += ToValueString<uint16_t>(region, binary);
break;
default: break;
}
// If this is an offset type, include the calculated offset location in the
// value.
// TODO(dbaileychess): It might be nicer to put this in the comment field.
if (IsOffset(region.type)) {
s += " Loc: +0x";
s += ToHex(region.points_to_offset, output_config.offset_max_char);
}
return s;
}
struct DocContinuation {
// The start column where the value text first starts
size_t value_start_column = 0;
// The remaining part of the doc to print.
std::string value;
};
static std::string GenerateTypeString(const BinaryRegion &region) {
return ToString(region.type) +
((region.array_length)
? "[" + std::to_string(region.array_length) + "]"
: "");
}
static std::string GenerateDocumentation(const BinaryRegion &region,
const BinarySection &,
const uint8_t *binary,
DocContinuation &continuation,
const OutputConfig &output_config) {
std::string s;
// Check if there is a doc continuation that should be prioritized.
if (continuation.value_start_column) {
s += std::string(continuation.value_start_column - 2, ' ');
s += output_config.delimiter;
s += " ";
s += continuation.value.substr(0, output_config.max_bytes_per_line);
continuation.value = continuation.value.substr(
std::min(output_config.max_bytes_per_line, continuation.value.size()));
return s;
}
{
std::stringstream ss;
ss << std::setw(output_config.largest_type_string) << std::left;
ss << GenerateTypeString(region);
s += ss.str();
}
s += " ";
s += output_config.delimiter;
s += " ";
if (region.array_length) {
// Record where the value is first being outputted.
continuation.value_start_column = s.size();
// Get the full-length value, which we will chunk below.
const std::string value = ToValueString(region, binary, output_config);
std::stringstream ss;
ss << std::setw(output_config.largest_value_string) << std::left;
ss << value.substr(0, output_config.max_bytes_per_line);
s += ss.str();
continuation.value =
value.substr(std::min(output_config.max_bytes_per_line, value.size()));
} else {
std::stringstream ss;
ss << std::setw(output_config.largest_value_string) << std::left;
ss << ToValueString(region, binary, output_config);
s += ss.str();
}
s += " ";
if (!region.comment.empty()) {
s += output_config.delimiter;
s += " ";
s += region.comment;
}
return s;
}
static std::string GenerateRegion(const BinaryRegion &region,
const BinarySection &section,
const uint8_t *binary,
const OutputConfig &output_config) {
std::string s;
bool doc_generated = false;
DocContinuation doc_continuation;
for (uint64_t i = 0; i < region.length; ++i) {
if ((i % output_config.max_bytes_per_line) == 0) {
// Start a new line of output
s += '\n';
s += " ";
s += "+0x";
s += ToHex(region.offset + i, output_config.offset_max_char);
s += " ";
s += output_config.delimiter;
}
// Add each byte
s += " ";
s += ToHex(binary[region.offset + i]);
// Check for end of line or end of region conditions.
if (((i + 1) % output_config.max_bytes_per_line == 0) ||
i + 1 == region.length) {
if (i + 1 == region.length) {
// We are out of bytes but haven't the kMaxBytesPerLine, so we need to
// zero those out to align everything globally.
for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
++j) {
s += " ";
}
}
s += " ";
s += output_config.delimiter;
// This is the end of the first line or its the last byte of the region,
// generate the end-of-line documentation.
if (!doc_generated) {
s += " ";
s += GenerateDocumentation(region, section, binary, doc_continuation,
output_config);
// If we have a value in the doc continuation, that means the doc is
// being printed on multiple lines.
doc_generated = doc_continuation.value.empty();
}
}
}
return s;
}
static std::string GenerateSection(const BinarySection &section,
const uint8_t *binary,
const OutputConfig &output_config) {
std::string s;
s += "\n";
s += ToString(section.type);
if (!section.name.empty()) { s += " (" + section.name + ")"; }
s += ":";
for (const BinaryRegion &region : section.regions) {
s += GenerateRegion(region, section, binary, output_config);
}
return s;
}
} // namespace
bool AnnotatedBinaryTextGenerator::Generate(
const std::string &filename, const std::string &schema_filename) {
OutputConfig output_config;
output_config.max_bytes_per_line = options_.max_bytes_per_line;
// Given the length of the binary, we can calculate the maximum number of
// characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
// the max output).
output_config.offset_max_char =
binary_length_ > 0xFFFFFF
? 8
: (binary_length_ > 0xFFFF ? 6 : (binary_length_ > 0xFF ? 4 : 2));
// Find the largest type string of all the regions in this file, so we can
// align the output nicely.
output_config.largest_type_string = 0;
for (const auto &section : annotations_) {
for (const auto &region : section.second.regions) {
std::string s = GenerateTypeString(region);
if (s.size() > output_config.largest_type_string) {
output_config.largest_type_string = s.size();
}
// Don't consider array regions, as they will be split to multiple lines.
if (!region.array_length) {
s = ToValueString(region, binary_, output_config);
if (s.size() > output_config.largest_value_string) {
output_config.largest_value_string = s.size();
}
}
}
}
// Generate each of the binary sections
std::string s;
s += "// Annotated Flatbuffer Binary\n";
s += "//\n";
s += "// Schema file: " + schema_filename + "\n";
s += "// Binary file: " + filename + "\n";
for (const auto &section : annotations_) {
s += GenerateSection(section.second, binary_, output_config);
s += "\n";
}
// Modify the output filename.
std::string output_filename = StripExtension(filename);
output_filename += options_.output_postfix;
output_filename +=
"." + (options_.output_extension.empty() ? GetExtension(filename)
: options_.output_extension);
return SaveFile(output_filename.c_str(), s, false);
}
} // namespace flatbuffers

View File

@@ -0,0 +1,71 @@
/*
* Copyright 2021 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_
#define FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_
#include <map>
#include <memory>
#include <string>
#include "binary_annotator.h"
namespace flatbuffers {
class AnnotatedBinaryTextGenerator {
public:
struct Options {
// The maximum number of raw bytes to print per line in the output. 8 is a
// good default due to the largest type (double) being 8 bytes long.
size_t max_bytes_per_line = 8;
// The output file postfix, appended between the filename and the extension.
// Example binary1.bin -> binary1_annotated.bin
std::string output_postfix = "";
// The output file extension, replacing any extension given. If empty, don't
// change the provided extension. AFB = Annotated Flatbuffer Binary
//
// Example: binary1.bin -> binary1.afb
std::string output_extension = "afb";
};
explicit AnnotatedBinaryTextGenerator(
const Options &options, std::map<uint64_t, BinarySection> annotations,
const uint8_t *const binary, const int64_t binary_length)
: annotations_(std::move(annotations)),
binary_(binary),
binary_length_(binary_length),
options_(options) {}
// Generate the annotated binary for the given `filename`. Returns true if the
// annotated binary was succesfully saved.
bool Generate(const std::string &filename, const std::string &schema_filename);
private:
const std::map<uint64_t, BinarySection> annotations_;
// The binary data itself.
const uint8_t *binary_;
const int64_t binary_length_;
// Output configuration
const Options options_;
};
} // namespace flatbuffers
#endif // FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_

769
src/binary_annotator.cpp Normal file
View File

@@ -0,0 +1,769 @@
#include "binary_annotator.h"
#include <iostream>
#include <vector>
#include "flatbuffers/reflection.h"
#include "flatbuffers/verifier.h"
namespace flatbuffers {
namespace {
static BinaryRegion MakeBinaryRegion(
const uint64_t offset = 0, const uint64_t length = 0,
const BinaryRegionType type = BinaryRegionType::Unknown,
const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
const std::string &comment = "") {
BinaryRegion region;
region.offset = offset;
region.length = length;
region.type = type;
region.array_length = array_length;
region.points_to_offset = points_to_offset;
region.comment = comment;
return region;
}
static BinarySection MakeBinarySection(
const std::string &name, const BinarySectionType type,
const std::vector<BinaryRegion> &regions) {
BinarySection section;
section.name = name;
section.type = type;
section.regions = regions;
return section;
}
static uint64_t BuildField(const uint64_t offset,
const reflection::Field *field,
std::vector<BinaryRegion> &regions) {
const uint64_t type_size = GetTypeSize(field->type()->base_type());
const BinaryRegionType type = GetRegionType(field->type()->base_type());
regions.emplace_back(MakeBinaryRegion(
offset, type_size, type, 0, 0,
std::string("table field `") + field->name()->c_str() + "` (" +
reflection::EnumNameBaseType(field->type()->base_type()) + ")"));
return offset + type_size;
}
static uint64_t BuildStructureField(const uint64_t offset,
const reflection::Object *object,
const reflection::Field *field,
std::vector<BinaryRegion> &regions) {
const uint64_t type_size = GetTypeSize(field->type()->base_type());
regions.emplace_back(MakeBinaryRegion(
offset, type_size, GetRegionType(field->type()->base_type()), 0, 0,
std::string("struct field `") + object->name()->c_str() + "." +
field->name()->c_str() + "` (" +
reflection::EnumNameBaseType(field->type()->base_type()) + ")"));
return offset + type_size;
}
static uint64_t BuildArrayField(uint64_t offset,
const reflection::Object *object,
const reflection::Field *field,
const uint16_t array_length,
std::vector<BinaryRegion> &regions) {
const uint64_t type_size = GetTypeSize(field->type()->element());
for (uint16_t i = 0; i < array_length; ++i) {
regions.emplace_back(MakeBinaryRegion(
offset, type_size, GetRegionType(field->type()->element()), 0, 0,
std::string("array field `") + object->name()->c_str() + "." +
field->name()->c_str() + "[" + std::to_string(i) + "]` (" +
reflection::EnumNameBaseType(field->type()->element()) + ")"));
offset += type_size;
}
// The following groups the complete array together which shows up nicely as
// an array, but then we don't show the individual values. So the above method
// treats each field of the array as a separate region.
// regions.emplace_back(
// BinaryRegion{ offset, array_length * type_size,
// GetRegionType(field->type()->element()), array_length, 0,
// std::string("array field '") + object->name()->c_str() +
// "." + field->name()->c_str() + "' value" });
return offset;
}
static bool IsNonZeroRegion(uint64_t offset, uint64_t length,
const uint8_t *binary) {
for (uint64_t i = offset; i < offset + length; ++i) {
if (binary[i] != 0) { return true; }
}
return false;
}
} // namespace
std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
// Make sure we start with a clean slate.
vtables_.clear();
strings_.clear();
sections_.clear();
// First parse the header region which always start at offset 0.
// The returned offset will point to the root_table location.
const uint64_t root_table_offset = BuildHeader(0);
// Build the root table, and all else will be referenced from it.
BuildTable(root_table_offset, BinarySectionType::RootTable,
schema_->root_table());
// Now that all the sections are built, scan the regions between them and
// insert padding bytes that are implied.
FixMissingSections();
return sections_;
}
uint64_t BinaryAnnotator::BuildHeader(const uint64_t offset) {
std::vector<BinaryRegion> regions;
// TODO(dbaileychess): sized prefixed value
const uint32_t root_table_offset = GetScalar<uint32_t>(offset);
regions.emplace_back(MakeBinaryRegion(
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, root_table_offset,
std::string("offset to root table `") +
schema_->root_table()->name()->str() + "`"));
if (IsNonZeroRegion(offset, 4, binary_)) {
// Check if the file identifier region has non-zero data, and assume its the
// file identifier. Otherwise, it will get filled in with padding later.
regions.emplace_back(MakeBinaryRegion(
offset + sizeof(uint32_t), 4 * sizeof(uint8_t), BinaryRegionType::Char,
4, 0, std::string("File Identifier")));
}
sections_.insert(std::make_pair(
offset, MakeBinarySection("", BinarySectionType::Header, regions)));
return root_table_offset;
}
void BinaryAnnotator::BuildVTable(uint64_t offset,
const reflection::Object *table) {
const uint64_t vtable_offset = offset;
// First see if we have used this vtable before, if so skip building it again.
auto it = vtables_.find(vtable_offset);
if (it != vtables_.end()) { return; }
std::vector<BinaryRegion> regions;
// Vtables start with the size of the vtable
const uint16_t vtable_size = GetScalar<uint16_t>(offset);
regions.emplace_back(MakeBinaryRegion(offset, sizeof(uint16_t),
BinaryRegionType::Uint16, 0, 0,
std::string("size of this vtable")));
offset += sizeof(uint16_t);
// Then they have the size of the table they reference.
const uint16_t table_size = GetScalar<uint16_t>(offset);
regions.emplace_back(
MakeBinaryRegion(offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0,
std::string("size of referring table")));
offset += sizeof(uint16_t);
const uint64_t offset_start = offset;
// A mapping between field (and its id) to the relative offset (uin16_t) from
// the start of the table.
std::map<uint16_t, VTable::Entry> fields;
// Counter for determining if the binary has more vtable entries than the
// schema provided. This can occur if the binary was created at a newer schema
// version and is being processed with an older one.
uint16_t fields_processed = 0;
// Loop over all the fields.
ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
if (field_offset >= vtable_offset + vtable_size) {
// This field_offset is too large for this vtable, so it must come from a
// newer schema than the binary was create with or the binary writer did
// not write it. For either case, it is safe to ignore.
// TODO(dbaileychess): We could show which fields are not set an their
// default values if we want. We just need a way to make it obvious that
// it isn't part of the buffer.
return;
}
const uint16_t offset_from_table = GetScalar<uint16_t>(field_offset);
VTable::Entry entry;
entry.field = field;
entry.offset_from_table = offset_from_table;
fields.insert(std::make_pair(field->id(), entry));
std::string default_label;
if (offset_from_table == 0) {
// Not present, so could be default or be optional.
if (field->required()) {
// If this is a required field, make it known this is an error.
regions.push_back(MakeBinaryRegion(
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
std::string("ERROR: required field `") + field->name()->c_str() +
"` (id: " + std::to_string(field->id()) + ") is not present!"));
return;
} else {
// Its an optional field, so get the default value and interpret and
// provided an annotation for it.
if (IsScalar(field->type()->base_type())) {
default_label += " <defaults to ";
default_label += IsFloat(field->type()->base_type())
? std::to_string(field->default_real())
: std::to_string(field->default_integer());
default_label += "> (";
} else {
default_label += " <null> (";
}
default_label +=
reflection::EnumNameBaseType(field->type()->base_type());
default_label += ")";
}
}
regions.push_back(MakeBinaryRegion(
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
std::string("offset to field `") + field->name()->c_str() +
"` (id: " + std::to_string(field->id()) + ")" + default_label));
fields_processed++;
});
// Check if we covered all the expectant fields. If not, we need to add them
// as unknown fields.
const uint16_t expectant_vtable_fields =
(vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
const uint16_t offset_from_table = GetScalar<uint16_t>(field_offset);
VTable::Entry entry;
entry.field = nullptr; // No field to reference.
entry.offset_from_table = offset_from_table;
fields.insert(std::make_pair(id, entry));
regions.push_back(MakeBinaryRegion(
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
std::string("offset to unknown field (id: " + std::to_string(id) +
")")));
}
sections_[vtable_offset] = MakeBinarySection(
table->name()->str(), BinarySectionType::VTable, std::move(regions));
VTable vtable;
vtable.fields = std::move(fields);
vtable.table_size = table_size;
vtable.vtable_size = vtable_size;
vtables_[vtable_offset] = vtable;
}
void BinaryAnnotator::BuildTable(uint64_t offset, const BinarySectionType type,
const reflection::Object *table) {
std::vector<BinaryRegion> regions;
const uint64_t table_offset = offset;
// Tables start with the vtable
const uint64_t vtable_offset = table_offset - GetScalar<int32_t>(offset);
regions.emplace_back(
MakeBinaryRegion(offset, sizeof(int32_t), BinaryRegionType::SOffset, 0,
vtable_offset, std::string("offset to vtable")));
offset += sizeof(int32_t);
// Parse the vtable first so we know what the rest of the fields in the table
// are.
BuildVTable(vtable_offset, table);
const VTable &vtable = vtables_.at(vtable_offset);
// This is the size and length of this table.
const uint16_t table_size = vtable.table_size;
const uint64_t table_end_offset = table_offset + table_size;
const uint64_t field_offset_start = offset;
// We need to iterate over the vtable fields by their offset in the binary,
// not by their IDs. So copy them over to another vector that we can sort on
// the offset_from_table property.
std::vector<VTable::Entry> fields;
for (const auto &vtable_field : vtable.fields) {
fields.push_back(vtable_field.second);
}
std::stable_sort(fields.begin(), fields.end(),
[](const VTable::Entry &a, const VTable::Entry &b) {
return a.offset_from_table < b.offset_from_table;
});
// Iterate over all the fields by order of their offset.
for (size_t i = 0; i < fields.size(); ++i) {
const reflection::Field *field = fields[i].field;
const uint16_t offset_from_table = fields[i].offset_from_table;
if (offset_from_table == 0) {
// Skip non-present fields.
continue;
}
// The field offsets are relative to the start of the table.
const uint64_t field_offset = table_offset + offset_from_table;
// We have a vtable entry for a non-existant field, that means its a binary
// generated by a newer schema than we are currently processing.
if (field == nullptr) {
// Calculate the length of this unknown field.
const uint64_t unknown_field_length =
// Check if there is another unknown field after this one.
((i + 1 < fields.size())
? table_offset + fields[i + 1].offset_from_table
// Otherwise use the known end of the table.
: table_end_offset) -
field_offset;
std::string hint;
if (unknown_field_length == 4) {
// The field is 4 in length, so it could be an offset? Provide a hint.
hint += " <possibly an offset? Check Loc: +0x";
hint += ToHex(field_offset + GetScalar<uint32_t>(field_offset));
hint += ">";
}
regions.emplace_back(
MakeBinaryRegion(field_offset, unknown_field_length * sizeof(uint8_t),
BinaryRegionType::Unknown, unknown_field_length, 0,
std::string("Unknown field") + hint));
continue;
}
if (IsScalar(field->type()->base_type())) {
// These are the raw values store in the table.
offset = BuildField(field_offset, field, regions);
continue;
}
switch (field->type()->base_type()) {
case reflection::BaseType::Obj: {
const reflection::Object *next_object =
schema_->objects()->Get(field->type()->index());
if (next_object->is_struct()) {
// Structs are stored inline.
offset = BuildStruct(field_offset, regions, next_object);
} else {
const uint64_t next_object_offset =
field_offset + GetScalar<uint32_t>(field_offset);
regions.emplace_back(MakeBinaryRegion(
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
next_object_offset,
std::string("offset to field `") + field->name()->c_str() + "`"));
offset += sizeof(uint32_t);
BuildTable(next_object_offset, BinarySectionType::Table, next_object);
}
} break;
case reflection::BaseType::String: {
const uint64_t string_offset =
field_offset + GetScalar<uint32_t>(field_offset);
regions.emplace_back(MakeBinaryRegion(
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
string_offset,
std::string("offset to field `") + field->name()->c_str() + "`"));
BuildString(string_offset, table, field);
} break;
case reflection::BaseType::Vector: {
const uint64_t vector_offset =
field_offset + GetScalar<uint32_t>(field_offset);
regions.emplace_back(MakeBinaryRegion(
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
vector_offset,
std::string("offset to field `") + field->name()->c_str() + "`"));
BuildVector(vector_offset, table, field, table_offset, vtable);
} break;
case reflection::BaseType::Union: {
const uint64_t union_offset =
field_offset + GetScalar<uint32_t>(field_offset);
// The union type field is always one less than the union itself.
const uint16_t union_type_id = field->id() - 1;
auto vtable_entry = vtable.fields.find(union_type_id);
if (vtable_entry == vtable.fields.end()) {
// TODO(dbaileychess): need to capture this error condition.
break;
}
const uint64_t type_offset =
table_offset + vtable_entry->second.offset_from_table;
const uint8_t realized_type = GetScalar<uint8_t>(type_offset);
const std::string enum_type =
BuildUnion(union_offset, realized_type, field);
regions.emplace_back(MakeBinaryRegion(
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
union_offset,
std::string("offset to field `") + field->name()->c_str() +
"` (union of type `" + enum_type + "`)"));
} break;
default: break;
}
}
// Fill in any regions that weren't covered above, as those are padding
// regions.
size_t region_index = 1;
std::vector<BinaryRegion> padding_regions;
uint64_t i = field_offset_start;
while (region_index < regions.size() && i < table_end_offset) {
const uint64_t region_start = regions[region_index].offset;
const uint64_t region_end = region_start + regions[region_index].length;
if (i < region_start) {
const uint64_t pad_bytes = region_start - i;
// We are at an index that is lower than any region, so pad upto its
// offset.
padding_regions.emplace_back(
MakeBinaryRegion(i, pad_bytes * sizeof(uint8_t),
BinaryRegionType::Uint8, pad_bytes, 0, "padding"));
i = region_end;
region_index++;
} else if (i < region_end) {
i = region_end + 1;
} else {
region_index++;
}
}
// Handle the case where there is padding after the last known binary
// region. Calculate where we left off towards the expected end of the
// table.
if (i < table_end_offset) {
const uint64_t pad_bytes = table_end_offset - i + 1;
padding_regions.emplace_back(
MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
BinaryRegionType::Uint8, pad_bytes, 0, "padding"));
}
regions.insert(regions.end(), padding_regions.begin(), padding_regions.end());
std::stable_sort(regions.begin(), regions.end(),
[&](const BinaryRegion &a, const BinaryRegion &b) {
return a.offset < b.offset;
});
sections_.insert(std::make_pair(
table_offset,
MakeBinarySection(table->name()->str(), type, std::move(regions))));
}
uint64_t BinaryAnnotator::BuildStruct(uint64_t offset,
std::vector<BinaryRegion> &regions,
const reflection::Object *object) {
if (!object->is_struct()) { return offset; }
// Loop over all the fields in increasing order
ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
if (IsScalar(field->type()->base_type())) {
offset = BuildStructureField(offset, object, field, regions);
} else if (field->type()->base_type() == reflection::BaseType::Obj) {
// Structs are stored inline, even when nested.
offset = BuildStruct(offset, regions,
schema_->objects()->Get(field->type()->index()));
} else if (field->type()->base_type() == reflection::BaseType::Array) {
// Arrays are just repeated structures.
if (IsScalar(field->type()->element())) {
offset = BuildArrayField(offset, object, field,
field->type()->fixed_length(), regions);
} else {
for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
// TODO(dbaileychess): This works, but the comments on the fields lose
// some context. Need to figure a way how to plumb the nested arrays
// comments together that isn't too confusing.
offset = BuildStruct(offset, regions,
schema_->objects()->Get(field->type()->index()));
}
}
}
});
return offset;
}
void BinaryAnnotator::BuildString(uint64_t offset,
const reflection::Object *table,
const reflection::Field *field) {
// Check if we have already generated this string section, and this is a
// shared string instance.
if (strings_.find(offset) != strings_.end()) { return; }
std::vector<BinaryRegion> regions;
const uint32_t string_length = GetScalar<uint32_t>(offset);
const uint64_t string_soffset = offset;
regions.emplace_back(MakeBinaryRegion(offset, sizeof(uint32_t),
BinaryRegionType::Uint32, 0, 0,
std::string("length of string")));
offset += sizeof(uint32_t);
regions.emplace_back(MakeBinaryRegion(offset, string_length * sizeof(char),
BinaryRegionType::Char, string_length,
0, ""));
offset += string_length * sizeof(char);
regions.emplace_back(MakeBinaryRegion(offset, sizeof(char),
BinaryRegionType::Char, 0, 0,
std::string("string terminator")));
offset += sizeof(char);
sections_.insert(std::make_pair(
string_soffset,
MakeBinarySection(
std::string(table->name()->c_str()) + "." + field->name()->c_str(),
BinarySectionType::String, std::move(regions))));
// Insert into the strings set to find possible instances of shared strings.
strings_.insert(string_soffset);
}
void BinaryAnnotator::BuildVector(uint64_t offset,
const reflection::Object *table,
const reflection::Field *field,
const uint64_t parent_table_offset,
const VTable &vtable) {
std::vector<BinaryRegion> regions;
const uint32_t vector_length = GetScalar<uint32_t>(offset);
const uint64_t vector_offset = offset;
regions.emplace_back(
MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::Uint32, 0, 0,
std::string("length of vector (# items)")));
offset += sizeof(uint32_t);
switch (field->type()->element()) {
case reflection::BaseType::Obj: {
const reflection::Object *object =
schema_->objects()->Get(field->type()->index());
if (object->is_struct()) {
// Vector of structs
for (size_t i = 0; i < vector_length; ++i) {
// Structs are inline to the vector.
offset = BuildStruct(offset, regions, object);
}
} else {
// Vector of objects
for (size_t i = 0; i < vector_length; ++i) {
// The table offset is relative from the offset location itself.
const uint64_t table_offset = offset + GetScalar<uint32_t>(offset);
regions.emplace_back(MakeBinaryRegion(
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
table_offset,
std::string("offset to table[") + std::to_string(i) + "]"));
BuildTable(table_offset, BinarySectionType::Table, object);
offset += sizeof(uint32_t);
}
}
} break;
case reflection::BaseType::String: {
// Vector of strings
for (size_t i = 0; i < vector_length; ++i) {
// The string offset is relative from the offset location itself.
const uint64_t string_offset = offset + GetScalar<uint32_t>(offset);
regions.emplace_back(MakeBinaryRegion(
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
string_offset,
std::string("offset to string[") + std::to_string(i) + "]"));
BuildString(string_offset, table, field);
offset += sizeof(uint32_t);
}
} break;
case reflection::BaseType::Union: {
// Vector of unions
// Unions have both their realized type (uint8_t for now) that are
// stored sperately. These are stored in the field->index() - 1
// location.
const uint16_t union_type_vector_id = field->id() - 1;
auto vtable_entry = vtable.fields.find(union_type_vector_id);
if (vtable_entry == vtable.fields.end()) {
// TODO(dbaileychess): need to capture this error condition.
break;
}
const uint64_t union_type_vector_field_offset =
parent_table_offset + vtable_entry->second.offset_from_table;
// Get the offset to the first type (the + sizeof(uint32_t) is to skip
// over the vector length which we already know)
const uint64_t union_type_vector_data_offset =
union_type_vector_field_offset +
GetScalar<uint16_t>(union_type_vector_field_offset) +
sizeof(uint32_t);
for (size_t i = 0; i < vector_length; ++i) {
// The union offset is relative from the offset location itself.
const uint64_t union_offset = offset + GetScalar<uint32_t>(offset);
const uint8_t realized_type = GetScalar<uint8_t>(
union_type_vector_data_offset + i * sizeof(uint8_t));
const std::string enum_type =
BuildUnion(union_offset, realized_type, field);
regions.emplace_back(MakeBinaryRegion(
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
union_offset,
std::string("offset to union[") + std::to_string(i) + "] (`" +
enum_type + "`)"));
offset += sizeof(uint32_t);
}
} break;
default: {
if (IsScalar(field->type()->element())) {
const BinaryRegionType binary_region_type =
GetRegionType(field->type()->element());
const uint64_t type_size = GetTypeSize(field->type()->element());
// TODO(dbaileychess): It might be nicer to user the
// BinaryRegion.array_length field to indicate this.
for (size_t i = 0; i < vector_length; ++i) {
regions.emplace_back(MakeBinaryRegion(
offset, type_size, binary_region_type, 0, 0,
std::string("value[") + std::to_string(i) + "]"));
offset += type_size;
}
}
} break;
}
sections_.insert(std::make_pair(
vector_offset,
MakeBinarySection(
std::string(table->name()->c_str()) + "." + field->name()->c_str(),
BinarySectionType::Vector, std::move(regions))));
}
std::string BinaryAnnotator::BuildUnion(uint64_t offset,
const uint8_t realized_type,
const reflection::Field *field) {
const reflection::Enum *next_enum =
schema_->enums()->Get(field->type()->index());
const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
const reflection::Type *union_type = enum_val->union_type();
if (union_type->base_type() == reflection::BaseType::Obj) {
const reflection::Object *object =
schema_->objects()->Get(union_type->index());
if (object->is_struct()) {
// Union of vectors point to a new Binary section
std::vector<BinaryRegion> regions;
offset = BuildStruct(offset, regions, object);
sections_.insert(std::make_pair(
regions[0].offset,
MakeBinarySection(std::string(object->name()->c_str()) + "." +
field->name()->c_str(),
BinarySectionType::Union, std::move(regions))));
} else {
BuildTable(offset, BinarySectionType::Table, object);
}
}
// TODO(dbaileychess): handle the other union types.
return enum_val->name()->c_str();
}
void BinaryAnnotator::FixMissingSections() {
uint64_t offset = 0;
std::vector<BinarySection> sections_to_insert;
for (auto &current_section : sections_) {
BinarySection &section = current_section.second;
const uint64_t section_start_offset = current_section.first;
const uint64_t section_end_offset =
section.regions.back().offset + section.regions.back().length;
if (offset < section_start_offset) {
// We are at an offset that is less then the current section.
const uint64_t pad_bytes = section_start_offset - offset + 1;
const uint64_t start_offset = offset - 1;
std::vector<BinaryRegion> regions;
// Check if the region is all zeros or not, as that can tell us if it is
// padding or not.
if (IsNonZeroRegion(offset - 1, pad_bytes, binary_)) {
// Some of the padding bytes are non-zero, so this might be an unknown
// section of the binary.
regions.emplace_back(MakeBinaryRegion(
start_offset, pad_bytes * sizeof(uint8_t),
BinaryRegionType::Unknown, pad_bytes, 0,
pad_bytes < 8 ? "could be a corrupted padding region (non zero) "
"due to the length < 8 bytes."
: "WARN: nothing refers to this. Check if any "
"`Unkown Field`s point to this."));
sections_to_insert.emplace_back(
MakeBinarySection("no known references", BinarySectionType::Unknown,
std::move(regions)));
} else {
// This region is most likely padding.
regions.emplace_back(MakeBinaryRegion(
start_offset, pad_bytes * sizeof(uint8_t), BinaryRegionType::Uint8,
pad_bytes, 0,
// Output a different annotation if the pad bytes exceed what we
// expect to be the maximum padding.
pad_bytes > 7 ? "likely padding but might be an unknown section "
"due to being larger than 7 bytes"
: "padding"));
sections_to_insert.emplace_back(MakeBinarySection(
"", BinarySectionType::Padding, std::move(regions)));
}
}
offset = section_end_offset + 1;
}
for (const BinarySection &section_to_insert : sections_to_insert) {
sections_.insert(
std::make_pair(section_to_insert.regions[0].offset, section_to_insert));
}
}
} // namespace flatbuffers

227
src/binary_annotator.h Normal file
View File

@@ -0,0 +1,227 @@
/*
* Copyright 2021 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
#define FLATBUFFERS_BINARY_ANNOTATOR_H_
#include <map>
#include <vector>
#include "flatbuffers/reflection.h"
namespace flatbuffers {
enum class BinaryRegionType {
Unknown = 0,
UOffset = 1,
SOffset = 2,
VOffset = 3,
Bool = 4,
Byte = 5,
Char = 6,
Uint8 = 7,
Int8 = 8,
Uint16 = 9,
Int16 = 10,
Uint32 = 11,
Int32 = 12,
Uint64 = 13,
Int64 = 14,
Float = 15,
Double = 16
};
template<typename T> static inline T GetScalar(const uint8_t *binary) {
return *reinterpret_cast<const T *>(binary);
}
template<typename T>
static inline std::string ToHex(T i, size_t width = sizeof(T)) {
std::stringstream stream;
stream << std::hex << std::uppercase << std::setfill('0') << std::setw(width)
<< i;
return stream.str();
}
// Specialized version for uint8_t that don't work well with std::hex.
static inline std::string ToHex(uint8_t i) {
return ToHex(static_cast<int>(i), 2);
}
struct BinaryRegion {
// Offset into the binary where this region begins.
uint64_t offset = 0;
// The length of this region in bytes.
uint64_t length = 0;
// The underlying datatype of this region
BinaryRegionType type = BinaryRegionType::Unknown;
// If `type` is an array/vector, this is the number of those types this region
// encompasses.
uint64_t array_length = 0;
// If the is an offset to some other region, this is what it points to. The
// offset is relative to overall binary, not to this region.
uint64_t points_to_offset = 0;
// The comment on the region.
// TODO(dbaileychess): Consider moving this to a more structure comment field
// so that other generators can parse it easier.
std::string comment;
};
enum class BinarySectionType {
Unknown = 0,
Header = 1,
Table = 2,
RootTable = 3,
VTable = 4,
Struct = 5,
String = 6,
Vector = 7,
Union = 8,
Padding = 9,
};
// A section of the binary that is grouped together in some logical manner, and
// often is pointed too by some other offset BinaryRegion. Sections include
// `tables`, `vtables`, `strings`, `vectors`, etc..
struct BinarySection {
// User-specified name of the section, if applicable.
std::string name;
// The type of this section.
BinarySectionType type = BinarySectionType::Unknown;
// The binary regions that make up this section, in order of their offsets.
std::vector<BinaryRegion> regions;
};
inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
switch (base_type) {
case reflection::UType: return BinaryRegionType::Uint8;
case reflection::Bool: return BinaryRegionType::Uint8;
case reflection::Byte: return BinaryRegionType::Uint8;
case reflection::UByte: return BinaryRegionType::Uint8;
case reflection::Short: return BinaryRegionType::Int16;
case reflection::UShort: return BinaryRegionType::Uint16;
case reflection::Int: return BinaryRegionType::Uint32;
case reflection::UInt: return BinaryRegionType::Uint32;
case reflection::Long: return BinaryRegionType::Int64;
case reflection::ULong: return BinaryRegionType::Uint64;
case reflection::Float: return BinaryRegionType::Float;
case reflection::Double: return BinaryRegionType::Double;
default: return BinaryRegionType::Unknown;
}
}
inline static std::string ToString(const BinaryRegionType type) {
switch (type) {
case BinaryRegionType::UOffset: return "UOffset32";
case BinaryRegionType::SOffset: return "SOffset32";
case BinaryRegionType::VOffset: return "VOffset16";
case BinaryRegionType::Bool: return "bool";
case BinaryRegionType::Char: return "char";
case BinaryRegionType::Byte: return "int8_t";
case BinaryRegionType::Uint8: return "uint8_t";
case BinaryRegionType::Uint16: return "uint16_t";
case BinaryRegionType::Uint32: return "uint32_t";
case BinaryRegionType::Uint64: return "uint64_t"; ;
case BinaryRegionType::Int8: return "int8_t";
case BinaryRegionType::Int16: return "int16_t";
case BinaryRegionType::Int32: return "int32_t";
case BinaryRegionType::Int64: return "int64_t";
case BinaryRegionType::Double: return "double";
case BinaryRegionType::Float: return "float";
case BinaryRegionType::Unknown: return "?uint8_t";
default: return "todo";
}
}
class BinaryAnnotator {
public:
explicit BinaryAnnotator(const uint8_t *const bfbs, const int64_t bfbs_length,
const uint8_t *const binary)
: bfbs_(bfbs),
bfbs_length_(bfbs_length),
schema_(reflection::GetSchema(bfbs)),
binary_(binary) {}
std::map<uint64_t, BinarySection> Annotate();
private:
struct VTable {
struct Entry {
const reflection::Field *field = nullptr;
uint16_t offset_from_table = 0;
};
// Field ID -> {field def, offset from table}
std::map<uint16_t, Entry> fields;
uint16_t vtable_size = 0;
uint16_t table_size = 0;
};
uint64_t BuildHeader(uint64_t offset);
void BuildVTable(uint64_t offset, const reflection::Object *table);
void BuildTable(uint64_t offset, const BinarySectionType type,
const reflection::Object *table);
uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> &regions,
const reflection::Object *structure);
void BuildString(uint64_t offset, const reflection::Object *table,
const reflection::Field *field);
void BuildVector(uint64_t offset, const reflection::Object *table,
const reflection::Field *field, uint64_t parent_table_offset,
const VTable &vtable);
std::string BuildUnion(uint64_t offset, uint8_t realized_type,
const reflection::Field *field);
void FixMissingSections();
template<typename T> inline T GetScalar(uint64_t offset) {
return *reinterpret_cast<const T *>(binary_ + offset);
}
// The schema for the binary file
const uint8_t *bfbs_;
const int64_t bfbs_length_;
const reflection::Schema *schema_;
// The binary data itself.
const uint8_t *binary_;
// Map of binary offset to vtables, to dedupe vtables.
std::map<uint64_t, VTable> vtables_;
// A set of binary offset to string sections, to dedupe shared strings.
std::set<uint64_t> strings_;
// The annotated binary sections, index by their absolute offset.
std::map<uint64_t, BinarySection> sections_;
};
} // namespace flatbuffers
#endif // FLATBUFFERS_BINARY_ANNOTATOR_H_

View File

@@ -19,6 +19,8 @@
#include <list>
#include <sstream>
#include "annotated_binary_text_gen.h"
#include "binary_annotator.h"
#include "flatbuffers/util.h"
namespace flatbuffers {
@@ -215,6 +217,8 @@ const static FlatCOption options[] = {
"in JSON, which is unsafe unless checked by a verifier afterwards." },
{ "", "ts-flat-files", "",
"Only generated one typescript file per .fbs file." },
{ "", "annotate", "SCHEMA",
"Annotate the provided BINARY_FILE with the specified SCHEMA file." },
};
static void AppendTextWrappedString(std::stringstream &ss, std::string &text,
@@ -297,7 +301,7 @@ std::string FlatCompiler::GetShortUsageString(const char *program_name) const {
ss << ", ";
}
ss.seekp(-2, ss.cur);
ss << "]... FILE... [-- FILE...]";
ss << "]... FILE... [-- BINARY_FILE...]";
std::string help = ss.str();
std::stringstream ss_textwrap;
AppendTextWrappedString(ss_textwrap, help, 80, 0);
@@ -306,7 +310,8 @@ std::string FlatCompiler::GetShortUsageString(const char *program_name) const {
std::string FlatCompiler::GetUsageString(const char *program_name) const {
std::stringstream ss;
ss << "Usage: " << program_name << " [OPTION]... FILE... [-- FILE...]\n";
ss << "Usage: " << program_name
<< " [OPTION]... FILE... [-- BINARY_FILE...]\n";
for (size_t i = 0; i < params_.num_generators; ++i) {
const Generator &g = params_.generators[i];
AppendOption(ss, g.option, 80, 25);
@@ -320,16 +325,48 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const {
std::string files_description =
"FILEs may be schemas (must end in .fbs), binary schemas (must end in "
".bfbs) or JSON files (conforming to preceding schema). FILEs after the "
"-- must be binary flatbuffer format files. Output files are named using "
"the base file name of the input, and written to the current directory "
"or the path given by -o. example: " +
".bfbs) or JSON files (conforming to preceding schema). BINARY_FILEs "
"after the -- must be binary flatbuffer format files. Output files are "
"named using the base file name of the input, and written to the current "
"directory or the path given by -o. example: " +
std::string(program_name) + " -c -b schema1.fbs schema2.fbs data.json";
AppendTextWrappedString(ss, files_description, 80, 0);
ss << "\n";
return ss.str();
}
void FlatCompiler::AnnotateBinaries(
const uint8_t *binary_schema, const uint64_t binary_schema_size,
const std::string &schema_filename,
const std::vector<std::string> &binary_files) {
for (const std::string &filename : binary_files) {
std::string binary_contents;
if (!flatbuffers::LoadFile(filename.c_str(), true, &binary_contents)) {
Warn("unable to load binary file: " + filename);
continue;
}
const uint8_t *binary =
reinterpret_cast<const uint8_t *>(binary_contents.c_str());
const size_t binary_size = binary_contents.size();
flatbuffers::BinaryAnnotator binary_annotator(binary_schema,
binary_schema_size, binary);
auto annotations = binary_annotator.Annotate();
// TODO(dbaileychess): Right now we just support a single text-based
// output of the annotated binary schema, which we generate here. We
// could output the raw annotations instead and have third-party tools
// use them to generate their own output.
flatbuffers::AnnotatedBinaryTextGenerator text_generator(
flatbuffers::AnnotatedBinaryTextGenerator::Options{}, annotations,
binary, binary_size);
text_generator.Generate(filename, schema_filename);
}
}
int FlatCompiler::Compile(int argc, const char **argv) {
if (params_.generators == nullptr || params_.num_generators == 0) {
return 0;
@@ -353,6 +390,7 @@ int FlatCompiler::Compile(int argc, const char **argv) {
std::vector<bool> generator_enabled(params_.num_generators, false);
size_t binary_files_from = std::numeric_limits<size_t>::max();
std::string conform_to_schema;
std::string annotate_schema;
const char *program_name = argv[0];
@@ -554,6 +592,9 @@ int FlatCompiler::Compile(int argc, const char **argv) {
opts.json_nested_legacy_flatbuffers = true;
} else if (arg == "--ts-flat-files") {
opts.ts_flat_file = true;
} else if (arg == "--annotate") {
if (++argi >= argc) Error("missing path following: " + arg, true);
annotate_schema = flatbuffers::PosixPath(argv[argi]);
} else {
for (size_t i = 0; i < params_.num_generators; ++i) {
if (arg == "--" + params_.generators[i].option.long_opt ||
@@ -582,7 +623,8 @@ int FlatCompiler::Compile(int argc, const char **argv) {
if (opts.proto_mode) {
if (any_generator)
Error("cannot generate code directly from .proto files", true);
} else if (!any_generator && conform_to_schema.empty()) {
} else if (!any_generator && conform_to_schema.empty() &&
annotate_schema.empty()) {
Error("no options: specify at least one generator.", true);
}
@@ -611,6 +653,53 @@ int FlatCompiler::Compile(int argc, const char **argv) {
}
}
if (!annotate_schema.empty()) {
const std::string ext = flatbuffers::GetExtension(annotate_schema);
if (!(ext == reflection::SchemaExtension() || ext == "fbs")) {
Error("Expected a `.bfbs` or `.fbs` schema, got: " + annotate_schema);
}
const bool is_binary_schema = ext == reflection::SchemaExtension();
std::string schema_contents;
if (!flatbuffers::LoadFile(annotate_schema.c_str(),
/*binary=*/is_binary_schema, &schema_contents)) {
Error("unable to load schema: " + annotate_schema);
}
const uint8_t *binary_schema = nullptr;
uint64_t binary_schema_size = 0;
IDLOptions binary_opts;
binary_opts.lang_to_generate |= flatbuffers::IDLOptions::kBinary;
flatbuffers::Parser parser(binary_opts);
if (is_binary_schema) {
binary_schema =
reinterpret_cast<const uint8_t *>(schema_contents.c_str());
binary_schema_size = schema_contents.size();
} else {
// If we need to generate the .bfbs file from the provided schema file
// (.fbs)
ParseFile(parser, annotate_schema, schema_contents, include_directories);
parser.Serialize();
binary_schema = parser.builder_.GetBufferPointer();
binary_schema_size = parser.builder_.GetSize();
}
if (binary_schema == nullptr || !binary_schema_size) {
Error("could not parse a value binary schema from: " + annotate_schema);
}
// Annotate the provided files with the binary_schema.
AnnotateBinaries(binary_schema, binary_schema_size, annotate_schema,
filenames);
// We don't support doing anything else after annotating a binary.
return 0;
}
std::unique_ptr<flatbuffers::Parser> parser(new flatbuffers::Parser(opts));
for (auto file_it = filenames.begin(); file_it != filenames.end();

View File

@@ -120,6 +120,23 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
}
}
void ForAllFields(const reflection::Object *object, bool reverse,
std::function<void(const reflection::Field *)> func) {
std::vector<uint32_t> field_to_id_map;
field_to_id_map.resize(object->fields()->size());
// Create the mapping of field ID to the index into the vector.
for (uint32_t i = 0; i < object->fields()->size(); ++i) {
auto field = object->fields()->Get(i);
field_to_id_map[field->id()] = i;
}
for (size_t i = 0; i < field_to_id_map.size(); ++i) {
func(object->fields()->Get(
field_to_id_map[reverse ? field_to_id_map.size() - i + 1 : i]));
}
}
void SetAnyValueI(reflection::BaseType type, uint8_t *data, int64_t val) {
// clang-format off
#define FLATBUFFERS_SET(T) WriteScalar(data, static_cast<T>(val))