mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-06 21:37:36 +00:00
Annotated Flatbuffer Binary (#7174)
* Annotated Flatbuffer Binary * Various fixes * Handles old schema * handle multiple missing fields * minor edits * bazel fix, spelling fix, ascii fix
This commit is contained in:
@@ -34,10 +34,14 @@ cc_library(
|
||||
cc_library(
|
||||
name = "flatc_library",
|
||||
srcs = [
|
||||
"annotated_binary_text_gen.cpp",
|
||||
"annotated_binary_text_gen.h",
|
||||
"bfbs_gen.h",
|
||||
"bfbs_gen_lua.cpp",
|
||||
"bfbs_gen_lua.h",
|
||||
"bfbs_namer.h",
|
||||
"binary_annotator.cpp",
|
||||
"binary_annotator.h",
|
||||
"flatc.cpp",
|
||||
"namer.h",
|
||||
],
|
||||
|
||||
320
src/annotated_binary_text_gen.cpp
Normal file
320
src/annotated_binary_text_gen.cpp
Normal file
@@ -0,0 +1,320 @@
|
||||
#include "annotated_binary_text_gen.h"
|
||||
|
||||
#include "flatbuffers/util.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
namespace {
|
||||
|
||||
struct OutputConfig {
|
||||
size_t largest_type_string = 10;
|
||||
|
||||
size_t largest_value_string = 20;
|
||||
|
||||
size_t max_bytes_per_line = 8;
|
||||
|
||||
size_t offset_max_char = 4;
|
||||
|
||||
char delimiter = '|';
|
||||
};
|
||||
|
||||
static std::string ToString(const BinarySectionType type) {
|
||||
switch (type) {
|
||||
case BinarySectionType::Header: return "header";
|
||||
case BinarySectionType::Table: return "table";
|
||||
case BinarySectionType::RootTable: return "root_table";
|
||||
case BinarySectionType::VTable: return "vtable";
|
||||
case BinarySectionType::Struct: return "struct";
|
||||
case BinarySectionType::String: return "string";
|
||||
case BinarySectionType::Vector: return "vector";
|
||||
case BinarySectionType::Unknown: return "unknown";
|
||||
case BinarySectionType::Union: return "union";
|
||||
case BinarySectionType::Padding: return "padding";
|
||||
default: return "todo";
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsOffset(const BinaryRegionType type) {
|
||||
return type == BinaryRegionType::UOffset || type == BinaryRegionType::SOffset;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::string ToValueString(const BinaryRegion ®ion, const uint8_t *binary) {
|
||||
std::string s;
|
||||
s += "0x";
|
||||
const T val = GetScalar<T>(binary + region.offset);
|
||||
const uint64_t start_index = region.offset + region.length - 1;
|
||||
for (uint64_t i = 0; i < region.length; ++i) {
|
||||
s += ToHex(binary[start_index - i]);
|
||||
}
|
||||
s += " (";
|
||||
s += std::to_string(val);
|
||||
s += ")";
|
||||
return s;
|
||||
}
|
||||
|
||||
template<>
|
||||
std::string ToValueString<std::string>(const BinaryRegion ®ion,
|
||||
const uint8_t *binary) {
|
||||
return std::string(reinterpret_cast<const char *>(binary + region.offset),
|
||||
static_cast<size_t>(region.array_length));
|
||||
}
|
||||
|
||||
static std::string ToValueString(const BinaryRegion ®ion,
|
||||
const uint8_t *binary,
|
||||
const OutputConfig &output_config) {
|
||||
std::string s;
|
||||
|
||||
if (region.array_length) {
|
||||
if (region.type == BinaryRegionType::Uint8 ||
|
||||
region.type == BinaryRegionType::Unknown) {
|
||||
// Interpet each value as a ASCII to aid debugging
|
||||
for (uint64_t i = 0; i < region.array_length; ++i) {
|
||||
const uint8_t c = *(binary + region.offset + i);
|
||||
s += isprint(c) ? toascii(c) : '.';
|
||||
}
|
||||
return s;
|
||||
} else if (region.type == BinaryRegionType::Char) {
|
||||
// string value
|
||||
return ToValueString<std::string>(region, binary);
|
||||
}
|
||||
}
|
||||
|
||||
switch (region.type) {
|
||||
case BinaryRegionType::Uint32:
|
||||
return ToValueString<uint32_t>(region, binary);
|
||||
case BinaryRegionType::Int32: return ToValueString<int32_t>(region, binary);
|
||||
case BinaryRegionType::Uint16:
|
||||
return ToValueString<uint16_t>(region, binary);
|
||||
case BinaryRegionType::Int16: return ToValueString<int16_t>(region, binary);
|
||||
case BinaryRegionType::Bool: return ToValueString<bool>(region, binary);
|
||||
case BinaryRegionType::Uint8: return ToValueString<uint8_t>(region, binary);
|
||||
case BinaryRegionType::Char: return ToValueString<char>(region, binary);
|
||||
case BinaryRegionType::Byte:
|
||||
case BinaryRegionType::Int8: return ToValueString<int8_t>(region, binary);
|
||||
case BinaryRegionType::Int64: return ToValueString<int64_t>(region, binary);
|
||||
case BinaryRegionType::Uint64:
|
||||
return ToValueString<uint64_t>(region, binary);
|
||||
case BinaryRegionType::Double: return ToValueString<double>(region, binary);
|
||||
case BinaryRegionType::Float: return ToValueString<float>(region, binary);
|
||||
|
||||
// Handle Offsets separately, incase they add additional details.
|
||||
case BinaryRegionType::UOffset:
|
||||
s += ToValueString<uint32_t>(region, binary);
|
||||
break;
|
||||
case BinaryRegionType::SOffset:
|
||||
s += ToValueString<int32_t>(region, binary);
|
||||
break;
|
||||
case BinaryRegionType::VOffset:
|
||||
s += ToValueString<uint16_t>(region, binary);
|
||||
break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
// If this is an offset type, include the calculated offset location in the
|
||||
// value.
|
||||
// TODO(dbaileychess): It might be nicer to put this in the comment field.
|
||||
if (IsOffset(region.type)) {
|
||||
s += " Loc: +0x";
|
||||
s += ToHex(region.points_to_offset, output_config.offset_max_char);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
struct DocContinuation {
|
||||
// The start column where the value text first starts
|
||||
size_t value_start_column = 0;
|
||||
|
||||
// The remaining part of the doc to print.
|
||||
std::string value;
|
||||
};
|
||||
|
||||
static std::string GenerateTypeString(const BinaryRegion ®ion) {
|
||||
return ToString(region.type) +
|
||||
((region.array_length)
|
||||
? "[" + std::to_string(region.array_length) + "]"
|
||||
: "");
|
||||
}
|
||||
|
||||
static std::string GenerateDocumentation(const BinaryRegion ®ion,
|
||||
const BinarySection &,
|
||||
const uint8_t *binary,
|
||||
DocContinuation &continuation,
|
||||
const OutputConfig &output_config) {
|
||||
std::string s;
|
||||
|
||||
// Check if there is a doc continuation that should be prioritized.
|
||||
if (continuation.value_start_column) {
|
||||
s += std::string(continuation.value_start_column - 2, ' ');
|
||||
s += output_config.delimiter;
|
||||
s += " ";
|
||||
|
||||
s += continuation.value.substr(0, output_config.max_bytes_per_line);
|
||||
continuation.value = continuation.value.substr(
|
||||
std::min(output_config.max_bytes_per_line, continuation.value.size()));
|
||||
return s;
|
||||
}
|
||||
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::setw(output_config.largest_type_string) << std::left;
|
||||
ss << GenerateTypeString(region);
|
||||
s += ss.str();
|
||||
}
|
||||
s += " ";
|
||||
s += output_config.delimiter;
|
||||
s += " ";
|
||||
if (region.array_length) {
|
||||
// Record where the value is first being outputted.
|
||||
continuation.value_start_column = s.size();
|
||||
|
||||
// Get the full-length value, which we will chunk below.
|
||||
const std::string value = ToValueString(region, binary, output_config);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::setw(output_config.largest_value_string) << std::left;
|
||||
ss << value.substr(0, output_config.max_bytes_per_line);
|
||||
s += ss.str();
|
||||
|
||||
continuation.value =
|
||||
value.substr(std::min(output_config.max_bytes_per_line, value.size()));
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << std::setw(output_config.largest_value_string) << std::left;
|
||||
ss << ToValueString(region, binary, output_config);
|
||||
s += ss.str();
|
||||
}
|
||||
|
||||
s += " ";
|
||||
if (!region.comment.empty()) {
|
||||
s += output_config.delimiter;
|
||||
s += " ";
|
||||
s += region.comment;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::string GenerateRegion(const BinaryRegion ®ion,
|
||||
const BinarySection §ion,
|
||||
const uint8_t *binary,
|
||||
const OutputConfig &output_config) {
|
||||
std::string s;
|
||||
bool doc_generated = false;
|
||||
DocContinuation doc_continuation;
|
||||
for (uint64_t i = 0; i < region.length; ++i) {
|
||||
if ((i % output_config.max_bytes_per_line) == 0) {
|
||||
// Start a new line of output
|
||||
s += '\n';
|
||||
s += " ";
|
||||
s += "+0x";
|
||||
s += ToHex(region.offset + i, output_config.offset_max_char);
|
||||
s += " ";
|
||||
s += output_config.delimiter;
|
||||
}
|
||||
|
||||
// Add each byte
|
||||
s += " ";
|
||||
s += ToHex(binary[region.offset + i]);
|
||||
|
||||
// Check for end of line or end of region conditions.
|
||||
if (((i + 1) % output_config.max_bytes_per_line == 0) ||
|
||||
i + 1 == region.length) {
|
||||
if (i + 1 == region.length) {
|
||||
// We are out of bytes but haven't the kMaxBytesPerLine, so we need to
|
||||
// zero those out to align everything globally.
|
||||
for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
|
||||
++j) {
|
||||
s += " ";
|
||||
}
|
||||
}
|
||||
s += " ";
|
||||
s += output_config.delimiter;
|
||||
// This is the end of the first line or its the last byte of the region,
|
||||
// generate the end-of-line documentation.
|
||||
if (!doc_generated) {
|
||||
s += " ";
|
||||
s += GenerateDocumentation(region, section, binary, doc_continuation,
|
||||
output_config);
|
||||
|
||||
// If we have a value in the doc continuation, that means the doc is
|
||||
// being printed on multiple lines.
|
||||
doc_generated = doc_continuation.value.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::string GenerateSection(const BinarySection §ion,
|
||||
const uint8_t *binary,
|
||||
const OutputConfig &output_config) {
|
||||
std::string s;
|
||||
s += "\n";
|
||||
s += ToString(section.type);
|
||||
if (!section.name.empty()) { s += " (" + section.name + ")"; }
|
||||
s += ":";
|
||||
for (const BinaryRegion ®ion : section.regions) {
|
||||
s += GenerateRegion(region, section, binary, output_config);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool AnnotatedBinaryTextGenerator::Generate(
|
||||
const std::string &filename, const std::string &schema_filename) {
|
||||
OutputConfig output_config;
|
||||
output_config.max_bytes_per_line = options_.max_bytes_per_line;
|
||||
|
||||
// Given the length of the binary, we can calculate the maximum number of
|
||||
// characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
|
||||
// the max output).
|
||||
output_config.offset_max_char =
|
||||
binary_length_ > 0xFFFFFF
|
||||
? 8
|
||||
: (binary_length_ > 0xFFFF ? 6 : (binary_length_ > 0xFF ? 4 : 2));
|
||||
|
||||
// Find the largest type string of all the regions in this file, so we can
|
||||
// align the output nicely.
|
||||
output_config.largest_type_string = 0;
|
||||
for (const auto §ion : annotations_) {
|
||||
for (const auto ®ion : section.second.regions) {
|
||||
std::string s = GenerateTypeString(region);
|
||||
if (s.size() > output_config.largest_type_string) {
|
||||
output_config.largest_type_string = s.size();
|
||||
}
|
||||
|
||||
// Don't consider array regions, as they will be split to multiple lines.
|
||||
if (!region.array_length) {
|
||||
s = ToValueString(region, binary_, output_config);
|
||||
if (s.size() > output_config.largest_value_string) {
|
||||
output_config.largest_value_string = s.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate each of the binary sections
|
||||
std::string s;
|
||||
|
||||
s += "// Annotated Flatbuffer Binary\n";
|
||||
s += "//\n";
|
||||
s += "// Schema file: " + schema_filename + "\n";
|
||||
s += "// Binary file: " + filename + "\n";
|
||||
|
||||
for (const auto §ion : annotations_) {
|
||||
s += GenerateSection(section.second, binary_, output_config);
|
||||
s += "\n";
|
||||
}
|
||||
|
||||
// Modify the output filename.
|
||||
std::string output_filename = StripExtension(filename);
|
||||
output_filename += options_.output_postfix;
|
||||
output_filename +=
|
||||
"." + (options_.output_extension.empty() ? GetExtension(filename)
|
||||
: options_.output_extension);
|
||||
|
||||
return SaveFile(output_filename.c_str(), s, false);
|
||||
}
|
||||
|
||||
} // namespace flatbuffers
|
||||
71
src/annotated_binary_text_gen.h
Normal file
71
src/annotated_binary_text_gen.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2021 Google Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_
|
||||
#define FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "binary_annotator.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
|
||||
class AnnotatedBinaryTextGenerator {
|
||||
public:
|
||||
struct Options {
|
||||
// The maximum number of raw bytes to print per line in the output. 8 is a
|
||||
// good default due to the largest type (double) being 8 bytes long.
|
||||
size_t max_bytes_per_line = 8;
|
||||
|
||||
// The output file postfix, appended between the filename and the extension.
|
||||
// Example binary1.bin -> binary1_annotated.bin
|
||||
std::string output_postfix = "";
|
||||
|
||||
// The output file extension, replacing any extension given. If empty, don't
|
||||
// change the provided extension. AFB = Annotated Flatbuffer Binary
|
||||
//
|
||||
// Example: binary1.bin -> binary1.afb
|
||||
std::string output_extension = "afb";
|
||||
};
|
||||
|
||||
explicit AnnotatedBinaryTextGenerator(
|
||||
const Options &options, std::map<uint64_t, BinarySection> annotations,
|
||||
const uint8_t *const binary, const int64_t binary_length)
|
||||
: annotations_(std::move(annotations)),
|
||||
binary_(binary),
|
||||
binary_length_(binary_length),
|
||||
options_(options) {}
|
||||
|
||||
// Generate the annotated binary for the given `filename`. Returns true if the
|
||||
// annotated binary was succesfully saved.
|
||||
bool Generate(const std::string &filename, const std::string &schema_filename);
|
||||
|
||||
private:
|
||||
const std::map<uint64_t, BinarySection> annotations_;
|
||||
|
||||
// The binary data itself.
|
||||
const uint8_t *binary_;
|
||||
const int64_t binary_length_;
|
||||
|
||||
// Output configuration
|
||||
const Options options_;
|
||||
};
|
||||
|
||||
} // namespace flatbuffers
|
||||
|
||||
#endif // FLATBUFFERS_ANNOTATED_BINARY_TEXT_GEN_H_
|
||||
769
src/binary_annotator.cpp
Normal file
769
src/binary_annotator.cpp
Normal file
@@ -0,0 +1,769 @@
|
||||
#include "binary_annotator.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "flatbuffers/reflection.h"
|
||||
#include "flatbuffers/verifier.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
namespace {
|
||||
|
||||
static BinaryRegion MakeBinaryRegion(
|
||||
const uint64_t offset = 0, const uint64_t length = 0,
|
||||
const BinaryRegionType type = BinaryRegionType::Unknown,
|
||||
const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
|
||||
const std::string &comment = "") {
|
||||
BinaryRegion region;
|
||||
region.offset = offset;
|
||||
region.length = length;
|
||||
region.type = type;
|
||||
region.array_length = array_length;
|
||||
region.points_to_offset = points_to_offset;
|
||||
region.comment = comment;
|
||||
return region;
|
||||
}
|
||||
|
||||
static BinarySection MakeBinarySection(
|
||||
const std::string &name, const BinarySectionType type,
|
||||
const std::vector<BinaryRegion> ®ions) {
|
||||
BinarySection section;
|
||||
section.name = name;
|
||||
section.type = type;
|
||||
section.regions = regions;
|
||||
return section;
|
||||
}
|
||||
|
||||
static uint64_t BuildField(const uint64_t offset,
|
||||
const reflection::Field *field,
|
||||
std::vector<BinaryRegion> ®ions) {
|
||||
const uint64_t type_size = GetTypeSize(field->type()->base_type());
|
||||
const BinaryRegionType type = GetRegionType(field->type()->base_type());
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, type_size, type, 0, 0,
|
||||
std::string("table field `") + field->name()->c_str() + "` (" +
|
||||
reflection::EnumNameBaseType(field->type()->base_type()) + ")"));
|
||||
return offset + type_size;
|
||||
}
|
||||
|
||||
static uint64_t BuildStructureField(const uint64_t offset,
|
||||
const reflection::Object *object,
|
||||
const reflection::Field *field,
|
||||
std::vector<BinaryRegion> ®ions) {
|
||||
const uint64_t type_size = GetTypeSize(field->type()->base_type());
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, type_size, GetRegionType(field->type()->base_type()), 0, 0,
|
||||
std::string("struct field `") + object->name()->c_str() + "." +
|
||||
field->name()->c_str() + "` (" +
|
||||
reflection::EnumNameBaseType(field->type()->base_type()) + ")"));
|
||||
return offset + type_size;
|
||||
}
|
||||
|
||||
static uint64_t BuildArrayField(uint64_t offset,
|
||||
const reflection::Object *object,
|
||||
const reflection::Field *field,
|
||||
const uint16_t array_length,
|
||||
std::vector<BinaryRegion> ®ions) {
|
||||
const uint64_t type_size = GetTypeSize(field->type()->element());
|
||||
for (uint16_t i = 0; i < array_length; ++i) {
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, type_size, GetRegionType(field->type()->element()), 0, 0,
|
||||
std::string("array field `") + object->name()->c_str() + "." +
|
||||
field->name()->c_str() + "[" + std::to_string(i) + "]` (" +
|
||||
reflection::EnumNameBaseType(field->type()->element()) + ")"));
|
||||
offset += type_size;
|
||||
}
|
||||
|
||||
// The following groups the complete array together which shows up nicely as
|
||||
// an array, but then we don't show the individual values. So the above method
|
||||
// treats each field of the array as a separate region.
|
||||
// regions.emplace_back(
|
||||
// BinaryRegion{ offset, array_length * type_size,
|
||||
// GetRegionType(field->type()->element()), array_length, 0,
|
||||
// std::string("array field '") + object->name()->c_str() +
|
||||
// "." + field->name()->c_str() + "' value" });
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static bool IsNonZeroRegion(uint64_t offset, uint64_t length,
|
||||
const uint8_t *binary) {
|
||||
for (uint64_t i = offset; i < offset + length; ++i) {
|
||||
if (binary[i] != 0) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
|
||||
flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
|
||||
if (!reflection::VerifySchemaBuffer(verifier)) { return {}; }
|
||||
|
||||
// Make sure we start with a clean slate.
|
||||
vtables_.clear();
|
||||
strings_.clear();
|
||||
sections_.clear();
|
||||
|
||||
// First parse the header region which always start at offset 0.
|
||||
// The returned offset will point to the root_table location.
|
||||
const uint64_t root_table_offset = BuildHeader(0);
|
||||
|
||||
// Build the root table, and all else will be referenced from it.
|
||||
BuildTable(root_table_offset, BinarySectionType::RootTable,
|
||||
schema_->root_table());
|
||||
|
||||
// Now that all the sections are built, scan the regions between them and
|
||||
// insert padding bytes that are implied.
|
||||
FixMissingSections();
|
||||
|
||||
return sections_;
|
||||
}
|
||||
|
||||
uint64_t BinaryAnnotator::BuildHeader(const uint64_t offset) {
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
// TODO(dbaileychess): sized prefixed value
|
||||
const uint32_t root_table_offset = GetScalar<uint32_t>(offset);
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0, root_table_offset,
|
||||
std::string("offset to root table `") +
|
||||
schema_->root_table()->name()->str() + "`"));
|
||||
|
||||
if (IsNonZeroRegion(offset, 4, binary_)) {
|
||||
// Check if the file identifier region has non-zero data, and assume its the
|
||||
// file identifier. Otherwise, it will get filled in with padding later.
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset + sizeof(uint32_t), 4 * sizeof(uint8_t), BinaryRegionType::Char,
|
||||
4, 0, std::string("File Identifier")));
|
||||
}
|
||||
|
||||
sections_.insert(std::make_pair(
|
||||
offset, MakeBinarySection("", BinarySectionType::Header, regions)));
|
||||
return root_table_offset;
|
||||
}
|
||||
|
||||
void BinaryAnnotator::BuildVTable(uint64_t offset,
|
||||
const reflection::Object *table) {
|
||||
const uint64_t vtable_offset = offset;
|
||||
|
||||
// First see if we have used this vtable before, if so skip building it again.
|
||||
auto it = vtables_.find(vtable_offset);
|
||||
if (it != vtables_.end()) { return; }
|
||||
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
// Vtables start with the size of the vtable
|
||||
const uint16_t vtable_size = GetScalar<uint16_t>(offset);
|
||||
regions.emplace_back(MakeBinaryRegion(offset, sizeof(uint16_t),
|
||||
BinaryRegionType::Uint16, 0, 0,
|
||||
std::string("size of this vtable")));
|
||||
offset += sizeof(uint16_t);
|
||||
|
||||
// Then they have the size of the table they reference.
|
||||
const uint16_t table_size = GetScalar<uint16_t>(offset);
|
||||
regions.emplace_back(
|
||||
MakeBinaryRegion(offset, sizeof(uint16_t), BinaryRegionType::Uint16, 0, 0,
|
||||
std::string("size of referring table")));
|
||||
offset += sizeof(uint16_t);
|
||||
|
||||
const uint64_t offset_start = offset;
|
||||
|
||||
// A mapping between field (and its id) to the relative offset (uin16_t) from
|
||||
// the start of the table.
|
||||
std::map<uint16_t, VTable::Entry> fields;
|
||||
|
||||
// Counter for determining if the binary has more vtable entries than the
|
||||
// schema provided. This can occur if the binary was created at a newer schema
|
||||
// version and is being processed with an older one.
|
||||
uint16_t fields_processed = 0;
|
||||
|
||||
// Loop over all the fields.
|
||||
ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
|
||||
const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
|
||||
|
||||
if (field_offset >= vtable_offset + vtable_size) {
|
||||
// This field_offset is too large for this vtable, so it must come from a
|
||||
// newer schema than the binary was create with or the binary writer did
|
||||
// not write it. For either case, it is safe to ignore.
|
||||
|
||||
// TODO(dbaileychess): We could show which fields are not set an their
|
||||
// default values if we want. We just need a way to make it obvious that
|
||||
// it isn't part of the buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
const uint16_t offset_from_table = GetScalar<uint16_t>(field_offset);
|
||||
|
||||
VTable::Entry entry;
|
||||
entry.field = field;
|
||||
entry.offset_from_table = offset_from_table;
|
||||
fields.insert(std::make_pair(field->id(), entry));
|
||||
|
||||
std::string default_label;
|
||||
if (offset_from_table == 0) {
|
||||
// Not present, so could be default or be optional.
|
||||
if (field->required()) {
|
||||
// If this is a required field, make it known this is an error.
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
|
||||
std::string("ERROR: required field `") + field->name()->c_str() +
|
||||
"` (id: " + std::to_string(field->id()) + ") is not present!"));
|
||||
return;
|
||||
} else {
|
||||
// Its an optional field, so get the default value and interpret and
|
||||
// provided an annotation for it.
|
||||
if (IsScalar(field->type()->base_type())) {
|
||||
default_label += " <defaults to ";
|
||||
default_label += IsFloat(field->type()->base_type())
|
||||
? std::to_string(field->default_real())
|
||||
: std::to_string(field->default_integer());
|
||||
default_label += "> (";
|
||||
} else {
|
||||
default_label += " <null> (";
|
||||
}
|
||||
default_label +=
|
||||
reflection::EnumNameBaseType(field->type()->base_type());
|
||||
default_label += ")";
|
||||
}
|
||||
}
|
||||
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
|
||||
std::string("offset to field `") + field->name()->c_str() +
|
||||
"` (id: " + std::to_string(field->id()) + ")" + default_label));
|
||||
|
||||
fields_processed++;
|
||||
});
|
||||
|
||||
// Check if we covered all the expectant fields. If not, we need to add them
|
||||
// as unknown fields.
|
||||
const uint16_t expectant_vtable_fields =
|
||||
(vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
|
||||
|
||||
for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
|
||||
const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
|
||||
const uint16_t offset_from_table = GetScalar<uint16_t>(field_offset);
|
||||
|
||||
VTable::Entry entry;
|
||||
entry.field = nullptr; // No field to reference.
|
||||
entry.offset_from_table = offset_from_table;
|
||||
fields.insert(std::make_pair(id, entry));
|
||||
|
||||
regions.push_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint16_t), BinaryRegionType::VOffset, 0, 0,
|
||||
std::string("offset to unknown field (id: " + std::to_string(id) +
|
||||
")")));
|
||||
}
|
||||
|
||||
sections_[vtable_offset] = MakeBinarySection(
|
||||
table->name()->str(), BinarySectionType::VTable, std::move(regions));
|
||||
|
||||
VTable vtable;
|
||||
vtable.fields = std::move(fields);
|
||||
vtable.table_size = table_size;
|
||||
vtable.vtable_size = vtable_size;
|
||||
|
||||
vtables_[vtable_offset] = vtable;
|
||||
}
|
||||
|
||||
void BinaryAnnotator::BuildTable(uint64_t offset, const BinarySectionType type,
|
||||
const reflection::Object *table) {
|
||||
std::vector<BinaryRegion> regions;
|
||||
const uint64_t table_offset = offset;
|
||||
|
||||
// Tables start with the vtable
|
||||
const uint64_t vtable_offset = table_offset - GetScalar<int32_t>(offset);
|
||||
regions.emplace_back(
|
||||
MakeBinaryRegion(offset, sizeof(int32_t), BinaryRegionType::SOffset, 0,
|
||||
vtable_offset, std::string("offset to vtable")));
|
||||
offset += sizeof(int32_t);
|
||||
|
||||
// Parse the vtable first so we know what the rest of the fields in the table
|
||||
// are.
|
||||
BuildVTable(vtable_offset, table);
|
||||
|
||||
const VTable &vtable = vtables_.at(vtable_offset);
|
||||
|
||||
// This is the size and length of this table.
|
||||
const uint16_t table_size = vtable.table_size;
|
||||
const uint64_t table_end_offset = table_offset + table_size;
|
||||
|
||||
const uint64_t field_offset_start = offset;
|
||||
|
||||
// We need to iterate over the vtable fields by their offset in the binary,
|
||||
// not by their IDs. So copy them over to another vector that we can sort on
|
||||
// the offset_from_table property.
|
||||
std::vector<VTable::Entry> fields;
|
||||
for (const auto &vtable_field : vtable.fields) {
|
||||
fields.push_back(vtable_field.second);
|
||||
}
|
||||
|
||||
std::stable_sort(fields.begin(), fields.end(),
|
||||
[](const VTable::Entry &a, const VTable::Entry &b) {
|
||||
return a.offset_from_table < b.offset_from_table;
|
||||
});
|
||||
|
||||
// Iterate over all the fields by order of their offset.
|
||||
for (size_t i = 0; i < fields.size(); ++i) {
|
||||
const reflection::Field *field = fields[i].field;
|
||||
const uint16_t offset_from_table = fields[i].offset_from_table;
|
||||
|
||||
if (offset_from_table == 0) {
|
||||
// Skip non-present fields.
|
||||
continue;
|
||||
}
|
||||
|
||||
// The field offsets are relative to the start of the table.
|
||||
const uint64_t field_offset = table_offset + offset_from_table;
|
||||
|
||||
// We have a vtable entry for a non-existant field, that means its a binary
|
||||
// generated by a newer schema than we are currently processing.
|
||||
if (field == nullptr) {
|
||||
// Calculate the length of this unknown field.
|
||||
const uint64_t unknown_field_length =
|
||||
// Check if there is another unknown field after this one.
|
||||
((i + 1 < fields.size())
|
||||
? table_offset + fields[i + 1].offset_from_table
|
||||
// Otherwise use the known end of the table.
|
||||
: table_end_offset) -
|
||||
field_offset;
|
||||
|
||||
std::string hint;
|
||||
if (unknown_field_length == 4) {
|
||||
// The field is 4 in length, so it could be an offset? Provide a hint.
|
||||
hint += " <possibly an offset? Check Loc: +0x";
|
||||
hint += ToHex(field_offset + GetScalar<uint32_t>(field_offset));
|
||||
hint += ">";
|
||||
}
|
||||
|
||||
regions.emplace_back(
|
||||
MakeBinaryRegion(field_offset, unknown_field_length * sizeof(uint8_t),
|
||||
BinaryRegionType::Unknown, unknown_field_length, 0,
|
||||
std::string("Unknown field") + hint));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsScalar(field->type()->base_type())) {
|
||||
// These are the raw values store in the table.
|
||||
offset = BuildField(field_offset, field, regions);
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (field->type()->base_type()) {
|
||||
case reflection::BaseType::Obj: {
|
||||
const reflection::Object *next_object =
|
||||
schema_->objects()->Get(field->type()->index());
|
||||
|
||||
if (next_object->is_struct()) {
|
||||
// Structs are stored inline.
|
||||
offset = BuildStruct(field_offset, regions, next_object);
|
||||
} else {
|
||||
const uint64_t next_object_offset =
|
||||
field_offset + GetScalar<uint32_t>(field_offset);
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
next_object_offset,
|
||||
std::string("offset to field `") + field->name()->c_str() + "`"));
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
BuildTable(next_object_offset, BinarySectionType::Table, next_object);
|
||||
}
|
||||
} break;
|
||||
|
||||
case reflection::BaseType::String: {
|
||||
const uint64_t string_offset =
|
||||
field_offset + GetScalar<uint32_t>(field_offset);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
string_offset,
|
||||
std::string("offset to field `") + field->name()->c_str() + "`"));
|
||||
|
||||
BuildString(string_offset, table, field);
|
||||
} break;
|
||||
|
||||
case reflection::BaseType::Vector: {
|
||||
const uint64_t vector_offset =
|
||||
field_offset + GetScalar<uint32_t>(field_offset);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
vector_offset,
|
||||
std::string("offset to field `") + field->name()->c_str() + "`"));
|
||||
|
||||
BuildVector(vector_offset, table, field, table_offset, vtable);
|
||||
} break;
|
||||
|
||||
case reflection::BaseType::Union: {
|
||||
const uint64_t union_offset =
|
||||
field_offset + GetScalar<uint32_t>(field_offset);
|
||||
|
||||
// The union type field is always one less than the union itself.
|
||||
const uint16_t union_type_id = field->id() - 1;
|
||||
|
||||
auto vtable_entry = vtable.fields.find(union_type_id);
|
||||
if (vtable_entry == vtable.fields.end()) {
|
||||
// TODO(dbaileychess): need to capture this error condition.
|
||||
break;
|
||||
}
|
||||
|
||||
const uint64_t type_offset =
|
||||
table_offset + vtable_entry->second.offset_from_table;
|
||||
|
||||
const uint8_t realized_type = GetScalar<uint8_t>(type_offset);
|
||||
|
||||
const std::string enum_type =
|
||||
BuildUnion(union_offset, realized_type, field);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
field_offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
union_offset,
|
||||
std::string("offset to field `") + field->name()->c_str() +
|
||||
"` (union of type `" + enum_type + "`)"));
|
||||
|
||||
} break;
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill in any regions that weren't covered above, as those are padding
|
||||
// regions.
|
||||
size_t region_index = 1;
|
||||
std::vector<BinaryRegion> padding_regions;
|
||||
uint64_t i = field_offset_start;
|
||||
while (region_index < regions.size() && i < table_end_offset) {
|
||||
const uint64_t region_start = regions[region_index].offset;
|
||||
const uint64_t region_end = region_start + regions[region_index].length;
|
||||
|
||||
if (i < region_start) {
|
||||
const uint64_t pad_bytes = region_start - i;
|
||||
// We are at an index that is lower than any region, so pad upto its
|
||||
// offset.
|
||||
padding_regions.emplace_back(
|
||||
MakeBinaryRegion(i, pad_bytes * sizeof(uint8_t),
|
||||
BinaryRegionType::Uint8, pad_bytes, 0, "padding"));
|
||||
i = region_end;
|
||||
region_index++;
|
||||
} else if (i < region_end) {
|
||||
i = region_end + 1;
|
||||
} else {
|
||||
region_index++;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where there is padding after the last known binary
|
||||
// region. Calculate where we left off towards the expected end of the
|
||||
// table.
|
||||
if (i < table_end_offset) {
|
||||
const uint64_t pad_bytes = table_end_offset - i + 1;
|
||||
padding_regions.emplace_back(
|
||||
MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
|
||||
BinaryRegionType::Uint8, pad_bytes, 0, "padding"));
|
||||
}
|
||||
|
||||
regions.insert(regions.end(), padding_regions.begin(), padding_regions.end());
|
||||
|
||||
std::stable_sort(regions.begin(), regions.end(),
|
||||
[&](const BinaryRegion &a, const BinaryRegion &b) {
|
||||
return a.offset < b.offset;
|
||||
});
|
||||
|
||||
sections_.insert(std::make_pair(
|
||||
table_offset,
|
||||
MakeBinarySection(table->name()->str(), type, std::move(regions))));
|
||||
}
|
||||
|
||||
uint64_t BinaryAnnotator::BuildStruct(uint64_t offset,
|
||||
std::vector<BinaryRegion> ®ions,
|
||||
const reflection::Object *object) {
|
||||
if (!object->is_struct()) { return offset; }
|
||||
|
||||
// Loop over all the fields in increasing order
|
||||
ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
|
||||
if (IsScalar(field->type()->base_type())) {
|
||||
offset = BuildStructureField(offset, object, field, regions);
|
||||
|
||||
} else if (field->type()->base_type() == reflection::BaseType::Obj) {
|
||||
// Structs are stored inline, even when nested.
|
||||
offset = BuildStruct(offset, regions,
|
||||
schema_->objects()->Get(field->type()->index()));
|
||||
} else if (field->type()->base_type() == reflection::BaseType::Array) {
|
||||
// Arrays are just repeated structures.
|
||||
if (IsScalar(field->type()->element())) {
|
||||
offset = BuildArrayField(offset, object, field,
|
||||
field->type()->fixed_length(), regions);
|
||||
} else {
|
||||
for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
|
||||
// TODO(dbaileychess): This works, but the comments on the fields lose
|
||||
// some context. Need to figure a way how to plumb the nested arrays
|
||||
// comments together that isn't too confusing.
|
||||
offset = BuildStruct(offset, regions,
|
||||
schema_->objects()->Get(field->type()->index()));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
void BinaryAnnotator::BuildString(uint64_t offset,
|
||||
const reflection::Object *table,
|
||||
const reflection::Field *field) {
|
||||
// Check if we have already generated this string section, and this is a
|
||||
// shared string instance.
|
||||
if (strings_.find(offset) != strings_.end()) { return; }
|
||||
|
||||
std::vector<BinaryRegion> regions;
|
||||
const uint32_t string_length = GetScalar<uint32_t>(offset);
|
||||
|
||||
const uint64_t string_soffset = offset;
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(offset, sizeof(uint32_t),
|
||||
BinaryRegionType::Uint32, 0, 0,
|
||||
std::string("length of string")));
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(offset, string_length * sizeof(char),
|
||||
BinaryRegionType::Char, string_length,
|
||||
0, ""));
|
||||
offset += string_length * sizeof(char);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(offset, sizeof(char),
|
||||
BinaryRegionType::Char, 0, 0,
|
||||
std::string("string terminator")));
|
||||
offset += sizeof(char);
|
||||
|
||||
sections_.insert(std::make_pair(
|
||||
string_soffset,
|
||||
MakeBinarySection(
|
||||
std::string(table->name()->c_str()) + "." + field->name()->c_str(),
|
||||
BinarySectionType::String, std::move(regions))));
|
||||
|
||||
// Insert into the strings set to find possible instances of shared strings.
|
||||
strings_.insert(string_soffset);
|
||||
}
|
||||
|
||||
void BinaryAnnotator::BuildVector(uint64_t offset,
|
||||
const reflection::Object *table,
|
||||
const reflection::Field *field,
|
||||
const uint64_t parent_table_offset,
|
||||
const VTable &vtable) {
|
||||
std::vector<BinaryRegion> regions;
|
||||
const uint32_t vector_length = GetScalar<uint32_t>(offset);
|
||||
|
||||
const uint64_t vector_offset = offset;
|
||||
|
||||
regions.emplace_back(
|
||||
MakeBinaryRegion(offset, sizeof(uint32_t), BinaryRegionType::Uint32, 0, 0,
|
||||
std::string("length of vector (# items)")));
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
switch (field->type()->element()) {
|
||||
case reflection::BaseType::Obj: {
|
||||
const reflection::Object *object =
|
||||
schema_->objects()->Get(field->type()->index());
|
||||
|
||||
if (object->is_struct()) {
|
||||
// Vector of structs
|
||||
for (size_t i = 0; i < vector_length; ++i) {
|
||||
// Structs are inline to the vector.
|
||||
offset = BuildStruct(offset, regions, object);
|
||||
}
|
||||
} else {
|
||||
// Vector of objects
|
||||
for (size_t i = 0; i < vector_length; ++i) {
|
||||
// The table offset is relative from the offset location itself.
|
||||
const uint64_t table_offset = offset + GetScalar<uint32_t>(offset);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
table_offset,
|
||||
std::string("offset to table[") + std::to_string(i) + "]"));
|
||||
|
||||
BuildTable(table_offset, BinarySectionType::Table, object);
|
||||
|
||||
offset += sizeof(uint32_t);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case reflection::BaseType::String: {
|
||||
// Vector of strings
|
||||
for (size_t i = 0; i < vector_length; ++i) {
|
||||
// The string offset is relative from the offset location itself.
|
||||
const uint64_t string_offset = offset + GetScalar<uint32_t>(offset);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
string_offset,
|
||||
std::string("offset to string[") + std::to_string(i) + "]"));
|
||||
|
||||
BuildString(string_offset, table, field);
|
||||
|
||||
offset += sizeof(uint32_t);
|
||||
}
|
||||
} break;
|
||||
case reflection::BaseType::Union: {
|
||||
// Vector of unions
|
||||
// Unions have both their realized type (uint8_t for now) that are
|
||||
// stored sperately. These are stored in the field->index() - 1
|
||||
// location.
|
||||
const uint16_t union_type_vector_id = field->id() - 1;
|
||||
|
||||
auto vtable_entry = vtable.fields.find(union_type_vector_id);
|
||||
if (vtable_entry == vtable.fields.end()) {
|
||||
// TODO(dbaileychess): need to capture this error condition.
|
||||
break;
|
||||
}
|
||||
|
||||
const uint64_t union_type_vector_field_offset =
|
||||
parent_table_offset + vtable_entry->second.offset_from_table;
|
||||
|
||||
// Get the offset to the first type (the + sizeof(uint32_t) is to skip
|
||||
// over the vector length which we already know)
|
||||
const uint64_t union_type_vector_data_offset =
|
||||
union_type_vector_field_offset +
|
||||
GetScalar<uint16_t>(union_type_vector_field_offset) +
|
||||
sizeof(uint32_t);
|
||||
|
||||
for (size_t i = 0; i < vector_length; ++i) {
|
||||
// The union offset is relative from the offset location itself.
|
||||
const uint64_t union_offset = offset + GetScalar<uint32_t>(offset);
|
||||
|
||||
const uint8_t realized_type = GetScalar<uint8_t>(
|
||||
union_type_vector_data_offset + i * sizeof(uint8_t));
|
||||
|
||||
const std::string enum_type =
|
||||
BuildUnion(union_offset, realized_type, field);
|
||||
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
|
||||
union_offset,
|
||||
std::string("offset to union[") + std::to_string(i) + "] (`" +
|
||||
enum_type + "`)"));
|
||||
|
||||
offset += sizeof(uint32_t);
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
if (IsScalar(field->type()->element())) {
|
||||
const BinaryRegionType binary_region_type =
|
||||
GetRegionType(field->type()->element());
|
||||
|
||||
const uint64_t type_size = GetTypeSize(field->type()->element());
|
||||
|
||||
// TODO(dbaileychess): It might be nicer to user the
|
||||
// BinaryRegion.array_length field to indicate this.
|
||||
for (size_t i = 0; i < vector_length; ++i) {
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
offset, type_size, binary_region_type, 0, 0,
|
||||
std::string("value[") + std::to_string(i) + "]"));
|
||||
offset += type_size;
|
||||
}
|
||||
}
|
||||
} break;
|
||||
}
|
||||
|
||||
sections_.insert(std::make_pair(
|
||||
vector_offset,
|
||||
MakeBinarySection(
|
||||
std::string(table->name()->c_str()) + "." + field->name()->c_str(),
|
||||
BinarySectionType::Vector, std::move(regions))));
|
||||
}
|
||||
|
||||
std::string BinaryAnnotator::BuildUnion(uint64_t offset,
|
||||
const uint8_t realized_type,
|
||||
const reflection::Field *field) {
|
||||
const reflection::Enum *next_enum =
|
||||
schema_->enums()->Get(field->type()->index());
|
||||
|
||||
const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
|
||||
|
||||
const reflection::Type *union_type = enum_val->union_type();
|
||||
|
||||
if (union_type->base_type() == reflection::BaseType::Obj) {
|
||||
const reflection::Object *object =
|
||||
schema_->objects()->Get(union_type->index());
|
||||
|
||||
if (object->is_struct()) {
|
||||
// Union of vectors point to a new Binary section
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
offset = BuildStruct(offset, regions, object);
|
||||
|
||||
sections_.insert(std::make_pair(
|
||||
regions[0].offset,
|
||||
MakeBinarySection(std::string(object->name()->c_str()) + "." +
|
||||
field->name()->c_str(),
|
||||
BinarySectionType::Union, std::move(regions))));
|
||||
} else {
|
||||
BuildTable(offset, BinarySectionType::Table, object);
|
||||
}
|
||||
}
|
||||
// TODO(dbaileychess): handle the other union types.
|
||||
|
||||
return enum_val->name()->c_str();
|
||||
}
|
||||
|
||||
void BinaryAnnotator::FixMissingSections() {
|
||||
uint64_t offset = 0;
|
||||
|
||||
std::vector<BinarySection> sections_to_insert;
|
||||
|
||||
for (auto ¤t_section : sections_) {
|
||||
BinarySection §ion = current_section.second;
|
||||
const uint64_t section_start_offset = current_section.first;
|
||||
const uint64_t section_end_offset =
|
||||
section.regions.back().offset + section.regions.back().length;
|
||||
|
||||
if (offset < section_start_offset) {
|
||||
// We are at an offset that is less then the current section.
|
||||
const uint64_t pad_bytes = section_start_offset - offset + 1;
|
||||
|
||||
const uint64_t start_offset = offset - 1;
|
||||
|
||||
std::vector<BinaryRegion> regions;
|
||||
|
||||
// Check if the region is all zeros or not, as that can tell us if it is
|
||||
// padding or not.
|
||||
if (IsNonZeroRegion(offset - 1, pad_bytes, binary_)) {
|
||||
// Some of the padding bytes are non-zero, so this might be an unknown
|
||||
// section of the binary.
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
start_offset, pad_bytes * sizeof(uint8_t),
|
||||
BinaryRegionType::Unknown, pad_bytes, 0,
|
||||
pad_bytes < 8 ? "could be a corrupted padding region (non zero) "
|
||||
"due to the length < 8 bytes."
|
||||
: "WARN: nothing refers to this. Check if any "
|
||||
"`Unkown Field`s point to this."));
|
||||
|
||||
sections_to_insert.emplace_back(
|
||||
MakeBinarySection("no known references", BinarySectionType::Unknown,
|
||||
std::move(regions)));
|
||||
} else {
|
||||
// This region is most likely padding.
|
||||
regions.emplace_back(MakeBinaryRegion(
|
||||
start_offset, pad_bytes * sizeof(uint8_t), BinaryRegionType::Uint8,
|
||||
pad_bytes, 0,
|
||||
// Output a different annotation if the pad bytes exceed what we
|
||||
// expect to be the maximum padding.
|
||||
pad_bytes > 7 ? "likely padding but might be an unknown section "
|
||||
"due to being larger than 7 bytes"
|
||||
: "padding"));
|
||||
|
||||
sections_to_insert.emplace_back(MakeBinarySection(
|
||||
"", BinarySectionType::Padding, std::move(regions)));
|
||||
}
|
||||
}
|
||||
offset = section_end_offset + 1;
|
||||
}
|
||||
|
||||
for (const BinarySection §ion_to_insert : sections_to_insert) {
|
||||
sections_.insert(
|
||||
std::make_pair(section_to_insert.regions[0].offset, section_to_insert));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace flatbuffers
|
||||
227
src/binary_annotator.h
Normal file
227
src/binary_annotator.h
Normal file
@@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright 2021 Google Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
|
||||
#define FLATBUFFERS_BINARY_ANNOTATOR_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "flatbuffers/reflection.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
|
||||
enum class BinaryRegionType {
|
||||
Unknown = 0,
|
||||
UOffset = 1,
|
||||
SOffset = 2,
|
||||
VOffset = 3,
|
||||
Bool = 4,
|
||||
Byte = 5,
|
||||
Char = 6,
|
||||
Uint8 = 7,
|
||||
Int8 = 8,
|
||||
Uint16 = 9,
|
||||
Int16 = 10,
|
||||
Uint32 = 11,
|
||||
Int32 = 12,
|
||||
Uint64 = 13,
|
||||
Int64 = 14,
|
||||
Float = 15,
|
||||
Double = 16
|
||||
};
|
||||
|
||||
template<typename T> static inline T GetScalar(const uint8_t *binary) {
|
||||
return *reinterpret_cast<const T *>(binary);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static inline std::string ToHex(T i, size_t width = sizeof(T)) {
|
||||
std::stringstream stream;
|
||||
stream << std::hex << std::uppercase << std::setfill('0') << std::setw(width)
|
||||
<< i;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
// Specialized version for uint8_t that don't work well with std::hex.
|
||||
static inline std::string ToHex(uint8_t i) {
|
||||
return ToHex(static_cast<int>(i), 2);
|
||||
}
|
||||
|
||||
struct BinaryRegion {
|
||||
// Offset into the binary where this region begins.
|
||||
uint64_t offset = 0;
|
||||
|
||||
// The length of this region in bytes.
|
||||
uint64_t length = 0;
|
||||
|
||||
// The underlying datatype of this region
|
||||
BinaryRegionType type = BinaryRegionType::Unknown;
|
||||
|
||||
// If `type` is an array/vector, this is the number of those types this region
|
||||
// encompasses.
|
||||
uint64_t array_length = 0;
|
||||
|
||||
// If the is an offset to some other region, this is what it points to. The
|
||||
// offset is relative to overall binary, not to this region.
|
||||
uint64_t points_to_offset = 0;
|
||||
|
||||
// The comment on the region.
|
||||
// TODO(dbaileychess): Consider moving this to a more structure comment field
|
||||
// so that other generators can parse it easier.
|
||||
std::string comment;
|
||||
};
|
||||
|
||||
enum class BinarySectionType {
|
||||
Unknown = 0,
|
||||
Header = 1,
|
||||
Table = 2,
|
||||
RootTable = 3,
|
||||
VTable = 4,
|
||||
Struct = 5,
|
||||
String = 6,
|
||||
Vector = 7,
|
||||
Union = 8,
|
||||
Padding = 9,
|
||||
};
|
||||
|
||||
// A section of the binary that is grouped together in some logical manner, and
|
||||
// often is pointed too by some other offset BinaryRegion. Sections include
|
||||
// `tables`, `vtables`, `strings`, `vectors`, etc..
|
||||
struct BinarySection {
|
||||
// User-specified name of the section, if applicable.
|
||||
std::string name;
|
||||
|
||||
// The type of this section.
|
||||
BinarySectionType type = BinarySectionType::Unknown;
|
||||
|
||||
// The binary regions that make up this section, in order of their offsets.
|
||||
std::vector<BinaryRegion> regions;
|
||||
};
|
||||
|
||||
inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
|
||||
switch (base_type) {
|
||||
case reflection::UType: return BinaryRegionType::Uint8;
|
||||
case reflection::Bool: return BinaryRegionType::Uint8;
|
||||
case reflection::Byte: return BinaryRegionType::Uint8;
|
||||
case reflection::UByte: return BinaryRegionType::Uint8;
|
||||
case reflection::Short: return BinaryRegionType::Int16;
|
||||
case reflection::UShort: return BinaryRegionType::Uint16;
|
||||
case reflection::Int: return BinaryRegionType::Uint32;
|
||||
case reflection::UInt: return BinaryRegionType::Uint32;
|
||||
case reflection::Long: return BinaryRegionType::Int64;
|
||||
case reflection::ULong: return BinaryRegionType::Uint64;
|
||||
case reflection::Float: return BinaryRegionType::Float;
|
||||
case reflection::Double: return BinaryRegionType::Double;
|
||||
default: return BinaryRegionType::Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
inline static std::string ToString(const BinaryRegionType type) {
|
||||
switch (type) {
|
||||
case BinaryRegionType::UOffset: return "UOffset32";
|
||||
case BinaryRegionType::SOffset: return "SOffset32";
|
||||
case BinaryRegionType::VOffset: return "VOffset16";
|
||||
case BinaryRegionType::Bool: return "bool";
|
||||
case BinaryRegionType::Char: return "char";
|
||||
case BinaryRegionType::Byte: return "int8_t";
|
||||
case BinaryRegionType::Uint8: return "uint8_t";
|
||||
case BinaryRegionType::Uint16: return "uint16_t";
|
||||
case BinaryRegionType::Uint32: return "uint32_t";
|
||||
case BinaryRegionType::Uint64: return "uint64_t"; ;
|
||||
case BinaryRegionType::Int8: return "int8_t";
|
||||
case BinaryRegionType::Int16: return "int16_t";
|
||||
case BinaryRegionType::Int32: return "int32_t";
|
||||
case BinaryRegionType::Int64: return "int64_t";
|
||||
case BinaryRegionType::Double: return "double";
|
||||
case BinaryRegionType::Float: return "float";
|
||||
case BinaryRegionType::Unknown: return "?uint8_t";
|
||||
default: return "todo";
|
||||
}
|
||||
}
|
||||
|
||||
class BinaryAnnotator {
|
||||
public:
|
||||
explicit BinaryAnnotator(const uint8_t *const bfbs, const int64_t bfbs_length,
|
||||
const uint8_t *const binary)
|
||||
: bfbs_(bfbs),
|
||||
bfbs_length_(bfbs_length),
|
||||
schema_(reflection::GetSchema(bfbs)),
|
||||
binary_(binary) {}
|
||||
|
||||
std::map<uint64_t, BinarySection> Annotate();
|
||||
|
||||
private:
|
||||
struct VTable {
|
||||
struct Entry {
|
||||
const reflection::Field *field = nullptr;
|
||||
uint16_t offset_from_table = 0;
|
||||
};
|
||||
|
||||
// Field ID -> {field def, offset from table}
|
||||
std::map<uint16_t, Entry> fields;
|
||||
|
||||
uint16_t vtable_size = 0;
|
||||
uint16_t table_size = 0;
|
||||
};
|
||||
|
||||
uint64_t BuildHeader(uint64_t offset);
|
||||
|
||||
void BuildVTable(uint64_t offset, const reflection::Object *table);
|
||||
|
||||
void BuildTable(uint64_t offset, const BinarySectionType type,
|
||||
const reflection::Object *table);
|
||||
|
||||
uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> ®ions,
|
||||
const reflection::Object *structure);
|
||||
|
||||
void BuildString(uint64_t offset, const reflection::Object *table,
|
||||
const reflection::Field *field);
|
||||
|
||||
void BuildVector(uint64_t offset, const reflection::Object *table,
|
||||
const reflection::Field *field, uint64_t parent_table_offset,
|
||||
const VTable &vtable);
|
||||
|
||||
std::string BuildUnion(uint64_t offset, uint8_t realized_type,
|
||||
const reflection::Field *field);
|
||||
|
||||
void FixMissingSections();
|
||||
|
||||
template<typename T> inline T GetScalar(uint64_t offset) {
|
||||
return *reinterpret_cast<const T *>(binary_ + offset);
|
||||
}
|
||||
|
||||
// The schema for the binary file
|
||||
const uint8_t *bfbs_;
|
||||
const int64_t bfbs_length_;
|
||||
const reflection::Schema *schema_;
|
||||
|
||||
// The binary data itself.
|
||||
const uint8_t *binary_;
|
||||
|
||||
// Map of binary offset to vtables, to dedupe vtables.
|
||||
std::map<uint64_t, VTable> vtables_;
|
||||
|
||||
// A set of binary offset to string sections, to dedupe shared strings.
|
||||
std::set<uint64_t> strings_;
|
||||
|
||||
// The annotated binary sections, index by their absolute offset.
|
||||
std::map<uint64_t, BinarySection> sections_;
|
||||
};
|
||||
|
||||
} // namespace flatbuffers
|
||||
|
||||
#endif // FLATBUFFERS_BINARY_ANNOTATOR_H_
|
||||
103
src/flatc.cpp
103
src/flatc.cpp
@@ -19,6 +19,8 @@
|
||||
#include <list>
|
||||
#include <sstream>
|
||||
|
||||
#include "annotated_binary_text_gen.h"
|
||||
#include "binary_annotator.h"
|
||||
#include "flatbuffers/util.h"
|
||||
|
||||
namespace flatbuffers {
|
||||
@@ -215,6 +217,8 @@ const static FlatCOption options[] = {
|
||||
"in JSON, which is unsafe unless checked by a verifier afterwards." },
|
||||
{ "", "ts-flat-files", "",
|
||||
"Only generated one typescript file per .fbs file." },
|
||||
{ "", "annotate", "SCHEMA",
|
||||
"Annotate the provided BINARY_FILE with the specified SCHEMA file." },
|
||||
};
|
||||
|
||||
static void AppendTextWrappedString(std::stringstream &ss, std::string &text,
|
||||
@@ -297,7 +301,7 @@ std::string FlatCompiler::GetShortUsageString(const char *program_name) const {
|
||||
ss << ", ";
|
||||
}
|
||||
ss.seekp(-2, ss.cur);
|
||||
ss << "]... FILE... [-- FILE...]";
|
||||
ss << "]... FILE... [-- BINARY_FILE...]";
|
||||
std::string help = ss.str();
|
||||
std::stringstream ss_textwrap;
|
||||
AppendTextWrappedString(ss_textwrap, help, 80, 0);
|
||||
@@ -306,7 +310,8 @@ std::string FlatCompiler::GetShortUsageString(const char *program_name) const {
|
||||
|
||||
std::string FlatCompiler::GetUsageString(const char *program_name) const {
|
||||
std::stringstream ss;
|
||||
ss << "Usage: " << program_name << " [OPTION]... FILE... [-- FILE...]\n";
|
||||
ss << "Usage: " << program_name
|
||||
<< " [OPTION]... FILE... [-- BINARY_FILE...]\n";
|
||||
for (size_t i = 0; i < params_.num_generators; ++i) {
|
||||
const Generator &g = params_.generators[i];
|
||||
AppendOption(ss, g.option, 80, 25);
|
||||
@@ -320,16 +325,48 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const {
|
||||
|
||||
std::string files_description =
|
||||
"FILEs may be schemas (must end in .fbs), binary schemas (must end in "
|
||||
".bfbs) or JSON files (conforming to preceding schema). FILEs after the "
|
||||
"-- must be binary flatbuffer format files. Output files are named using "
|
||||
"the base file name of the input, and written to the current directory "
|
||||
"or the path given by -o. example: " +
|
||||
".bfbs) or JSON files (conforming to preceding schema). BINARY_FILEs "
|
||||
"after the -- must be binary flatbuffer format files. Output files are "
|
||||
"named using the base file name of the input, and written to the current "
|
||||
"directory or the path given by -o. example: " +
|
||||
std::string(program_name) + " -c -b schema1.fbs schema2.fbs data.json";
|
||||
AppendTextWrappedString(ss, files_description, 80, 0);
|
||||
ss << "\n";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void FlatCompiler::AnnotateBinaries(
|
||||
const uint8_t *binary_schema, const uint64_t binary_schema_size,
|
||||
const std::string &schema_filename,
|
||||
const std::vector<std::string> &binary_files) {
|
||||
for (const std::string &filename : binary_files) {
|
||||
std::string binary_contents;
|
||||
if (!flatbuffers::LoadFile(filename.c_str(), true, &binary_contents)) {
|
||||
Warn("unable to load binary file: " + filename);
|
||||
continue;
|
||||
}
|
||||
|
||||
const uint8_t *binary =
|
||||
reinterpret_cast<const uint8_t *>(binary_contents.c_str());
|
||||
const size_t binary_size = binary_contents.size();
|
||||
|
||||
flatbuffers::BinaryAnnotator binary_annotator(binary_schema,
|
||||
binary_schema_size, binary);
|
||||
|
||||
auto annotations = binary_annotator.Annotate();
|
||||
|
||||
// TODO(dbaileychess): Right now we just support a single text-based
|
||||
// output of the annotated binary schema, which we generate here. We
|
||||
// could output the raw annotations instead and have third-party tools
|
||||
// use them to generate their own output.
|
||||
flatbuffers::AnnotatedBinaryTextGenerator text_generator(
|
||||
flatbuffers::AnnotatedBinaryTextGenerator::Options{}, annotations,
|
||||
binary, binary_size);
|
||||
|
||||
text_generator.Generate(filename, schema_filename);
|
||||
}
|
||||
}
|
||||
|
||||
int FlatCompiler::Compile(int argc, const char **argv) {
|
||||
if (params_.generators == nullptr || params_.num_generators == 0) {
|
||||
return 0;
|
||||
@@ -353,6 +390,7 @@ int FlatCompiler::Compile(int argc, const char **argv) {
|
||||
std::vector<bool> generator_enabled(params_.num_generators, false);
|
||||
size_t binary_files_from = std::numeric_limits<size_t>::max();
|
||||
std::string conform_to_schema;
|
||||
std::string annotate_schema;
|
||||
|
||||
const char *program_name = argv[0];
|
||||
|
||||
@@ -554,6 +592,9 @@ int FlatCompiler::Compile(int argc, const char **argv) {
|
||||
opts.json_nested_legacy_flatbuffers = true;
|
||||
} else if (arg == "--ts-flat-files") {
|
||||
opts.ts_flat_file = true;
|
||||
} else if (arg == "--annotate") {
|
||||
if (++argi >= argc) Error("missing path following: " + arg, true);
|
||||
annotate_schema = flatbuffers::PosixPath(argv[argi]);
|
||||
} else {
|
||||
for (size_t i = 0; i < params_.num_generators; ++i) {
|
||||
if (arg == "--" + params_.generators[i].option.long_opt ||
|
||||
@@ -582,7 +623,8 @@ int FlatCompiler::Compile(int argc, const char **argv) {
|
||||
if (opts.proto_mode) {
|
||||
if (any_generator)
|
||||
Error("cannot generate code directly from .proto files", true);
|
||||
} else if (!any_generator && conform_to_schema.empty()) {
|
||||
} else if (!any_generator && conform_to_schema.empty() &&
|
||||
annotate_schema.empty()) {
|
||||
Error("no options: specify at least one generator.", true);
|
||||
}
|
||||
|
||||
@@ -611,6 +653,53 @@ int FlatCompiler::Compile(int argc, const char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!annotate_schema.empty()) {
|
||||
const std::string ext = flatbuffers::GetExtension(annotate_schema);
|
||||
if (!(ext == reflection::SchemaExtension() || ext == "fbs")) {
|
||||
Error("Expected a `.bfbs` or `.fbs` schema, got: " + annotate_schema);
|
||||
}
|
||||
|
||||
const bool is_binary_schema = ext == reflection::SchemaExtension();
|
||||
|
||||
std::string schema_contents;
|
||||
if (!flatbuffers::LoadFile(annotate_schema.c_str(),
|
||||
/*binary=*/is_binary_schema, &schema_contents)) {
|
||||
Error("unable to load schema: " + annotate_schema);
|
||||
}
|
||||
|
||||
const uint8_t *binary_schema = nullptr;
|
||||
uint64_t binary_schema_size = 0;
|
||||
|
||||
IDLOptions binary_opts;
|
||||
binary_opts.lang_to_generate |= flatbuffers::IDLOptions::kBinary;
|
||||
flatbuffers::Parser parser(binary_opts);
|
||||
|
||||
if (is_binary_schema) {
|
||||
binary_schema =
|
||||
reinterpret_cast<const uint8_t *>(schema_contents.c_str());
|
||||
binary_schema_size = schema_contents.size();
|
||||
} else {
|
||||
// If we need to generate the .bfbs file from the provided schema file
|
||||
// (.fbs)
|
||||
ParseFile(parser, annotate_schema, schema_contents, include_directories);
|
||||
parser.Serialize();
|
||||
|
||||
binary_schema = parser.builder_.GetBufferPointer();
|
||||
binary_schema_size = parser.builder_.GetSize();
|
||||
}
|
||||
|
||||
if (binary_schema == nullptr || !binary_schema_size) {
|
||||
Error("could not parse a value binary schema from: " + annotate_schema);
|
||||
}
|
||||
|
||||
// Annotate the provided files with the binary_schema.
|
||||
AnnotateBinaries(binary_schema, binary_schema_size, annotate_schema,
|
||||
filenames);
|
||||
|
||||
// We don't support doing anything else after annotating a binary.
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::unique_ptr<flatbuffers::Parser> parser(new flatbuffers::Parser(opts));
|
||||
|
||||
for (auto file_it = filenames.begin(); file_it != filenames.end();
|
||||
|
||||
@@ -120,6 +120,23 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
|
||||
}
|
||||
}
|
||||
|
||||
void ForAllFields(const reflection::Object *object, bool reverse,
|
||||
std::function<void(const reflection::Field *)> func) {
|
||||
std::vector<uint32_t> field_to_id_map;
|
||||
field_to_id_map.resize(object->fields()->size());
|
||||
|
||||
// Create the mapping of field ID to the index into the vector.
|
||||
for (uint32_t i = 0; i < object->fields()->size(); ++i) {
|
||||
auto field = object->fields()->Get(i);
|
||||
field_to_id_map[field->id()] = i;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < field_to_id_map.size(); ++i) {
|
||||
func(object->fields()->Get(
|
||||
field_to_id_map[reverse ? field_to_id_map.size() - i + 1 : i]));
|
||||
}
|
||||
}
|
||||
|
||||
void SetAnyValueI(reflection::BaseType type, uint8_t *data, int64_t val) {
|
||||
// clang-format off
|
||||
#define FLATBUFFERS_SET(T) WriteScalar(data, static_cast<T>(val))
|
||||
|
||||
Reference in New Issue
Block a user