[Annotated Buffers] Improve efficiency (#7820)

* AnnotatedBinaryTextGen switch to ofstream instead of building giant string

* Add --annotate-sparse-vectors to reduce AFB size
This commit is contained in:
Derek Bailey
2023-02-06 23:42:44 -06:00
committed by GitHub
parent 85aee1f5c3
commit 535ead8d8c
4 changed files with 108 additions and 86 deletions

View File

@@ -49,6 +49,7 @@ struct FlatCOptions {
size_t binary_files_from = std::numeric_limits<size_t>::max(); size_t binary_files_from = std::numeric_limits<size_t>::max();
std::string conform_to_schema; std::string conform_to_schema;
std::string annotate_schema; std::string annotate_schema;
bool annotate_include_vector_contents = true;
bool any_generator = false; bool any_generator = false;
bool print_make_rules = false; bool print_make_rules = false;
bool raw_binary = false; bool raw_binary = false;
@@ -110,8 +111,7 @@ class FlatCompiler {
void AnnotateBinaries(const uint8_t *binary_schema, void AnnotateBinaries(const uint8_t *binary_schema,
uint64_t binary_schema_size, uint64_t binary_schema_size,
const std::string &schema_filename, const FlatCOptions &options);
const std::vector<std::string> &binary_files);
void ValidateOptions(const FlatCOptions &options); void ValidateOptions(const FlatCOptions &options);

View File

@@ -1,6 +1,8 @@
#include "annotated_binary_text_gen.h" #include "annotated_binary_text_gen.h"
#include <algorithm> #include <algorithm>
#include <fstream>
#include <ostream>
#include <sstream> #include <sstream>
#include <string> #include <string>
@@ -21,6 +23,8 @@ struct OutputConfig {
size_t offset_max_char = 4; size_t offset_max_char = 4;
char delimiter = '|'; char delimiter = '|';
bool include_vector_contents = true;
}; };
static std::string ToString(const BinarySectionType type) { static std::string ToString(const BinarySectionType type) {
@@ -83,7 +87,7 @@ static std::string ToValueString(const BinaryRegion &region,
if (region.array_length) { if (region.array_length) {
if (region.type == BinaryRegionType::Uint8 || if (region.type == BinaryRegionType::Uint8 ||
region.type == BinaryRegionType::Unknown) { region.type == BinaryRegionType::Unknown) {
// Interpet each value as a ASCII to aid debugging // Interpret each value as a ASCII to aid debugging
for (uint64_t i = 0; i < region.array_length; ++i) { for (uint64_t i = 0; i < region.array_length; ++i) {
const uint8_t c = *(binary + region.offset + i); const uint8_t c = *(binary + region.offset + i);
s += isprint(c) ? static_cast<char>(c & 0x7F) : '.'; s += isprint(c) ? static_cast<char>(c & 0x7F) : '.';
@@ -257,84 +261,74 @@ static std::string GenerateComment(const BinaryRegionComment &comment,
return s; return s;
} }
static std::string GenerateDocumentation(const BinaryRegion &region, static void GenerateDocumentation(std::ostream &os, const BinaryRegion &region,
const BinarySection &section, const BinarySection &section,
const uint8_t *binary, const uint8_t *binary,
DocContinuation &continuation, DocContinuation &continuation,
const OutputConfig &output_config) { const OutputConfig &output_config) {
std::string s;
// Check if there is a doc continuation that should be prioritized. // Check if there is a doc continuation that should be prioritized.
if (continuation.value_start_column) { if (continuation.value_start_column) {
s += std::string(continuation.value_start_column - 2, ' '); os << std::string(continuation.value_start_column - 2, ' ');
s += output_config.delimiter; os << output_config.delimiter << " ";
s += " ";
s += continuation.value.substr(0, output_config.max_bytes_per_line); os << continuation.value.substr(0, output_config.max_bytes_per_line);
continuation.value = continuation.value.substr( continuation.value = continuation.value.substr(
std::min(output_config.max_bytes_per_line, continuation.value.size())); std::min(output_config.max_bytes_per_line, continuation.value.size()));
return s; return;
} }
size_t size_of = 0;
{ {
std::stringstream ss; std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_type_string)) << std::left; ss << std::setw(static_cast<int>(output_config.largest_type_string))
<< std::left;
ss << GenerateTypeString(region); ss << GenerateTypeString(region);
s += ss.str(); os << ss.str();
size_of = ss.str().size();
} }
s += " "; os << " " << output_config.delimiter << " ";
s += output_config.delimiter;
s += " ";
if (region.array_length) { if (region.array_length) {
// Record where the value is first being outputted. // Record where the value is first being outputted.
continuation.value_start_column = s.size(); continuation.value_start_column = 3 + size_of;
// Get the full-length value, which we will chunk below. // Get the full-length value, which we will chunk below.
const std::string value = ToValueString(region, binary, output_config); const std::string value = ToValueString(region, binary, output_config);
std::stringstream ss; std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_value_string)) << std::left; ss << std::setw(static_cast<int>(output_config.largest_value_string))
<< std::left;
ss << value.substr(0, output_config.max_bytes_per_line); ss << value.substr(0, output_config.max_bytes_per_line);
s += ss.str(); os << ss.str();
continuation.value = continuation.value =
value.substr(std::min(output_config.max_bytes_per_line, value.size())); value.substr(std::min(output_config.max_bytes_per_line, value.size()));
} else { } else {
std::stringstream ss; std::stringstream ss;
ss << std::setw(static_cast<int>(output_config.largest_value_string)) << std::left; ss << std::setw(static_cast<int>(output_config.largest_value_string))
<< std::left;
ss << ToValueString(region, binary, output_config); ss << ToValueString(region, binary, output_config);
s += ss.str(); os << ss.str();
} }
s += " "; os << " " << output_config.delimiter << " ";
s += output_config.delimiter; os << GenerateComment(region.comment, section);
s += " ";
s += GenerateComment(region.comment, section);
return s;
} }
static std::string GenerateRegion(const BinaryRegion &region, static void GenerateRegion(std::ostream &os, const BinaryRegion &region,
const BinarySection &section, const BinarySection &section, const uint8_t *binary,
const uint8_t *binary, const OutputConfig &output_config) {
const OutputConfig &output_config) {
std::string s;
bool doc_generated = false; bool doc_generated = false;
DocContinuation doc_continuation; DocContinuation doc_continuation;
for (uint64_t i = 0; i < region.length; ++i) { for (uint64_t i = 0; i < region.length; ++i) {
if ((i % output_config.max_bytes_per_line) == 0) { if ((i % output_config.max_bytes_per_line) == 0) {
// Start a new line of output // Start a new line of output
s += '\n'; os << std::endl;
s += " "; os << " +0x" << ToHex(region.offset + i, output_config.offset_max_char);
s += "+0x"; os << " " << output_config.delimiter;
s += ToHex(region.offset + i, output_config.offset_max_char);
s += " ";
s += output_config.delimiter;
} }
// Add each byte // Add each byte
s += " "; os << " " << ToHex(binary[region.offset + i]);
s += ToHex(binary[region.offset + i]);
// Check for end of line or end of region conditions. // Check for end of line or end of region conditions.
if (((i + 1) % output_config.max_bytes_per_line == 0) || if (((i + 1) % output_config.max_bytes_per_line == 0) ||
@@ -344,17 +338,16 @@ static std::string GenerateRegion(const BinaryRegion &region,
// zero those out to align everything globally. // zero those out to align everything globally.
for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0; for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
++j) { ++j) {
s += " "; os << " ";
} }
} }
s += " "; os << " " << output_config.delimiter;
s += output_config.delimiter;
// This is the end of the first line or its the last byte of the region, // This is the end of the first line or its the last byte of the region,
// generate the end-of-line documentation. // generate the end-of-line documentation.
if (!doc_generated) { if (!doc_generated) {
s += " "; os << " ";
s += GenerateDocumentation(region, section, binary, doc_continuation, GenerateDocumentation(os, region, section, binary, doc_continuation,
output_config); output_config);
// If we have a value in the doc continuation, that means the doc is // If we have a value in the doc continuation, that means the doc is
// being printed on multiple lines. // being printed on multiple lines.
@@ -362,22 +355,41 @@ static std::string GenerateRegion(const BinaryRegion &region,
} }
} }
} }
return s;
} }
static std::string GenerateSection(const BinarySection &section, static void GenerateSection(std::ostream &os, const BinarySection &section,
const uint8_t *binary, const uint8_t *binary,
const OutputConfig &output_config) { const OutputConfig &output_config) {
std::string s; os << std::endl;
s += "\n"; os << ToString(section.type);
s += ToString(section.type); if (!section.name.empty()) { os << " (" + section.name + ")"; }
if (!section.name.empty()) { s += " (" + section.name + ")"; } os << ":";
s += ":";
for (const BinaryRegion &region : section.regions) { // As a space saving measure, skip generating every vector element, just put
s += GenerateRegion(region, section, binary, output_config); // the first and last elements in the output. Skip the whole thing if there
// are only two or fewer elements, as it doesn't save space.
if (section.type == BinarySectionType::Vector &&
!output_config.include_vector_contents && section.regions.size() > 3) {
// Generate the length region which should be first.
GenerateRegion(os, section.regions[0], section, binary, output_config);
// Generate the first element.
GenerateRegion(os, section.regions[1], section, binary, output_config);
// Indicate that we omitted lines.
os << std::endl
<< " <" << section.regions.size() - 2 << " regions omitted>";
// Generate the last element.
GenerateRegion(os, section.regions.back(), section, binary, output_config);
os << std::endl;
return;
} }
return s;
for (const BinaryRegion &region : section.regions) {
GenerateRegion(os, region, section, binary, output_config);
}
os << std::endl;
} }
} // namespace } // namespace
@@ -385,6 +397,7 @@ bool AnnotatedBinaryTextGenerator::Generate(
const std::string &filename, const std::string &schema_filename) { const std::string &filename, const std::string &schema_filename) {
OutputConfig output_config; OutputConfig output_config;
output_config.max_bytes_per_line = options_.max_bytes_per_line; output_config.max_bytes_per_line = options_.max_bytes_per_line;
output_config.include_vector_contents = options_.include_vector_contents;
// Given the length of the binary, we can calculate the maximum number of // Given the length of the binary, we can calculate the maximum number of
// characters to display in the offset hex: (i.e. 2 would lead to 0XFF being // characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
@@ -414,19 +427,6 @@ bool AnnotatedBinaryTextGenerator::Generate(
} }
} }
// Generate each of the binary sections
std::string s;
s += "// Annotated Flatbuffer Binary\n";
s += "//\n";
s += "// Schema file: " + schema_filename + "\n";
s += "// Binary file: " + filename + "\n";
for (const auto &section : annotations_) {
s += GenerateSection(section.second, binary_, output_config);
s += "\n";
}
// Modify the output filename. // Modify the output filename.
std::string output_filename = StripExtension(filename); std::string output_filename = StripExtension(filename);
output_filename += options_.output_postfix; output_filename += options_.output_postfix;
@@ -434,7 +434,20 @@ bool AnnotatedBinaryTextGenerator::Generate(
"." + (options_.output_extension.empty() ? GetExtension(filename) "." + (options_.output_extension.empty() ? GetExtension(filename)
: options_.output_extension); : options_.output_extension);
return SaveFile(output_filename.c_str(), s, false); std::ofstream ofs(output_filename.c_str());
ofs << "// Annotated Flatbuffer Binary" << std::endl;
ofs << "//" << std::endl;
ofs << "// Schema file: " << schema_filename << std::endl;
ofs << "// Binary file: " << filename << std::endl;
// Generate each of the binary sections
for (const auto &section : annotations_) {
GenerateSection(ofs, section.second, binary_, output_config);
}
ofs.close();
return true;
} }
} // namespace flatbuffers } // namespace flatbuffers

View File

@@ -41,6 +41,9 @@ class AnnotatedBinaryTextGenerator {
// //
// Example: binary1.bin -> binary1.afb // Example: binary1.bin -> binary1.afb
std::string output_extension = "afb"; std::string output_extension = "afb";
// Controls.
bool include_vector_contents = true;
}; };
explicit AnnotatedBinaryTextGenerator( explicit AnnotatedBinaryTextGenerator(

View File

@@ -243,6 +243,7 @@ const static FlatCOption flatc_options[] = {
"ts_entry_points." }, "ts_entry_points." },
{ "", "ts-entry-points", "", { "", "ts-entry-points", "",
"Generate entry point typescript per namespace. Implies gen-all." }, "Generate entry point typescript per namespace. Implies gen-all." },
{ "", "annotate-sparse-vectors", "", "Don't annotate every vector element."},
{ "", "annotate", "SCHEMA", { "", "annotate", "SCHEMA",
"Annotate the provided BINARY_FILE with the specified SCHEMA file." }, "Annotate the provided BINARY_FILE with the specified SCHEMA file." },
{ "", "no-leak-private-annotation", "", { "", "no-leak-private-annotation", "",
@@ -371,11 +372,12 @@ std::string FlatCompiler::GetUsageString(
return ss.str(); return ss.str();
} }
void FlatCompiler::AnnotateBinaries( void FlatCompiler::AnnotateBinaries(const uint8_t *binary_schema,
const uint8_t *binary_schema, const uint64_t binary_schema_size, const uint64_t binary_schema_size,
const std::string &schema_filename, const FlatCOptions &options) {
const std::vector<std::string> &binary_files) { const std::string &schema_filename = options.annotate_schema;
for (const std::string &filename : binary_files) {
for (const std::string &filename : options.filenames) {
std::string binary_contents; std::string binary_contents;
if (!flatbuffers::LoadFile(filename.c_str(), true, &binary_contents)) { if (!flatbuffers::LoadFile(filename.c_str(), true, &binary_contents)) {
Warn("unable to load binary file: " + filename); Warn("unable to load binary file: " + filename);
@@ -391,13 +393,16 @@ void FlatCompiler::AnnotateBinaries(
auto annotations = binary_annotator.Annotate(); auto annotations = binary_annotator.Annotate();
flatbuffers::AnnotatedBinaryTextGenerator::Options text_gen_opts;
text_gen_opts.include_vector_contents =
options.annotate_include_vector_contents;
// TODO(dbaileychess): Right now we just support a single text-based // TODO(dbaileychess): Right now we just support a single text-based
// output of the annotated binary schema, which we generate here. We // output of the annotated binary schema, which we generate here. We
// could output the raw annotations instead and have third-party tools // could output the raw annotations instead and have third-party tools
// use them to generate their own output. // use them to generate their own output.
flatbuffers::AnnotatedBinaryTextGenerator text_generator( flatbuffers::AnnotatedBinaryTextGenerator text_generator(
flatbuffers::AnnotatedBinaryTextGenerator::Options{}, annotations, text_gen_opts, annotations, binary, binary_size);
binary, binary_size);
text_generator.Generate(filename, schema_filename); text_generator.Generate(filename, schema_filename);
} }
@@ -641,6 +646,8 @@ FlatCOptions FlatCompiler::ParseFromCommandLineArguments(int argc,
opts.ts_no_import_ext = true; opts.ts_no_import_ext = true;
} else if (arg == "--no-leak-private-annotation") { } else if (arg == "--no-leak-private-annotation") {
opts.no_leak_private_annotations = true; opts.no_leak_private_annotations = true;
} else if (arg == "--annotate-sparse-vectors") {
options.annotate_include_vector_contents = false;
} else if (arg == "--annotate") { } else if (arg == "--annotate") {
if (++argi >= argc) Error("missing path following: " + arg, true); if (++argi >= argc) Error("missing path following: " + arg, true);
options.annotate_schema = flatbuffers::PosixPath(argv[argi]); options.annotate_schema = flatbuffers::PosixPath(argv[argi]);
@@ -939,8 +946,7 @@ int FlatCompiler::Compile(const FlatCOptions &options) {
} }
// Annotate the provided files with the binary_schema. // Annotate the provided files with the binary_schema.
AnnotateBinaries(binary_schema, binary_schema_size, options.annotate_schema, AnnotateBinaries(binary_schema, binary_schema_size, options);
options.filenames);
// We don't support doing anything else after annotating a binary. // We don't support doing anything else after annotating a binary.
return 0; return 0;