Parsing from proto should keep field ID. (fixes #7645) (#7655)

* Parsing from proto should keep field ID. (fixes #7645)

* Fix failed tests

* Fix windows warning

* Improve attribute generation in proto to fbs

* Check if id is used twice. fix Some clang-format problems

* Test if fake id can solve the test problem

* Validate proto file in proto -> fbs generation.

* Fix error messages

* Ignore id in union

* Add keep proto id for legacy and check gap flag have been added. Reserved id will be checked.

* Add needed flags

* unit tests

* fix fromat problem. fix comments and error messages.

* clear

* More unit tests

* Fix windows build

* Fix include problems

* Fake commit to invoke rebuild

* Fix buzel build

* Fix some issues

* Fix comments, fix return value and sort for android NDK

* Fix return type

* Break down big function

* Place todo

---------

Co-authored-by: Derek Bailey <derekbailey@google.com>
This commit is contained in:
Saman
2023-02-02 03:17:35 +08:00
committed by GitHub
parent 08ebd202e2
commit f838017860
25 changed files with 1154 additions and 177 deletions

View File

@@ -170,6 +170,15 @@ const static FlatCOption flatc_options[] = {
{ "", "proto-namespace-suffix", "SUFFIX",
"Add this namespace to any flatbuffers generated from protobufs." },
{ "", "oneof-union", "", "Translate .proto oneofs to flatbuffer unions." },
{ "", "keep-proto-id", "", "Keep protobuf field ids in generated fbs file." },
{ "", "proto-id-gap", "",
"Action that should be taken when a gap between protobuf ids found. "
"Supported values: * "
"'nop' - do not care about gap * 'warn' - A warning message will be shown "
"about the gap in protobuf ids"
"(default) "
"* 'error' - An error message will be shown and the fbs generation will be "
"interrupted." },
{ "", "grpc", "", "Generate GRPC interfaces for the specified languages." },
{ "", "schema", "", "Serialize schemas instead of JSON (use with -b)." },
{ "", "bfbs-filenames", "PATH",
@@ -541,6 +550,18 @@ FlatCOptions FlatCompiler::ParseFromCommandLineArguments(int argc,
opts.proto_namespace_suffix = argv[argi];
} else if (arg == "--oneof-union") {
opts.proto_oneof_union = true;
} else if (arg == "--keep-proto-id") {
opts.keep_proto_id = true;
} else if (arg == "--proto-id-gap") {
if (++argi >= argc) Error("missing case style following: " + arg, true);
if (!strcmp(argv[argi], "nop"))
opts.proto_id_gap_action = IDLOptions::ProtoIdGapAction::NO_OP;
else if (!strcmp(argv[argi], "warn"))
opts.proto_id_gap_action = IDLOptions::ProtoIdGapAction::WARNING;
else if (!strcmp(argv[argi], "error"))
opts.proto_id_gap_action = IDLOptions::ProtoIdGapAction::ERROR;
else
Error("unknown case style: " + std::string(argv[argi]), true);
} else if (arg == "--schema") {
options.schema_binary = true;
} else if (arg == "-M") {

View File

@@ -15,6 +15,9 @@
*/
// independent from idl_parser, since this code is not needed for most clients
#include <unordered_map>
#include <utility>
#include <vector>
#include "flatbuffers/code_generators.h"
#include "flatbuffers/flatbuffers.h"
@@ -39,6 +42,184 @@ static std::string GenType(const Type &type, bool underlying = false) {
}
}
static bool HasFieldWithId(const std::vector<FieldDef *> &fields) {
static const std::string ID = "id";
for (const auto *field : fields) {
const auto *id_attribute = field->attributes.Lookup(ID);
if (id_attribute != nullptr && !id_attribute->constant.empty()) {
return true;
}
}
return false;
}
static bool HasNonPositiveFieldId(const std::vector<FieldDef *> &fields) {
static const std::string ID = "id";
for (const auto *field : fields) {
const auto *id_attribute = field->attributes.Lookup(ID);
if (id_attribute != nullptr && !id_attribute->constant.empty()) {
voffset_t proto_id = 0;
bool done = StringToNumber(id_attribute->constant.c_str(), &proto_id);
if (!done) { return true; }
}
}
return false;
}
static bool HasFieldIdFromReservedIds(
const std::vector<FieldDef *> &fields,
const std::vector<voffset_t> &reserved_ids) {
static const std::string ID = "id";
for (const auto *field : fields) {
const auto *id_attribute = field->attributes.Lookup(ID);
if (id_attribute != nullptr && !id_attribute->constant.empty()) {
voffset_t proto_id = 0;
bool done = StringToNumber(id_attribute->constant.c_str(), &proto_id);
if (!done) { return true; }
auto id_it =
std::find(std::begin(reserved_ids), std::end(reserved_ids), proto_id);
if (id_it != reserved_ids.end()) { return true; }
}
}
return false;
}
static std::vector<voffset_t> ExtractProtobufIds(
const std::vector<FieldDef *> &fields) {
static const std::string ID = "id";
std::vector<voffset_t> used_proto_ids;
for (const auto *field : fields) {
const auto *id_attribute = field->attributes.Lookup(ID);
if (id_attribute != nullptr && !id_attribute->constant.empty()) {
voffset_t proto_id = 0;
bool done = StringToNumber(id_attribute->constant.c_str(), &proto_id);
if (done) { used_proto_ids.push_back(proto_id); }
}
}
return used_proto_ids;
}
static bool HasTwiceUsedId(const std::vector<FieldDef *> &fields) {
std::vector<voffset_t> used_proto_ids = ExtractProtobufIds(fields);
std::sort(std::begin(used_proto_ids), std::end(used_proto_ids));
for (auto it = std::next(std::begin(used_proto_ids));
it != std::end(used_proto_ids); it++) {
if (*it == *std::prev(it)) { return true; }
}
return false;
}
static bool HasGapInProtoId(const std::vector<FieldDef *> &fields) {
std::vector<voffset_t> used_proto_ids = ExtractProtobufIds(fields);
std::sort(std::begin(used_proto_ids), std::end(used_proto_ids));
for (auto it = std::next(std::begin(used_proto_ids));
it != std::end(used_proto_ids); it++) {
if (*it != *std::prev(it) + 1) { return true; }
}
return false;
}
static bool ProtobufIdSanityCheck(const StructDef &struct_def,
IDLOptions::ProtoIdGapAction gap_action) {
const auto &fields = struct_def.fields.vec;
if (HasNonPositiveFieldId(fields)) {
// TODO: Use LogCompilerWarn
fprintf(stderr,
"Field id in struct %s has a non positive number value\n",
struct_def.name.c_str());
return false;
}
if (HasTwiceUsedId(fields)) {
// TODO: Use LogCompilerWarn
fprintf(stderr, "Fields in struct %s have used an id twice\n", struct_def.name.c_str());
return false;
}
if (HasFieldIdFromReservedIds(fields, struct_def.reserved_ids)) {
// TODO: Use LogCompilerWarn
fprintf(stderr,
"Fields in struct %s use id from reserved ids\n", struct_def.name.c_str());
return false;
}
if (gap_action != IDLOptions::ProtoIdGapAction::NO_OP) {
if (HasGapInProtoId(fields)) {
// TODO: Use LogCompilerWarn
fprintf(stderr, "Fields in struct %s have gap between ids\n", struct_def.name.c_str());
if (gap_action == IDLOptions::ProtoIdGapAction::ERROR) { return false; }
}
}
return true;
}
struct ProtobufToFbsIdMap {
using FieldName = std::string;
using FieldID = voffset_t;
using FieldNameToIdMap = std::unordered_map<FieldName, FieldID>;
FieldNameToIdMap field_to_id;
bool successful = false;
};
static ProtobufToFbsIdMap MapProtoIdsToFieldsId(
const StructDef &struct_def, IDLOptions::ProtoIdGapAction gap_action) {
const auto &fields = struct_def.fields.vec;
if (!HasFieldWithId(fields)) {
ProtobufToFbsIdMap result;
result.successful = true;
return result;
}
if (!ProtobufIdSanityCheck(struct_def, gap_action)) { return {}; }
static constexpr int UNION_ID = -1;
using ProtoIdFieldNamePair = std::pair<int, std::string>;
std::vector<ProtoIdFieldNamePair> proto_ids;
for (const auto *field : fields) {
const auto *id_attribute = field->attributes.Lookup("id");
if (id_attribute != nullptr) {
// When we have union but do not use union flag to keep them
if (id_attribute->constant.empty() &&
field->value.type.base_type == BASE_TYPE_UNION) {
proto_ids.emplace_back(UNION_ID, field->name);
} else {
voffset_t proto_id = 0;
StringToNumber(id_attribute->constant.c_str(), &proto_id);
proto_ids.emplace_back(proto_id, field->name);
}
} else {
// TODO: Use LogCompilerWarn
fprintf(stderr, "Fields id in struct %s is missing\n", struct_def.name.c_str());
return {};
}
}
std::sort(
std::begin(proto_ids), std::end(proto_ids),
[](const ProtoIdFieldNamePair &rhs, const ProtoIdFieldNamePair &lhs) {
return rhs.first < lhs.first;
});
struct ProtobufToFbsIdMap proto_to_fbs;
voffset_t id = 0;
for (const auto &element : proto_ids) {
if (element.first == UNION_ID) { id++; }
proto_to_fbs.field_to_id.emplace(element.second, id++);
}
proto_to_fbs.successful = true;
return proto_to_fbs;
}
static void GenNameSpace(const Namespace &name_space, std::string *_schema,
const Namespace **last_namespace) {
if (*last_namespace == &name_space) return;
@@ -79,8 +260,9 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
int num_includes = 0;
for (auto it = parser.included_files_.begin();
it != parser.included_files_.end(); ++it) {
if (it->second.empty())
if (it->second.empty()) {
continue;
}
std::string basename;
if(parser.opts.keep_prefix) {
basename = flatbuffers::StripExtension(it->second);
@@ -94,6 +276,7 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
if (num_includes) schema += "\n";
// clang-format on
}
// Generate code for all the enum declarations.
const Namespace *last_namespace = nullptr;
for (auto enum_def_it = parser.enums_.vec.begin();
@@ -104,18 +287,22 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
}
GenNameSpace(*enum_def.defined_namespace, &schema, &last_namespace);
GenComment(enum_def.doc_comment, &schema, nullptr);
if (enum_def.is_union)
if (enum_def.is_union) {
schema += "union " + enum_def.name;
else
} else {
schema += "enum " + enum_def.name + " : ";
}
schema += GenType(enum_def.underlying_type, true) + " {\n";
for (auto it = enum_def.Vals().begin(); it != enum_def.Vals().end(); ++it) {
auto &ev = **it;
GenComment(ev.doc_comment, &schema, nullptr, " ");
if (enum_def.is_union)
if (enum_def.is_union) {
schema += " " + GenType(ev.union_type) + ",\n";
else
} else {
schema += " " + ev.name + " = " + enum_def.ToString(ev) + ",\n";
}
}
schema += "}\n\n";
}
@@ -123,9 +310,14 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
for (auto it = parser.structs_.vec.begin(); it != parser.structs_.vec.end();
++it) {
StructDef &struct_def = **it;
const auto proto_fbs_ids =
MapProtoIdsToFieldsId(struct_def, parser.opts.proto_id_gap_action);
if (!proto_fbs_ids.successful) { return {}; }
if (parser.opts.include_dependence_headers && struct_def.generated) {
continue;
}
GenNameSpace(*struct_def.defined_namespace, &schema, &last_namespace);
GenComment(struct_def.doc_comment, &schema, nullptr);
schema += "table " + struct_def.name + " {\n";
@@ -136,8 +328,26 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
GenComment(field.doc_comment, &schema, nullptr, " ");
schema += " " + field.name + ":" + GenType(field.value.type);
if (field.value.constant != "0") schema += " = " + field.value.constant;
if (field.IsRequired()) schema += " (required)";
if (field.key) schema += " (key)";
std::vector<std::string> attributes;
if (field.IsRequired()) attributes.push_back("required");
if (field.key) attributes.push_back("key");
if (parser.opts.keep_proto_id) {
auto it = proto_fbs_ids.field_to_id.find(field.name);
if (it != proto_fbs_ids.field_to_id.end()) {
attributes.push_back("id: " + NumToString(it->second));
} // If not found it means we do not have any ids
}
if (!attributes.empty()) {
schema += " (";
for (const auto &attribute : attributes) {
schema += attribute + ",";
}
schema.pop_back();
schema += ")";
}
schema += ";\n";
}
}
@@ -148,8 +358,13 @@ std::string GenerateFBS(const Parser &parser, const std::string &file_name) {
bool GenerateFBS(const Parser &parser, const std::string &path,
const std::string &file_name) {
return SaveFile((path + file_name + ".fbs").c_str(),
GenerateFBS(parser, file_name), false);
const std::string fbs = GenerateFBS(parser, file_name);
if (fbs.empty()) { return false; }
// TODO: Use LogCompilerWarn
fprintf(stderr,
"When you use --proto, that you should check for conformity "
"yourself, using the existing --conform");
return SaveFile((path + file_name + ".fbs").c_str(), fbs, false);
}
} // namespace flatbuffers

View File

@@ -920,8 +920,11 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
if (struct_def.fixed) {
if (IsIncompleteStruct(type) ||
(IsArray(type) && IsIncompleteStruct(type.VectorType()))) {
std::string type_name = IsArray(type) ? type.VectorType().struct_def->name : type.struct_def->name;
return Error(std::string("Incomplete type in struct is not allowed, type name: ") + type_name);
std::string type_name = IsArray(type) ? type.VectorType().struct_def->name
: type.struct_def->name;
return Error(
std::string("Incomplete type in struct is not allowed, type name: ") +
type_name);
}
auto valid = IsScalar(type.base_type) || IsStruct(type);
@@ -2289,12 +2292,8 @@ template<typename T> void EnumDef::ChangeEnumValue(EnumVal *ev, T new_value) {
}
namespace EnumHelper {
template<BaseType E> struct EnumValType {
typedef int64_t type;
};
template<> struct EnumValType<BASE_TYPE_ULONG> {
typedef uint64_t type;
};
template<BaseType E> struct EnumValType { typedef int64_t type; };
template<> struct EnumValType<BASE_TYPE_ULONG> { typedef uint64_t type; };
} // namespace EnumHelper
struct EnumValBuilder {
@@ -2698,6 +2697,7 @@ CheckedError Parser::ParseDecl(const char *filename) {
for (voffset_t i = 0; i < static_cast<voffset_t>(fields.size()); i++) {
auto &field = *fields[i];
const auto &id_str = field.attributes.Lookup("id")->constant;
// Metadata values have a dynamic type, they can be `float`, 'int', or
// 'string`.
// The FieldIndexToOffset(i) expects the voffset_t so `id` is limited by
@@ -2921,8 +2921,40 @@ CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
ECHECK(ParseProtoOption());
EXPECT(';');
} else if (IsIdent("reserved")) { // Skip these.
/**
* Reserved proto ids can be comma seperated (e.g. 1,2,4,5;)
* or range based (e.g. 9 to 11;)
* or combination of them (e.g. 1,2,9 to 11,4,5;)
* It will be ended by a semicolon.
*/
NEXT();
while (!Is(';')) { NEXT(); } // A variety of formats, just skip.
bool range = false;
voffset_t from = 0;
while (!Is(';')) {
if (token_ == kTokenIntegerConstant) {
voffset_t attribute = 0;
bool done = StringToNumber(attribute_.c_str(), &attribute);
if (!done)
return Error("Protobuf has non positive number in reserved ids");
if (range) {
for (voffset_t id = from + 1; id <= attribute; id++)
struct_def->reserved_ids.push_back(id);
range = false;
} else {
struct_def->reserved_ids.push_back(attribute);
}
from = attribute;
}
if (attribute_ == "to") range = true;
NEXT();
} // A variety of formats, just skip.
NEXT();
} else if (IsIdent("map")) {
ECHECK(ParseProtoMapField(struct_def));
@@ -2980,11 +3012,13 @@ CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
}
std::string name = attribute_;
EXPECT(kTokenIdentifier);
std::string proto_field_id;
if (!oneof) {
// Parse the field id. Since we're just translating schemas, not
// any kind of binary compatibility, we can safely ignore these, and
// assign our own.
EXPECT('=');
proto_field_id = attribute_;
EXPECT(kTokenIntegerConstant);
}
FieldDef *field = nullptr;
@@ -2995,6 +3029,11 @@ CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
}
if (!field) ECHECK(AddField(*struct_def, name, type, &field));
field->doc_comment = field_comment;
if (!proto_field_id.empty() || oneof) {
auto val = new Value();
val->constant = proto_field_id;
field->attributes.Add("id", val);
}
if (!IsScalar(type.base_type) && required) {
field->presence = FieldDef::kRequired;
}
@@ -3072,6 +3111,7 @@ CheckedError Parser::ParseProtoMapField(StructDef *struct_def) {
auto field_name = attribute_;
NEXT();
EXPECT('=');
std::string proto_field_id = attribute_;
EXPECT(kTokenIntegerConstant);
EXPECT(';');
@@ -3091,6 +3131,11 @@ CheckedError Parser::ParseProtoMapField(StructDef *struct_def) {
field_type.struct_def = entry_table;
FieldDef *field;
ECHECK(AddField(*struct_def, field_name, field_type, &field));
if (!proto_field_id.empty()) {
auto val = new Value();
val->constant = proto_field_id;
field->attributes.Add("id", val);
}
return NoError();
}