This commit is contained in:
Wouter van Oortmerssen
2016-01-19 14:10:49 -08:00
17 changed files with 278 additions and 91 deletions

View File

@@ -201,6 +201,14 @@ CheckedError Parser::ParseHexNum(int nibbles, int64_t *val) {
return NoError();
}
CheckedError Parser::SkipByteOrderMark() {
if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
cursor_++;
if (static_cast<unsigned char>(*cursor_++) != 0xbb) return Error("invalid utf-8 byte order mark");
if (static_cast<unsigned char>(*cursor_++) != 0xbf) return Error("invalid utf-8 byte order mark");
return NoError();
}
CheckedError Parser::Next() {
doc_comment_.clear();
bool seen_newline = false;
@@ -1118,6 +1126,33 @@ CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
return NoError();
}
CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
StructDef *struct_def,
const char *suffix,
BaseType basetype) {
auto len = strlen(suffix);
for (auto it = fields.begin(); it != fields.end(); ++it) {
auto &fname = (*it)->name;
if (fname.length() > len &&
fname.compare(fname.length() - len, len, suffix) == 0 &&
(*it)->value.type.base_type != BASE_TYPE_UTYPE) {
auto field = struct_def->fields.Lookup(
fname.substr(0, fname.length() - len));
if (field && field->value.type.base_type == basetype)
return Error("Field " + fname +
" would clash with generated functions for field " +
field->name);
}
}
return NoError();
}
static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
return a_id < b_id;
}
CheckedError Parser::ParseDecl() {
std::vector<std::string> dc = doc_comment_;
bool fixed = Is(kTokenStruct);
@@ -1160,12 +1195,7 @@ CheckedError Parser::ParseDecl() {
"either all fields or no fields must have an 'id' attribute");
// Simply sort by id, then the fields are the same as if no ids had
// been specified.
std::sort(fields.begin(), fields.end(),
[](const FieldDef *a, const FieldDef *b) -> bool {
auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
return a_id < b_id;
});
std::sort(fields.begin(), fields.end(), compareFieldDefs);
// Verify we have a contiguous set, and reassign vtable offsets.
for (int i = 0; i < static_cast<int>(fields.size()); i++) {
if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
@@ -1175,34 +1205,13 @@ CheckedError Parser::ParseDecl() {
}
}
}
// Check that no identifiers clash with auto generated fields.
// This is not an ideal situation, but should occur very infrequently,
// and allows us to keep using very readable names for type & length fields
// without inducing compile errors.
auto CheckClash = [&fields, &struct_def, this](const char *suffix,
BaseType basetype) -> CheckedError {
auto len = strlen(suffix);
for (auto it = fields.begin(); it != fields.end(); ++it) {
auto &fname = (*it)->name;
if (fname.length() > len &&
fname.compare(fname.length() - len, len, suffix) == 0 &&
(*it)->value.type.base_type != BASE_TYPE_UTYPE) {
auto field = struct_def->fields.Lookup(
fname.substr(0, fname.length() - len));
if (field && field->value.type.base_type == basetype)
return Error("Field " + fname +
" would clash with generated functions for field " +
field->name);
}
}
return NoError();
};
ECHECK(CheckClash("_type", BASE_TYPE_UNION));
ECHECK(CheckClash("Type", BASE_TYPE_UNION));
ECHECK(CheckClash("_length", BASE_TYPE_VECTOR));
ECHECK(CheckClash("Length", BASE_TYPE_VECTOR));
ECHECK(CheckClash("_byte_vector", BASE_TYPE_STRING));
ECHECK(CheckClash("ByteVector", BASE_TYPE_STRING));
ECHECK(CheckClash(fields, struct_def, "_type", BASE_TYPE_UNION));
ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
EXPECT('}');
return NoError();
}
@@ -1244,6 +1253,10 @@ CheckedError Parser::ParseNamespace() {
return NoError();
}
static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
return a->value < b->value;
}
// Best effort parsing of .proto declarations, with the aim to turn them
// in the closest corresponding FlatBuffer equivalent.
// We parse everything as identifiers instead of keywords, since we don't
@@ -1294,9 +1307,8 @@ CheckedError Parser::ParseProtoDecl() {
if (Is(';')) NEXT();
// Protobuf allows them to be specified in any order, so sort afterwards.
auto &v = enum_def->vals.vec;
std::sort(v.begin(), v.end(), [](const EnumVal *a, const EnumVal *b) {
return a->value < b->value;
});
std::sort(v.begin(), v.end(), compareEnumVals);
// Temp: remove any duplicates, as .fbs files can't handle them.
for (auto it = v.begin(); it != v.end(); ) {
if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
@@ -1593,6 +1605,7 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
builder_.Clear();
// Start with a blank namespace just in case this file doesn't have one.
namespaces_.push_back(new Namespace());
ECHECK(SkipByteOrderMark());
NEXT();
// Includes must come before type declarations:
for (;;) {
@@ -1744,11 +1757,14 @@ std::set<std::string> Parser::GetIncludedFilesRecursive(
// Schema serialization functionality:
template<typename T> bool compareName(const T* a, const T* b) {
return a->name < b->name;
}
template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
// Pre-sort these vectors, such that we can set the correct indices for them.
auto vec = defvec;
std::sort(vec.begin(), vec.end(),
[](const T *a, const T *b) { return a->name < b->name; });
std::sort(vec.begin(), vec.end(), compareName<T>);
for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
}