mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-06 13:37:25 +00:00
Refactoring of idl_parser (#4948)
* Refactoring of numbers parser More accurate parse of float and double. Hexadecimal floats. Check "out-of-range" of uint64 fields. Check correctness of default values and metadata. * Remove locale-independent code strtod/strtof from PR #4948. * small optimization * Add is_(ascii) functions * is_ascii cleanup * Fix format conversation * Refine number parser * Make code compatible with Android build * Remove unnecessary suppression of warning C4127
This commit is contained in:
committed by
Wouter van Oortmerssen
parent
53ce80ce91
commit
4ed6fafdfa
@@ -180,6 +180,17 @@
|
||||
#endif // __has_include
|
||||
#endif // !FLATBUFFERS_HAS_STRING_VIEW
|
||||
|
||||
#ifndef FLATBUFFERS_HAS_NEW_STRTOD
|
||||
// Modern (C++11) strtod and strtof functions are available for use.
|
||||
// 1) nan/inf strings as argument of strtod;
|
||||
// 2) hex-float as argument of strtod/strtof.
|
||||
#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
|
||||
(defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
|
||||
(defined(__clang__))
|
||||
#define FLATBUFFERS_HAS_NEW_STRTOD 1
|
||||
#endif
|
||||
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
|
||||
|
||||
/// @endcond
|
||||
|
||||
/// @file
|
||||
|
||||
@@ -484,7 +484,11 @@ struct IDLOptions {
|
||||
// This encapsulates where the parser is in the current source file.
|
||||
struct ParserState {
|
||||
ParserState()
|
||||
: cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {}
|
||||
: cursor_(nullptr),
|
||||
line_start_(nullptr),
|
||||
line_(0),
|
||||
token_(-1),
|
||||
attr_is_trivial_ascii_string_(true) {}
|
||||
|
||||
protected:
|
||||
void ResetState(const char *source) {
|
||||
@@ -508,6 +512,10 @@ struct ParserState {
|
||||
int line_; // the current line being parsed
|
||||
int token_;
|
||||
|
||||
// Flag: text in attribute_ is true ASCII string without escape
|
||||
// sequences. Only printable ASCII (without [\t\r\n]).
|
||||
// Used for number-in-string (and base64 string in future).
|
||||
bool attr_is_trivial_ascii_string_;
|
||||
std::string attribute_;
|
||||
std::vector<std::string> doc_comment_;
|
||||
};
|
||||
@@ -644,7 +652,8 @@ class Parser : public ParserState {
|
||||
bool ParseFlexBuffer(const char *source, const char *source_filename,
|
||||
flexbuffers::Builder *builder);
|
||||
|
||||
FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max);
|
||||
FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number,
|
||||
const std::string &msg);
|
||||
|
||||
StructDef *LookupStruct(const std::string &id) const;
|
||||
|
||||
@@ -711,7 +720,7 @@ class Parser : public ParserState {
|
||||
BaseType req, bool *destmatch);
|
||||
FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
|
||||
FLATBUFFERS_CHECKED_ERROR TokenError();
|
||||
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e);
|
||||
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
|
||||
FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result);
|
||||
StructDef *LookupCreateStruct(const std::string &name,
|
||||
bool create_if_new = true,
|
||||
|
||||
@@ -37,9 +37,9 @@
|
||||
// Not possible if Microsoft Compiler before 2012
|
||||
// Possible is the language feature __cpp_alias_templates is defined well
|
||||
// Or possible if the C++ std is C+11 or newer
|
||||
#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \
|
||||
&& ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|
||||
|| (defined(__cplusplus) && __cplusplus >= 201103L))
|
||||
#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
|
||||
|| (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|
||||
|| (defined(__cplusplus) && __cplusplus >= 201103L)
|
||||
#define FLATBUFFERS_TEMPLATES_ALIASES
|
||||
#endif
|
||||
|
||||
@@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
|
||||
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
|
||||
#else
|
||||
template <typename T> class numeric_limits :
|
||||
public std::numeric_limits<T> {};
|
||||
public std::numeric_limits<T> {
|
||||
public:
|
||||
// Android NDK fix.
|
||||
static T lowest() {
|
||||
return std::numeric_limits<T>::min();
|
||||
}
|
||||
};
|
||||
|
||||
template <> class numeric_limits<float> :
|
||||
public std::numeric_limits<float> {
|
||||
public:
|
||||
static float lowest() { return -FLT_MAX; }
|
||||
};
|
||||
|
||||
template <> class numeric_limits<double> :
|
||||
public std::numeric_limits<double> {
|
||||
public:
|
||||
static double lowest() { return -DBL_MAX; }
|
||||
};
|
||||
|
||||
template <> class numeric_limits<unsigned long long> {
|
||||
public:
|
||||
static unsigned long long min() { return 0ULL; }
|
||||
static unsigned long long max() { return ~0ULL; }
|
||||
static unsigned long long lowest() {
|
||||
return numeric_limits<unsigned long long>::min();
|
||||
}
|
||||
};
|
||||
|
||||
template <> class numeric_limits<long long> {
|
||||
@@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
|
||||
return static_cast<long long>(
|
||||
(1ULL << ((sizeof(long long) << 3) - 1)) - 1);
|
||||
}
|
||||
static long long lowest() {
|
||||
return numeric_limits<long long>::min();
|
||||
}
|
||||
};
|
||||
#endif // FLATBUFFERS_CPP98_STL
|
||||
|
||||
@@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
|
||||
template <typename T, typename U> using is_same = std::is_same<T,U>;
|
||||
template <typename T> using is_floating_point = std::is_floating_point<T>;
|
||||
template <typename T> using is_unsigned = std::is_unsigned<T>;
|
||||
template <typename T> using make_unsigned = std::make_unsigned<T>;
|
||||
#else
|
||||
// Map C++ TR1 templates defined by stlport.
|
||||
template <typename T> using is_scalar = std::tr1::is_scalar<T>;
|
||||
@@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
|
||||
template <typename T> using is_floating_point =
|
||||
std::tr1::is_floating_point<T>;
|
||||
template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
|
||||
// Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
|
||||
template<typename T> struct make_unsigned {
|
||||
static_assert(is_unsigned<T>::value, "Specialization not impelented!");
|
||||
using type = T;
|
||||
};
|
||||
template<> struct make_unsigned<char> { using type = unsigned char; };
|
||||
template<> struct make_unsigned<int> { using type = unsigned int; };
|
||||
#endif // !FLATBUFFERS_CPP98_STL
|
||||
#else
|
||||
// MSVC 2010 doesn't support C++11 aliases.
|
||||
@@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
|
||||
template <typename T> struct is_floating_point :
|
||||
public std::is_floating_point<T> {};
|
||||
template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
|
||||
template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
|
||||
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
|
||||
|
||||
#ifndef FLATBUFFERS_CPP98_STL
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#ifndef FLATBUFFERS_UTIL_H_
|
||||
#define FLATBUFFERS_UTIL_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
@@ -50,6 +50,52 @@
|
||||
|
||||
namespace flatbuffers {
|
||||
|
||||
// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
|
||||
template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) {
|
||||
return !!t;
|
||||
}
|
||||
|
||||
// @locale-independent functions for ASCII characters set.
|
||||
|
||||
// Check that integer scalar is in closed range: (a <= x <= b)
|
||||
// using one compare (conditional branch) operator.
|
||||
template<typename T> inline bool check_in_range(T x, T a, T b) {
|
||||
// (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
|
||||
FLATBUFFERS_ASSERT(a <= b); // static_assert only if 'a' & 'b' templated
|
||||
typedef typename flatbuffers::make_unsigned<T>::type U;
|
||||
return (static_cast<U>(x - a) <= static_cast<U>(b - a));
|
||||
}
|
||||
|
||||
// Case-insensitive isalpha
|
||||
static inline bool is_alpha(char c) {
|
||||
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
|
||||
return check_in_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
|
||||
}
|
||||
|
||||
// Check (case-insensitive) that `c` is equal to alpha.
|
||||
static inline bool is_alpha_char(char c, char alpha) {
|
||||
FLATBUFFERS_ASSERT(is_alpha(alpha));
|
||||
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
|
||||
return ((c & 0xDF) == (alpha & 0xDF));
|
||||
}
|
||||
|
||||
// https://en.cppreference.com/w/cpp/string/byte/isxdigit
|
||||
// isdigit and isxdigit are the only standard narrow character classification
|
||||
// functions that are not affected by the currently installed C locale. although
|
||||
// some implementations (e.g. Microsoft in 1252 codepage) may classify
|
||||
// additional single-byte characters as digits.
|
||||
static inline bool is_digit(char c) { return check_in_range(c, '0', '9'); }
|
||||
|
||||
static inline bool is_xdigit(char c) {
|
||||
// Replace by look-up table.
|
||||
return is_digit(c) | check_in_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
|
||||
}
|
||||
|
||||
// Case-insensitive isalnum
|
||||
static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
|
||||
|
||||
// @end-locale-independent functions for ASCII character set
|
||||
|
||||
#ifdef FLATBUFFERS_PREFER_PRINTF
|
||||
template<typename T> size_t IntToDigitCount(T t) {
|
||||
size_t digit_count = 0;
|
||||
@@ -158,6 +204,7 @@ template<> inline std::string NumToString<float>(float t) {
|
||||
// The returned string length is always xdigits long, prefixed by 0 digits.
|
||||
// For example, IntToStringHex(0x23, 8) returns the string "00000023".
|
||||
inline std::string IntToStringHex(int i, int xdigits) {
|
||||
FLATBUFFERS_ASSERT(i >= 0);
|
||||
// clang-format off
|
||||
#ifndef FLATBUFFERS_PREFER_PRINTF
|
||||
std::stringstream ss;
|
||||
@@ -170,28 +217,178 @@ inline std::string IntToStringHex(int i, int xdigits) {
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
// Portable implementation of strtoll().
|
||||
inline int64_t StringToInt(const char *str, char **endptr = nullptr,
|
||||
int base = 10) {
|
||||
static inline double strtod_impl(const char *str, char **str_end) {
|
||||
// Result of strtod (printf, etc) depends from current C-locale.
|
||||
return strtod(str, str_end);
|
||||
}
|
||||
|
||||
static inline float strtof_impl(const char *str, char **str_end) {
|
||||
// Use "strtof" for float and strtod for double to avoid double=>float
|
||||
// rounding problems (see
|
||||
// https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with
|
||||
// std::numeric_limits<float>::is_iec559==false. Example:
|
||||
// for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){
|
||||
// const char *s = "-4e38";
|
||||
// std::fesetround(mode);
|
||||
// std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; "
|
||||
// << static_cast<float>(strtod(s, nullptr)) << "\n";
|
||||
// }
|
||||
// Gives:
|
||||
// -inf; -4e+38; -inf
|
||||
// -inf; -4e+38; -inf
|
||||
// -inf; -4e+38; -3.40282e+38
|
||||
// -inf; -4e+38; -3.40282e+38
|
||||
|
||||
// clang-format off
|
||||
#ifdef _MSC_VER
|
||||
return _strtoi64(str, endptr, base);
|
||||
#ifdef FLATBUFFERS_HAS_NEW_STRTOD
|
||||
return strtof(str, str_end);
|
||||
#else
|
||||
return strtoll(str, endptr, base);
|
||||
#endif
|
||||
return static_cast<float>(strtod_impl(str, str_end));
|
||||
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
// Portable implementation of strtoull().
|
||||
inline uint64_t StringToUInt(const char *str, char **endptr = nullptr,
|
||||
// Adaptor for strtoull()/strtoll().
|
||||
// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
|
||||
// while strtoll with base=0 interprets first leading zero as octal prefix.
|
||||
// In future, it is possible to add prefixed 0b0101.
|
||||
// 1) Checks errno code for overflow condition (out of range).
|
||||
// 2) If base <= 0, function try to detect base of number by prefix.
|
||||
//
|
||||
// Return value (like strtoull and strtoll, but reject partial result):
|
||||
// - If successful, an integer value corresponding to the str is returned.
|
||||
// - If full string conversion can't be performed, 0 is returned.
|
||||
// - If the converted value falls out of range of corresponding return type, a
|
||||
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
|
||||
template<typename T>
|
||||
inline T StringToInteger64Impl(const char *const str, const char **endptr,
|
||||
const int base, const bool check_errno = true) {
|
||||
static_assert(flatbuffers::is_same<T, int64_t>::value ||
|
||||
flatbuffers::is_same<T, uint64_t>::value,
|
||||
"Type T must be either int64_t or uint64_t");
|
||||
FLATBUFFERS_ASSERT(str && endptr); // endptr must be not null
|
||||
if (base <= 0) {
|
||||
auto s = str;
|
||||
while (*s && !is_digit(*s)) s++;
|
||||
if (s[0] == '0' && is_alpha_char(s[1], 'X'))
|
||||
return StringToInteger64Impl<T>(str, endptr, 16, check_errno);
|
||||
// if a prefix not match, try base=10
|
||||
return StringToInteger64Impl<T>(str, endptr, 10, check_errno);
|
||||
} else {
|
||||
if (check_errno) errno = 0; // clear thread-local errno
|
||||
// calculate result
|
||||
T result;
|
||||
if (IsConstTrue(flatbuffers::is_same<T, int64_t>::value)) {
|
||||
// clang-format off
|
||||
#ifdef _MSC_VER
|
||||
result = _strtoi64(str, const_cast<char**>(endptr), base);
|
||||
#else
|
||||
result = strtoll(str, const_cast<char**>(endptr), base);
|
||||
#endif
|
||||
// clang-format on
|
||||
} else { // T is uint64_t
|
||||
// clang-format off
|
||||
#ifdef _MSC_VER
|
||||
result = _strtoui64(str, const_cast<char**>(endptr), base);
|
||||
#else
|
||||
result = strtoull(str, const_cast<char**>(endptr), base);
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
// The strtoull accepts negative numbers:
|
||||
// If the minus sign was part of the input sequence, the numeric value
|
||||
// calculated from the sequence of digits is negated as if by unary minus
|
||||
// in the result type, which applies unsigned integer wraparound rules.
|
||||
// Fix this behaviour (except -0).
|
||||
if ((**endptr == '\0') && (0 != result)) {
|
||||
auto s = str;
|
||||
while (*s && !is_digit(*s)) s++;
|
||||
s = (s > str) ? (s - 1) : s; // step back to one symbol
|
||||
if (*s == '-') {
|
||||
// For unsigned types return max to distinguish from
|
||||
// "no conversion can be performed".
|
||||
result = flatbuffers::numeric_limits<T>::max();
|
||||
// point to the start of string, like errno
|
||||
*endptr = str;
|
||||
}
|
||||
}
|
||||
}
|
||||
// check for overflow
|
||||
if (check_errno && errno) *endptr = str; // point it to start of input
|
||||
// erase partial result, but save an overflow
|
||||
if ((*endptr != str) && (**endptr != '\0')) result = 0;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert a string to an instance of T.
|
||||
// Return value (matched with StringToInteger64Impl and strtod):
|
||||
// - If successful, a numeric value corresponding to the str is returned.
|
||||
// - If full string conversion can't be performed, 0 is returned.
|
||||
// - If the converted value falls out of range of corresponding return type, a
|
||||
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
|
||||
template<typename T> inline bool StringToNumber(const char *s, T *val) {
|
||||
FLATBUFFERS_ASSERT(s && val);
|
||||
const char *end = nullptr;
|
||||
// The errno check isn't needed. strtoll will return MAX/MIN on overlow.
|
||||
const int64_t i = StringToInteger64Impl<int64_t>(s, &end, -1, false);
|
||||
*val = static_cast<T>(i);
|
||||
const auto done = (s != end) && (*end == '\0');
|
||||
if (done) {
|
||||
const int64_t max = flatbuffers::numeric_limits<T>::max();
|
||||
const int64_t min = flatbuffers::numeric_limits<T>::lowest();
|
||||
if (i > max) {
|
||||
*val = static_cast<T>(max);
|
||||
return false;
|
||||
}
|
||||
if (i < min) {
|
||||
// For unsigned types return max to distinguish from
|
||||
// "no conversion can be performed" when 0 is returned.
|
||||
*val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return done;
|
||||
}
|
||||
template<> inline bool StringToNumber<int64_t>(const char *s, int64_t *val) {
|
||||
const char *end = s; // request errno checking
|
||||
*val = StringToInteger64Impl<int64_t>(s, &end, -1);
|
||||
return (s != end) && (*end == '\0');
|
||||
}
|
||||
template<> inline bool StringToNumber<uint64_t>(const char *s, uint64_t *val) {
|
||||
const char *end = s; // request errno checking
|
||||
*val = StringToInteger64Impl<uint64_t>(s, &end, -1);
|
||||
return (s != end) && (*end == '\0');
|
||||
}
|
||||
|
||||
template<> inline bool StringToNumber<double>(const char *s, double *val) {
|
||||
FLATBUFFERS_ASSERT(s && val);
|
||||
char *end = nullptr;
|
||||
*val = strtod_impl(s, &end);
|
||||
auto done = (s != end) && (*end == '\0');
|
||||
if (!done) *val = 0; // erase partial result
|
||||
return done;
|
||||
}
|
||||
|
||||
template<> inline bool StringToNumber<float>(const char *s, float *val) {
|
||||
FLATBUFFERS_ASSERT(s && val);
|
||||
char *end = nullptr;
|
||||
*val = strtof_impl(s, &end);
|
||||
auto done = (s != end) && (*end == '\0');
|
||||
if (!done) *val = 0; // erase partial result
|
||||
return done;
|
||||
}
|
||||
|
||||
inline int64_t StringToInt(const char *str, const char **endptr = nullptr,
|
||||
int base = 10) {
|
||||
const char *ep = nullptr;
|
||||
return StringToInteger64Impl<int64_t>(str, endptr ? endptr : &ep, base);
|
||||
}
|
||||
|
||||
inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr,
|
||||
int base = 10) {
|
||||
// clang-format off
|
||||
#ifdef _MSC_VER
|
||||
return _strtoui64(str, endptr, base);
|
||||
#else
|
||||
return strtoull(str, endptr, base);
|
||||
#endif
|
||||
// clang-format on
|
||||
const char *ep = nullptr;
|
||||
return StringToInteger64Impl<uint64_t>(str, endptr ? endptr : &ep, base);
|
||||
}
|
||||
|
||||
typedef bool (*LoadFileFunction)(const char *filename, bool binary,
|
||||
|
||||
Reference in New Issue
Block a user