Refactoring of idl_parser (#4948)

* Refactoring of numbers parser More accurate parse of float and double. Hexadecimal floats. Check "out-of-range" of uint64 fields. Check correctness of default values and metadata. * Remove locale-independent code strtod/strtof from PR #4948. * small optimization * Add is_(ascii) functions * is_ascii cleanup * Fix format conversation * Refine number parser * Make code compatible with Android build * Remove unnecessary suppression of warning C4127
2026-06-06 13:37:25 +00:00 · 2018-10-12 00:37:47 +07:00
parent 53ce80ce91
commit 4ed6fafdfa
11 changed files with 880 additions and 206 deletions
--- a/include/flatbuffers/base.h
+++ b/include/flatbuffers/base.h
@@ -180,6 +180,17 @@
  #endif // __has_include
 #endif // !FLATBUFFERS_HAS_STRING_VIEW

+#ifndef FLATBUFFERS_HAS_NEW_STRTOD
+  // Modern (C++11) strtod and strtof functions are available for use.
+  // 1) nan/inf strings as argument of strtod;
+  // 2) hex-float  as argument of  strtod/strtof.
+  #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+      (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
+      (defined(__clang__))
+    #define FLATBUFFERS_HAS_NEW_STRTOD 1
+  #endif
+#endif // !FLATBUFFERS_HAS_NEW_STRTOD
+
 /// @endcond

 /// @file
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -484,7 +484,11 @@ struct IDLOptions {
 // This encapsulates where the parser is in the current source file.
 struct ParserState {
  ParserState()
-      : cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {}
+      : cursor_(nullptr),
+        line_start_(nullptr),
+        line_(0),
+        token_(-1),
+        attr_is_trivial_ascii_string_(true) {}

 protected:
  void ResetState(const char *source) {
@@ -508,6 +512,10 @@ struct ParserState {
  int line_;  // the current line being parsed
  int token_;

+  // Flag: text in attribute_ is true ASCII string without escape
+  // sequences. Only printable ASCII (without [\t\r\n]).
+  // Used for number-in-string (and base64 string in future).
+  bool attr_is_trivial_ascii_string_;
  std::string attribute_;
  std::vector<std::string> doc_comment_;
 };
@@ -644,7 +652,8 @@ class Parser : public ParserState {
  bool ParseFlexBuffer(const char *source, const char *source_filename,
                       flexbuffers::Builder *builder);

-  FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max);
+  FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number,
+                                          const std::string &msg);

  StructDef *LookupStruct(const std::string &id) const;

@@ -711,7 +720,7 @@ class Parser : public ParserState {
                                          BaseType req, bool *destmatch);
  FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
  FLATBUFFERS_CHECKED_ERROR TokenError();
-  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e);
+  FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
  FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result);
  StructDef *LookupCreateStruct(const std::string &name,
                                bool create_if_new = true,
--- a/include/flatbuffers/stl_emulation.h
+++ b/include/flatbuffers/stl_emulation.h
@@ -37,9 +37,9 @@
 // Not possible if Microsoft Compiler before 2012
 // Possible is the language feature __cpp_alias_templates is defined well
 // Or possible if the C++ std is C+11 or newer
-#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \
-  && ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
-    || (defined(__cplusplus) && __cplusplus >= 201103L))
+#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
+    || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
+    || (defined(__cplusplus) && __cplusplus >= 201103L)
  #define FLATBUFFERS_TEMPLATES_ALIASES
 #endif

@@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
 #else
  template <typename T> class numeric_limits :
-      public std::numeric_limits<T> {};
+      public std::numeric_limits<T> {
+    public:
+      // Android NDK fix.
+      static T lowest() {
+        return std::numeric_limits<T>::min();
+      }
+  };
+
+  template <> class numeric_limits<float> : 
+      public std::numeric_limits<float> {
+    public:
+      static float lowest() { return -FLT_MAX; }
+  };
+
+  template <> class numeric_limits<double> : 
+      public std::numeric_limits<double> {
+    public:
+      static double lowest() { return -DBL_MAX; }
+  };

  template <> class numeric_limits<unsigned long long> {
   public:
    static unsigned long long min() { return 0ULL; }
    static unsigned long long max() { return ~0ULL; }
+    static unsigned long long lowest() {
+      return numeric_limits<unsigned long long>::min();
+    }
  };

  template <> class numeric_limits<long long> {
@@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
      return static_cast<long long>(
          (1ULL << ((sizeof(long long) << 3) - 1)) - 1);
    }
+    static long long lowest() {
+      return numeric_limits<long long>::min();
+    }
  };
 #endif  // FLATBUFFERS_CPP98_STL

@@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
    template <typename T, typename U> using is_same = std::is_same<T,U>;
    template <typename T> using is_floating_point = std::is_floating_point<T>;
    template <typename T> using is_unsigned = std::is_unsigned<T>;
+    template <typename T> using make_unsigned = std::make_unsigned<T>;
  #else
    // Map C++ TR1 templates defined by stlport.
    template <typename T> using is_scalar = std::tr1::is_scalar<T>;
@@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
    template <typename T> using is_floating_point =
        std::tr1::is_floating_point<T>;
    template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
+    // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
+    template<typename T> struct make_unsigned {
+      static_assert(is_unsigned<T>::value, "Specialization not impelented!");
+      using type = T;
+    };
+    template<> struct make_unsigned<char> { using type = unsigned char; };
+    template<> struct make_unsigned<int>  { using type = unsigned int;  };
  #endif  // !FLATBUFFERS_CPP98_STL
 #else
  // MSVC 2010 doesn't support C++11 aliases.
@@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
  template <typename T> struct is_floating_point :
        public std::is_floating_point<T> {};
  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
+  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
 #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)

 #ifndef FLATBUFFERS_CPP98_STL
--- a/include/flatbuffers/util.h
+++ b/include/flatbuffers/util.h
@@ -17,7 +17,7 @@
 #ifndef FLATBUFFERS_UTIL_H_
 #define FLATBUFFERS_UTIL_H_

-#include <assert.h>
+#include <errno.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <fstream>
@@ -50,6 +50,52 @@

 namespace flatbuffers {

+// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
+template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) {
+  return !!t;
+}
+
+// @locale-independent functions for ASCII characters set.
+
+// Check that integer scalar is in closed range: (a <= x <= b)
+// using one compare (conditional branch) operator.
+template<typename T> inline bool check_in_range(T x, T a, T b) {
+  // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
+  FLATBUFFERS_ASSERT(a <= b);  // static_assert only if 'a' & 'b' templated
+  typedef typename flatbuffers::make_unsigned<T>::type U;
+  return (static_cast<U>(x - a) <= static_cast<U>(b - a));
+}
+
+// Case-insensitive isalpha
+static inline bool is_alpha(char c) {
+  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
+  return check_in_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
+}
+
+// Check (case-insensitive) that `c` is equal to alpha.
+static inline bool is_alpha_char(char c, char alpha) {
+  FLATBUFFERS_ASSERT(is_alpha(alpha));
+  // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
+  return ((c & 0xDF) == (alpha & 0xDF));
+}
+
+// https://en.cppreference.com/w/cpp/string/byte/isxdigit
+// isdigit and isxdigit are the only standard narrow character classification
+// functions that are not affected by the currently installed C locale. although
+// some implementations (e.g. Microsoft in 1252 codepage) may classify
+// additional single-byte characters as digits.
+static inline bool is_digit(char c) { return check_in_range(c, '0', '9'); }
+
+static inline bool is_xdigit(char c) {
+  // Replace by look-up table.
+  return is_digit(c) | check_in_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
+}
+
+// Case-insensitive isalnum
+static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
+
+// @end-locale-independent functions for ASCII character set
+
 #ifdef FLATBUFFERS_PREFER_PRINTF
 template<typename T> size_t IntToDigitCount(T t) {
  size_t digit_count = 0;
@@ -158,6 +204,7 @@ template<> inline std::string NumToString<float>(float t) {
 // The returned string length is always xdigits long, prefixed by 0 digits.
 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
 inline std::string IntToStringHex(int i, int xdigits) {
+  FLATBUFFERS_ASSERT(i >= 0);
  // clang-format off
  #ifndef FLATBUFFERS_PREFER_PRINTF
    std::stringstream ss;
@@ -170,28 +217,178 @@ inline std::string IntToStringHex(int i, int xdigits) {
  // clang-format on
 }

-// Portable implementation of strtoll().
-inline int64_t StringToInt(const char *str, char **endptr = nullptr,
-                           int base = 10) {
+static inline double strtod_impl(const char *str, char **str_end) {
+  // Result of strtod (printf, etc) depends from current C-locale.
+  return strtod(str, str_end);
+}
+
+static inline float strtof_impl(const char *str, char **str_end) {
+  // Use "strtof" for float and strtod for double to avoid double=>float
+  // rounding problems (see
+  // https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with
+  // std::numeric_limits<float>::is_iec559==false. Example:
+  //  for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){
+  //    const char *s = "-4e38";
+  //    std::fesetround(mode);
+  //    std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; "
+  //              << static_cast<float>(strtod(s, nullptr)) << "\n";
+  //  }
+  // Gives:
+  //  -inf; -4e+38; -inf
+  //  -inf; -4e+38; -inf
+  //  -inf; -4e+38; -3.40282e+38
+  //  -inf; -4e+38; -3.40282e+38
+
  // clang-format off
-  #ifdef _MSC_VER
-    return _strtoi64(str, endptr, base);
+  #ifdef FLATBUFFERS_HAS_NEW_STRTOD
+    return strtof(str, str_end);
  #else
-    return strtoll(str, endptr, base);
-  #endif
+    return static_cast<float>(strtod_impl(str, str_end));
+  #endif // !FLATBUFFERS_HAS_NEW_STRTOD
  // clang-format on
 }

-// Portable implementation of strtoull().
-inline uint64_t StringToUInt(const char *str, char **endptr = nullptr,
+// Adaptor for strtoull()/strtoll().
+// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
+// while strtoll with base=0 interprets first leading zero as octal prefix.
+// In future, it is possible to add prefixed 0b0101.
+// 1) Checks errno code for overflow condition (out of range).
+// 2) If base <= 0, function try to detect base of number by prefix.
+//
+// Return value (like strtoull and strtoll, but reject partial result):
+// - If successful, an integer value corresponding to the str is returned.
+// - If full string conversion can't be performed, 0 is returned.
+// - If the converted value falls out of range of corresponding return type, a
+// range error occurs. In this case value MAX(T)/MIN(T) is returned.
+template<typename T>
+inline T StringToInteger64Impl(const char *const str, const char **endptr,
+                               const int base, const bool check_errno = true) {
+  static_assert(flatbuffers::is_same<T, int64_t>::value ||
+                flatbuffers::is_same<T, uint64_t>::value,
+                "Type T must be either int64_t or uint64_t");
+  FLATBUFFERS_ASSERT(str && endptr);  // endptr must be not null
+  if (base <= 0) {
+    auto s = str;
+    while (*s && !is_digit(*s)) s++;
+    if (s[0] == '0' && is_alpha_char(s[1], 'X'))
+      return StringToInteger64Impl<T>(str, endptr, 16, check_errno);
+    // if a prefix not match, try base=10
+    return StringToInteger64Impl<T>(str, endptr, 10, check_errno);
+  } else {
+    if (check_errno) errno = 0;  // clear thread-local errno
+    // calculate result
+    T result;
+    if (IsConstTrue(flatbuffers::is_same<T, int64_t>::value)) {
+      // clang-format off
+      #ifdef _MSC_VER
+        result = _strtoi64(str, const_cast<char**>(endptr), base);
+      #else
+        result = strtoll(str, const_cast<char**>(endptr), base);
+      #endif
+      // clang-format on
+    } else {  // T is uint64_t
+      // clang-format off
+      #ifdef _MSC_VER
+        result = _strtoui64(str, const_cast<char**>(endptr), base);
+      #else
+        result = strtoull(str, const_cast<char**>(endptr), base);
+      #endif
+      // clang-format on
+
+      // The strtoull accepts negative numbers:
+      // If the minus sign was part of the input sequence, the numeric value
+      // calculated from the sequence of digits is negated as if by unary minus
+      // in the result type, which applies unsigned integer wraparound rules.
+      // Fix this behaviour (except -0).
+      if ((**endptr == '\0') && (0 != result)) {
+        auto s = str;
+        while (*s && !is_digit(*s)) s++;
+        s = (s > str) ? (s - 1) : s;  // step back to one symbol
+        if (*s == '-') {
+          // For unsigned types return max to distinguish from
+          // "no conversion can be performed".
+          result = flatbuffers::numeric_limits<T>::max();
+          // point to the start of string, like errno
+          *endptr = str;
+        }
+      }
+    }
+    // check for overflow
+    if (check_errno && errno) *endptr = str; // point it to start of input
+    // erase partial result, but save an overflow
+    if ((*endptr != str) && (**endptr != '\0')) result = 0;
+    return result;
+  }
+}
+
+// Convert a string to an instance of T.
+// Return value (matched with StringToInteger64Impl and strtod):
+// - If successful, a numeric value corresponding to the str is returned.
+// - If full string conversion can't be performed, 0 is returned.
+// - If the converted value falls out of range of corresponding return type, a
+// range error occurs. In this case value MAX(T)/MIN(T) is returned.
+template<typename T> inline bool StringToNumber(const char *s, T *val) {
+  FLATBUFFERS_ASSERT(s && val);
+  const char *end = nullptr;
+  // The errno check isn't needed. strtoll will return MAX/MIN on overlow.
+  const int64_t i = StringToInteger64Impl<int64_t>(s, &end, -1, false);
+  *val = static_cast<T>(i);
+  const auto done = (s != end) && (*end == '\0');
+  if (done) {
+    const int64_t max = flatbuffers::numeric_limits<T>::max();
+    const int64_t min = flatbuffers::numeric_limits<T>::lowest();
+    if (i > max) {
+      *val = static_cast<T>(max);
+      return false;
+    }
+    if (i < min) {
+      // For unsigned types return max to distinguish from
+      // "no conversion can be performed" when 0 is returned.
+      *val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
+      return false;
+    }
+  }
+  return done;
+}
+template<> inline bool StringToNumber<int64_t>(const char *s, int64_t *val) {
+  const char *end = s;  // request errno checking
+  *val = StringToInteger64Impl<int64_t>(s, &end, -1);
+  return (s != end) && (*end == '\0');
+}
+template<> inline bool StringToNumber<uint64_t>(const char *s, uint64_t *val) {
+  const char *end = s;  // request errno checking
+  *val = StringToInteger64Impl<uint64_t>(s, &end, -1);
+  return (s != end) && (*end == '\0');
+}
+
+template<> inline bool StringToNumber<double>(const char *s, double *val) {
+  FLATBUFFERS_ASSERT(s && val);
+  char *end = nullptr;
+  *val = strtod_impl(s, &end);
+  auto done = (s != end) && (*end == '\0');
+  if (!done) *val = 0;  // erase partial result
+  return done;
+}
+
+template<> inline bool StringToNumber<float>(const char *s, float *val) {
+  FLATBUFFERS_ASSERT(s && val);
+  char *end = nullptr;
+  *val = strtof_impl(s, &end);
+  auto done = (s != end) && (*end == '\0');
+  if (!done) *val = 0;  // erase partial result
+  return done;
+}
+
+inline int64_t StringToInt(const char *str, const char **endptr = nullptr,
+                           int base = 10) {
+  const char *ep = nullptr;
+  return StringToInteger64Impl<int64_t>(str, endptr ? endptr : &ep, base);
+}
+
+inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr,
                             int base = 10) {
-  // clang-format off
-  #ifdef _MSC_VER
-    return _strtoui64(str, endptr, base);
-  #else
-    return strtoull(str, endptr, base);
-  #endif
-  // clang-format on
+  const char *ep = nullptr;
+  return StringToInteger64Impl<uint64_t>(str, endptr ? endptr : &ep, base);
 }

 typedef bool (*LoadFileFunction)(const char *filename, bool binary,