Make the Parser independent from the global C-locale (#5028)

* Make the Parser independent from the global C-locale

* Set a specific test locale using the environment variable FLATBUFFERS_TEST_LOCALE

* Remove redundant static qualifiers
This commit is contained in:
Vladimir Glavnyy
2018-11-17 00:24:06 +07:00
committed by Wouter van Oortmerssen
parent d6b1ce09cf
commit 5f32f94810
15 changed files with 480 additions and 277 deletions

View File

@@ -195,15 +195,35 @@
#endif
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
// Suppress sanitizer directives.
#ifndef FLATBUFFERS_LOCALE_INDEPENDENT
// Enable locale independent functions {strtof_l, strtod_l,strtoll_l, strtoull_l}.
// They are part of the POSIX-2008 but not part of the C/C++ standard.
// GCC/Clang have definition (_XOPEN_SOURCE>=700) if POSIX-2008.
#if ((defined(_MSC_VER) && _MSC_VER >= 1800) || \
(defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE>=700)))
#define FLATBUFFERS_LOCALE_INDEPENDENT 1
#else
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#endif
#endif // !FLATBUFFERS_LOCALE_INDEPENDENT
// Suppress Undefined Behavior Sanitizer (recoverable only). Usage:
// - __supress_ubsan__("undefined")
// - __supress_ubsan__("signed-integer-overflow")
#if defined(__clang__)
#define __no_sanitize_undefined__(reason) __attribute__((no_sanitize("undefined")))
#define __supress_ubsan__(type) __attribute__((no_sanitize(type)))
#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 408)
#define __no_sanitize_undefined__(reason) __attribute__((no_sanitize_undefined))
#define __supress_ubsan__(type) __attribute__((no_sanitize_undefined))
#else
#define __no_sanitize_undefined__(reason)
#define __supress_ubsan__(type)
#endif
// This is constexpr function used for checking compile-time constants.
// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) {
return !!t;
}
/// @endcond
/// @file
@@ -287,13 +307,15 @@ template<typename T> T EndianScalar(T t) {
}
template<typename T>
__no_sanitize_undefined__("C++ aliasing type rules, see std::bit_cast<>")
// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
__supress_ubsan__("alignment")
T ReadScalar(const void *p) {
return EndianScalar(*reinterpret_cast<const T *>(p));
}
template<typename T>
__no_sanitize_undefined__("C++ aliasing type rules, see std::bit_cast<>")
// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details.
__supress_ubsan__("alignment")
void WriteScalar(void *p, T t) {
*reinterpret_cast<T *>(p) = EndianScalar(t);
}

View File

@@ -17,18 +17,15 @@
#ifndef FLATBUFFERS_UTIL_H_
#define FLATBUFFERS_UTIL_H_
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <fstream>
#include <iomanip>
// clang-format off
#ifndef FLATBUFFERS_PREFER_PRINTF
# include <sstream>
#else // FLATBUFFERS_PREFER_PRINTF
# include <float.h>
# include <stdio.h>
#endif // FLATBUFFERS_PREFER_PRINTF
#include <string>
#ifdef _WIN32
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
@@ -43,18 +40,21 @@
#else
# include <limits.h>
#endif
// clang-format on
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <iomanip>
#include <fstream>
#include "flatbuffers/base.h"
namespace flatbuffers {
// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) {
return !!t;
}
// @locale-independent functions for ASCII characters set.
// Check that integer scalar is in closed range: (a <= x <= b)
@@ -67,13 +67,13 @@ template<typename T> inline bool check_in_range(T x, T a, T b) {
}
// Case-insensitive isalpha
static inline bool is_alpha(char c) {
inline bool is_alpha(char c) {
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
return check_in_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
}
// Check (case-insensitive) that `c` is equal to alpha.
static inline bool is_alpha_char(char c, char alpha) {
inline bool is_alpha_char(char c, char alpha) {
FLATBUFFERS_ASSERT(is_alpha(alpha));
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
return ((c & 0xDF) == (alpha & 0xDF));
@@ -84,15 +84,15 @@ static inline bool is_alpha_char(char c, char alpha) {
// functions that are not affected by the currently installed C locale. although
// some implementations (e.g. Microsoft in 1252 codepage) may classify
// additional single-byte characters as digits.
static inline bool is_digit(char c) { return check_in_range(c, '0', '9'); }
inline bool is_digit(char c) { return check_in_range(c, '0', '9'); }
static inline bool is_xdigit(char c) {
inline bool is_xdigit(char c) {
// Replace by look-up table.
return is_digit(c) | check_in_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
}
// Case-insensitive isalnum
static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
// @end-locale-independent functions for ASCII character set
@@ -119,21 +119,22 @@ template<typename T> size_t NumToStringWidth(T t, int precision = 0) {
return string_width;
}
template<typename T> std::string NumToStringImplWrapper(T t, const char* fmt,
int precision = 0) {
template<typename T>
std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0) {
size_t string_width = NumToStringWidth(t, precision);
std::string s(string_width, 0x00);
// Allow snprintf to use std::string trailing null to detect buffer overflow
snprintf(const_cast<char*>(s.data()), (s.size()+1), fmt, precision, t);
snprintf(const_cast<char *>(s.data()), (s.size() + 1), fmt, precision, t);
return s;
}
#endif // FLATBUFFERS_PREFER_PRINTF
#endif // FLATBUFFERS_PREFER_PRINTF
// Convert an integer or floating point value to a string.
// In contrast to std::stringstream, "char" values are
// converted to a string of digits, and we don't use scientific notation.
template<typename T> std::string NumToString(T t) {
// clang-format off
#ifndef FLATBUFFERS_PREFER_PRINTF
std::stringstream ss;
ss << t;
@@ -169,6 +170,7 @@ inline std::string NumToString<unsigned long long>(unsigned long long t) {
// Special versions for floats/doubles.
template<typename T> std::string FloatToString(T t, int precision) {
// clang-format off
#ifndef FLATBUFFERS_PREFER_PRINTF
// to_string() prints different numbers of digits for floats depending on
// platform and isn't available on Android, so we use stringstream
@@ -206,6 +208,7 @@ template<> inline std::string NumToString<float>(float t) {
inline std::string IntToStringHex(int i, int xdigits) {
FLATBUFFERS_ASSERT(i >= 0);
// clang-format off
#ifndef FLATBUFFERS_PREFER_PRINTF
std::stringstream ss;
ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase
@@ -217,37 +220,71 @@ inline std::string IntToStringHex(int i, int xdigits) {
// clang-format on
}
static inline double strtod_impl(const char *str, char **str_end) {
// Result of strtod (printf, etc) depends from current C-locale.
return strtod(str, str_end);
}
// clang-format off
// Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}.
#if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0)
class ClassicLocale {
#ifdef _MSC_VER
typedef _locale_t locale_type;
#else
typedef locale_t locale_type; // POSIX.1-2008 locale_t type
#endif
ClassicLocale();
~ClassicLocale();
locale_type locale_;
static ClassicLocale instance_;
public:
static locale_type Get() { return instance_.locale_; }
};
static inline float strtof_impl(const char *str, char **str_end) {
// Use "strtof" for float and strtod for double to avoid double=>float
// rounding problems (see
// https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with
// std::numeric_limits<float>::is_iec559==false. Example:
// for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){
// const char *s = "-4e38";
// std::fesetround(mode);
// std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; "
// << static_cast<float>(strtod(s, nullptr)) << "\n";
// }
// Gives:
// -inf; -4e+38; -inf
// -inf; -4e+38; -inf
// -inf; -4e+38; -3.40282e+38
// -inf; -4e+38; -3.40282e+38
// clang-format off
#ifdef FLATBUFFERS_HAS_NEW_STRTOD
return strtof(str, str_end);
#ifdef _MSC_VER
#define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get())
#define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get())
#define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get())
#define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get())
#else
return static_cast<float>(strtod_impl(str, str_end));
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
// clang-format on
#define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get())
#define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get())
#define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get())
#define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get())
#endif
#else
#define __strtod_impl(s, pe) strtod(s, pe)
#define __strtof_impl(s, pe) static_cast<float>(strtod(s, pe))
#ifdef _MSC_VER
#define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b)
#define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b)
#else
#define __strtoull_impl(s, pe, b) strtoull(s, pe, b)
#define __strtoll_impl(s, pe, b) strtoll(s, pe, b)
#endif
#endif
inline void strtoval_impl(int64_t *val, const char *str, char **endptr,
int base) {
*val = __strtoll_impl(str, endptr, base);
}
inline void strtoval_impl(uint64_t *val, const char *str, char **endptr,
int base) {
*val = __strtoull_impl(str, endptr, base);
}
inline void strtoval_impl(double *val, const char *str, char **endptr) {
*val = __strtod_impl(str, endptr);
}
// UBSAN: double to float is safe if numeric_limits<float>::is_iec559 is true.
__supress_ubsan__("float-cast-overflow")
inline void strtoval_impl(float *val, const char *str, char **endptr) {
*val = __strtof_impl(str, endptr);
}
#undef __strtoull_impl
#undef __strtoll_impl
#undef __strtod_impl
#undef __strtof_impl
// clang-format on
// Adaptor for strtoull()/strtoll().
// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
// while strtoll with base=0 interprets first leading zero as octal prefix.
@@ -261,66 +298,43 @@ static inline float strtof_impl(const char *str, char **str_end) {
// - If the converted value falls out of range of corresponding return type, a
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
template<typename T>
inline T StringToInteger64Impl(const char *const str, const char **endptr,
const int base, const bool check_errno = true) {
static_assert(flatbuffers::is_same<T, int64_t>::value ||
flatbuffers::is_same<T, uint64_t>::value,
"Type T must be either int64_t or uint64_t");
FLATBUFFERS_ASSERT(str && endptr); // endptr must be not null
inline bool StringToIntegerImpl(T *val, const char *const str,
const int base = 0,
const bool check_errno = true) {
// T is int64_t or uint64_T
FLATBUFFERS_ASSERT(str);
if (base <= 0) {
auto s = str;
while (*s && !is_digit(*s)) s++;
if (s[0] == '0' && is_alpha_char(s[1], 'X'))
return StringToInteger64Impl<T>(str, endptr, 16, check_errno);
return StringToIntegerImpl(val, str, 16, check_errno);
// if a prefix not match, try base=10
return StringToInteger64Impl<T>(str, endptr, 10, check_errno);
return StringToIntegerImpl(val, str, 10, check_errno);
} else {
if (check_errno) errno = 0; // clear thread-local errno
// calculate result
T result;
if (IsConstTrue(flatbuffers::is_same<T, int64_t>::value)) {
// clang-format off
#ifdef _MSC_VER
result = _strtoi64(str, const_cast<char**>(endptr), base);
#else
result = strtoll(str, const_cast<char**>(endptr), base);
#endif
// clang-format on
} else { // T is uint64_t
// clang-format off
#ifdef _MSC_VER
result = _strtoui64(str, const_cast<char**>(endptr), base);
#else
result = strtoull(str, const_cast<char**>(endptr), base);
#endif
// clang-format on
// The strtoull accepts negative numbers:
// If the minus sign was part of the input sequence, the numeric value
// calculated from the sequence of digits is negated as if by unary minus
// in the result type, which applies unsigned integer wraparound rules.
// Fix this behaviour (except -0).
if ((**endptr == '\0') && (0 != result)) {
auto s = str;
while (*s && !is_digit(*s)) s++;
s = (s > str) ? (s - 1) : s; // step back to one symbol
if (*s == '-') {
// For unsigned types return max to distinguish from
// "no conversion can be performed".
result = flatbuffers::numeric_limits<T>::max();
// point to the start of string, like errno
*endptr = str;
}
}
auto endptr = str;
strtoval_impl(val, str, const_cast<char **>(&endptr), base);
if ((*endptr != '\0') || (endptr == str)) {
*val = 0; // erase partial result
return false; // invalid string
}
// check for overflow
if (check_errno && errno) *endptr = str; // point it to start of input
// erase partial result, but save an overflow
if ((*endptr != str) && (**endptr != '\0')) result = 0;
return result;
// errno is out-of-range, return MAX/MIN
if (check_errno && errno) return false;
return true;
}
}
template<typename T>
inline bool StringToFloatImpl(T *val, const char *const str) {
// Type T must be either float or double.
FLATBUFFERS_ASSERT(str && val);
auto end = str;
strtoval_impl(val, str, const_cast<char **>(&end));
auto done = (end != str) && (*end == '\0');
if (!done) *val = 0; // erase partial result
return done;
}
// Convert a string to an instance of T.
// Return value (matched with StringToInteger64Impl and strtod):
// - If successful, a numeric value corresponding to the str is returned.
@@ -329,66 +343,70 @@ inline T StringToInteger64Impl(const char *const str, const char **endptr,
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
template<typename T> inline bool StringToNumber(const char *s, T *val) {
FLATBUFFERS_ASSERT(s && val);
const char *end = nullptr;
// The errno check isn't needed. strtoll will return MAX/MIN on overlow.
const int64_t i = StringToInteger64Impl<int64_t>(s, &end, -1, false);
*val = static_cast<T>(i);
const auto done = (s != end) && (*end == '\0');
if (done) {
int64_t i64;
// The errno check isn't needed, will return MAX/MIN on overflow.
if (StringToIntegerImpl(&i64, s, 0, false)) {
const int64_t max = flatbuffers::numeric_limits<T>::max();
const int64_t min = flatbuffers::numeric_limits<T>::lowest();
if (i > max) {
if (i64 > max) {
*val = static_cast<T>(max);
return false;
}
if (i < min) {
if (i64 < min) {
// For unsigned types return max to distinguish from
// "no conversion can be performed" when 0 is returned.
*val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
return false;
}
*val = static_cast<T>(i64);
return true;
}
return done;
}
template<> inline bool StringToNumber<int64_t>(const char *s, int64_t *val) {
const char *end = s; // request errno checking
*val = StringToInteger64Impl<int64_t>(s, &end, -1);
return (s != end) && (*end == '\0');
}
template<> inline bool StringToNumber<uint64_t>(const char *s, uint64_t *val) {
const char *end = s; // request errno checking
*val = StringToInteger64Impl<uint64_t>(s, &end, -1);
return (s != end) && (*end == '\0');
*val = 0;
return false;
}
template<> inline bool StringToNumber<double>(const char *s, double *val) {
FLATBUFFERS_ASSERT(s && val);
char *end = nullptr;
*val = strtod_impl(s, &end);
auto done = (s != end) && (*end == '\0');
if (!done) *val = 0; // erase partial result
return done;
template<> inline bool StringToNumber<int64_t>(const char *str, int64_t *val) {
return StringToIntegerImpl(val, str);
}
template<> inline bool StringToNumber<float>(const char *s, float *val) {
FLATBUFFERS_ASSERT(s && val);
char *end = nullptr;
*val = strtof_impl(s, &end);
auto done = (s != end) && (*end == '\0');
if (!done) *val = 0; // erase partial result
return done;
template<>
inline bool StringToNumber<uint64_t>(const char *str, uint64_t *val) {
if (!StringToIntegerImpl(val, str)) return false;
// The strtoull accepts negative numbers:
// If the minus sign was part of the input sequence, the numeric value
// calculated from the sequence of digits is negated as if by unary minus
// in the result type, which applies unsigned integer wraparound rules.
// Fix this behaviour (except -0).
if (*val) {
auto s = str;
while (*s && !is_digit(*s)) s++;
s = (s > str) ? (s - 1) : s; // step back to one symbol
if (*s == '-') {
// For unsigned types return the max to distinguish from
// "no conversion can be performed".
*val = flatbuffers::numeric_limits<uint64_t>::max();
return false;
}
}
return true;
}
inline int64_t StringToInt(const char *str, const char **endptr = nullptr,
int base = 10) {
const char *ep = nullptr;
return StringToInteger64Impl<int64_t>(str, endptr ? endptr : &ep, base);
template<> inline bool StringToNumber(const char *s, float *val) {
return StringToFloatImpl(val, s);
}
inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr,
int base = 10) {
const char *ep = nullptr;
return StringToInteger64Impl<uint64_t>(str, endptr ? endptr : &ep, base);
template<> inline bool StringToNumber(const char *s, double *val) {
return StringToFloatImpl(val, s);
}
inline int64_t StringToInt(const char *s, int base = 10) {
int64_t val;
return StringToIntegerImpl(&val, s, base) ? val : 0;
}
inline uint64_t StringToUInt(const char *s, int base = 10) {
uint64_t val;
return StringToIntegerImpl(&val, s, base) ? val : 0;
}
typedef bool (*LoadFileFunction)(const char *filename, bool binary,
@@ -506,6 +524,7 @@ inline void EnsureDirExists(const std::string &filepath) {
auto parent = StripFileName(filepath);
if (parent.length()) EnsureDirExists(parent);
// clang-format off
#ifdef _WIN32
(void)_mkdir(filepath.c_str());
#else
@@ -518,6 +537,7 @@ inline void EnsureDirExists(const std::string &filepath) {
// Returns the input path if the absolute path couldn't be resolved.
inline std::string AbsolutePath(const std::string &filepath) {
// clang-format off
#ifdef FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION
return filepath;
#else
@@ -576,7 +596,8 @@ inline int FromUTF8(const char **in) {
break;
}
}
if ((static_cast<unsigned char>(**in) << len) & 0x80) return -1; // Bit after leading 1's must be 0.
if ((static_cast<unsigned char>(**in) << len) & 0x80)
return -1; // Bit after leading 1's must be 0.
if (!len) return *(*in)++;
// UTF-8 encoded values with a length are between 2 and 4 bytes.
if (len < 2 || len > 4) { return -1; }
@@ -635,7 +656,7 @@ inline std::string WordWrap(const std::string in, size_t max_length,
return wrapped;
}
#endif // !FLATBUFFERS_PREFER_PRINTF
#endif // !FLATBUFFERS_PREFER_PRINTF
inline bool EscapeString(const char *s, size_t length, std::string *_text,
bool allow_non_utf8, bool natural_utf8) {
@@ -707,6 +728,19 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text,
return true;
}
// Remove paired quotes in a string: "text"|'text' -> text.
std::string RemoveStringQuotes(const std::string &s);
// Change th global C-locale to locale with name <locale_name>.
// Returns an actual locale name in <_value>, useful if locale_name is "" or
// null.
bool SetGlobalTestLocale(const char *locale_name,
std::string *_value = nullptr);
// Read (or test) a value of environment variable.
bool ReadEnvironmentVariable(const char *var_name,
std::string *_value = nullptr);
} // namespace flatbuffers
#endif // FLATBUFFERS_UTIL_H_