mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-09 14:46:26 +00:00
Support all JSON escape codes (including \u) for parsing & text gen.
Bug: 16624362 Change-Id: Ia09ea404c0c11dd1dc6993a8cbd155bf8152b65f Tested: on Windows & Linux.
This commit is contained in:
@@ -276,6 +276,7 @@ class Parser {
|
||||
void MarkGenerated();
|
||||
|
||||
private:
|
||||
int64_t ParseHexNum(int nibbles);
|
||||
void Next();
|
||||
bool IsNext(int t);
|
||||
void Expect(int t);
|
||||
|
||||
@@ -44,13 +44,11 @@ template<> inline std::string NumToString<unsigned char>(unsigned char t) {
|
||||
}
|
||||
|
||||
// Convert an integer value to a hexadecimal string.
|
||||
// The returned string length is the number of nibbles in
|
||||
// the supplied value prefixed by 0 digits. For example,
|
||||
// IntToStringHex(static_cast<int>(0x23)) returns the
|
||||
// string "00000023".
|
||||
template<typename T> std::string IntToStringHex(T i) {
|
||||
// The returned string length is always xdigits long, prefixed by 0 digits.
|
||||
// For example, IntToStringHex(0x23, 8) returns the string "00000023".
|
||||
inline std::string IntToStringHex(int i, int xdigits) {
|
||||
std::stringstream ss;
|
||||
ss << std::setw(sizeof(T) * 2)
|
||||
ss << std::setw(xdigits)
|
||||
<< std::setfill('0')
|
||||
<< std::hex
|
||||
<< std::uppercase
|
||||
@@ -59,11 +57,11 @@ template<typename T> std::string IntToStringHex(T i) {
|
||||
}
|
||||
|
||||
// Portable implementation of strtoull().
|
||||
inline int64_t StringToInt(const char *str) {
|
||||
inline int64_t StringToInt(const char *str, int base = 10) {
|
||||
#ifdef _MSC_VER
|
||||
return _strtoui64(str, nullptr, 10);
|
||||
return _strtoui64(str, nullptr, base);
|
||||
#else
|
||||
return strtoull(str, nullptr, 10);
|
||||
return strtoull(str, nullptr, base);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -126,6 +124,60 @@ inline std::string StripFileName(const std::string &filepath) {
|
||||
return i != std::string::npos ? filepath.substr(0, i + 1) : "";
|
||||
}
|
||||
|
||||
// To and from UTF-8 unicode conversion functions
|
||||
|
||||
// Convert a unicode code point into a UTF-8 representation by appending it
|
||||
// to a string. Returns the number of bytes generated.
|
||||
inline int ToUTF8(uint32_t ucc, std::string *out) {
|
||||
assert(!(ucc & 0x80000000)); // Top bit can't be set.
|
||||
// 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
|
||||
for (int i = 0; i < 6; i++) {
|
||||
// Max bits this encoding can represent.
|
||||
uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
|
||||
if (ucc < (1 << max_bits)) { // does it fit?
|
||||
// Remaining bits not encoded in the first byte, store 6 bits each
|
||||
uint32_t remain_bits = i * 6;
|
||||
// Store first byte:
|
||||
(*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
|
||||
(ucc >> remain_bits));
|
||||
// Store remaining bytes:
|
||||
for (int j = i - 1; j >= 0; j--) {
|
||||
(*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
|
||||
}
|
||||
return i + 1; // Return the number of bytes added.
|
||||
}
|
||||
}
|
||||
assert(0); // Impossible to arrive here.
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Converts whatever prefix of the incoming string corresponds to a valid
|
||||
// UTF-8 sequence into a unicode code. The incoming pointer will have been
|
||||
// advanced past all bytes parsed.
|
||||
// returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
|
||||
// this case).
|
||||
inline int FromUTF8(const char **in) {
|
||||
int len = 0;
|
||||
// Count leading 1 bits.
|
||||
for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
|
||||
if (**in & mask) {
|
||||
len++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((**in << len) & 0x80) return -1; // Bit after leading 1's must be 0.
|
||||
if (!len) return *(*in)++;
|
||||
// Grab initial bits of the code.
|
||||
int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
|
||||
for (int i = 0; i < len - 1; i++) {
|
||||
if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0.
|
||||
ucc <<= 6;
|
||||
ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
|
||||
}
|
||||
return ucc;
|
||||
}
|
||||
|
||||
} // namespace flatbuffers
|
||||
|
||||
#endif // FLATBUFFERS_UTIL_H_
|
||||
|
||||
Reference in New Issue
Block a user