Support all JSON escape codes (including \u) for parsing & text gen.

Bug: 16624362
Change-Id: Ia09ea404c0c11dd1dc6993a8cbd155bf8152b65f
Tested: on Windows & Linux.
This commit is contained in:
Wouter van Oortmerssen
2014-08-20 13:22:16 -07:00
parent f7b0d130b6
commit ebac1e1940
8 changed files with 172 additions and 27 deletions

View File

@@ -276,6 +276,7 @@ class Parser {
void MarkGenerated();
private:
int64_t ParseHexNum(int nibbles);
void Next();
bool IsNext(int t);
void Expect(int t);

View File

@@ -44,13 +44,11 @@ template<> inline std::string NumToString<unsigned char>(unsigned char t) {
}
// Convert an integer value to a hexadecimal string.
// The returned string length is the number of nibbles in
// the supplied value prefixed by 0 digits. For example,
// IntToStringHex(static_cast<int>(0x23)) returns the
// string "00000023".
template<typename T> std::string IntToStringHex(T i) {
// The returned string length is always xdigits long, prefixed by 0 digits.
// For example, IntToStringHex(0x23, 8) returns the string "00000023".
inline std::string IntToStringHex(int i, int xdigits) {
std::stringstream ss;
ss << std::setw(sizeof(T) * 2)
ss << std::setw(xdigits)
<< std::setfill('0')
<< std::hex
<< std::uppercase
@@ -59,11 +57,11 @@ template<typename T> std::string IntToStringHex(T i) {
}
// Portable implementation of strtoull().
inline int64_t StringToInt(const char *str) {
inline int64_t StringToInt(const char *str, int base = 10) {
#ifdef _MSC_VER
return _strtoui64(str, nullptr, 10);
return _strtoui64(str, nullptr, base);
#else
return strtoull(str, nullptr, 10);
return strtoull(str, nullptr, base);
#endif
}
@@ -126,6 +124,60 @@ inline std::string StripFileName(const std::string &filepath) {
return i != std::string::npos ? filepath.substr(0, i + 1) : "";
}
// To and from UTF-8 unicode conversion functions
// Convert a unicode code point into a UTF-8 representation by appending it
// to a string. Returns the number of bytes generated.
inline int ToUTF8(uint32_t ucc, std::string *out) {
assert(!(ucc & 0x80000000)); // Top bit can't be set.
// 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
for (int i = 0; i < 6; i++) {
// Max bits this encoding can represent.
uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
if (ucc < (1 << max_bits)) { // does it fit?
// Remaining bits not encoded in the first byte, store 6 bits each
uint32_t remain_bits = i * 6;
// Store first byte:
(*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
(ucc >> remain_bits));
// Store remaining bytes:
for (int j = i - 1; j >= 0; j--) {
(*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
}
return i + 1; // Return the number of bytes added.
}
}
assert(0); // Impossible to arrive here.
return -1;
}
// Converts whatever prefix of the incoming string corresponds to a valid
// UTF-8 sequence into a unicode code. The incoming pointer will have been
// advanced past all bytes parsed.
// returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
// this case).
inline int FromUTF8(const char **in) {
int len = 0;
// Count leading 1 bits.
for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
if (**in & mask) {
len++;
} else {
break;
}
}
if ((**in << len) & 0x80) return -1; // Bit after leading 1's must be 0.
if (!len) return *(*in)++;
// Grab initial bits of the code.
int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
for (int i = 0; i < len - 1; i++) {
if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0.
ucc <<= 6;
ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
}
return ucc;
}
} // namespace flatbuffers
#endif // FLATBUFFERS_UTIL_H_