Output JSON strings as natural UTF-8 text without escapes (#4710)

* Added support for the non-escaped print of utf-8 string.

* EscapeString: the first invalid symbol resets print_natural_utf8 flag to false.

* Move the test to ParseAndGenerateTextTest. Fixes.

* Removed dependence between `natural_utf8` and `allow_non_utf8` flags.
This commit is contained in:
Vladimir Glavnyy
2018-05-04 02:10:45 +07:00
committed by Wouter van Oortmerssen
parent 85faa46fb3
commit 12c4c2238c
9 changed files with 55 additions and 16 deletions

View File

@@ -495,14 +495,14 @@ class Reference {
if (type_ == TYPE_STRING) {
String str(Indirect(), byte_width_);
if (strings_quoted) {
flatbuffers::EscapeString(str.c_str(), str.length(), &s, true);
flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false);
} else {
s.append(str.c_str(), str.length());
}
} else if (IsKey()) {
auto str = AsKey();
if (keys_quoted) {
flatbuffers::EscapeString(str, strlen(str), &s, true);
flatbuffers::EscapeString(str, strlen(str), &s, true, false);
} else {
s += str;
}

View File

@@ -379,6 +379,7 @@ struct IDLOptions {
std::string object_suffix;
bool union_value_namespacing;
bool allow_non_utf8;
bool natural_utf8;
std::string include_prefix;
bool keep_include_path;
bool binary_schema_comments;
@@ -439,6 +440,7 @@ struct IDLOptions {
object_suffix("T"),
union_value_namespacing(true),
allow_non_utf8(false),
natural_utf8(false),
keep_include_path(false),
binary_schema_comments(false),
binary_schema_builtins(false),

View File

@@ -314,7 +314,7 @@ struct ToStringVisitor : public IterationVisitor {
void Float(float x) { s += NumToString(x); }
void Double(double x) { s += NumToString(x); }
void String(const struct String *str) {
EscapeString(str->c_str(), str->size(), &s, true);
EscapeString(str->c_str(), str->size(), &s, true, false);
}
void Unknown(const uint8_t *) { s += "(?)"; }
void StartVector() { s += "[ "; }

View File

@@ -381,7 +381,7 @@ inline std::string WordWrap(const std::string in, size_t max_length,
}
inline bool EscapeString(const char *s, size_t length, std::string *_text,
bool allow_non_utf8) {
bool allow_non_utf8, bool natural_utf8) {
std::string &text = *_text;
text += "\"";
for (uoffset_t i = 0; i < length; i++) {
@@ -421,7 +421,10 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text,
return false;
}
} else {
if (ucc <= 0xFFFF) {
if (natural_utf8) {
// utf8 points to past all utf-8 bytes parsed
text.append(s + i, static_cast<size_t>(utf8 - s - i));
} else if (ucc <= 0xFFFF) {
// Parses as Unicode within JSON's \uXXXX range, so use that.
text += "\\u";
text += IntToStringHex(ucc, 4);