mirror of
https://github.com/google/flatbuffers.git
synced 2026-07-02 23:48:17 +00:00
Output JSON strings as natural UTF-8 text without escapes (#4710)
* Added support for the non-escaped print of utf-8 string. * EscapeString: the first invalid symbol resets print_natural_utf8 flag to false. * Move the test to ParseAndGenerateTextTest. Fixes. * Removed dependence between `natural_utf8` and `allow_non_utf8` flags.
This commit is contained in:
committed by
Wouter van Oortmerssen
parent
85faa46fb3
commit
12c4c2238c
@@ -495,14 +495,14 @@ class Reference {
|
|||||||
if (type_ == TYPE_STRING) {
|
if (type_ == TYPE_STRING) {
|
||||||
String str(Indirect(), byte_width_);
|
String str(Indirect(), byte_width_);
|
||||||
if (strings_quoted) {
|
if (strings_quoted) {
|
||||||
flatbuffers::EscapeString(str.c_str(), str.length(), &s, true);
|
flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false);
|
||||||
} else {
|
} else {
|
||||||
s.append(str.c_str(), str.length());
|
s.append(str.c_str(), str.length());
|
||||||
}
|
}
|
||||||
} else if (IsKey()) {
|
} else if (IsKey()) {
|
||||||
auto str = AsKey();
|
auto str = AsKey();
|
||||||
if (keys_quoted) {
|
if (keys_quoted) {
|
||||||
flatbuffers::EscapeString(str, strlen(str), &s, true);
|
flatbuffers::EscapeString(str, strlen(str), &s, true, false);
|
||||||
} else {
|
} else {
|
||||||
s += str;
|
s += str;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -379,6 +379,7 @@ struct IDLOptions {
|
|||||||
std::string object_suffix;
|
std::string object_suffix;
|
||||||
bool union_value_namespacing;
|
bool union_value_namespacing;
|
||||||
bool allow_non_utf8;
|
bool allow_non_utf8;
|
||||||
|
bool natural_utf8;
|
||||||
std::string include_prefix;
|
std::string include_prefix;
|
||||||
bool keep_include_path;
|
bool keep_include_path;
|
||||||
bool binary_schema_comments;
|
bool binary_schema_comments;
|
||||||
@@ -439,6 +440,7 @@ struct IDLOptions {
|
|||||||
object_suffix("T"),
|
object_suffix("T"),
|
||||||
union_value_namespacing(true),
|
union_value_namespacing(true),
|
||||||
allow_non_utf8(false),
|
allow_non_utf8(false),
|
||||||
|
natural_utf8(false),
|
||||||
keep_include_path(false),
|
keep_include_path(false),
|
||||||
binary_schema_comments(false),
|
binary_schema_comments(false),
|
||||||
binary_schema_builtins(false),
|
binary_schema_builtins(false),
|
||||||
|
|||||||
@@ -314,7 +314,7 @@ struct ToStringVisitor : public IterationVisitor {
|
|||||||
void Float(float x) { s += NumToString(x); }
|
void Float(float x) { s += NumToString(x); }
|
||||||
void Double(double x) { s += NumToString(x); }
|
void Double(double x) { s += NumToString(x); }
|
||||||
void String(const struct String *str) {
|
void String(const struct String *str) {
|
||||||
EscapeString(str->c_str(), str->size(), &s, true);
|
EscapeString(str->c_str(), str->size(), &s, true, false);
|
||||||
}
|
}
|
||||||
void Unknown(const uint8_t *) { s += "(?)"; }
|
void Unknown(const uint8_t *) { s += "(?)"; }
|
||||||
void StartVector() { s += "[ "; }
|
void StartVector() { s += "[ "; }
|
||||||
|
|||||||
@@ -381,7 +381,7 @@ inline std::string WordWrap(const std::string in, size_t max_length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline bool EscapeString(const char *s, size_t length, std::string *_text,
|
inline bool EscapeString(const char *s, size_t length, std::string *_text,
|
||||||
bool allow_non_utf8) {
|
bool allow_non_utf8, bool natural_utf8) {
|
||||||
std::string &text = *_text;
|
std::string &text = *_text;
|
||||||
text += "\"";
|
text += "\"";
|
||||||
for (uoffset_t i = 0; i < length; i++) {
|
for (uoffset_t i = 0; i < length; i++) {
|
||||||
@@ -421,7 +421,10 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (ucc <= 0xFFFF) {
|
if (natural_utf8) {
|
||||||
|
// utf8 points to past all utf-8 bytes parsed
|
||||||
|
text.append(s + i, static_cast<size_t>(utf8 - s - i));
|
||||||
|
} else if (ucc <= 0xFFFF) {
|
||||||
// Parses as Unicode within JSON's \uXXXX range, so use that.
|
// Parses as Unicode within JSON's \uXXXX range, so use that.
|
||||||
text += "\\u";
|
text += "\\u";
|
||||||
text += IntToStringHex(ucc, 4);
|
text += IntToStringHex(ucc, 4);
|
||||||
|
|||||||
@@ -69,6 +69,8 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const {
|
|||||||
" --allow-non-utf8 Pass non-UTF-8 input through parser and emit nonstandard\n"
|
" --allow-non-utf8 Pass non-UTF-8 input through parser and emit nonstandard\n"
|
||||||
" \\x escapes in JSON. (Default is to raise parse error on\n"
|
" \\x escapes in JSON. (Default is to raise parse error on\n"
|
||||||
" non-UTF-8 input.)\n"
|
" non-UTF-8 input.)\n"
|
||||||
|
" --natural-utf8 Output strings with UTF-8 as human-readable strings.\n"
|
||||||
|
" By default, UTF-8 characters are printed as \\uXXXX escapes.\n"
|
||||||
" --defaults-json Output fields whose value is the default when\n"
|
" --defaults-json Output fields whose value is the default when\n"
|
||||||
" writing JSON\n"
|
" writing JSON\n"
|
||||||
" --unknown-json Allow fields in JSON that are not defined in the\n"
|
" --unknown-json Allow fields in JSON that are not defined in the\n"
|
||||||
@@ -182,6 +184,8 @@ int FlatCompiler::Compile(int argc, const char **argv) {
|
|||||||
opts.strict_json = true;
|
opts.strict_json = true;
|
||||||
} else if (arg == "--allow-non-utf8") {
|
} else if (arg == "--allow-non-utf8") {
|
||||||
opts.allow_non_utf8 = true;
|
opts.allow_non_utf8 = true;
|
||||||
|
} else if (arg == "--natural-utf8") {
|
||||||
|
opts.natural_utf8 = true;
|
||||||
} else if (arg == "--no-js-exports") {
|
} else if (arg == "--no-js-exports") {
|
||||||
opts.skip_js_exports = true;
|
opts.skip_js_exports = true;
|
||||||
} else if (arg == "--goog-js-export") {
|
} else if (arg == "--goog-js-export") {
|
||||||
|
|||||||
@@ -119,7 +119,8 @@ bool Print<const void *>(const void *val, Type type, int indent,
|
|||||||
break;
|
break;
|
||||||
case BASE_TYPE_STRING: {
|
case BASE_TYPE_STRING: {
|
||||||
auto s = reinterpret_cast<const String *>(val);
|
auto s = reinterpret_cast<const String *>(val);
|
||||||
if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8)) {
|
if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8,
|
||||||
|
opts.natural_utf8)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -91,7 +91,8 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data,
|
|||||||
auto val = GetAnyFieldS(*table_field, fielddef, schema);
|
auto val = GetAnyFieldS(*table_field, fielddef, schema);
|
||||||
if (fielddef.type()->base_type() == reflection::String) {
|
if (fielddef.type()->base_type() == reflection::String) {
|
||||||
std::string esc;
|
std::string esc;
|
||||||
flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true);
|
flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true,
|
||||||
|
false);
|
||||||
val = esc;
|
val = esc;
|
||||||
}
|
}
|
||||||
s += fielddef.name()->str();
|
s += fielddef.name()->str();
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright 2014 Google Inc. All rights reserved.
|
* Copyright 2014 Google Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@@ -645,6 +645,22 @@ void ParseAndGenerateTextTest() {
|
|||||||
// If this fails, check registry.lasterror_.
|
// If this fails, check registry.lasterror_.
|
||||||
TEST_EQ(ok, true);
|
TEST_EQ(ok, true);
|
||||||
TEST_EQ_STR(text.c_str(), jsonfile.c_str());
|
TEST_EQ_STR(text.c_str(), jsonfile.c_str());
|
||||||
|
|
||||||
|
// Generate text for UTF-8 strings without escapes.
|
||||||
|
std::string jsonfile_utf8;
|
||||||
|
TEST_EQ(flatbuffers::LoadFile((test_data_path + "unicode_test.json").c_str(),
|
||||||
|
false, &jsonfile_utf8),
|
||||||
|
true);
|
||||||
|
TEST_EQ(parser.Parse(jsonfile_utf8.c_str(), include_directories), true);
|
||||||
|
// To ensure it is correct, generate utf-8 text back from the binary.
|
||||||
|
std::string jsongen_utf8;
|
||||||
|
// request natural printing for utf-8 strings
|
||||||
|
parser.opts.natural_utf8 = true;
|
||||||
|
parser.opts.strict_json = true;
|
||||||
|
TEST_EQ(
|
||||||
|
GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen_utf8),
|
||||||
|
true);
|
||||||
|
TEST_EQ_STR(jsongen_utf8.c_str(), jsonfile_utf8.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ReflectionTest(uint8_t *flatbuf, size_t length) {
|
void ReflectionTest(uint8_t *flatbuf, size_t length) {
|
||||||
|
|||||||
@@ -1,13 +1,5 @@
|
|||||||
{
|
{
|
||||||
"name": "unicode_test",
|
"name": "unicode_test",
|
||||||
"testarrayoftables": [
|
|
||||||
{ "name": "Цлїςσδε" },
|
|
||||||
{ "name": "フムアムカモケモ" },
|
|
||||||
{ "name": "フムヤムカモケモ" },
|
|
||||||
{ "name": "㊀㊁㊂㊃㊄" },
|
|
||||||
{ "name": "☳☶☲" },
|
|
||||||
{ "name": "𡇙𝌆" }
|
|
||||||
],
|
|
||||||
"testarrayofstring": [
|
"testarrayofstring": [
|
||||||
"Цлїςσδε",
|
"Цлїςσδε",
|
||||||
"フムアムカモケモ",
|
"フムアムカモケモ",
|
||||||
@@ -15,5 +7,25 @@
|
|||||||
"㊀㊁㊂㊃㊄",
|
"㊀㊁㊂㊃㊄",
|
||||||
"☳☶☲",
|
"☳☶☲",
|
||||||
"𡇙𝌆"
|
"𡇙𝌆"
|
||||||
|
],
|
||||||
|
"testarrayoftables": [
|
||||||
|
{
|
||||||
|
"name": "Цлїςσδε"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "フムアムカモケモ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "フムヤムカモケモ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "㊀㊁㊂㊃㊄"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "☳☶☲"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "𡇙𝌆"
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user