From af236833119ce6ec5ab82930be6f5b2badd9e89e Mon Sep 17 00:00:00 2001 From: Faizan Rashid Date: Thu, 31 Dec 2015 09:41:00 +0500 Subject: [PATCH 1/2] [BUG FIX] [MINOR] Fix encoding with unicode characters. When passing a unicode string to builder.CreateString, the default encoding assumed all characters can be encoded using ascii. Added a fix so a user can specify the encoding and how to handle errors when creating strings. --- python/flatbuffers/builder.py | 4 ++-- tests/py_test.py | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/python/flatbuffers/builder.py b/python/flatbuffers/builder.py index 6e3465913..8ca0e9321 100644 --- a/python/flatbuffers/builder.py +++ b/python/flatbuffers/builder.py @@ -361,14 +361,14 @@ class Builder(object): self.PlaceUOffsetT(vectorNumElems) return self.Offset() - def CreateString(self, s): + def CreateString(self, s, encoding='utf-8', errors='strict'): """CreateString writes a null-terminated byte string as a vector.""" self.assertNotNested() self.nested = True if isinstance(s, compat.string_types): - x = s.encode() + x = s.encode(encoding, errors) elif isinstance(s, compat.binary_type): x = s else: diff --git a/tests/py_test.py b/tests/py_test.py index cce317989..0ad011736 100644 --- a/tests/py_test.py +++ b/tests/py_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2014 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -389,23 +390,36 @@ class TestByteLayout(unittest.TestCase): def test_create_ascii_string(self): b = flatbuffers.Builder(0) - b.CreateString(u"foo".encode('ascii')) + b.CreateString(u"foo", encoding='ascii') + # 0-terminated, no pad: self.assertBuilderEquals(b, [3, 0, 0, 0, 'f', 'o', 'o', 0]) - b.CreateString(u"moop".encode('ascii')) + b.CreateString(u"moop", encoding='ascii') # 0-terminated, 3-byte pad: self.assertBuilderEquals(b, [4, 0, 0, 0, 'm', 'o', 'o', 'p', 0, 0, 0, 0, 3, 0, 0, 0, 'f', 'o', 'o', 0]) + def test_create_utf8_string(self): + b = flatbuffers.Builder(0) + b.CreateString(u"Цлїςσδε") + self.assertBuilderEquals(b, "\x0e\x00\x00\x00\xd0\xa6\xd0\xbb\xd1\x97" \ + "\xcf\x82\xcf\x83\xce\xb4\xce\xb5\x00\x00") + + b.CreateString(u"フムアムカモケモ") + self.assertBuilderEquals(b, "\x18\x00\x00\x00\xef\xbe\x8c\xef\xbe\x91" \ + "\xef\xbd\xb1\xef\xbe\x91\xef\xbd\xb6\xef\xbe\x93\xef\xbd\xb9\xef" \ + "\xbe\x93\x00\x00\x00\x00\x0e\x00\x00\x00\xd0\xa6\xd0\xbb\xd1\x97" \ + "\xcf\x82\xcf\x83\xce\xb4\xce\xb5\x00\x00") + def test_create_arbitrary_string(self): b = flatbuffers.Builder(0) - s = "\x01\x02\x03".encode('utf-8') - b.CreateString(s) + s = "\x01\x02\x03" + b.CreateString(s) # Default encoding is utf-8. # 0-terminated, no pad: self.assertBuilderEquals(b, [3, 0, 0, 0, 1, 2, 3, 0]) - s2 = "\x04\x05\x06\x07".encode('utf-8') - b.CreateString(s2) + s2 = "\x04\x05\x06\x07" + b.CreateString(s2) # Default encoding is utf-8. # 0-terminated, 3-byte pad: self.assertBuilderEquals(b, [4, 0, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0, 3, 0, 0, 0, 1, 2, 3, 0]) From f1ab30a49010d5e0d9d29c511186babc5f510234 Mon Sep 17 00:00:00 2001 From: Oli Wilkinson Date: Mon, 18 Jan 2016 20:54:22 +0000 Subject: [PATCH 2/2] Added Visual Studio transient files to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index ca18d2049..6f3894d06 100755 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,6 @@ java/*.iml java/target **/*.pyc .idea +build/VS2010/FlatBuffers.sdf +build/VS2010/FlatBuffers.opensdf +build/VS2010/ipch/**/*.ipch