mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-01 19:58:15 +00:00
[BUG FIX] [MINOR] Fix encoding with unicode characters.
When passing a unicode string to builder.CreateString, the default encoding assumed all characters can be encoded using ascii. Added a fix so a user can specify the encoding and how to handle errors when creating strings.
This commit is contained in:
@@ -361,14 +361,14 @@ class Builder(object):
|
||||
self.PlaceUOffsetT(vectorNumElems)
|
||||
return self.Offset()
|
||||
|
||||
def CreateString(self, s):
|
||||
def CreateString(self, s, encoding='utf-8', errors='strict'):
|
||||
"""CreateString writes a null-terminated byte string as a vector."""
|
||||
|
||||
self.assertNotNested()
|
||||
self.nested = True
|
||||
|
||||
if isinstance(s, compat.string_types):
|
||||
x = s.encode()
|
||||
x = s.encode(encoding, errors)
|
||||
elif isinstance(s, compat.binary_type):
|
||||
x = s
|
||||
else:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2014 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -389,23 +390,36 @@ class TestByteLayout(unittest.TestCase):
|
||||
|
||||
def test_create_ascii_string(self):
|
||||
b = flatbuffers.Builder(0)
|
||||
b.CreateString(u"foo".encode('ascii'))
|
||||
b.CreateString(u"foo", encoding='ascii')
|
||||
|
||||
# 0-terminated, no pad:
|
||||
self.assertBuilderEquals(b, [3, 0, 0, 0, 'f', 'o', 'o', 0])
|
||||
b.CreateString(u"moop".encode('ascii'))
|
||||
b.CreateString(u"moop", encoding='ascii')
|
||||
# 0-terminated, 3-byte pad:
|
||||
self.assertBuilderEquals(b, [4, 0, 0, 0, 'm', 'o', 'o', 'p',
|
||||
0, 0, 0, 0,
|
||||
3, 0, 0, 0, 'f', 'o', 'o', 0])
|
||||
|
||||
def test_create_utf8_string(self):
|
||||
b = flatbuffers.Builder(0)
|
||||
b.CreateString(u"Цлїςσδε")
|
||||
self.assertBuilderEquals(b, "\x0e\x00\x00\x00\xd0\xa6\xd0\xbb\xd1\x97" \
|
||||
"\xcf\x82\xcf\x83\xce\xb4\xce\xb5\x00\x00")
|
||||
|
||||
b.CreateString(u"フムアムカモケモ")
|
||||
self.assertBuilderEquals(b, "\x18\x00\x00\x00\xef\xbe\x8c\xef\xbe\x91" \
|
||||
"\xef\xbd\xb1\xef\xbe\x91\xef\xbd\xb6\xef\xbe\x93\xef\xbd\xb9\xef" \
|
||||
"\xbe\x93\x00\x00\x00\x00\x0e\x00\x00\x00\xd0\xa6\xd0\xbb\xd1\x97" \
|
||||
"\xcf\x82\xcf\x83\xce\xb4\xce\xb5\x00\x00")
|
||||
|
||||
def test_create_arbitrary_string(self):
|
||||
b = flatbuffers.Builder(0)
|
||||
s = "\x01\x02\x03".encode('utf-8')
|
||||
b.CreateString(s)
|
||||
s = "\x01\x02\x03"
|
||||
b.CreateString(s) # Default encoding is utf-8.
|
||||
# 0-terminated, no pad:
|
||||
self.assertBuilderEquals(b, [3, 0, 0, 0, 1, 2, 3, 0])
|
||||
s2 = "\x04\x05\x06\x07".encode('utf-8')
|
||||
b.CreateString(s2)
|
||||
s2 = "\x04\x05\x06\x07"
|
||||
b.CreateString(s2) # Default encoding is utf-8.
|
||||
# 0-terminated, 3-byte pad:
|
||||
self.assertBuilderEquals(b, [4, 0, 0, 0, 4, 5, 6, 7, 0, 0, 0, 0,
|
||||
3, 0, 0, 0, 1, 2, 3, 0])
|
||||
|
||||
Reference in New Issue
Block a user