Dart - optimize writeString for ASCII (#6736)

* Dart - optimize writeString for ASCII

* Dart - writeString() ASCII optimization optional
This commit is contained in:
Ivan Dlugos
2021-07-22 19:36:37 +02:00
committed by GitHub
parent 2bfc8e9f01
commit c36672d803
2 changed files with 55 additions and 14 deletions

View File

@@ -668,29 +668,68 @@ class Builder {
return result;
}
/// Write the given string [value] and return its offset.
int writeString(String value) {
/// Write the given string [value] and return its offset
///
/// Dart strings are UTF-16 but must be stored as UTF-8 in FlatBuffers.
/// If the given string consists only of ASCII characters, you can indicate
/// enable [asciiOptimization]. In this mode, [writeString()] first tries to
/// copy the ASCII string directly to the output buffer and if that fails
/// (because there are no-ASCII characters in the string) it falls back and to
/// the default UTF-16 -> UTF-8 conversion (with slight performance penalty).
int? writeString(String value, {bool asciiOptimization = false}) {
_ensureNoVTable();
if (_strings != null) {
return _strings!.putIfAbsent(value, () => _writeString(value));
return _strings!
.putIfAbsent(value, () => _writeString(value, asciiOptimization));
} else {
return _writeString(value);
return _writeString(value, asciiOptimization);
}
}
int _writeString(String value) {
// TODO(scheglov) optimize for ASCII strings
List<int> bytes = utf8.encode(value);
int length = bytes.length;
int _writeString(String value, bool asciiOptimization) {
if (asciiOptimization) {
// [utf8.encode()] is slow (up to at least Dart SDK 2.13). If the given
// string is ASCII we can just write it directly, without any conversion.
final originalTail = _tail;
if (_tryWriteASCIIString(value)) return _tail;
// if non-ASCII: reset the output buffer position for [_writeUTFString()]
_tail = originalTail;
}
_writeUTFString(value);
return _tail;
}
// Try to write the string as ASCII, return false if there's a non-ascii char.
@pragma('vm:prefer-inline')
bool _tryWriteASCIIString(String value) {
_prepare(4, 1, additionalBytes: value.length + 1);
final length = value.length;
var offset = _buf.lengthInBytes - _tail + 4;
for (var i = 0; i < length; i++) {
// utf16 code unit, e.g. for '†' it's [0x20 0x20], which is 8224 decimal.
// ASCII characters go from 0x00 to 0x7F (which is 0 to 127 decimal).
final char = value.codeUnitAt(i);
if ((char & ~0x7F) != 0) {
return false;
}
_buf.setUint8(offset++, char);
}
_buf.setUint8(offset, 0); // trailing zero
_setUint32AtTail(_buf, _tail, value.length);
return true;
}
@pragma('vm:prefer-inline')
void _writeUTFString(String value) {
final bytes = utf8.encode(value) as Uint8List;
final length = bytes.length;
_prepare(4, 1, additionalBytes: length + 1);
final int result = _tail;
_setUint32AtTail(_buf, _tail, length);
int offset = _buf.lengthInBytes - _tail + 4;
var offset = _buf.lengthInBytes - _tail + 4;
for (int i = 0; i < length; i++) {
_buf.setUint8(offset++, bytes[i]);
}
_buf.setUint8(offset, 0); // trailing zero
return result;
}
/// Throw an exception if there is not currently a vtable.

View File

@@ -160,7 +160,7 @@ class BuilderTest {
final str = fbBuilder.writeString('MyMonster');
fbBuilder.writeString('test1');
fbBuilder.writeString('test2');
fbBuilder.writeString('test2', asciiOptimization: true);
final testArrayOfString = fbBuilder.endStructVector(2);
final fred = fbBuilder.writeString('Fred');
@@ -360,8 +360,10 @@ class BuilderTest {
List<int> byteList;
{
Builder builder = new Builder(initialSize: 0);
int? latinStringOffset = builder.writeString(latinString);
int? unicodeStringOffset = builder.writeString(unicodeString);
int? latinStringOffset =
builder.writeString(latinString, asciiOptimization: true);
int? unicodeStringOffset =
builder.writeString(unicodeString, asciiOptimization: true);
builder.startTable(2);
builder.addOffset(0, latinStringOffset);
builder.addOffset(1, unicodeStringOffset);