From 6908826f9506fc0149d7bb60e0cc45aafa532f4a Mon Sep 17 00:00:00 2001 From: pjulien Date: Sat, 16 Apr 2016 20:49:36 -0400 Subject: [PATCH] Fix for #3849 This avoids the following allocations over repeated use: - A ``CharsetEncoder`` - A byte array to contain the contents of the string - A wrapper, via ``ByteBuffer#wrap``, for the previously mentioned byte array This also removes a copy of the temporary byte array. This extra copy is needed because ``String#getBytes`` returns a byte array and its length must match the exact size of the contents. This implementation requires that the builder retain a ``ByteBuffer`` and ``CharEncoder``. This is considered slower only for users that will only allocate a single string in their buffers or none at all. The improvement is especially potent if the builder is constantly reused by its caller. This also eliminates the penalty for users that do not use strings at all since the cost of allocating these fields is now amortized. The only allocation left in this code is a ``CharBuffer`` wrapper. It's possible to eliminate this allocation using an additional copy but need further profiling to see which is the bigger problem. --- .../google/flatbuffers/FlatBufferBuilder.java | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/java/com/google/flatbuffers/FlatBufferBuilder.java b/java/com/google/flatbuffers/FlatBufferBuilder.java index e86471397..092a239f8 100644 --- a/java/com/google/flatbuffers/FlatBufferBuilder.java +++ b/java/com/google/flatbuffers/FlatBufferBuilder.java @@ -17,6 +17,11 @@ package com.google.flatbuffers; import static com.google.flatbuffers.Constants.*; + +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; import java.util.Arrays; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -46,6 +51,8 @@ public class FlatBufferBuilder { int num_vtables = 0; // Number of entries in `vtables` in use. int vector_num_elems = 0; // For the current vector being built. boolean force_defaults = false; // False omits default values from the serialized data. + CharsetEncoder encoder = utf8charset.newEncoder(); + ByteBuffer dst; /// @endcond /** @@ -368,12 +375,26 @@ public class FlatBufferBuilder { * @return The offset in the buffer where the encoded string starts. */ public int createString(String s) { - byte[] utf8 = s.getBytes(utf8charset); - addByte((byte)0); - startVector(1, utf8.length, 1); - bb.position(space -= utf8.length); - bb.put(utf8, 0, utf8.length); - return endVector(); + int length = s.length(); + int estimatedDstCapacity = (int) (length * encoder.maxBytesPerChar()); + if (dst == null || dst.capacity() < estimatedDstCapacity) { + dst = ByteBuffer.allocate(Math.max(128, estimatedDstCapacity)); + } + + dst.clear(); + + CharBuffer src = CharBuffer.wrap(s); + CoderResult result = encoder.encode(src, dst, true); + if (result.isError()) { + try { + result.throwException(); + } catch (CharacterCodingException x) { + throw new Error(x); + } + } + + dst.flip(); + return createString(dst); } /**