mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-02 04:04:19 +00:00
Store vtables sorted in Rust builder (#6765)
* benchmark many vtables * Rust: Store written_table rev-positions sorted. The previous implementation was slow if there were too many tables. Asymototically when inserting the n^th vtable: The old implementation took O(n) lookup steps and O(1) insertion. The new implementation is O(log n) lookup and O(n) insertion. This might be improved further by using a balanced btree. Benchmarking, create_many_tables is 7.5x faster (on my laptop): // Simple vector cache test create_many_tables ... bench: 728,875 ns/iter (+/- 12,279) = 44 MB/s // Sorted vector cache test create_many_tables ... bench: 97,843 ns/iter (+/- 4,430) = 334 MB/s * Fix lints Co-authored-by: Casper Neo <cneo@google.com>
This commit is contained in:
@@ -554,42 +554,38 @@ impl<'fbb> FlatBufferBuilder<'fbb> {
|
||||
// serialize every FieldLoc to the vtable:
|
||||
for &fl in self.field_locs.iter() {
|
||||
let pos: VOffsetT = (object_revloc_to_vtable.value() - fl.off) as VOffsetT;
|
||||
debug_assert_eq!(
|
||||
vtfw.get_field_offset(fl.id),
|
||||
0,
|
||||
"tried to write a vtable field multiple times"
|
||||
);
|
||||
vtfw.write_field_offset(fl.id, pos);
|
||||
}
|
||||
}
|
||||
let dup_vt_use = {
|
||||
let this_vt = VTable::init(&self.owned_buf[..], self.head);
|
||||
self.find_duplicate_stored_vtable_revloc(this_vt)
|
||||
};
|
||||
|
||||
let vt_use = match dup_vt_use {
|
||||
Some(n) => {
|
||||
let new_vt_bytes = &self.owned_buf[vt_start_pos..vt_end_pos];
|
||||
let found = self.written_vtable_revpos.binary_search_by(|old_vtable_revpos: &UOffsetT| {
|
||||
let old_vtable_pos = self.owned_buf.len() - *old_vtable_revpos as usize;
|
||||
let old_vtable = VTable::init(&self.owned_buf, old_vtable_pos);
|
||||
new_vt_bytes.cmp(old_vtable.as_bytes())
|
||||
});
|
||||
let final_vtable_revpos = match found {
|
||||
Ok(i) => {
|
||||
// The new vtable is a duplicate so clear it.
|
||||
VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]).clear();
|
||||
self.head += vtable_byte_len;
|
||||
n
|
||||
self.written_vtable_revpos[i]
|
||||
}
|
||||
None => {
|
||||
let new_vt_use = self.used_space() as UOffsetT;
|
||||
self.written_vtable_revpos.push(new_vt_use);
|
||||
new_vt_use
|
||||
Err(i) => {
|
||||
// This is a new vtable. Add it to the cache.
|
||||
let new_vt_revpos = self.used_space() as UOffsetT;
|
||||
self.written_vtable_revpos.insert(i, new_vt_revpos);
|
||||
new_vt_revpos
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
let n = self.head + self.used_space() - object_revloc_to_vtable.value() as usize;
|
||||
let saw = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, n) };
|
||||
debug_assert_eq!(saw, 0xF0F0_F0F0);
|
||||
unsafe {
|
||||
emplace_scalar::<SOffsetT>(
|
||||
&mut self.owned_buf[n..n + SIZE_SOFFSET],
|
||||
vt_use as SOffsetT - object_revloc_to_vtable.value() as SOffsetT,
|
||||
);
|
||||
}
|
||||
// Write signed offset from table to its vtable.
|
||||
let table_pos = self.owned_buf.len() - object_revloc_to_vtable.value() as usize;
|
||||
let tmp_soffset_to_vt = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, table_pos) };
|
||||
debug_assert_eq!(tmp_soffset_to_vt, 0xF0F0_F0F0);
|
||||
unsafe {
|
||||
emplace_scalar::<SOffsetT>(
|
||||
&mut self.owned_buf[table_pos..table_pos + SIZE_SOFFSET],
|
||||
final_vtable_revpos as SOffsetT - object_revloc_to_vtable.value() as SOffsetT
|
||||
);
|
||||
}
|
||||
|
||||
self.field_locs.clear();
|
||||
@@ -597,20 +593,6 @@ impl<'fbb> FlatBufferBuilder<'fbb> {
|
||||
object_revloc_to_vtable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn find_duplicate_stored_vtable_revloc(&self, needle: VTable) -> Option<UOffsetT> {
|
||||
for &revloc in self.written_vtable_revpos.iter().rev() {
|
||||
let o = VTable::init(
|
||||
&self.owned_buf[..],
|
||||
self.head + self.used_space() - revloc as usize,
|
||||
);
|
||||
if needle == o {
|
||||
return Some(revloc);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// Only call this when you know it is safe to double the size of the buffer.
|
||||
#[inline]
|
||||
fn grow_owned_buf(&mut self) {
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
use std::ptr::write_bytes;
|
||||
|
||||
use crate::endian_scalar::{emplace_scalar, read_scalar_at};
|
||||
use crate::endian_scalar::emplace_scalar;
|
||||
use crate::primitives::*;
|
||||
|
||||
/// VTableWriter compartmentalizes actions needed to create a vtable.
|
||||
@@ -54,16 +54,6 @@ impl<'a> VTableWriter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets an object field offset from the vtable. Only used for debugging.
|
||||
///
|
||||
/// Note that this expects field offsets (which are like pointers), not
|
||||
/// field ids (which are like array indices).
|
||||
#[inline(always)]
|
||||
pub fn get_field_offset(&self, vtable_offset: VOffsetT) -> VOffsetT {
|
||||
let idx = vtable_offset as usize;
|
||||
unsafe { read_scalar_at::<VOffsetT>(&self.buf, idx) }
|
||||
}
|
||||
|
||||
/// Writes an object field offset into the vtable.
|
||||
///
|
||||
/// Note that this expects field offsets (which are like pointers), not
|
||||
|
||||
@@ -142,7 +142,7 @@ fn blackbox<T>(t: T) -> T {
|
||||
|
||||
#[inline(always)]
|
||||
fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
|
||||
let m = my_game::example::get_root_as_monster(bytes);
|
||||
let m = unsafe { my_game::example::root_as_monster_unchecked(bytes) };
|
||||
blackbox(m.hp());
|
||||
blackbox(m.mana());
|
||||
blackbox(m.name());
|
||||
@@ -172,7 +172,7 @@ fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
|
||||
}
|
||||
|
||||
fn create_string_10(bench: &mut Bencher) {
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||
let mut i = 0;
|
||||
bench.iter(|| {
|
||||
builder.create_string("foobarbaz"); // zero-terminated -> 10 bytes
|
||||
@@ -187,7 +187,7 @@ fn create_string_10(bench: &mut Bencher) {
|
||||
}
|
||||
|
||||
fn create_string_100(bench: &mut Bencher) {
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||
let s_owned = (0..99).map(|_| "x").collect::<String>();
|
||||
let s: &str = &s_owned;
|
||||
|
||||
@@ -205,7 +205,7 @@ fn create_string_100(bench: &mut Bencher) {
|
||||
}
|
||||
|
||||
fn create_byte_vector_100_naive(bench: &mut Bencher) {
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
||||
let v: &[u8] = &v_owned;
|
||||
|
||||
@@ -223,7 +223,7 @@ fn create_byte_vector_100_naive(bench: &mut Bencher) {
|
||||
}
|
||||
|
||||
fn create_byte_vector_100_optimal(bench: &mut Bencher) {
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
||||
let v: &[u8] = &v_owned;
|
||||
|
||||
@@ -240,6 +240,24 @@ fn create_byte_vector_100_optimal(bench: &mut Bencher) {
|
||||
bench.bytes = v.len() as u64;
|
||||
}
|
||||
|
||||
fn create_many_tables(bench: &mut Bencher) {
|
||||
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||
// We test vtable overhead by making many unique tables of up to 16 fields of u8s.
|
||||
bench.iter(|| {
|
||||
for i in 0..(1u16 << 10) {
|
||||
let t = builder.start_table();
|
||||
for j in 0..15 {
|
||||
if i & (1 << j) == 1 {
|
||||
builder.push_slot_always(i * 2, 42u8);
|
||||
}
|
||||
}
|
||||
builder.end_table(t);
|
||||
}
|
||||
builder.reset();
|
||||
});
|
||||
bench.bytes = 1 << 15;
|
||||
}
|
||||
|
||||
benchmark_group!(
|
||||
benches,
|
||||
create_byte_vector_100_naive,
|
||||
@@ -247,5 +265,6 @@ benchmark_group!(
|
||||
traverse_canonical_buffer,
|
||||
create_canonical_buffer_then_reset,
|
||||
create_string_10,
|
||||
create_string_100
|
||||
create_string_100,
|
||||
create_many_tables,
|
||||
);
|
||||
|
||||
@@ -354,7 +354,7 @@ fn test_object_api_reads_correctly() -> Result<(), &'static str>{
|
||||
// Disabled due to Windows CI limitations.
|
||||
// #[test]
|
||||
// fn builder_initializes_with_maximum_buffer_size() {
|
||||
// flatbuffers::FlatBufferBuilder::new_with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
|
||||
// flatbuffers::FlatBufferBuilder::with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
|
||||
// }
|
||||
|
||||
#[should_panic]
|
||||
|
||||
Reference in New Issue
Block a user