mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-24 16:06:10 +00:00
Store vtables sorted in Rust builder (#6765)
* benchmark many vtables * Rust: Store written_table rev-positions sorted. The previous implementation was slow if there were too many tables. Asymototically when inserting the n^th vtable: The old implementation took O(n) lookup steps and O(1) insertion. The new implementation is O(log n) lookup and O(n) insertion. This might be improved further by using a balanced btree. Benchmarking, create_many_tables is 7.5x faster (on my laptop): // Simple vector cache test create_many_tables ... bench: 728,875 ns/iter (+/- 12,279) = 44 MB/s // Sorted vector cache test create_many_tables ... bench: 97,843 ns/iter (+/- 4,430) = 334 MB/s * Fix lints Co-authored-by: Casper Neo <cneo@google.com>
This commit is contained in:
@@ -554,42 +554,38 @@ impl<'fbb> FlatBufferBuilder<'fbb> {
|
|||||||
// serialize every FieldLoc to the vtable:
|
// serialize every FieldLoc to the vtable:
|
||||||
for &fl in self.field_locs.iter() {
|
for &fl in self.field_locs.iter() {
|
||||||
let pos: VOffsetT = (object_revloc_to_vtable.value() - fl.off) as VOffsetT;
|
let pos: VOffsetT = (object_revloc_to_vtable.value() - fl.off) as VOffsetT;
|
||||||
debug_assert_eq!(
|
|
||||||
vtfw.get_field_offset(fl.id),
|
|
||||||
0,
|
|
||||||
"tried to write a vtable field multiple times"
|
|
||||||
);
|
|
||||||
vtfw.write_field_offset(fl.id, pos);
|
vtfw.write_field_offset(fl.id, pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let dup_vt_use = {
|
let new_vt_bytes = &self.owned_buf[vt_start_pos..vt_end_pos];
|
||||||
let this_vt = VTable::init(&self.owned_buf[..], self.head);
|
let found = self.written_vtable_revpos.binary_search_by(|old_vtable_revpos: &UOffsetT| {
|
||||||
self.find_duplicate_stored_vtable_revloc(this_vt)
|
let old_vtable_pos = self.owned_buf.len() - *old_vtable_revpos as usize;
|
||||||
};
|
let old_vtable = VTable::init(&self.owned_buf, old_vtable_pos);
|
||||||
|
new_vt_bytes.cmp(old_vtable.as_bytes())
|
||||||
let vt_use = match dup_vt_use {
|
});
|
||||||
Some(n) => {
|
let final_vtable_revpos = match found {
|
||||||
|
Ok(i) => {
|
||||||
|
// The new vtable is a duplicate so clear it.
|
||||||
VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]).clear();
|
VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]).clear();
|
||||||
self.head += vtable_byte_len;
|
self.head += vtable_byte_len;
|
||||||
n
|
self.written_vtable_revpos[i]
|
||||||
}
|
}
|
||||||
None => {
|
Err(i) => {
|
||||||
let new_vt_use = self.used_space() as UOffsetT;
|
// This is a new vtable. Add it to the cache.
|
||||||
self.written_vtable_revpos.push(new_vt_use);
|
let new_vt_revpos = self.used_space() as UOffsetT;
|
||||||
new_vt_use
|
self.written_vtable_revpos.insert(i, new_vt_revpos);
|
||||||
|
new_vt_revpos
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
// Write signed offset from table to its vtable.
|
||||||
{
|
let table_pos = self.owned_buf.len() - object_revloc_to_vtable.value() as usize;
|
||||||
let n = self.head + self.used_space() - object_revloc_to_vtable.value() as usize;
|
let tmp_soffset_to_vt = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, table_pos) };
|
||||||
let saw = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, n) };
|
debug_assert_eq!(tmp_soffset_to_vt, 0xF0F0_F0F0);
|
||||||
debug_assert_eq!(saw, 0xF0F0_F0F0);
|
unsafe {
|
||||||
unsafe {
|
emplace_scalar::<SOffsetT>(
|
||||||
emplace_scalar::<SOffsetT>(
|
&mut self.owned_buf[table_pos..table_pos + SIZE_SOFFSET],
|
||||||
&mut self.owned_buf[n..n + SIZE_SOFFSET],
|
final_vtable_revpos as SOffsetT - object_revloc_to_vtable.value() as SOffsetT
|
||||||
vt_use as SOffsetT - object_revloc_to_vtable.value() as SOffsetT,
|
);
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.field_locs.clear();
|
self.field_locs.clear();
|
||||||
@@ -597,20 +593,6 @@ impl<'fbb> FlatBufferBuilder<'fbb> {
|
|||||||
object_revloc_to_vtable
|
object_revloc_to_vtable
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn find_duplicate_stored_vtable_revloc(&self, needle: VTable) -> Option<UOffsetT> {
|
|
||||||
for &revloc in self.written_vtable_revpos.iter().rev() {
|
|
||||||
let o = VTable::init(
|
|
||||||
&self.owned_buf[..],
|
|
||||||
self.head + self.used_space() - revloc as usize,
|
|
||||||
);
|
|
||||||
if needle == o {
|
|
||||||
return Some(revloc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only call this when you know it is safe to double the size of the buffer.
|
// Only call this when you know it is safe to double the size of the buffer.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn grow_owned_buf(&mut self) {
|
fn grow_owned_buf(&mut self) {
|
||||||
|
|||||||
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
use std::ptr::write_bytes;
|
use std::ptr::write_bytes;
|
||||||
|
|
||||||
use crate::endian_scalar::{emplace_scalar, read_scalar_at};
|
use crate::endian_scalar::emplace_scalar;
|
||||||
use crate::primitives::*;
|
use crate::primitives::*;
|
||||||
|
|
||||||
/// VTableWriter compartmentalizes actions needed to create a vtable.
|
/// VTableWriter compartmentalizes actions needed to create a vtable.
|
||||||
@@ -54,16 +54,6 @@ impl<'a> VTableWriter<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets an object field offset from the vtable. Only used for debugging.
|
|
||||||
///
|
|
||||||
/// Note that this expects field offsets (which are like pointers), not
|
|
||||||
/// field ids (which are like array indices).
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn get_field_offset(&self, vtable_offset: VOffsetT) -> VOffsetT {
|
|
||||||
let idx = vtable_offset as usize;
|
|
||||||
unsafe { read_scalar_at::<VOffsetT>(&self.buf, idx) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Writes an object field offset into the vtable.
|
/// Writes an object field offset into the vtable.
|
||||||
///
|
///
|
||||||
/// Note that this expects field offsets (which are like pointers), not
|
/// Note that this expects field offsets (which are like pointers), not
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ fn blackbox<T>(t: T) -> T {
|
|||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
|
fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
|
||||||
let m = my_game::example::get_root_as_monster(bytes);
|
let m = unsafe { my_game::example::root_as_monster_unchecked(bytes) };
|
||||||
blackbox(m.hp());
|
blackbox(m.hp());
|
||||||
blackbox(m.mana());
|
blackbox(m.mana());
|
||||||
blackbox(m.name());
|
blackbox(m.name());
|
||||||
@@ -172,7 +172,7 @@ fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn create_string_10(bench: &mut Bencher) {
|
fn create_string_10(bench: &mut Bencher) {
|
||||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
bench.iter(|| {
|
bench.iter(|| {
|
||||||
builder.create_string("foobarbaz"); // zero-terminated -> 10 bytes
|
builder.create_string("foobarbaz"); // zero-terminated -> 10 bytes
|
||||||
@@ -187,7 +187,7 @@ fn create_string_10(bench: &mut Bencher) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn create_string_100(bench: &mut Bencher) {
|
fn create_string_100(bench: &mut Bencher) {
|
||||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||||
let s_owned = (0..99).map(|_| "x").collect::<String>();
|
let s_owned = (0..99).map(|_| "x").collect::<String>();
|
||||||
let s: &str = &s_owned;
|
let s: &str = &s_owned;
|
||||||
|
|
||||||
@@ -205,7 +205,7 @@ fn create_string_100(bench: &mut Bencher) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn create_byte_vector_100_naive(bench: &mut Bencher) {
|
fn create_byte_vector_100_naive(bench: &mut Bencher) {
|
||||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||||
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
||||||
let v: &[u8] = &v_owned;
|
let v: &[u8] = &v_owned;
|
||||||
|
|
||||||
@@ -223,7 +223,7 @@ fn create_byte_vector_100_naive(bench: &mut Bencher) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn create_byte_vector_100_optimal(bench: &mut Bencher) {
|
fn create_byte_vector_100_optimal(bench: &mut Bencher) {
|
||||||
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
|
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||||
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
|
||||||
let v: &[u8] = &v_owned;
|
let v: &[u8] = &v_owned;
|
||||||
|
|
||||||
@@ -240,6 +240,24 @@ fn create_byte_vector_100_optimal(bench: &mut Bencher) {
|
|||||||
bench.bytes = v.len() as u64;
|
bench.bytes = v.len() as u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_many_tables(bench: &mut Bencher) {
|
||||||
|
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
|
||||||
|
// We test vtable overhead by making many unique tables of up to 16 fields of u8s.
|
||||||
|
bench.iter(|| {
|
||||||
|
for i in 0..(1u16 << 10) {
|
||||||
|
let t = builder.start_table();
|
||||||
|
for j in 0..15 {
|
||||||
|
if i & (1 << j) == 1 {
|
||||||
|
builder.push_slot_always(i * 2, 42u8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.end_table(t);
|
||||||
|
}
|
||||||
|
builder.reset();
|
||||||
|
});
|
||||||
|
bench.bytes = 1 << 15;
|
||||||
|
}
|
||||||
|
|
||||||
benchmark_group!(
|
benchmark_group!(
|
||||||
benches,
|
benches,
|
||||||
create_byte_vector_100_naive,
|
create_byte_vector_100_naive,
|
||||||
@@ -247,5 +265,6 @@ benchmark_group!(
|
|||||||
traverse_canonical_buffer,
|
traverse_canonical_buffer,
|
||||||
create_canonical_buffer_then_reset,
|
create_canonical_buffer_then_reset,
|
||||||
create_string_10,
|
create_string_10,
|
||||||
create_string_100
|
create_string_100,
|
||||||
|
create_many_tables,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -354,7 +354,7 @@ fn test_object_api_reads_correctly() -> Result<(), &'static str>{
|
|||||||
// Disabled due to Windows CI limitations.
|
// Disabled due to Windows CI limitations.
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn builder_initializes_with_maximum_buffer_size() {
|
// fn builder_initializes_with_maximum_buffer_size() {
|
||||||
// flatbuffers::FlatBufferBuilder::new_with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
|
// flatbuffers::FlatBufferBuilder::with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
#[should_panic]
|
#[should_panic]
|
||||||
|
|||||||
Reference in New Issue
Block a user