forked from BigfootDev/flatbuffers
feat: use HashMap for create_shared_string to fix O(N²) performance (#8958)
* feat: use HashMap for create_shared_string to fix O(N²) performance * refactor: clean up no_std binary_search_by with direct slice comparison
This commit is contained in:
@@ -24,6 +24,9 @@ use core::marker::PhantomData;
|
||||
use core::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, Sub, SubAssign};
|
||||
use core::ptr::write_bytes;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::endian_scalar::emplace_scalar;
|
||||
use crate::primitives::*;
|
||||
use crate::push::{Push, PushAlignment};
|
||||
@@ -139,6 +142,9 @@ pub struct FlatBufferBuilder<'fbb, A: Allocator = DefaultAllocator> {
|
||||
|
||||
min_align: usize,
|
||||
force_defaults: bool,
|
||||
#[cfg(feature = "std")]
|
||||
strings_pool: HashMap<String, WIPOffset<&'fbb str>>,
|
||||
#[cfg(not(feature = "std"))]
|
||||
strings_pool: Vec<WIPOffset<&'fbb str>>,
|
||||
|
||||
_phantom: PhantomData<&'fbb ()>,
|
||||
@@ -197,6 +203,9 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
|
||||
|
||||
min_align: 0,
|
||||
force_defaults: false,
|
||||
#[cfg(feature = "std")]
|
||||
strings_pool: HashMap::new(),
|
||||
#[cfg(not(feature = "std"))]
|
||||
strings_pool: Vec::new(),
|
||||
|
||||
_phantom: PhantomData,
|
||||
@@ -343,6 +352,31 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
|
||||
WIPOffset::new(o.value())
|
||||
}
|
||||
|
||||
/// Create a utf8 string, and de-duplicate if already created.
|
||||
///
|
||||
/// Uses a HashMap to track previously written strings, providing O(1)
|
||||
/// amortized lookup and insertion.
|
||||
#[cfg(feature = "std")]
|
||||
#[inline]
|
||||
pub fn create_shared_string<'a: 'b, 'b>(&'a mut self, s: &'b str) -> WIPOffset<&'fbb str> {
|
||||
self.assert_not_nested(
|
||||
"create_shared_string can not be called when a table or vector is under construction",
|
||||
);
|
||||
|
||||
if let Some(&offset) = self.strings_pool.get(s) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
let address = WIPOffset::new(self.create_byte_string(s.as_bytes()).value());
|
||||
self.strings_pool.insert(s.to_owned(), address);
|
||||
address
|
||||
}
|
||||
|
||||
/// Create a utf8 string, and de-duplicate if already created.
|
||||
///
|
||||
/// Uses a sorted Vec with binary search to track previously written
|
||||
/// strings when in `no_std` mode.
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[inline]
|
||||
pub fn create_shared_string<'a: 'b, 'b>(&'a mut self, s: &'b str) -> WIPOffset<&'fbb str> {
|
||||
self.assert_not_nested(
|
||||
@@ -355,19 +389,15 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
|
||||
|
||||
let found = self.strings_pool.binary_search_by(|offset| {
|
||||
let ptr = offset.value() as usize;
|
||||
// Gets The pointer to the size of the string
|
||||
let str_memory = &buf[buf.len() - ptr..];
|
||||
// Gets the size of the written string from buffer
|
||||
let size =
|
||||
u32::from_le_bytes([str_memory[0], str_memory[1], str_memory[2], str_memory[3]])
|
||||
as usize;
|
||||
// Size of the string size
|
||||
let string_size: usize = 4;
|
||||
// Fetches actual string bytes from index of string after string size
|
||||
// to the size of string plus string size
|
||||
let iter = str_memory[string_size..size + string_size].iter();
|
||||
// Compares bytes of fetched string and current writable string
|
||||
iter.cloned().cmp(s.bytes())
|
||||
let size = u32::from_le_bytes([
|
||||
str_memory[0],
|
||||
str_memory[1],
|
||||
str_memory[2],
|
||||
str_memory[3],
|
||||
]) as usize;
|
||||
let stored = &str_memory[4..4 + size];
|
||||
stored.cmp(s.as_bytes())
|
||||
});
|
||||
|
||||
match found {
|
||||
|
||||
@@ -3224,4 +3224,77 @@ fn test_shared_strings() {
|
||||
assert_eq!(string_vector.get(1), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shared_strings_pool_deduplication() {
|
||||
// Verifies that create_shared_string correctly deduplicates across many
|
||||
// unique strings and that the resulting buffer contains valid data.
|
||||
let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1024);
|
||||
|
||||
// Insert multiple unique strings and verify each gets a distinct offset.
|
||||
let animals = ["cat", "dog", "bird", "fish", "snake"];
|
||||
let offsets: Vec<_> = animals
|
||||
.iter()
|
||||
.map(|s| builder.create_shared_string(s))
|
||||
.collect();
|
||||
for i in 0..offsets.len() {
|
||||
for j in (i + 1)..offsets.len() {
|
||||
assert_ne!(
|
||||
offsets[i].value(),
|
||||
offsets[j].value(),
|
||||
"unique strings '{}' and '{}' must have different offsets",
|
||||
animals[i],
|
||||
animals[j],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Re-insert the same strings and verify they return the original offsets.
|
||||
for (i, s) in animals.iter().enumerate() {
|
||||
let offset = builder.create_shared_string(s);
|
||||
assert_eq!(
|
||||
offset.value(),
|
||||
offsets[i].value(),
|
||||
"duplicate string '{}' must return the same offset",
|
||||
s,
|
||||
);
|
||||
}
|
||||
|
||||
// Verify that reset clears the pool: a previously shared string is no
|
||||
// longer deduplicated against strings from before the reset.
|
||||
builder.reset();
|
||||
let a = builder.create_shared_string("cat");
|
||||
let b = builder.create_shared_string("cat");
|
||||
assert_eq!(a.value(), b.value(), "same string after reset must still deduplicate");
|
||||
|
||||
// Verify that shared strings produce a valid, readable buffer.
|
||||
builder.reset();
|
||||
let shared_name = builder.create_shared_string("goblin");
|
||||
let shared_name_dup = builder.create_shared_string("goblin");
|
||||
assert_eq!(shared_name.value(), shared_name_dup.value());
|
||||
|
||||
let enemy = my_game::example::Monster::create(
|
||||
&mut builder,
|
||||
&my_game::example::MonsterArgs {
|
||||
name: Some(shared_name),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let main_name = builder.create_shared_string("goblin");
|
||||
assert_eq!(main_name.value(), shared_name.value());
|
||||
|
||||
let monster = my_game::example::Monster::create(
|
||||
&mut builder,
|
||||
&my_game::example::MonsterArgs {
|
||||
name: Some(main_name),
|
||||
enemy: Some(enemy),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
builder.finish(monster, None);
|
||||
|
||||
let m = my_game::example::root_as_monster(builder.finished_data()).unwrap();
|
||||
assert_eq!(m.name(), "goblin");
|
||||
assert_eq!(m.enemy().unwrap().name(), "goblin");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user