FlatBuffers 64 for C++ (#7935)

* First working hack of adding 64-bit. Don't judge :) * Made vector_downward work on 64 bit types * vector_downward uses size_t, added offset64 to reflection * cleaned up adding offset64 in parser * Add C++ testing skeleton for 64-bit * working test for CreateVector64 * working >2 GiB buffers * support for large strings * simplified CreateString<> to just provide the offset type * generalize CreateVector template * update test_64.afb due to upstream format change * Added Vector64 type, which is just an alias for vector ATM * Switch to Offset64 for Vector64 * Update for reflection bfbs output change * Starting to add support for vector64 type in C++ * made a generic CreateVector that can handle different offsets and vector types * Support for 32-vector with 64-addressing * Vector64 basic builder + tests working * basic support for json vector64 support * renamed fields in test_64bit.fbs to better reflect their use * working C++ vector64 builder * Apply --annotate-sparse-vector to 64-bit tests * Enable Vector64 for --annotate-sparse-vectors * Merged from upstream * Add `near_string` field for testing 32-bit offsets alongside * keep track of where the 32-bit and 64-bit regions are for flatbufferbuilder * move template<> outside class body for GCC * update run.sh to build and run tests * basic assertion for adding 64-bit offset at the wrong time * started to separate `FlatBufferBuilder` into two classes, 1 64-bit aware, the other not * add test for nested flatbuffer vector64, fix bug in alignment of big vectors * fixed CreateDirect method by iterating by Offset64 first * internal refactoring of flatbufferbuilder * block not supported languages in the parser from using 64-bit * evolution tests for adding a vector64 field * conformity tests for adding/removing offset64 attributes * ensure test is for a big buffer * add parser error tests for `offset64` and `vector64` attributes * add missing static that GCC only complains about * remove stdint-uintn.h header that gets automatically added * move 64-bit CalculateOffset internal * fixed return size of EndVector * various fixes on windows * add SizeT to vector_downward * minimze range of size changes in vector and builder * reworked how tracking if 64-offsets are added * Add ReturnT to EndVector * small cleanups * remove need for second Array definition * combine IndirectHelpers into one definition * started support for vector of struct * Support for 32/64-vectors of structs + Offset64 * small cleanups * add verification for vector64 * add sized prefix for 64-bit buffers * add fuzzer for 64-bit * add example of adding many vectors using a wrapper table * run the new -bfbs-gen-embed logic on the 64-bit tests * remove run.sh and fix cmakelist issue * fixed bazel rules * fixed some PR comments * add 64-bit tests to cmakelist
2026-06-15 00:38:52 +00:00 · 2023-05-09 09:16:30 -07:00
parent 13fc75cb6b
commit 63b7b25289
49 changed files with 3274 additions and 529 deletions
--- a/include/flatbuffers/vector_downward.h
+++ b/include/flatbuffers/vector_downward.h
@@ -17,6 +17,8 @@
 #ifndef FLATBUFFERS_VECTOR_DOWNWARD_H_
 #define FLATBUFFERS_VECTOR_DOWNWARD_H_

+#include <cstdint>
+
 #include <algorithm>

 #include "flatbuffers/base.h"
@@ -31,13 +33,15 @@ namespace flatbuffers {
 // Since this vector leaves the lower part unused, we support a "scratch-pad"
 // that can be stored there for temporary data, to share the allocated space.
 // Essentially, this supports 2 std::vectors in a single buffer.
-class vector_downward {
+template<typename SizeT = uoffset_t> class vector_downward {
 public:
  explicit vector_downward(size_t initial_size, Allocator *allocator,
-                           bool own_allocator, size_t buffer_minalign)
+                           bool own_allocator, size_t buffer_minalign,
+                           const SizeT max_size = FLATBUFFERS_MAX_BUFFER_SIZE)
      : allocator_(allocator),
        own_allocator_(own_allocator),
        initial_size_(initial_size),
+        max_size_(max_size),
        buffer_minalign_(buffer_minalign),
        reserved_(0),
        size_(0),
@@ -50,6 +54,7 @@ class vector_downward {
      : allocator_(other.allocator_),
        own_allocator_(other.own_allocator_),
        initial_size_(other.initial_size_),
+        max_size_(other.max_size_),
        buffer_minalign_(other.buffer_minalign_),
        reserved_(other.reserved_),
        size_(other.size_),
@@ -111,7 +116,7 @@ class vector_downward {
  uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) {
    auto *buf = buf_;
    allocated_bytes = reserved_;
-    offset = static_cast<size_t>(cur_ - buf_);
+    offset = vector_downward::offset();

    // release_raw only relinquishes the buffer ownership.
    // Does not deallocate or reset the allocator. Destructor will do that.
@@ -136,10 +141,10 @@ class vector_downward {

  size_t ensure_space(size_t len) {
    FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_);
-    if (len > static_cast<size_t>(cur_ - scratch_)) { reallocate(len); }
-    // Beyond this, signed offsets may not have enough range:
-    // (FlatBuffers > 2GB not supported).
-    FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE);
+    // If the length is larger than the unused part of the buffer, we need to
+    // grow.
+    if (len > unused_buffer_size()) { reallocate(len); }
+    FLATBUFFERS_ASSERT(size() < max_size_);
    return len;
  }

@@ -147,7 +152,7 @@ class vector_downward {
    if (len) {
      ensure_space(len);
      cur_ -= len;
-      size_ += static_cast<uoffset_t>(len);
+      size_ += static_cast<SizeT>(len);
    }
    return cur_;
  }
@@ -155,11 +160,17 @@ class vector_downward {
  // Returns nullptr if using the DefaultAllocator.
  Allocator *get_custom_allocator() { return allocator_; }

-  inline uoffset_t size() const { return size_; }
+  // The current offset into the buffer.
+  size_t offset() const { return cur_ - buf_; }

-  uoffset_t scratch_size() const {
-    return static_cast<uoffset_t>(scratch_ - buf_);
-  }
+  // The total size of the vector (both the buffer and scratch parts).
+  inline SizeT size() const { return size_; }
+
+  // The size of the buffer part of the vector that is currently unused.
+  SizeT unused_buffer_size() const { return static_cast<SizeT>(cur_ - scratch_); }
+
+  // The size of the scratch part of the vector.
+  SizeT scratch_size() const { return static_cast<SizeT>(scratch_ - buf_); }

  size_t capacity() const { return reserved_; }

@@ -211,7 +222,7 @@ class vector_downward {

  void pop(size_t bytes_to_remove) {
    cur_ += bytes_to_remove;
-    size_ -= static_cast<uoffset_t>(bytes_to_remove);
+    size_ -= static_cast<SizeT>(bytes_to_remove);
  }

  void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; }
@@ -224,6 +235,7 @@ class vector_downward {
    swap(buffer_minalign_, other.buffer_minalign_);
    swap(reserved_, other.reserved_);
    swap(size_, other.size_);
+    swap(max_size_, other.max_size_);
    swap(buf_, other.buf_);
    swap(cur_, other.cur_);
    swap(scratch_, other.scratch_);
@@ -243,9 +255,12 @@ class vector_downward {
  Allocator *allocator_;
  bool own_allocator_;
  size_t initial_size_;
+
+  // The maximum size the vector can be.
+  SizeT max_size_;
  size_t buffer_minalign_;
  size_t reserved_;
-  uoffset_t size_;
+  SizeT size_;
  uint8_t *buf_;
  uint8_t *cur_;  // Points at location between empty (below) and used (above).
  uint8_t *scratch_;  // Points to the end of the scratchpad in use.