Verifier for FlexBuffers (#6977)

* Verifier for FlexBuffers

* Verifier improvements & fuzzer
This commit is contained in:
Wouter van Oortmerssen
2021-12-10 14:59:08 -08:00
committed by GitHub
parent 705f27f6ee
commit e367ca32ad
11 changed files with 300 additions and 6 deletions

View File

@@ -53,7 +53,7 @@ enum Type {
FBT_INT = 1, FBT_INT = 1,
FBT_UINT = 2, FBT_UINT = 2,
FBT_FLOAT = 3, FBT_FLOAT = 3,
// Types above stored inline, types below store an offset. // Types above stored inline, types below (except FBT_BOOL) store an offset.
FBT_KEY = 4, FBT_KEY = 4,
FBT_STRING = 5, FBT_STRING = 5,
FBT_INDIRECT_INT = 6, FBT_INDIRECT_INT = 6,
@@ -81,6 +81,8 @@ enum Type {
FBT_BOOL = 26, FBT_BOOL = 26,
FBT_VECTOR_BOOL = FBT_VECTOR_BOOL =
36, // To Allow the same type of conversion of type to vector type 36, // To Allow the same type of conversion of type to vector type
FBT_MAX_TYPE = 37
}; };
inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; } inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; }
@@ -757,6 +759,8 @@ class Reference {
return false; return false;
} }
friend class Verifier;
const uint8_t *data_; const uint8_t *data_;
uint8_t parent_width_; uint8_t parent_width_;
uint8_t byte_width_; uint8_t byte_width_;
@@ -1629,6 +1633,242 @@ class Builder FLATBUFFERS_FINAL_CLASS {
StringOffsetMap string_pool; StringOffsetMap string_pool;
}; };
// Helper class to verify the integrity of a FlexBuffer
class Verifier FLATBUFFERS_FINAL_CLASS {
public:
Verifier(const uint8_t *buf, size_t buf_len,
// Supplying this vector likely results in faster verification
// of larger buffers with many shared keys/strings, but
// comes at the cost of using additional memory 1/8th the size of
// the buffer being verified, so it is allowed to be null
// for special situations (memory constrained devices or
// really small buffers etc). Do note that when not supplying
// this buffer, you are not protected against buffers crafted
// specifically to DoS you, i.e. recursive sharing that causes
// exponential amounts of verification CPU time.
std::vector<bool> *reuse_tracker)
: buf_(buf), size_(buf_len), reuse_tracker_(reuse_tracker) {
FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
if (reuse_tracker_) {
reuse_tracker_->clear();
reuse_tracker_->resize(size_);
}
}
private:
// Central location where any verification failures register.
bool Check(bool ok) const {
// clang-format off
#ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
FLATBUFFERS_ASSERT(ok);
#endif
// clang-format on
return ok;
}
// Verify any range within the buffer.
bool VerifyFrom(size_t elem, size_t elem_len) const {
return Check(elem_len < size_ && elem <= size_ - elem_len);
}
bool VerifyBefore(size_t elem, size_t elem_len) const {
return Check(elem_len <= elem);
}
bool VerifyFromPointer(const uint8_t *p, size_t len) {
auto o = static_cast<size_t>(p - buf_);
return VerifyFrom(o, len);
}
bool VerifyBeforePointer(const uint8_t *p, size_t len) {
auto o = static_cast<size_t>(p - buf_);
return VerifyBefore(o, len);
}
bool VerifyByteWidth(size_t width) {
return Check(width == 1 || width == 2 || width == 4 || width == 8);
}
bool VerifyType(int type) {
return Check(type >= 0 && type < FBT_MAX_TYPE);
}
bool VerifyOffset(uint64_t off, const uint8_t *p) {
return Check(off <= static_cast<uint64_t>(size_)) &&
off <= static_cast<uint64_t>(p - buf_);
}
bool AlreadyVerified(const uint8_t *p) {
return reuse_tracker_ && (*reuse_tracker_)[p - buf_];
}
void MarkVerified(const uint8_t *p) {
if (reuse_tracker_)
(*reuse_tracker_)[p - buf_] = true;
}
bool VerifyVector(const uint8_t *p, Type elem_type, uint8_t size_byte_width,
uint8_t elem_byte_width) {
// Any kind of nesting goes thru this function, so guard against that
// here.
if (AlreadyVerified(p))
return true;
if (!VerifyBeforePointer(p, size_byte_width))
return false;
auto sized = Sized(p, size_byte_width);
auto num_elems = sized.size();
auto max_elems = SIZE_MAX / elem_byte_width;
if (!Check(num_elems < max_elems))
return false; // Protect against byte_size overflowing.
auto byte_size = num_elems * elem_byte_width;
if (!VerifyFromPointer(p, byte_size))
return false;
if (!IsInline(elem_type)) {
if (elem_type == FBT_NULL) {
// Verify type bytes after the vector.
if (!VerifyFromPointer(p + byte_size, num_elems)) return false;
auto v = Vector(p, size_byte_width);
for (size_t i = 0; i < num_elems; i++)
if (!VerifyRef(v[i])) return false;
} else if (elem_type == FBT_KEY) {
auto v = TypedVector(p, elem_byte_width, FBT_KEY);
for (size_t i = 0; i < num_elems; i++)
if (!VerifyRef(v[i])) return false;
} else {
FLATBUFFERS_ASSERT(false);
}
}
MarkVerified(p);
return true;
}
bool VerifyKeys(const uint8_t *p, uint8_t byte_width) {
// The vector part of the map has already been verified.
const size_t num_prefixed_fields = 3;
if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields))
return false;
p -= byte_width * num_prefixed_fields;
auto off = ReadUInt64(p, byte_width);
if (!VerifyOffset(off, p))
return false;
auto key_byte_with =
static_cast<uint8_t>(ReadUInt64(p + byte_width, byte_width));
if (!VerifyByteWidth(key_byte_with))
return false;
return VerifyVector(p - off, FBT_KEY, key_byte_with, key_byte_with);
}
bool VerifyKey(const uint8_t* p) {
if (AlreadyVerified(p)) return true;
MarkVerified(p);
while (p < buf_ + size_)
if (*p++) return true;
return false;
}
bool VerifyTerminator(const String &s) {
return VerifyFromPointer(reinterpret_cast<const uint8_t *>(s.c_str()),
s.size() + 1);
}
bool VerifyRef(Reference r) {
// r.parent_width_ and r.data_ already verified.
if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) {
return false;
}
if (IsInline(r.type_)) {
// Inline scalars, don't require further verification.
return true;
}
// All remaining types are an offset.
auto off = ReadUInt64(r.data_, r.parent_width_);
if (!VerifyOffset(off, r.data_))
return false;
auto p = r.Indirect();
switch (r.type_) {
case FBT_INDIRECT_INT:
case FBT_INDIRECT_UINT:
case FBT_INDIRECT_FLOAT:
return VerifyFromPointer(p, r.byte_width_);
case FBT_KEY:
return VerifyKey(p);
case FBT_MAP:
return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_) &&
VerifyKeys(p, r.byte_width_);
case FBT_VECTOR:
return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_);
case FBT_VECTOR_INT:
return VerifyVector(p, FBT_INT, r.byte_width_, r.byte_width_);
case FBT_VECTOR_BOOL:
case FBT_VECTOR_UINT:
return VerifyVector(p, FBT_UINT, r.byte_width_, r.byte_width_);
case FBT_VECTOR_FLOAT:
return VerifyVector(p, FBT_FLOAT, r.byte_width_, r.byte_width_);
case FBT_VECTOR_KEY:
return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_);
case FBT_VECTOR_STRING_DEPRECATED:
// Use of FBT_KEY here intentional, see elsewhere.
return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_);
case FBT_BLOB:
return VerifyVector(p, FBT_UINT, r.byte_width_, 1);
case FBT_STRING:
return VerifyVector(p, FBT_UINT, r.byte_width_, 1) &&
VerifyTerminator(String(p, r.byte_width_));
case FBT_VECTOR_INT2:
case FBT_VECTOR_UINT2:
case FBT_VECTOR_FLOAT2:
case FBT_VECTOR_INT3:
case FBT_VECTOR_UINT3:
case FBT_VECTOR_FLOAT3:
case FBT_VECTOR_INT4:
case FBT_VECTOR_UINT4:
case FBT_VECTOR_FLOAT4: {
uint8_t len = 0;
auto vtype = ToFixedTypedVectorElementType(r.type_, &len);
if (!VerifyType(vtype))
return false;
return VerifyFromPointer(p, r.byte_width_ * len);
}
default:
return false;
}
}
public:
bool VerifyBuffer() {
if (!Check(size_ >= 3)) return false;
auto end = buf_ + size_;
auto byte_width = *--end;
auto packed_type = *--end;
return VerifyByteWidth(byte_width) &&
Check(end - buf_ >= byte_width) &&
VerifyRef(Reference(end - byte_width, byte_width, packed_type));
}
private:
const uint8_t *buf_;
size_t size_;
std::vector<bool> *reuse_tracker_;
};
// Utility function that contructs the Verifier for you, see above for parameters.
inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len, std::vector<bool> *reuse_tracker) {
Verifier verifier(buf, buf_len, reuse_tracker);
return verifier.VerifyBuffer();
}
#ifdef FLATBUFFERS_H_
// This is a verifier utility function that works together with the
// FlatBuffers verifier, which should only be present if flatbuffer.h
// has been included (which it typically is in generated code).
inline bool VerifyNestedFlexBuffer(const flatbuffers::Vector<uint8_t> *nv,
flatbuffers::Verifier &verifier) {
if (!nv) return true;
return verifier.Check(
flexbuffers::VerifyBuffer(nv->data(), nv->size(),
&verifier.GetReuseVector()));
}
#endif
} // namespace flexbuffers } // namespace flexbuffers
#if defined(_MSC_VER) #if defined(_MSC_VER)

View File

@@ -254,6 +254,8 @@ class Verifier FLATBUFFERS_FINAL_CLASS {
// clang-format on // clang-format on
} }
std::vector<bool> &GetReuseVector() { return reuse_tracker_; }
private: private:
const uint8_t *buf_; const uint8_t *buf_;
size_t size_; size_t size_;
@@ -263,6 +265,9 @@ class Verifier FLATBUFFERS_FINAL_CLASS {
uoffset_t max_tables_; uoffset_t max_tables_;
mutable size_t upper_bound_; mutable size_t upper_bound_;
bool check_alignment_; bool check_alignment_;
// This is here for nested FlexBuffers, cheap if not touched.
// TODO: allow user to supply memory for this.
std::vector<bool> reuse_tracker_;
}; };
} // namespace flatbuffers } // namespace flatbuffers

View File

@@ -510,9 +510,12 @@ int FlatCompiler::Compile(int argc, const char **argv) {
LoadBinarySchema(*parser.get(), filename, contents); LoadBinarySchema(*parser.get(), filename, contents);
} else if (opts.use_flexbuffers) { } else if (opts.use_flexbuffers) {
if (opts.lang_to_generate == IDLOptions::kJson) { if (opts.lang_to_generate == IDLOptions::kJson) {
parser->flex_root_ = flexbuffers::GetRoot( auto data = reinterpret_cast<const uint8_t *>(contents.c_str());
reinterpret_cast<const uint8_t *>(contents.c_str()), auto size = contents.size();
contents.size()); std::vector<bool> reuse_tracker;
if (!flexbuffers::VerifyBuffer(data, size, &reuse_tracker))
Error("flexbuffers file failed to verify: " + filename, false);
parser->flex_root_ = flexbuffers::GetRoot(data, size);
} else { } else {
parser->flex_builder_.Clear(); parser->flex_builder_.Clear();
ParseFile(*parser.get(), filename, contents, include_directories); ParseFile(*parser.get(), filename, contents, include_directories);

View File

@@ -2006,6 +2006,9 @@ class CppGenerator : public BaseGenerator {
// FIXME: file_identifier. // FIXME: file_identifier.
code_ += "{{PRE}}verifier.VerifyNestedFlatBuffer<{{CPP_NAME}}>" code_ += "{{PRE}}verifier.VerifyNestedFlatBuffer<{{CPP_NAME}}>"
"({{NAME}}(), nullptr)\\"; "({{NAME}}(), nullptr)\\";
} else if (field.flexbuffer) {
code_ += "{{PRE}}flexbuffers::VerifyNestedFlexBuffer"
"({{NAME}}(), verifier)\\";
} }
break; break;
} }

View File

@@ -265,6 +265,12 @@ struct JsonPrinter {
val = reinterpret_cast<const Struct *>(table)->GetStruct<const void *>( val = reinterpret_cast<const Struct *>(table)->GetStruct<const void *>(
fd.value.offset); fd.value.offset);
} else if (fd.flexbuffer && opts.json_nested_flexbuffers) { } else if (fd.flexbuffer && opts.json_nested_flexbuffers) {
// We could verify this FlexBuffer before access, but since this sits
// inside a FlatBuffer that we don't know wether it has been verified or
// not, there is little point making this part safer than the parent..
// The caller should really be verifying the whole.
// If the whole buffer is corrupt, we likely crash before we even get
// here.
auto vec = table->GetPointer<const Vector<uint8_t> *>(fd.value.offset); auto vec = table->GetPointer<const Vector<uint8_t> *>(fd.value.offset);
auto root = flexbuffers::GetRoot(vec->data(), vec->size()); auto root = flexbuffers::GetRoot(vec->data(), vec->size());
root.ToString(true, opts.strict_json, text); root.ToString(true, opts.strict_json, text);

View File

@@ -1740,6 +1740,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
verifier.VerifyVector(testarrayofsortedstruct()) && verifier.VerifyVector(testarrayofsortedstruct()) &&
VerifyOffset(verifier, VT_FLEX) && VerifyOffset(verifier, VT_FLEX) &&
verifier.VerifyVector(flex()) && verifier.VerifyVector(flex()) &&
flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) &&
VerifyOffset(verifier, VT_TEST5) && VerifyOffset(verifier, VT_TEST5) &&
verifier.VerifyVector(test5()) && verifier.VerifyVector(test5()) &&
VerifyOffset(verifier, VT_VECTOR_OF_LONGS) && VerifyOffset(verifier, VT_VECTOR_OF_LONGS) &&

View File

@@ -53,7 +53,7 @@ target_compile_options(
fuzzer_config fuzzer_config
INTERFACE INTERFACE
$<$<NOT:$<BOOL:${OSS_FUZZ}>>: $<$<NOT:$<BOOL:${OSS_FUZZ}>>:
-fsanitize-coverage=edge,trace-cmp -fsanitize-coverage=trace-cmp
> >
$<$<BOOL:${USE_ASAN}>: $<$<BOOL:${USE_ASAN}>:
-fsanitize=fuzzer,undefined,address -fsanitize=fuzzer,undefined,address
@@ -146,6 +146,9 @@ target_link_libraries(parser_fuzzer PRIVATE flatbuffers_fuzzed)
add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc) add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc)
target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed) target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed)
add_executable(flexverifier_fuzzer flexbuffers_verifier_fuzzer.cc)
target_link_libraries(flexverifier_fuzzer PRIVATE flatbuffers_fuzzed)
add_executable(monster_fuzzer flatbuffers_monster_fuzzer.cc) add_executable(monster_fuzzer flatbuffers_monster_fuzzer.cc)
target_link_libraries(monster_fuzzer PRIVATE flatbuffers_fuzzed) target_link_libraries(monster_fuzzer PRIVATE flatbuffers_fuzzed)
add_custom_command( add_custom_command(

View File

@@ -0,0 +1,14 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "flatbuffers/flexbuffers.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
std::vector<bool> reuse_tracker;
flexbuffers::VerifyBuffer(data, size, &reuse_tracker);
return 0;
}

View File

@@ -10,10 +10,17 @@ For details about **libFuzzer** see: https://llvm.org/docs/LibFuzzer.html
To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before. To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before.
The fuzzer section include three tests: The fuzzer section include four tests:
- `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema; - `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema;
- `parser_fuzzer` checks stability of schema and json parser under various inputs; - `parser_fuzzer` checks stability of schema and json parser under various inputs;
- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files; - `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files;
- `flexverifier_fuzzer` checks stability of deserialization engine for FlexBuffers only;
## Build
```sh
cd tests/fuzzer
CC=clang CXX=clang++ cmake . -DCMAKE_BUILD_TYPE=Debug -DUSE_ASAN=ON
```
## Run tests with a specific locale ## Run tests with a specific locale
The grammar of the Flatbuffers library is based on printable-ASCII characters. The grammar of the Flatbuffers library is based on printable-ASCII characters.

View File

@@ -1680,6 +1680,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
verifier.VerifyVector(testarrayofsortedstruct()) && verifier.VerifyVector(testarrayofsortedstruct()) &&
VerifyOffset(verifier, VT_FLEX) && VerifyOffset(verifier, VT_FLEX) &&
verifier.VerifyVector(flex()) && verifier.VerifyVector(flex()) &&
flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) &&
VerifyOffset(verifier, VT_TEST5) && VerifyOffset(verifier, VT_TEST5) &&
verifier.VerifyVector(test5()) && verifier.VerifyVector(test5()) &&
VerifyOffset(verifier, VT_VECTOR_OF_LONGS) && VerifyOffset(verifier, VT_VECTOR_OF_LONGS) &&

View File

@@ -3022,6 +3022,10 @@ void FlexBuffersTest() {
#endif #endif
// clang-format on // clang-format on
std::vector<bool> reuse_tracker;
TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
&reuse_tracker), true);
auto map = flexbuffers::GetRoot(slb.GetBuffer()).AsMap(); auto map = flexbuffers::GetRoot(slb.GetBuffer()).AsMap();
TEST_EQ(map.size(), 7); TEST_EQ(map.size(), 7);
auto vec = map["vec"].AsVector(); auto vec = map["vec"].AsVector();
@@ -3079,6 +3083,8 @@ void FlexBuffersTest() {
slb.Clear(); slb.Clear();
auto jsontest = "{ a: [ 123, 456.0 ], b: \"hello\", c: true, d: false }"; auto jsontest = "{ a: [ 123, 456.0 ], b: \"hello\", c: true, d: false }";
TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true); TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true);
TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
&reuse_tracker), true);
auto jroot = flexbuffers::GetRoot(slb.GetBuffer()); auto jroot = flexbuffers::GetRoot(slb.GetBuffer());
auto jmap = jroot.AsMap(); auto jmap = jroot.AsMap();
auto jvec = jmap["a"].AsVector(); auto jvec = jmap["a"].AsVector();
@@ -3116,6 +3122,8 @@ void FlexBuffersFloatingPointTest() {
"{ a: [1.0, nan, inf, infinity, -inf, +inf, -infinity, 8.0] }"; "{ a: [1.0, nan, inf, infinity, -inf, +inf, -infinity, 8.0] }";
TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true); TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true);
auto jroot = flexbuffers::GetRoot(slb.GetBuffer()); auto jroot = flexbuffers::GetRoot(slb.GetBuffer());
TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
nullptr), true);
auto jmap = jroot.AsMap(); auto jmap = jroot.AsMap();
auto jvec = jmap["a"].AsVector(); auto jvec = jmap["a"].AsVector();
TEST_EQ(8, jvec.size()); TEST_EQ(8, jvec.size());
@@ -3159,6 +3167,9 @@ void FlexBuffersDeprecatedTest() {
// same way, the fix lies on the reading side. // same way, the fix lies on the reading side.
slb.EndVector(start, true, false); slb.EndVector(start, true, false);
slb.Finish(); slb.Finish();
// Verify because why not.
TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
nullptr), true);
// So now lets read this data back. // So now lets read this data back.
// For existing data, since we have no way of knowing what the actual // For existing data, since we have no way of knowing what the actual
// bit-width of the size field of the string is, we are going to ignore this // bit-width of the size field of the string is, we are going to ignore this