mirror of
https://github.com/google/flatbuffers.git
synced 2026-06-02 04:04:19 +00:00
Add fuzzer test for scalar numbers in json. (#4996)
* Add fuzzer test for scalar numbers in json. Grammar-based regex used to check correctness. * Fix conversation
This commit is contained in:
committed by
Wouter van Oortmerssen
parent
efbb11e093
commit
55b30827f2
@@ -7,10 +7,12 @@
|
||||
#define _CRTDBG_MAP_ALLOC
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined(FLATBUFFERS_ASSERT)
|
||||
#include <assert.h>
|
||||
#define FLATBUFFERS_ASSERT assert
|
||||
#elif defined(FLATBUFFERS_ASSERT_INCLUDE)
|
||||
// Include file with forward declaration
|
||||
#include FLATBUFFERS_ASSERT_INCLUDE
|
||||
#endif
|
||||
|
||||
#ifndef ARDUINO
|
||||
|
||||
@@ -1914,7 +1914,7 @@ class Verifier FLATBUFFERS_FINAL_CLASS {
|
||||
#endif
|
||||
// clang-format on
|
||||
{
|
||||
assert(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
|
||||
FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
// Central location where any verification failures register.
|
||||
|
||||
102
tests/fuzzer/CMakeLists.txt
Normal file
102
tests/fuzzer/CMakeLists.txt
Normal file
@@ -0,0 +1,102 @@
|
||||
cmake_minimum_required(VERSION 3.9)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
project(FlatBuffersFuzzerTests)
|
||||
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -std=c++14 -Wall -pedantic -Werror -Wextra -Wno-unused-parameter -fsigned-char")
|
||||
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -g -fsigned-char -fno-omit-frame-pointer")
|
||||
|
||||
# Typical slowdown introduced by MemorySanitizer (memory) is 3x.
|
||||
# '-fsanitize=address' not allowed with '-fsanitize=memory'
|
||||
if(YES)
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,address,undefined")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,memory,undefined -fsanitize-memory-track-origins=2")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -fsanitize-coverage=edge,trace-cmp")
|
||||
|
||||
# enable link-time optimisation
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto")
|
||||
|
||||
# https://llvm.org/docs/Passes.html
|
||||
# save IR to see call graph
|
||||
# make one bitcode file:> llvm-link *.bc -o out.bc
|
||||
# print call-graph:> opt out.bc -analyze -print-callgraph &> callgraph.txt
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -save-temps -flto")
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld")
|
||||
|
||||
set(FLATBUFFERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../")
|
||||
|
||||
set(FlatBuffers_Library_SRCS
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/code_generators.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/base.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/flatbuffers.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/hash.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/idl.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/util.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/reflection.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/reflection_generated.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/stl_emulation.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/flexbuffers.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/registry.h
|
||||
${FLATBUFFERS_DIR}/include/flatbuffers/minireflect.h
|
||||
${FLATBUFFERS_DIR}/src/code_generators.cpp
|
||||
${FLATBUFFERS_DIR}/src/idl_parser.cpp
|
||||
${FLATBUFFERS_DIR}/src/idl_gen_text.cpp
|
||||
${FLATBUFFERS_DIR}/src/reflection.cpp
|
||||
${FLATBUFFERS_DIR}/src/util.cpp
|
||||
${FLATBUFFERS_DIR}/tests/test_assert.cpp
|
||||
)
|
||||
|
||||
include_directories(${FLATBUFFERS_DIR}/include)
|
||||
include_directories(${FLATBUFFERS_DIR}/tests)
|
||||
add_library(flatbuffers STATIC ${FlatBuffers_Library_SRCS})
|
||||
|
||||
# FLATBUFFERS_ASSERT should assert in Release as well.
|
||||
# Redefine FLATBUFFERS_ASSERT macro definition.
|
||||
# Declare as PUBLIC to cover asserts in all included header files.
|
||||
target_compile_definitions(flatbuffers PUBLIC
|
||||
FLATBUFFERS_ASSERT=fuzzer_assert_impl)
|
||||
target_compile_definitions(flatbuffers PUBLIC
|
||||
FLATBUFFERS_ASSERT_INCLUDE="${CMAKE_CURRENT_SOURCE_DIR}/fuzzer_assert.h")
|
||||
|
||||
if(NOT DEFINED FLATBUFFERS_MAX_PARSING_DEPTH)
|
||||
# Force checking of RecursionError in the test
|
||||
set(FLATBUFFERS_MAX_PARSING_DEPTH 8)
|
||||
endif()
|
||||
message(STATUS "FLATBUFFERS_MAX_PARSING_DEPTH: ${FLATBUFFERS_MAX_PARSING_DEPTH}")
|
||||
target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8)
|
||||
|
||||
# Setup fuzzer tests.
|
||||
|
||||
# Change default ASCII locale (affects to isalpha, isalnum, decimal
|
||||
# delimiters, other). https://en.cppreference.com/w/cpp/locale/setlocale
|
||||
if(DEFINED FUZZ_TEST_LOCALE)
|
||||
# Enable locale independent code and define locale for tests.
|
||||
# -DFUZZ_TEST_LOCALE="" - enable, but test with default locale
|
||||
# -DFUZZ_TEST_LOCALE="ru_RU.CP1251" - enable and test with ru_RU.CP1251
|
||||
# Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251
|
||||
add_definitions(-DFUZZ_TEST_LOCALE=\"${FUZZ_TEST_LOCALE}\")
|
||||
endif()
|
||||
message(STATUS "FUZZ_TEST_LOCALE: ${FUZZ_TEST_LOCALE}")
|
||||
|
||||
add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc)
|
||||
target_link_libraries(scalar_fuzzer PRIVATE flatbuffers)
|
||||
|
||||
add_executable(parser_fuzzer flatbuffers_parser_fuzzer.cc)
|
||||
target_link_libraries(parser_fuzzer PRIVATE flatbuffers)
|
||||
|
||||
add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc)
|
||||
target_link_libraries(verifier_fuzzer PRIVATE flatbuffers)
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2015 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer
|
||||
clang++ -c -g -O2 -std=c++11 fuzzer/*.cpp -Ifuzzer
|
||||
ar ruv libFuzzer.a Fuzzer*.o
|
||||
rm -rf fuzzer *.o
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2015 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
clang++ -fsanitize-coverage=edge -fsanitize=address -fsanitize=undefined \
|
||||
-g -fno-omit-frame-pointer -std=c++11 -stdlib=libstdc++ \
|
||||
-I.. -I../../include flatbuffers_parser_fuzzer.cc ../../src/idl_parser.cpp \
|
||||
../../src/util.cpp libFuzzer.a -o fuzz_parser
|
||||
mkdir -p parser_corpus
|
||||
cp ../*.json ../*.fbs parser_corpus
|
||||
./fuzz_parser parser_corpus
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2015 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
clang++ -fsanitize-coverage=edge -fsanitize=address -fsanitize=undefined \
|
||||
-g -fno-omit-frame-pointer -std=c++11 -stdlib=libstdc++ \
|
||||
-I.. -I../../include flatbuffers_verifier_fuzzer.cc libFuzzer.a -o fuzz_verifier
|
||||
mkdir -p verifier_corpus
|
||||
cp ../*.mon verifier_corpus
|
||||
./fuzz_verifier verifier_corpus
|
||||
@@ -3,14 +3,67 @@
|
||||
// found in the LICENSE file.
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <clocale>
|
||||
#include <string>
|
||||
|
||||
#include "flatbuffers/idl.h"
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
flatbuffers::Parser parser;
|
||||
// Guarantee 0-termination.
|
||||
std::string s(reinterpret_cast<const char *>(data), size);
|
||||
parser.Parse(s.c_str());
|
||||
static constexpr uint8_t flags_strict_json = 0x01;
|
||||
static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x02;
|
||||
static constexpr uint8_t flags_allow_non_utf8 = 0x04;
|
||||
// static constexpr uint8_t flags_flag_3 = 0x08;
|
||||
// static constexpr uint8_t flags_flag_4 = 0x10;
|
||||
// static constexpr uint8_t flags_flag_5 = 0x20;
|
||||
// static constexpr uint8_t flags_flag_6 = 0x40;
|
||||
// static constexpr uint8_t flags_flag_7 = 0x80;
|
||||
|
||||
// See readme.md and CMakeLists.txt for details.
|
||||
#ifdef FUZZ_TEST_LOCALE
|
||||
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
|
||||
#else
|
||||
static constexpr const char *test_locale = nullptr;
|
||||
#endif
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
// Reserve one byte for Parser flags and one byte for repetition counter.
|
||||
if (size < 3) return 0;
|
||||
const uint8_t flags = data[0];
|
||||
// normalize to ascii alphabet
|
||||
const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0;
|
||||
data += 2;
|
||||
size -= 2; // bypass
|
||||
|
||||
const std::string original(reinterpret_cast<const char *>(data), size);
|
||||
auto input = std::string(original.c_str()); // until '\0'
|
||||
if (input.empty()) return 0;
|
||||
|
||||
flatbuffers::IDLOptions opts;
|
||||
opts.strict_json = (flags & flags_strict_json);
|
||||
opts.skip_unexpected_fields_in_json =
|
||||
(flags & flags_skip_unexpected_fields_in_json);
|
||||
opts.allow_non_utf8 = (flags & flags_allow_non_utf8);
|
||||
|
||||
flatbuffers::Parser parser(opts);
|
||||
|
||||
// Guarantee 0-termination in the input.
|
||||
auto parse_input = input.c_str();
|
||||
|
||||
// The fuzzer can adjust the number repetition if a side-effects have found.
|
||||
// Each test should pass at least two times to ensure that the parser doesn't
|
||||
// have any hidden-states or locale-depended effects.
|
||||
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
|
||||
auto use_locale = !!test_locale && (0 == (cnt % 2));
|
||||
// Set new locale.
|
||||
if (use_locale) {
|
||||
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
|
||||
}
|
||||
|
||||
// Check Parser.
|
||||
parser.Parse(parse_input);
|
||||
|
||||
// Restore locale.
|
||||
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
361
tests/fuzzer/flatbuffers_scalar_fuzzer.cc
Normal file
361
tests/fuzzer/flatbuffers_scalar_fuzzer.cc
Normal file
@@ -0,0 +1,361 @@
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <clocale>
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
|
||||
#include "flatbuffers/idl.h"
|
||||
#include "fuzzer_assert.h"
|
||||
#include "test_assert.h"
|
||||
|
||||
static_assert(__has_feature(memory_sanitizer) ||
|
||||
__has_feature(address_sanitizer),
|
||||
"sanitizer disabled");
|
||||
|
||||
static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
|
||||
static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
|
||||
// reserved for future: json {named} or [unnamed]
|
||||
// static constexpr uint8_t flags_json_bracer = 0x20;
|
||||
|
||||
// See readme.md and CMakeLists.txt for details.
|
||||
#ifdef FUZZ_TEST_LOCALE
|
||||
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
|
||||
#else
|
||||
static constexpr const char *test_locale = nullptr;
|
||||
#endif
|
||||
|
||||
// Utility for test run.
|
||||
struct OneTimeTestInit {
|
||||
// Declare trap for the flatbuffers test engine.
|
||||
// This hook terminate program both in Debug and Release.
|
||||
static bool TestFailListener(const char *expval, const char *val,
|
||||
const char *exp, const char *file, int line,
|
||||
const char *func = 0) {
|
||||
(void)expval;
|
||||
(void)val;
|
||||
(void)exp;
|
||||
(void)file;
|
||||
(void)line;
|
||||
(void)func;
|
||||
// FLATBUFFERS_ASSERT also redefined to be fully independed from library
|
||||
// implementation (see test_assert.h for details).
|
||||
fuzzer_assert_impl(false); // terminate
|
||||
return false;
|
||||
}
|
||||
|
||||
OneTimeTestInit() {
|
||||
// Fuzzer test should not depend from the test engine implementation.
|
||||
// This hook will terminate test if TEST_EQ/TEST_ASSERT asserted.
|
||||
InitTestEngine(OneTimeTestInit::TestFailListener);
|
||||
}
|
||||
|
||||
static OneTimeTestInit one_time_init_;
|
||||
};
|
||||
|
||||
OneTimeTestInit OneTimeTestInit::one_time_init_;
|
||||
|
||||
// Find all 'subj' sub-strings and replace first character of sub-string.
|
||||
// BreakSequence("testest","tes", 'X') -> "XesXest".
|
||||
// BreakSequence("xxx","xx", 'Y') -> "YYx".
|
||||
static void BreakSequence(std::string &s, const char *subj, char repl) {
|
||||
size_t pos = 0;
|
||||
while (pos = s.find(subj, pos), pos != std::string::npos) {
|
||||
s.at(pos) = repl;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove all leading and trailing symbols matched with pattern set.
|
||||
// StripString("xy{xy}y", "xy") -> "{xy}"
|
||||
static std::string StripString(const std::string &s, const char *pattern,
|
||||
size_t *pos = nullptr) {
|
||||
if (pos) *pos = 0;
|
||||
// leading
|
||||
auto first = s.find_first_not_of(pattern);
|
||||
if (std::string::npos == first) return "";
|
||||
if (pos) *pos = first;
|
||||
// trailing
|
||||
auto last = s.find_last_not_of(pattern);
|
||||
assert(last < s.length());
|
||||
assert(first <= last);
|
||||
return s.substr(first, last - first + 1);
|
||||
}
|
||||
|
||||
class RegexMatcher {
|
||||
protected:
|
||||
virtual bool MatchNumber(const std::string &input) const = 0;
|
||||
|
||||
public:
|
||||
virtual ~RegexMatcher() = default;
|
||||
|
||||
struct MatchResult {
|
||||
size_t pos{ 0 };
|
||||
size_t len{ 0 };
|
||||
bool res{ false };
|
||||
bool quoted{ false };
|
||||
};
|
||||
|
||||
MatchResult Match(const std::string &input) const {
|
||||
MatchResult r;
|
||||
// strip leading and trailing "spaces" accepted by flatbuffer
|
||||
auto test = StripString(input, "\t\r\n ", &r.pos);
|
||||
r.len = test.size();
|
||||
// check quotes
|
||||
if (test.size() >= 2) {
|
||||
auto fch = test.front();
|
||||
auto lch = test.back();
|
||||
r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
|
||||
if (r.quoted) {
|
||||
// remove quotes for regex test
|
||||
test = test.substr(1, test.size() - 2);
|
||||
}
|
||||
}
|
||||
// Fast check:
|
||||
if (test.empty()) return r;
|
||||
// A string with a valid scalar shouldn't have non-ascii or non-printable
|
||||
// symbols.
|
||||
for (auto c : test) {
|
||||
if ((c < ' ') || (c > '~')) return r;
|
||||
}
|
||||
// Check with regex
|
||||
r.res = MatchNumber(test);
|
||||
return r;
|
||||
}
|
||||
|
||||
bool MatchRegexList(const std::string &input,
|
||||
const std::vector<std::regex> &re_list) const {
|
||||
auto str = StripString(input, " ");
|
||||
if (str.empty()) return false;
|
||||
for (auto &re : re_list) {
|
||||
std::smatch match;
|
||||
if (std::regex_match(str, match, re)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class IntegerRegex : public RegexMatcher {
|
||||
protected:
|
||||
bool MatchNumber(const std::string &input) const override {
|
||||
static const std::vector<std::regex> re_list = {
|
||||
std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
|
||||
|
||||
std::regex{
|
||||
R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }
|
||||
};
|
||||
return MatchRegexList(input, re_list);
|
||||
}
|
||||
|
||||
public:
|
||||
IntegerRegex() = default;
|
||||
virtual ~IntegerRegex() = default;
|
||||
};
|
||||
|
||||
class UIntegerRegex : public RegexMatcher {
|
||||
protected:
|
||||
bool MatchNumber(const std::string &input) const override {
|
||||
static const std::vector<std::regex> re_list = {
|
||||
std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
|
||||
std::regex{
|
||||
R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize },
|
||||
// accept -0 number
|
||||
std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
|
||||
};
|
||||
return MatchRegexList(input, re_list);
|
||||
}
|
||||
|
||||
public:
|
||||
UIntegerRegex() = default;
|
||||
virtual ~UIntegerRegex() = default;
|
||||
};
|
||||
|
||||
class BooleanRegex : public IntegerRegex {
|
||||
protected:
|
||||
bool MatchNumber(const std::string &input) const override {
|
||||
if (input == "true" || input == "false") return true;
|
||||
return IntegerRegex::MatchNumber(input);
|
||||
}
|
||||
|
||||
public:
|
||||
BooleanRegex() = default;
|
||||
virtual ~BooleanRegex() = default;
|
||||
};
|
||||
|
||||
class FloatRegex : public RegexMatcher {
|
||||
protected:
|
||||
bool MatchNumber(const std::string &input) const override {
|
||||
static const std::vector<std::regex> re_list = {
|
||||
// hex-float
|
||||
std::regex{
|
||||
R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
|
||||
std::regex_constants::optimize },
|
||||
// dec-float
|
||||
std::regex{
|
||||
R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
|
||||
std::regex_constants::optimize },
|
||||
|
||||
std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
|
||||
std::regex_constants::optimize | std::regex_constants::icase }
|
||||
};
|
||||
return MatchRegexList(input, re_list);
|
||||
}
|
||||
|
||||
public:
|
||||
FloatRegex() = default;
|
||||
virtual ~FloatRegex() = default;
|
||||
};
|
||||
|
||||
class ScalarReferenceResult {
|
||||
private:
|
||||
ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
|
||||
: type(_type), matched(_matched) {}
|
||||
|
||||
public:
|
||||
// Decode scalar type and check if the input string satisfies the scalar type.
|
||||
static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
|
||||
switch (code) {
|
||||
case 0x0: return { "double", FloatRegex().Match(input) };
|
||||
case 0x1: return { "float", FloatRegex().Match(input) };
|
||||
case 0x2: return { "int8", IntegerRegex().Match(input) };
|
||||
case 0x3: return { "int16", IntegerRegex().Match(input) };
|
||||
case 0x4: return { "int32", IntegerRegex().Match(input) };
|
||||
case 0x5: return { "int64", IntegerRegex().Match(input) };
|
||||
case 0x6: return { "uint8", UIntegerRegex().Match(input) };
|
||||
case 0x7: return { "uint16", UIntegerRegex().Match(input) };
|
||||
case 0x8: return { "uint32", UIntegerRegex().Match(input) };
|
||||
case 0x9: return { "uint64", UIntegerRegex().Match(input) };
|
||||
case 0xA: return { "bool", BooleanRegex().Match(input) };
|
||||
default: return { "float", FloatRegex().Match(input) };
|
||||
};
|
||||
}
|
||||
|
||||
const char *type;
|
||||
const RegexMatcher::MatchResult matched;
|
||||
};
|
||||
|
||||
bool Parse(flatbuffers::Parser &parser, const std::string &json,
|
||||
std::string *_text) {
|
||||
auto done = parser.Parse(json.c_str());
|
||||
if (done) {
|
||||
TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
|
||||
true);
|
||||
} else {
|
||||
*_text = parser.error_;
|
||||
}
|
||||
return done;
|
||||
}
|
||||
|
||||
// llvm std::regex have problem with stack overflow, limit maximum length.
|
||||
// ./scalar_fuzzer -max_len=3000
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
// Reserve one byte for Parser flags and one byte for repetition counter.
|
||||
if (size < 3) return 0;
|
||||
const uint8_t flags = data[0];
|
||||
// normalize to ascii alphabet
|
||||
const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0;
|
||||
data += 2;
|
||||
size -= 2; // bypass
|
||||
|
||||
// Guarantee 0-termination.
|
||||
const std::string original(reinterpret_cast<const char *>(data), size);
|
||||
auto input = std::string(original.c_str()); // until '\0'
|
||||
if (input.empty()) return 0;
|
||||
|
||||
// Break comments in json to avoid complexity with regex matcher.
|
||||
// The string " 12345 /* text */" will be accepted if insert it to string
|
||||
// expression: "table X { Y: " + " 12345 /* text */" + "; }.
|
||||
// But strings like this will complicate regex matcher.
|
||||
// We reject this by transform "/* text */ 12345" to "@* text */ 12345".
|
||||
BreakSequence(input, "//", '@'); // "//" -> "@/"
|
||||
BreakSequence(input, "/*", '@'); // "/*" -> "@*"
|
||||
// Break all known scalar functions (todo: add them to regex?):
|
||||
for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
|
||||
BreakSequence(input, f, '_'); // ident -> ident
|
||||
}
|
||||
|
||||
// Extract type of scalar from 'flags' and check if the input string satisfies
|
||||
// the scalar type.
|
||||
const auto ref_res =
|
||||
ScalarReferenceResult::Check(flags & flags_scalar_type, input);
|
||||
auto &recheck = ref_res.matched;
|
||||
|
||||
// Create parser
|
||||
flatbuffers::IDLOptions opts;
|
||||
opts.force_defaults = true;
|
||||
opts.output_default_scalars_in_json = true;
|
||||
opts.indent_step = -1;
|
||||
opts.strict_json = true;
|
||||
|
||||
flatbuffers::Parser parser(opts);
|
||||
auto schema =
|
||||
"table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
|
||||
TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
|
||||
|
||||
// The fuzzer can adjust the number repetition if a side-effects have found.
|
||||
// Each test should pass at least two times to ensure that the parser doesn't
|
||||
// have any hidden-states or locale-depended effects.
|
||||
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
|
||||
// Each even run (0,2,4..) will test locale independed code.
|
||||
auto use_locale = !!test_locale && (0 == (cnt % 2));
|
||||
// Set new locale.
|
||||
if (use_locale) {
|
||||
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
|
||||
}
|
||||
|
||||
// Parse original input as-is.
|
||||
auto orig_scalar = "{ \"Y\" : " + input + " }";
|
||||
std::string orig_back;
|
||||
auto orig_done = Parse(parser, orig_scalar, &orig_back);
|
||||
if (recheck.res != orig_done) {
|
||||
// look for "does not fit" or "doesn't fit" or "out of range"
|
||||
auto parser_not_fit =
|
||||
(orig_back.find("does not fit") == std::string::npos) ||
|
||||
(orig_back.find("out of range") == std::string::npos);
|
||||
|
||||
if ((false == recheck.res) || (false == parser_not_fit)) {
|
||||
TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
|
||||
recheck.res);
|
||||
TEST_EQ_STR(orig_back.c_str(),
|
||||
input.substr(recheck.pos, recheck.len).c_str());
|
||||
TEST_EQ_FUNC(orig_done, recheck.res);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to make quoted string and test it.
|
||||
std::string qouted_input;
|
||||
if (true == recheck.quoted) {
|
||||
// we can't simply remove quotes, they may be nested "'12'".
|
||||
// Original string "\'12\'" converted to "'12'".
|
||||
// The string can be an invalid string by JSON rules, but after quotes
|
||||
// removed can transform to valid.
|
||||
assert(recheck.len >= 2);
|
||||
} else {
|
||||
const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
|
||||
qouted_input = input; // copy
|
||||
qouted_input.insert(recheck.pos + recheck.len, 1, quote);
|
||||
qouted_input.insert(recheck.pos, 1, quote);
|
||||
}
|
||||
|
||||
// Test quoted version of the string
|
||||
if (!qouted_input.empty()) {
|
||||
auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
|
||||
std::string fix_back;
|
||||
auto fix_done = Parse(parser, fix_scalar, &fix_back);
|
||||
if (orig_done != fix_done) {
|
||||
TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
|
||||
orig_done);
|
||||
TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
|
||||
}
|
||||
if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
|
||||
TEST_EQ_FUNC(fix_done, orig_done);
|
||||
}
|
||||
|
||||
// Restore locale.
|
||||
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
9
tests/fuzzer/fuzzer_assert.h
Normal file
9
tests/fuzzer/fuzzer_assert.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#ifndef FUZZER_ASSERT_IMPL_H_
|
||||
#define FUZZER_ASSERT_IMPL_H_
|
||||
|
||||
// Declare Debug/Release independed assert macro.
|
||||
#define fuzzer_assert_impl(x) (!!(x) ? static_cast<void>(0) : __builtin_trap())
|
||||
|
||||
extern "C" void __builtin_trap(void);
|
||||
|
||||
#endif // !FUZZER_ASSERT_IMPL_H_
|
||||
61
tests/fuzzer/readme.md
Normal file
61
tests/fuzzer/readme.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Test Flatbuffers library with help of libFuzzer
|
||||
Test suite of Flatbuffers library has fuzzer section with tests are based on libFuzzer library.
|
||||
|
||||
> LibFuzzer is in-process, coverage-guided, evolutionary fuzzing engine.
|
||||
LibFuzzer is linked with the library under test, and feeds fuzzed inputs to the library via a specific fuzzing entrypoint (aka “target function”);
|
||||
the fuzzer then tracks which areas of the code are reached, and generates mutations on the corpus of input data in order to maximize the code coverage.
|
||||
The code coverage information for libFuzzer is provided by LLVM’s SanitizerCoverage instrumentation.
|
||||
|
||||
For details about **libFuzzer** see: https://llvm.org/docs/LibFuzzer.html
|
||||
|
||||
To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before.
|
||||
|
||||
The fuzzer section include three tests:
|
||||
- `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema;
|
||||
- `parser_fuzzer` checks stability of schema and json parser under various inputs;
|
||||
- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files;
|
||||
|
||||
## Build tests with locales
|
||||
Flatbuffers library use only printable-ASCII characters as characters of grammar alphabet for type and data declaration.
|
||||
This alphabet is fully compatible with JSON specification and make schema declaration fully portable.
|
||||
Flatbuffers library is independent from global or thread locales used by end-user application.
|
||||
To run fuzzer tests with selected C-locale under test pass `-DFUZZ_TEST_LOCALE="<locale name>"` to CMake when configuring.
|
||||
Selected locale must be installed in system before use.
|
||||
Command line:
|
||||
```sh
|
||||
cmake .. -DFUZZ_TEST_LOCALE="ru_RU.CP1251"
|
||||
```
|
||||
If use VSCode, use `cmake.configureSettings` section of workspace settings:
|
||||
```json
|
||||
"cmake.configureSettings": {
|
||||
"FUZZ_TEST_LOCALE" : "ru_RU.CP1251"
|
||||
}
|
||||
```
|
||||
|
||||
## Run fuzzer
|
||||
These are examples of fuzzer run.
|
||||
Flags may vary and depend from version of libFuzzer library.
|
||||
For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1`
|
||||
|
||||
`./verifier_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_verifier/`
|
||||
|
||||
`./parser_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_parser/`
|
||||
|
||||
`./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 ../.corpus_parser/ ../.seed_parser/`
|
||||
|
||||
Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:
|
||||
|
||||
`./scalar_fuzzer -only_ascii=1 -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 -jobs=2 ../.corpus_parser/ ../.seed_parser/`
|
||||
|
||||
## Merge (minimize) corpus
|
||||
The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag:
|
||||
> -merge
|
||||
If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory.
|
||||
Defaults to 0. This flag can be used to minimize a corpus.
|
||||
|
||||
Merge several seeds to one:
|
||||
`./scalar_fuzzer -merge=1 ../.corpus/ ../.seed_1/ ../.seed_2/`
|
||||
|
||||
## Know limitations
|
||||
- LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000.
|
||||
Example: `./scalar_fuzzer -max_len=3000`
|
||||
@@ -1,11 +1,12 @@
|
||||
#include <assert.h>
|
||||
#include "test_assert.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <assert.h>
|
||||
# include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
#include "test_assert.h"
|
||||
|
||||
int testing_fails = 0;
|
||||
static TestFailEventListener fail_listener_ = nullptr;
|
||||
|
||||
void TestFail(const char *expval, const char *val, const char *exp,
|
||||
const char *file, int line, const char *func) {
|
||||
@@ -14,7 +15,12 @@ void TestFail(const char *expval, const char *val, const char *exp,
|
||||
TEST_OUTPUT_LINE("TEST FAILED: %s:%d, %s in %s", file, line, exp,
|
||||
func ? func : "");
|
||||
testing_fails++;
|
||||
assert(0); // assert on first failure under debug
|
||||
|
||||
// Notify, emulate 'gtest::OnTestPartResult' event handler.
|
||||
if(fail_listener_)
|
||||
(*fail_listener_)(expval, val, exp, file, line, func);
|
||||
|
||||
assert(0); // ignored in Release if NDEBUG defined
|
||||
}
|
||||
|
||||
void TestEqStr(const char *expval, const char *val, const char *exp,
|
||||
@@ -31,7 +37,7 @@ int msvc_no_dialog_box_on_assert(int rpt_type, char *msg, int *ret_val) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void InitTestEngine() {
|
||||
void InitTestEngine(TestFailEventListener listener) {
|
||||
testing_fails = 0;
|
||||
// Disable stdout buffering to prevent information lost on assertion or core
|
||||
// dump.
|
||||
@@ -49,4 +55,6 @@ void InitTestEngine() {
|
||||
_CrtSetReportHook(msvc_no_dialog_box_on_assert);
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
fail_listener_ = listener;
|
||||
}
|
||||
|
||||
@@ -17,9 +17,17 @@
|
||||
|
||||
extern int testing_fails;
|
||||
|
||||
// Prepare test engine (MSVC assertion setup, etc)
|
||||
void InitTestEngine();
|
||||
// Listener of TestFail, like 'gtest::OnTestPartResult' event handler.
|
||||
// Called in TestFail after a failed assertion.
|
||||
typedef bool (*TestFailEventListener)(const char *expval, const char *val,
|
||||
const char *exp, const char *file, int line,
|
||||
const char *func);
|
||||
|
||||
// Prepare test engine (MSVC assertion setup, etc).
|
||||
// listener - this function will be notified on each TestFail call.
|
||||
void InitTestEngine(TestFailEventListener listener = nullptr);
|
||||
|
||||
// Write captured state to a log and terminate test run.
|
||||
void TestFail(const char *expval, const char *val, const char *exp,
|
||||
const char *file, int line, const char *func = 0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user