From 55b30827f266a07ae869fa03b615561140b1513b Mon Sep 17 00:00:00 2001 From: Vladimir Glavnyy <31897320+vglavnyy@users.noreply.github.com> Date: Tue, 23 Oct 2018 05:44:18 +0700 Subject: [PATCH] Add fuzzer test for scalar numbers in json. (#4996) * Add fuzzer test for scalar numbers in json. Grammar-based regex used to check correctness. * Fix conversation --- include/flatbuffers/base.h | 6 +- include/flatbuffers/flatbuffers.h | 2 +- tests/fuzzer/CMakeLists.txt | 102 ++++++ tests/fuzzer/build_fuzzer.sh | 20 -- tests/fuzzer/build_run_parser_test.sh | 23 -- tests/fuzzer/build_run_verifier_test.sh | 22 -- tests/fuzzer/flatbuffers_parser_fuzzer.cc | 63 +++- tests/fuzzer/flatbuffers_scalar_fuzzer.cc | 361 ++++++++++++++++++++++ tests/fuzzer/fuzzer_assert.h | 9 + tests/fuzzer/readme.md | 61 ++++ tests/test_assert.cpp | 18 +- tests/test_assert.h | 12 +- 12 files changed, 619 insertions(+), 80 deletions(-) create mode 100644 tests/fuzzer/CMakeLists.txt delete mode 100644 tests/fuzzer/build_fuzzer.sh delete mode 100644 tests/fuzzer/build_run_parser_test.sh delete mode 100644 tests/fuzzer/build_run_verifier_test.sh create mode 100644 tests/fuzzer/flatbuffers_scalar_fuzzer.cc create mode 100644 tests/fuzzer/fuzzer_assert.h create mode 100644 tests/fuzzer/readme.md diff --git a/include/flatbuffers/base.h b/include/flatbuffers/base.h index 2656f602e..02c8f0c12 100644 --- a/include/flatbuffers/base.h +++ b/include/flatbuffers/base.h @@ -7,10 +7,12 @@ #define _CRTDBG_MAP_ALLOC #endif -#include - #if !defined(FLATBUFFERS_ASSERT) +#include #define FLATBUFFERS_ASSERT assert +#elif defined(FLATBUFFERS_ASSERT_INCLUDE) +// Include file with forward declaration +#include FLATBUFFERS_ASSERT_INCLUDE #endif #ifndef ARDUINO diff --git a/include/flatbuffers/flatbuffers.h b/include/flatbuffers/flatbuffers.h index b6b1b1cb7..a4543033b 100644 --- a/include/flatbuffers/flatbuffers.h +++ b/include/flatbuffers/flatbuffers.h @@ -1914,7 +1914,7 @@ class Verifier FLATBUFFERS_FINAL_CLASS { #endif // clang-format on { - assert(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); + FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); } // Central location where any verification failures register. diff --git a/tests/fuzzer/CMakeLists.txt b/tests/fuzzer/CMakeLists.txt new file mode 100644 index 000000000..f0d1cdc79 --- /dev/null +++ b/tests/fuzzer/CMakeLists.txt @@ -0,0 +1,102 @@ +cmake_minimum_required(VERSION 3.9) + +set(CMAKE_VERBOSE_MAKEFILE ON) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +project(FlatBuffersFuzzerTests) + +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -std=c++14 -Wall -pedantic -Werror -Wextra -Wno-unused-parameter -fsigned-char") + +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -g -fsigned-char -fno-omit-frame-pointer") + +# Typical slowdown introduced by MemorySanitizer (memory) is 3x. +# '-fsanitize=address' not allowed with '-fsanitize=memory' +if(YES) + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,address,undefined") +else() + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,memory,undefined -fsanitize-memory-track-origins=2") +endif() + +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fsanitize-coverage=edge,trace-cmp") + +# enable link-time optimisation +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto") + +# https://llvm.org/docs/Passes.html +# save IR to see call graph +# make one bitcode file:> llvm-link *.bc -o out.bc +# print call-graph:> opt out.bc -analyze -print-callgraph &> callgraph.txt +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -save-temps -flto") + +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") + +set(FLATBUFFERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../") + +set(FlatBuffers_Library_SRCS + ${FLATBUFFERS_DIR}/include/flatbuffers/code_generators.h + ${FLATBUFFERS_DIR}/include/flatbuffers/base.h + ${FLATBUFFERS_DIR}/include/flatbuffers/flatbuffers.h + ${FLATBUFFERS_DIR}/include/flatbuffers/hash.h + ${FLATBUFFERS_DIR}/include/flatbuffers/idl.h + ${FLATBUFFERS_DIR}/include/flatbuffers/util.h + ${FLATBUFFERS_DIR}/include/flatbuffers/reflection.h + ${FLATBUFFERS_DIR}/include/flatbuffers/reflection_generated.h + ${FLATBUFFERS_DIR}/include/flatbuffers/stl_emulation.h + ${FLATBUFFERS_DIR}/include/flatbuffers/flexbuffers.h + ${FLATBUFFERS_DIR}/include/flatbuffers/registry.h + ${FLATBUFFERS_DIR}/include/flatbuffers/minireflect.h + ${FLATBUFFERS_DIR}/src/code_generators.cpp + ${FLATBUFFERS_DIR}/src/idl_parser.cpp + ${FLATBUFFERS_DIR}/src/idl_gen_text.cpp + ${FLATBUFFERS_DIR}/src/reflection.cpp + ${FLATBUFFERS_DIR}/src/util.cpp + ${FLATBUFFERS_DIR}/tests/test_assert.cpp +) + +include_directories(${FLATBUFFERS_DIR}/include) +include_directories(${FLATBUFFERS_DIR}/tests) +add_library(flatbuffers STATIC ${FlatBuffers_Library_SRCS}) + +# FLATBUFFERS_ASSERT should assert in Release as well. +# Redefine FLATBUFFERS_ASSERT macro definition. +# Declare as PUBLIC to cover asserts in all included header files. +target_compile_definitions(flatbuffers PUBLIC + FLATBUFFERS_ASSERT=fuzzer_assert_impl) +target_compile_definitions(flatbuffers PUBLIC + FLATBUFFERS_ASSERT_INCLUDE="${CMAKE_CURRENT_SOURCE_DIR}/fuzzer_assert.h") + +if(NOT DEFINED FLATBUFFERS_MAX_PARSING_DEPTH) + # Force checking of RecursionError in the test + set(FLATBUFFERS_MAX_PARSING_DEPTH 8) +endif() +message(STATUS "FLATBUFFERS_MAX_PARSING_DEPTH: ${FLATBUFFERS_MAX_PARSING_DEPTH}") +target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8) + +# Setup fuzzer tests. + +# Change default ASCII locale (affects to isalpha, isalnum, decimal +# delimiters, other). https://en.cppreference.com/w/cpp/locale/setlocale +if(DEFINED FUZZ_TEST_LOCALE) + # Enable locale independent code and define locale for tests. + # -DFUZZ_TEST_LOCALE="" - enable, but test with default locale + # -DFUZZ_TEST_LOCALE="ru_RU.CP1251" - enable and test with ru_RU.CP1251 + # Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251 + add_definitions(-DFUZZ_TEST_LOCALE=\"${FUZZ_TEST_LOCALE}\") +endif() +message(STATUS "FUZZ_TEST_LOCALE: ${FUZZ_TEST_LOCALE}") + +add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc) +target_link_libraries(scalar_fuzzer PRIVATE flatbuffers) + +add_executable(parser_fuzzer flatbuffers_parser_fuzzer.cc) +target_link_libraries(parser_fuzzer PRIVATE flatbuffers) + +add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc) +target_link_libraries(verifier_fuzzer PRIVATE flatbuffers) diff --git a/tests/fuzzer/build_fuzzer.sh b/tests/fuzzer/build_fuzzer.sh deleted file mode 100644 index c013cdd8a..000000000 --- a/tests/fuzzer/build_fuzzer.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# -# Copyright 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer -clang++ -c -g -O2 -std=c++11 fuzzer/*.cpp -Ifuzzer -ar ruv libFuzzer.a Fuzzer*.o -rm -rf fuzzer *.o diff --git a/tests/fuzzer/build_run_parser_test.sh b/tests/fuzzer/build_run_parser_test.sh deleted file mode 100644 index 7fac7182a..000000000 --- a/tests/fuzzer/build_run_parser_test.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# -# Copyright 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -clang++ -fsanitize-coverage=edge -fsanitize=address -fsanitize=undefined \ - -g -fno-omit-frame-pointer -std=c++11 -stdlib=libstdc++ \ - -I.. -I../../include flatbuffers_parser_fuzzer.cc ../../src/idl_parser.cpp \ - ../../src/util.cpp libFuzzer.a -o fuzz_parser -mkdir -p parser_corpus -cp ../*.json ../*.fbs parser_corpus -./fuzz_parser parser_corpus diff --git a/tests/fuzzer/build_run_verifier_test.sh b/tests/fuzzer/build_run_verifier_test.sh deleted file mode 100644 index 412afffc6..000000000 --- a/tests/fuzzer/build_run_verifier_test.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# -# Copyright 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -clang++ -fsanitize-coverage=edge -fsanitize=address -fsanitize=undefined \ - -g -fno-omit-frame-pointer -std=c++11 -stdlib=libstdc++ \ - -I.. -I../../include flatbuffers_verifier_fuzzer.cc libFuzzer.a -o fuzz_verifier -mkdir -p verifier_corpus -cp ../*.mon verifier_corpus -./fuzz_verifier verifier_corpus diff --git a/tests/fuzzer/flatbuffers_parser_fuzzer.cc b/tests/fuzzer/flatbuffers_parser_fuzzer.cc index 632b3b18d..e3e4d2d78 100644 --- a/tests/fuzzer/flatbuffers_parser_fuzzer.cc +++ b/tests/fuzzer/flatbuffers_parser_fuzzer.cc @@ -3,14 +3,67 @@ // found in the LICENSE file. #include #include +#include #include #include "flatbuffers/idl.h" -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - flatbuffers::Parser parser; - // Guarantee 0-termination. - std::string s(reinterpret_cast(data), size); - parser.Parse(s.c_str()); +static constexpr uint8_t flags_strict_json = 0x01; +static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x02; +static constexpr uint8_t flags_allow_non_utf8 = 0x04; +// static constexpr uint8_t flags_flag_3 = 0x08; +// static constexpr uint8_t flags_flag_4 = 0x10; +// static constexpr uint8_t flags_flag_5 = 0x20; +// static constexpr uint8_t flags_flag_6 = 0x40; +// static constexpr uint8_t flags_flag_7 = 0x80; + +// See readme.md and CMakeLists.txt for details. +#ifdef FUZZ_TEST_LOCALE +static constexpr const char *test_locale = (FUZZ_TEST_LOCALE); +#else +static constexpr const char *test_locale = nullptr; +#endif + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Reserve one byte for Parser flags and one byte for repetition counter. + if (size < 3) return 0; + const uint8_t flags = data[0]; + // normalize to ascii alphabet + const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0; + data += 2; + size -= 2; // bypass + + const std::string original(reinterpret_cast(data), size); + auto input = std::string(original.c_str()); // until '\0' + if (input.empty()) return 0; + + flatbuffers::IDLOptions opts; + opts.strict_json = (flags & flags_strict_json); + opts.skip_unexpected_fields_in_json = + (flags & flags_skip_unexpected_fields_in_json); + opts.allow_non_utf8 = (flags & flags_allow_non_utf8); + + flatbuffers::Parser parser(opts); + + // Guarantee 0-termination in the input. + auto parse_input = input.c_str(); + + // The fuzzer can adjust the number repetition if a side-effects have found. + // Each test should pass at least two times to ensure that the parser doesn't + // have any hidden-states or locale-depended effects. + for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) { + auto use_locale = !!test_locale && (0 == (cnt % 2)); + // Set new locale. + if (use_locale) { + FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale)); + } + + // Check Parser. + parser.Parse(parse_input); + + // Restore locale. + if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); } + } + return 0; } diff --git a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc new file mode 100644 index 000000000..cb2a6e322 --- /dev/null +++ b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc @@ -0,0 +1,361 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "flatbuffers/idl.h" +#include "fuzzer_assert.h" +#include "test_assert.h" + +static_assert(__has_feature(memory_sanitizer) || + __has_feature(address_sanitizer), + "sanitizer disabled"); + +static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value +static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or ' +// reserved for future: json {named} or [unnamed] +// static constexpr uint8_t flags_json_bracer = 0x20; + +// See readme.md and CMakeLists.txt for details. +#ifdef FUZZ_TEST_LOCALE +static constexpr const char *test_locale = (FUZZ_TEST_LOCALE); +#else +static constexpr const char *test_locale = nullptr; +#endif + +// Utility for test run. +struct OneTimeTestInit { + // Declare trap for the flatbuffers test engine. + // This hook terminate program both in Debug and Release. + static bool TestFailListener(const char *expval, const char *val, + const char *exp, const char *file, int line, + const char *func = 0) { + (void)expval; + (void)val; + (void)exp; + (void)file; + (void)line; + (void)func; + // FLATBUFFERS_ASSERT also redefined to be fully independed from library + // implementation (see test_assert.h for details). + fuzzer_assert_impl(false); // terminate + return false; + } + + OneTimeTestInit() { + // Fuzzer test should not depend from the test engine implementation. + // This hook will terminate test if TEST_EQ/TEST_ASSERT asserted. + InitTestEngine(OneTimeTestInit::TestFailListener); + } + + static OneTimeTestInit one_time_init_; +}; + +OneTimeTestInit OneTimeTestInit::one_time_init_; + +// Find all 'subj' sub-strings and replace first character of sub-string. +// BreakSequence("testest","tes", 'X') -> "XesXest". +// BreakSequence("xxx","xx", 'Y') -> "YYx". +static void BreakSequence(std::string &s, const char *subj, char repl) { + size_t pos = 0; + while (pos = s.find(subj, pos), pos != std::string::npos) { + s.at(pos) = repl; + pos++; + } +} + +// Remove all leading and trailing symbols matched with pattern set. +// StripString("xy{xy}y", "xy") -> "{xy}" +static std::string StripString(const std::string &s, const char *pattern, + size_t *pos = nullptr) { + if (pos) *pos = 0; + // leading + auto first = s.find_first_not_of(pattern); + if (std::string::npos == first) return ""; + if (pos) *pos = first; + // trailing + auto last = s.find_last_not_of(pattern); + assert(last < s.length()); + assert(first <= last); + return s.substr(first, last - first + 1); +} + +class RegexMatcher { + protected: + virtual bool MatchNumber(const std::string &input) const = 0; + + public: + virtual ~RegexMatcher() = default; + + struct MatchResult { + size_t pos{ 0 }; + size_t len{ 0 }; + bool res{ false }; + bool quoted{ false }; + }; + + MatchResult Match(const std::string &input) const { + MatchResult r; + // strip leading and trailing "spaces" accepted by flatbuffer + auto test = StripString(input, "\t\r\n ", &r.pos); + r.len = test.size(); + // check quotes + if (test.size() >= 2) { + auto fch = test.front(); + auto lch = test.back(); + r.quoted = (fch == lch) && (fch == '\'' || fch == '\"'); + if (r.quoted) { + // remove quotes for regex test + test = test.substr(1, test.size() - 2); + } + } + // Fast check: + if (test.empty()) return r; + // A string with a valid scalar shouldn't have non-ascii or non-printable + // symbols. + for (auto c : test) { + if ((c < ' ') || (c > '~')) return r; + } + // Check with regex + r.res = MatchNumber(test); + return r; + } + + bool MatchRegexList(const std::string &input, + const std::vector &re_list) const { + auto str = StripString(input, " "); + if (str.empty()) return false; + for (auto &re : re_list) { + std::smatch match; + if (std::regex_match(str, match, re)) return true; + } + return false; + } +}; + +class IntegerRegex : public RegexMatcher { + protected: + bool MatchNumber(const std::string &input) const override { + static const std::vector re_list = { + std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize }, + + std::regex{ + R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize } + }; + return MatchRegexList(input, re_list); + } + + public: + IntegerRegex() = default; + virtual ~IntegerRegex() = default; +}; + +class UIntegerRegex : public RegexMatcher { + protected: + bool MatchNumber(const std::string &input) const override { + static const std::vector re_list = { + std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize }, + std::regex{ + R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }, + // accept -0 number + std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize } + }; + return MatchRegexList(input, re_list); + } + + public: + UIntegerRegex() = default; + virtual ~UIntegerRegex() = default; +}; + +class BooleanRegex : public IntegerRegex { + protected: + bool MatchNumber(const std::string &input) const override { + if (input == "true" || input == "false") return true; + return IntegerRegex::MatchNumber(input); + } + + public: + BooleanRegex() = default; + virtual ~BooleanRegex() = default; +}; + +class FloatRegex : public RegexMatcher { + protected: + bool MatchNumber(const std::string &input) const override { + static const std::vector re_list = { + // hex-float + std::regex{ + R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)", + std::regex_constants::optimize }, + // dec-float + std::regex{ + R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)", + std::regex_constants::optimize }, + + std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)", + std::regex_constants::optimize | std::regex_constants::icase } + }; + return MatchRegexList(input, re_list); + } + + public: + FloatRegex() = default; + virtual ~FloatRegex() = default; +}; + +class ScalarReferenceResult { + private: + ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched) + : type(_type), matched(_matched) {} + + public: + // Decode scalar type and check if the input string satisfies the scalar type. + static ScalarReferenceResult Check(uint8_t code, const std::string &input) { + switch (code) { + case 0x0: return { "double", FloatRegex().Match(input) }; + case 0x1: return { "float", FloatRegex().Match(input) }; + case 0x2: return { "int8", IntegerRegex().Match(input) }; + case 0x3: return { "int16", IntegerRegex().Match(input) }; + case 0x4: return { "int32", IntegerRegex().Match(input) }; + case 0x5: return { "int64", IntegerRegex().Match(input) }; + case 0x6: return { "uint8", UIntegerRegex().Match(input) }; + case 0x7: return { "uint16", UIntegerRegex().Match(input) }; + case 0x8: return { "uint32", UIntegerRegex().Match(input) }; + case 0x9: return { "uint64", UIntegerRegex().Match(input) }; + case 0xA: return { "bool", BooleanRegex().Match(input) }; + default: return { "float", FloatRegex().Match(input) }; + }; + } + + const char *type; + const RegexMatcher::MatchResult matched; +}; + +bool Parse(flatbuffers::Parser &parser, const std::string &json, + std::string *_text) { + auto done = parser.Parse(json.c_str()); + if (done) { + TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text), + true); + } else { + *_text = parser.error_; + } + return done; +} + +// llvm std::regex have problem with stack overflow, limit maximum length. +// ./scalar_fuzzer -max_len=3000 +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Reserve one byte for Parser flags and one byte for repetition counter. + if (size < 3) return 0; + const uint8_t flags = data[0]; + // normalize to ascii alphabet + const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0; + data += 2; + size -= 2; // bypass + + // Guarantee 0-termination. + const std::string original(reinterpret_cast(data), size); + auto input = std::string(original.c_str()); // until '\0' + if (input.empty()) return 0; + + // Break comments in json to avoid complexity with regex matcher. + // The string " 12345 /* text */" will be accepted if insert it to string + // expression: "table X { Y: " + " 12345 /* text */" + "; }. + // But strings like this will complicate regex matcher. + // We reject this by transform "/* text */ 12345" to "@* text */ 12345". + BreakSequence(input, "//", '@'); // "//" -> "@/" + BreakSequence(input, "/*", '@'); // "/*" -> "@*" + // Break all known scalar functions (todo: add them to regex?): + for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) { + BreakSequence(input, f, '_'); // ident -> ident + } + + // Extract type of scalar from 'flags' and check if the input string satisfies + // the scalar type. + const auto ref_res = + ScalarReferenceResult::Check(flags & flags_scalar_type, input); + auto &recheck = ref_res.matched; + + // Create parser + flatbuffers::IDLOptions opts; + opts.force_defaults = true; + opts.output_default_scalars_in_json = true; + opts.indent_step = -1; + opts.strict_json = true; + + flatbuffers::Parser parser(opts); + auto schema = + "table X { Y: " + std::string(ref_res.type) + "; } root_type X;"; + TEST_EQ_FUNC(parser.Parse(schema.c_str()), true); + + // The fuzzer can adjust the number repetition if a side-effects have found. + // Each test should pass at least two times to ensure that the parser doesn't + // have any hidden-states or locale-depended effects. + for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) { + // Each even run (0,2,4..) will test locale independed code. + auto use_locale = !!test_locale && (0 == (cnt % 2)); + // Set new locale. + if (use_locale) { + FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale)); + } + + // Parse original input as-is. + auto orig_scalar = "{ \"Y\" : " + input + " }"; + std::string orig_back; + auto orig_done = Parse(parser, orig_scalar, &orig_back); + if (recheck.res != orig_done) { + // look for "does not fit" or "doesn't fit" or "out of range" + auto parser_not_fit = + (orig_back.find("does not fit") == std::string::npos) || + (orig_back.find("out of range") == std::string::npos); + + if ((false == recheck.res) || (false == parser_not_fit)) { + TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done, + recheck.res); + TEST_EQ_STR(orig_back.c_str(), + input.substr(recheck.pos, recheck.len).c_str()); + TEST_EQ_FUNC(orig_done, recheck.res); + } + } + + // Try to make quoted string and test it. + std::string qouted_input; + if (true == recheck.quoted) { + // we can't simply remove quotes, they may be nested "'12'". + // Original string "\'12\'" converted to "'12'". + // The string can be an invalid string by JSON rules, but after quotes + // removed can transform to valid. + assert(recheck.len >= 2); + } else { + const auto quote = (flags & flags_quotes_kind) ? '\"' : '\''; + qouted_input = input; // copy + qouted_input.insert(recheck.pos + recheck.len, 1, quote); + qouted_input.insert(recheck.pos, 1, quote); + } + + // Test quoted version of the string + if (!qouted_input.empty()) { + auto fix_scalar = "{ \"Y\" : " + qouted_input + " }"; + std::string fix_back; + auto fix_done = Parse(parser, fix_scalar, &fix_back); + if (orig_done != fix_done) { + TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done, + orig_done); + TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); + } + if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); } + TEST_EQ_FUNC(fix_done, orig_done); + } + + // Restore locale. + if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); } + } + + return 0; +} diff --git a/tests/fuzzer/fuzzer_assert.h b/tests/fuzzer/fuzzer_assert.h new file mode 100644 index 000000000..afdcf78ac --- /dev/null +++ b/tests/fuzzer/fuzzer_assert.h @@ -0,0 +1,9 @@ +#ifndef FUZZER_ASSERT_IMPL_H_ +#define FUZZER_ASSERT_IMPL_H_ + +// Declare Debug/Release independed assert macro. +#define fuzzer_assert_impl(x) (!!(x) ? static_cast(0) : __builtin_trap()) + +extern "C" void __builtin_trap(void); + +#endif // !FUZZER_ASSERT_IMPL_H_ diff --git a/tests/fuzzer/readme.md b/tests/fuzzer/readme.md new file mode 100644 index 000000000..e1171be4b --- /dev/null +++ b/tests/fuzzer/readme.md @@ -0,0 +1,61 @@ +# Test Flatbuffers library with help of libFuzzer +Test suite of Flatbuffers library has fuzzer section with tests are based on libFuzzer library. + +> LibFuzzer is in-process, coverage-guided, evolutionary fuzzing engine. +LibFuzzer is linked with the library under test, and feeds fuzzed inputs to the library via a specific fuzzing entrypoint (aka “target function”); +the fuzzer then tracks which areas of the code are reached, and generates mutations on the corpus of input data in order to maximize the code coverage. +The code coverage information for libFuzzer is provided by LLVM’s SanitizerCoverage instrumentation. + +For details about **libFuzzer** see: https://llvm.org/docs/LibFuzzer.html + +To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before. + +The fuzzer section include three tests: +- `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema; +- `parser_fuzzer` checks stability of schema and json parser under various inputs; +- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files; + +## Build tests with locales +Flatbuffers library use only printable-ASCII characters as characters of grammar alphabet for type and data declaration. +This alphabet is fully compatible with JSON specification and make schema declaration fully portable. +Flatbuffers library is independent from global or thread locales used by end-user application. +To run fuzzer tests with selected C-locale under test pass `-DFUZZ_TEST_LOCALE=""` to CMake when configuring. +Selected locale must be installed in system before use. +Command line: +```sh +cmake .. -DFUZZ_TEST_LOCALE="ru_RU.CP1251" +``` +If use VSCode, use `cmake.configureSettings` section of workspace settings: +```json +"cmake.configureSettings": { + "FUZZ_TEST_LOCALE" : "ru_RU.CP1251" +} +``` + +## Run fuzzer +These are examples of fuzzer run. +Flags may vary and depend from version of libFuzzer library. +For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1` + +`./verifier_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_verifier/` + +`./parser_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_parser/` + +`./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 ../.corpus_parser/ ../.seed_parser/` + +Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`: + +`./scalar_fuzzer -only_ascii=1 -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 -jobs=2 ../.corpus_parser/ ../.seed_parser/` + +## Merge (minimize) corpus +The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag: +> -merge + If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory. + Defaults to 0. This flag can be used to minimize a corpus. + +Merge several seeds to one: +`./scalar_fuzzer -merge=1 ../.corpus/ ../.seed_1/ ../.seed_2/` + +## Know limitations +- LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000. + Example: `./scalar_fuzzer -max_len=3000` diff --git a/tests/test_assert.cpp b/tests/test_assert.cpp index dd028664d..2784639e6 100644 --- a/tests/test_assert.cpp +++ b/tests/test_assert.cpp @@ -1,11 +1,12 @@ +#include +#include "test_assert.h" + #ifdef _MSC_VER -# include # include #endif -#include "test_assert.h" - int testing_fails = 0; +static TestFailEventListener fail_listener_ = nullptr; void TestFail(const char *expval, const char *val, const char *exp, const char *file, int line, const char *func) { @@ -14,7 +15,12 @@ void TestFail(const char *expval, const char *val, const char *exp, TEST_OUTPUT_LINE("TEST FAILED: %s:%d, %s in %s", file, line, exp, func ? func : ""); testing_fails++; - assert(0); // assert on first failure under debug + + // Notify, emulate 'gtest::OnTestPartResult' event handler. + if(fail_listener_) + (*fail_listener_)(expval, val, exp, file, line, func); + + assert(0); // ignored in Release if NDEBUG defined } void TestEqStr(const char *expval, const char *val, const char *exp, @@ -31,7 +37,7 @@ int msvc_no_dialog_box_on_assert(int rpt_type, char *msg, int *ret_val) { } #endif -void InitTestEngine() { +void InitTestEngine(TestFailEventListener listener) { testing_fails = 0; // Disable stdout buffering to prevent information lost on assertion or core // dump. @@ -49,4 +55,6 @@ void InitTestEngine() { _CrtSetReportHook(msvc_no_dialog_box_on_assert); #endif // clang-format on + + fail_listener_ = listener; } diff --git a/tests/test_assert.h b/tests/test_assert.h index 768c8ff3d..bdf658d56 100644 --- a/tests/test_assert.h +++ b/tests/test_assert.h @@ -17,9 +17,17 @@ extern int testing_fails; -// Prepare test engine (MSVC assertion setup, etc) -void InitTestEngine(); +// Listener of TestFail, like 'gtest::OnTestPartResult' event handler. +// Called in TestFail after a failed assertion. +typedef bool (*TestFailEventListener)(const char *expval, const char *val, + const char *exp, const char *file, int line, + const char *func); +// Prepare test engine (MSVC assertion setup, etc). +// listener - this function will be notified on each TestFail call. +void InitTestEngine(TestFailEventListener listener = nullptr); + +// Write captured state to a log and terminate test run. void TestFail(const char *expval, const char *val, const char *exp, const char *file, int line, const char *func = 0);