Make the Parser independent from the global C-locale (#5028)

* Make the Parser independent from the global C-locale

* Set a specific test locale using the environment variable FLATBUFFERS_TEST_LOCALE

* Remove redundant static qualifiers
This commit is contained in:
Vladimir Glavnyy
2018-11-17 00:24:06 +07:00
committed by Wouter van Oortmerssen
parent d6b1ce09cf
commit 5f32f94810
15 changed files with 480 additions and 277 deletions

View File

@@ -81,17 +81,6 @@ target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8)
# Setup fuzzer tests.
# Change default ASCII locale (affects to isalpha, isalnum, decimal
# delimiters, other). https://en.cppreference.com/w/cpp/locale/setlocale
if(DEFINED FUZZ_TEST_LOCALE)
# Enable locale independent code and define locale for tests.
# -DFUZZ_TEST_LOCALE="" - enable, but test with default locale
# -DFUZZ_TEST_LOCALE="ru_RU.CP1251" - enable and test with ru_RU.CP1251
# Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251
add_definitions(-DFUZZ_TEST_LOCALE=\"${FUZZ_TEST_LOCALE}\")
endif()
message(STATUS "FUZZ_TEST_LOCALE: ${FUZZ_TEST_LOCALE}")
add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc)
target_link_libraries(scalar_fuzzer PRIVATE flatbuffers)

View File

@@ -7,6 +7,7 @@
#include <string>
#include "flatbuffers/idl.h"
#include "test_init.h"
static constexpr uint8_t flags_strict_json = 0x01;
static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x02;
@@ -17,12 +18,8 @@ static constexpr uint8_t flags_allow_non_utf8 = 0x04;
// static constexpr uint8_t flags_flag_6 = 0x40;
// static constexpr uint8_t flags_flag_7 = 0x80;
// See readme.md and CMakeLists.txt for details.
#ifdef FUZZ_TEST_LOCALE
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
#else
static constexpr const char *test_locale = nullptr;
#endif
// Utility for test run.
OneTimeTestInit OneTimeTestInit::one_time_init_;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Reserve one byte for Parser flags and one byte for repetition counter.
@@ -52,17 +49,18 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Each test should pass at least two times to ensure that the parser doesn't
// have any hidden-states or locale-depended effects.
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
auto use_locale = !!test_locale && (0 == (cnt % 2));
// Each even run (0,2,4..) will test locale independed code.
auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
// Set new locale.
if (use_locale) {
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
}
// Check Parser.
parser.Parse(parse_input);
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
}
return 0;

View File

@@ -8,55 +8,13 @@
#include <string>
#include "flatbuffers/idl.h"
#include "fuzzer_assert.h"
#include "test_assert.h"
static_assert(__has_feature(memory_sanitizer) ||
__has_feature(address_sanitizer),
"sanitizer disabled");
#include "test_init.h"
static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
// reserved for future: json {named} or [unnamed]
// static constexpr uint8_t flags_json_bracer = 0x20;
// See readme.md and CMakeLists.txt for details.
#ifdef FUZZ_TEST_LOCALE
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
#else
static constexpr const char *test_locale = nullptr;
#endif
// Utility for test run.
struct OneTimeTestInit {
// Declare trap for the flatbuffers test engine.
// This hook terminate program both in Debug and Release.
static bool TestFailListener(const char *expval, const char *val,
const char *exp, const char *file, int line,
const char *func = 0) {
(void)expval;
(void)val;
(void)exp;
(void)file;
(void)line;
(void)func;
// FLATBUFFERS_ASSERT also redefined to be fully independed from library
// implementation (see test_assert.h for details).
fuzzer_assert_impl(false); // terminate
return false;
}
OneTimeTestInit() {
// Fuzzer test should not depend from the test engine implementation.
// This hook will terminate test if TEST_EQ/TEST_ASSERT asserted.
InitTestEngine(OneTimeTestInit::TestFailListener);
}
static OneTimeTestInit one_time_init_;
};
OneTimeTestInit OneTimeTestInit::one_time_init_;
// Find all 'subj' sub-strings and replace first character of sub-string.
// BreakSequence("testest","tes", 'X') -> "XesXest".
// BreakSequence("xxx","xx", 'Y') -> "YYx".
@@ -248,6 +206,9 @@ bool Parse(flatbuffers::Parser &parser, const std::string &json,
return done;
}
// Utility for test run.
OneTimeTestInit OneTimeTestInit::one_time_init_;
// llvm std::regex have problem with stack overflow, limit maximum length.
// ./scalar_fuzzer -max_len=3000
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
@@ -299,23 +260,26 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// have any hidden-states or locale-depended effects.
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
// Each even run (0,2,4..) will test locale independed code.
auto use_locale = !!test_locale && (0 == (cnt % 2));
auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
// Set new locale.
if (use_locale) {
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
}
// Parse original input as-is.
auto orig_scalar = "{ \"Y\" : " + input + " }";
std::string orig_back;
auto orig_done = Parse(parser, orig_scalar, &orig_back);
if (recheck.res != orig_done) {
// look for "does not fit" or "doesn't fit" or "out of range"
auto parser_not_fit =
(orig_back.find("does not fit") == std::string::npos) ||
(orig_back.find("out of range") == std::string::npos);
auto not_fit =
(true == recheck.res)
? ((orig_back.find("does not fit") != std::string::npos) ||
(orig_back.find("out of range") != std::string::npos))
: false;
if ((false == recheck.res) || (false == parser_not_fit)) {
if (false == not_fit) {
TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
recheck.res);
TEST_EQ_STR(orig_back.c_str(),
@@ -344,6 +308,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
std::string fix_back;
auto fix_done = Parse(parser, fix_scalar, &fix_back);
if (orig_done != fix_done) {
TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
orig_done);
@@ -353,9 +318,34 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
TEST_EQ_FUNC(fix_done, orig_done);
}
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
}
// Create new parser and test default value
if (true == orig_done) {
flatbuffers::Parser def_parser(opts); // re-use options
auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
input + "; } root_type X;" +
"{}"; // <- with empty json {}!
auto def_done = def_parser.Parse(def_schema.c_str());
if (false == def_done) {
TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
def_parser.error_.c_str());
FLATBUFFERS_ASSERT(false);
}
// Compare with print.
std::string ref_string, def_string;
FLATBUFFERS_ASSERT(GenerateText(
parser, parser.builder_.GetBufferPointer(), &ref_string));
FLATBUFFERS_ASSERT(GenerateText(
def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
if (ref_string != def_string) {
TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
ref_string.c_str());
FLATBUFFERS_ASSERT(false);
}
}
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
}
return 0;
}

View File

@@ -15,27 +15,19 @@ The fuzzer section include three tests:
- `parser_fuzzer` checks stability of schema and json parser under various inputs;
- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files;
## Build tests with locales
Flatbuffers library use only printable-ASCII characters as characters of grammar alphabet for type and data declaration.
This alphabet is fully compatible with JSON specification and make schema declaration fully portable.
Flatbuffers library is independent from global or thread locales used by end-user application.
To run fuzzer tests with selected C-locale under test pass `-DFUZZ_TEST_LOCALE="<locale name>"` to CMake when configuring.
Selected locale must be installed in system before use.
Command line:
## Run tests with a specific locale
The grammar of the Flatbuffers library is based on printable-ASCII characters.
By design, the Flatbuffers library should be independent of the global or thread locales used by an end-user application.
Set environment variable `FLATBUFFERS_TEST_LOCALE` to run a fuzzer with a specific C-locale:
```sh
cmake .. -DFUZZ_TEST_LOCALE="ru_RU.CP1251"
```
If use VSCode, use `cmake.configureSettings` section of workspace settings:
```json
"cmake.configureSettings": {
"FUZZ_TEST_LOCALE" : "ru_RU.CP1251"
}
>FLATBUFFERS_TEST_LOCALE="" ./scalar_parser
>FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ./parser_fuzzer
```
## Run fuzzer
These are examples of fuzzer run.
Flags may vary and depend from version of libFuzzer library.
For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1`
These are examples of running a fuzzer.
Flags may vary and depend on a version of the libFuzzer library.
For details, run a fuzzer with `-help` flag: `./parser_fuzzer -help=1`
`./verifier_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_verifier/`
@@ -43,18 +35,20 @@ For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1`
`./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 ../.corpus_parser/ ../.seed_parser/`
Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:
Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:
`./scalar_fuzzer -only_ascii=1 -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 -jobs=2 ../.corpus_parser/ ../.seed_parser/`
Run with a specific C-locale:
`FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 ../.corpus_parser/ ../.seed_parser/`
## Merge (minimize) corpus
The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag:
> -merge
If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory.
Defaults to 0. This flag can be used to minimize a corpus.
Merge several seeds to one:
`./scalar_fuzzer -merge=1 ../.corpus/ ../.seed_1/ ../.seed_2/`
Merge several seeds to one (a new collected corpus to the seed collection, for example):
`./scalar_fuzzer -merge=1 ../.seed_parser/ ../.corpus_parser/`
## Know limitations
- LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000.

56
tests/fuzzer/test_init.h Normal file
View File

@@ -0,0 +1,56 @@
#ifndef FUZZER_TEST_INIT_H_
#define FUZZER_TEST_INIT_H_
#include "fuzzer_assert.h"
#include "test_assert.h"
static_assert(__has_feature(memory_sanitizer) ||
__has_feature(address_sanitizer),
"sanitizer disabled");
// Utility for test run.
struct OneTimeTestInit {
// Declare trap for the Flatbuffers test engine.
// This hook terminate program both in Debug and Release.
static bool TestFailListener(const char *expval, const char *val,
const char *exp, const char *file, int line,
const char *func = 0) {
(void)expval;
(void)val;
(void)exp;
(void)file;
(void)line;
(void)func;
// FLATBUFFERS_ASSERT redefined to be fully independent of the Flatbuffers
// library implementation (see test_assert.h for details).
fuzzer_assert_impl(false); // terminate
return false;
}
OneTimeTestInit() : has_locale_(false) {
// Fuzzer test should be independent of the test engine implementation.
// This hook will terminate test if TEST_EQ/TEST_ASSERT asserted.
InitTestEngine(OneTimeTestInit::TestFailListener);
// Read a locale for the test.
if (flatbuffers::ReadEnvironmentVariable("FLATBUFFERS_TEST_LOCALE",
&test_locale_)) {
TEST_OUTPUT_LINE("The environment variable FLATBUFFERS_TEST_LOCALE=%s",
test_locale_.c_str());
test_locale_ = flatbuffers::RemoveStringQuotes(test_locale_);
has_locale_ = true;
}
}
static const char *test_locale() {
return one_time_init_.has_locale_ ? nullptr
: one_time_init_.test_locale_.c_str();
}
bool has_locale_;
std::string test_locale_;
static OneTimeTestInit one_time_init_;
};
#endif // !FUZZER_TEST_INIT_H_