Make the Parser independent from the global C-locale (#5028)

* Make the Parser independent from the global C-locale

* Set a specific test locale using the environment variable FLATBUFFERS_TEST_LOCALE

* Remove redundant static qualifiers
This commit is contained in:
Vladimir Glavnyy
2018-11-17 00:24:06 +07:00
committed by Wouter van Oortmerssen
parent d6b1ce09cf
commit 5f32f94810
15 changed files with 480 additions and 277 deletions

View File

@@ -81,17 +81,6 @@ target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8)
# Setup fuzzer tests.
# Change default ASCII locale (affects to isalpha, isalnum, decimal
# delimiters, other). https://en.cppreference.com/w/cpp/locale/setlocale
if(DEFINED FUZZ_TEST_LOCALE)
# Enable locale independent code and define locale for tests.
# -DFUZZ_TEST_LOCALE="" - enable, but test with default locale
# -DFUZZ_TEST_LOCALE="ru_RU.CP1251" - enable and test with ru_RU.CP1251
# Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251
add_definitions(-DFUZZ_TEST_LOCALE=\"${FUZZ_TEST_LOCALE}\")
endif()
message(STATUS "FUZZ_TEST_LOCALE: ${FUZZ_TEST_LOCALE}")
add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc)
target_link_libraries(scalar_fuzzer PRIVATE flatbuffers)

View File

@@ -7,6 +7,7 @@
#include <string>
#include "flatbuffers/idl.h"
#include "test_init.h"
static constexpr uint8_t flags_strict_json = 0x01;
static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x02;
@@ -17,12 +18,8 @@ static constexpr uint8_t flags_allow_non_utf8 = 0x04;
// static constexpr uint8_t flags_flag_6 = 0x40;
// static constexpr uint8_t flags_flag_7 = 0x80;
// See readme.md and CMakeLists.txt for details.
#ifdef FUZZ_TEST_LOCALE
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
#else
static constexpr const char *test_locale = nullptr;
#endif
// Utility for test run.
OneTimeTestInit OneTimeTestInit::one_time_init_;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Reserve one byte for Parser flags and one byte for repetition counter.
@@ -52,17 +49,18 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Each test should pass at least two times to ensure that the parser doesn't
// have any hidden-states or locale-depended effects.
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
auto use_locale = !!test_locale && (0 == (cnt % 2));
// Each even run (0,2,4..) will test locale independed code.
auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
// Set new locale.
if (use_locale) {
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
}
// Check Parser.
parser.Parse(parse_input);
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
}
return 0;

View File

@@ -8,55 +8,13 @@
#include <string>
#include "flatbuffers/idl.h"
#include "fuzzer_assert.h"
#include "test_assert.h"
static_assert(__has_feature(memory_sanitizer) ||
__has_feature(address_sanitizer),
"sanitizer disabled");
#include "test_init.h"
static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
// reserved for future: json {named} or [unnamed]
// static constexpr uint8_t flags_json_bracer = 0x20;
// See readme.md and CMakeLists.txt for details.
#ifdef FUZZ_TEST_LOCALE
static constexpr const char *test_locale = (FUZZ_TEST_LOCALE);
#else
static constexpr const char *test_locale = nullptr;
#endif
// Utility for test run.
struct OneTimeTestInit {
// Declare trap for the flatbuffers test engine.
// This hook terminate program both in Debug and Release.
static bool TestFailListener(const char *expval, const char *val,
const char *exp, const char *file, int line,
const char *func = 0) {
(void)expval;
(void)val;
(void)exp;
(void)file;
(void)line;
(void)func;
// FLATBUFFERS_ASSERT also redefined to be fully independed from library
// implementation (see test_assert.h for details).
fuzzer_assert_impl(false); // terminate
return false;
}
OneTimeTestInit() {
// Fuzzer test should not depend from the test engine implementation.
// This hook will terminate test if TEST_EQ/TEST_ASSERT asserted.
InitTestEngine(OneTimeTestInit::TestFailListener);
}
static OneTimeTestInit one_time_init_;
};
OneTimeTestInit OneTimeTestInit::one_time_init_;
// Find all 'subj' sub-strings and replace first character of sub-string.
// BreakSequence("testest","tes", 'X') -> "XesXest".
// BreakSequence("xxx","xx", 'Y') -> "YYx".
@@ -248,6 +206,9 @@ bool Parse(flatbuffers::Parser &parser, const std::string &json,
return done;
}
// Utility for test run.
OneTimeTestInit OneTimeTestInit::one_time_init_;
// llvm std::regex have problem with stack overflow, limit maximum length.
// ./scalar_fuzzer -max_len=3000
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
@@ -299,23 +260,26 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// have any hidden-states or locale-depended effects.
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
// Each even run (0,2,4..) will test locale independed code.
auto use_locale = !!test_locale && (0 == (cnt % 2));
auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
// Set new locale.
if (use_locale) {
FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, test_locale));
FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
}
// Parse original input as-is.
auto orig_scalar = "{ \"Y\" : " + input + " }";
std::string orig_back;
auto orig_done = Parse(parser, orig_scalar, &orig_back);
if (recheck.res != orig_done) {
// look for "does not fit" or "doesn't fit" or "out of range"
auto parser_not_fit =
(orig_back.find("does not fit") == std::string::npos) ||
(orig_back.find("out of range") == std::string::npos);
auto not_fit =
(true == recheck.res)
? ((orig_back.find("does not fit") != std::string::npos) ||
(orig_back.find("out of range") != std::string::npos))
: false;
if ((false == recheck.res) || (false == parser_not_fit)) {
if (false == not_fit) {
TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
recheck.res);
TEST_EQ_STR(orig_back.c_str(),
@@ -344,6 +308,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
std::string fix_back;
auto fix_done = Parse(parser, fix_scalar, &fix_back);
if (orig_done != fix_done) {
TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
orig_done);
@@ -353,9 +318,34 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
TEST_EQ_FUNC(fix_done, orig_done);
}
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); }
}
// Create new parser and test default value
if (true == orig_done) {
flatbuffers::Parser def_parser(opts); // re-use options
auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
input + "; } root_type X;" +
"{}"; // <- with empty json {}!
auto def_done = def_parser.Parse(def_schema.c_str());
if (false == def_done) {
TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
def_parser.error_.c_str());
FLATBUFFERS_ASSERT(false);
}
// Compare with print.
std::string ref_string, def_string;
FLATBUFFERS_ASSERT(GenerateText(
parser, parser.builder_.GetBufferPointer(), &ref_string));
FLATBUFFERS_ASSERT(GenerateText(
def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
if (ref_string != def_string) {
TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
ref_string.c_str());
FLATBUFFERS_ASSERT(false);
}
}
// Restore locale.
if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
}
return 0;
}

View File

@@ -15,27 +15,19 @@ The fuzzer section include three tests:
- `parser_fuzzer` checks stability of schema and json parser under various inputs;
- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files;
## Build tests with locales
Flatbuffers library use only printable-ASCII characters as characters of grammar alphabet for type and data declaration.
This alphabet is fully compatible with JSON specification and make schema declaration fully portable.
Flatbuffers library is independent from global or thread locales used by end-user application.
To run fuzzer tests with selected C-locale under test pass `-DFUZZ_TEST_LOCALE="<locale name>"` to CMake when configuring.
Selected locale must be installed in system before use.
Command line:
## Run tests with a specific locale
The grammar of the Flatbuffers library is based on printable-ASCII characters.
By design, the Flatbuffers library should be independent of the global or thread locales used by an end-user application.
Set environment variable `FLATBUFFERS_TEST_LOCALE` to run a fuzzer with a specific C-locale:
```sh
cmake .. -DFUZZ_TEST_LOCALE="ru_RU.CP1251"
```
If use VSCode, use `cmake.configureSettings` section of workspace settings:
```json
"cmake.configureSettings": {
"FUZZ_TEST_LOCALE" : "ru_RU.CP1251"
}
>FLATBUFFERS_TEST_LOCALE="" ./scalar_parser
>FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ./parser_fuzzer
```
## Run fuzzer
These are examples of fuzzer run.
Flags may vary and depend from version of libFuzzer library.
For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1`
These are examples of running a fuzzer.
Flags may vary and depend on a version of the libFuzzer library.
For details, run a fuzzer with `-help` flag: `./parser_fuzzer -help=1`
`./verifier_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_verifier/`
@@ -43,18 +35,20 @@ For detail, run a fuzzer test with help flag: `./parser_fuzzer -help=1`
`./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 ../.corpus_parser/ ../.seed_parser/`
Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:
Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:
`./scalar_fuzzer -only_ascii=1 -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 -jobs=2 ../.corpus_parser/ ../.seed_parser/`
Run with a specific C-locale:
`FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 ../.corpus_parser/ ../.seed_parser/`
## Merge (minimize) corpus
The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag:
> -merge
If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory.
Defaults to 0. This flag can be used to minimize a corpus.
Merge several seeds to one:
`./scalar_fuzzer -merge=1 ../.corpus/ ../.seed_1/ ../.seed_2/`
Merge several seeds to one (a new collected corpus to the seed collection, for example):
`./scalar_fuzzer -merge=1 ../.seed_parser/ ../.corpus_parser/`
## Know limitations
- LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000.

56
tests/fuzzer/test_init.h Normal file
View File

@@ -0,0 +1,56 @@
#ifndef FUZZER_TEST_INIT_H_
#define FUZZER_TEST_INIT_H_
#include "fuzzer_assert.h"
#include "test_assert.h"
static_assert(__has_feature(memory_sanitizer) ||
__has_feature(address_sanitizer),
"sanitizer disabled");
// Utility for test run.
struct OneTimeTestInit {
// Declare trap for the Flatbuffers test engine.
// This hook terminate program both in Debug and Release.
static bool TestFailListener(const char *expval, const char *val,
const char *exp, const char *file, int line,
const char *func = 0) {
(void)expval;
(void)val;
(void)exp;
(void)file;
(void)line;
(void)func;
// FLATBUFFERS_ASSERT redefined to be fully independent of the Flatbuffers
// library implementation (see test_assert.h for details).
fuzzer_assert_impl(false); // terminate
return false;
}
OneTimeTestInit() : has_locale_(false) {
// Fuzzer test should be independent of the test engine implementation.
// This hook will terminate test if TEST_EQ/TEST_ASSERT asserted.
InitTestEngine(OneTimeTestInit::TestFailListener);
// Read a locale for the test.
if (flatbuffers::ReadEnvironmentVariable("FLATBUFFERS_TEST_LOCALE",
&test_locale_)) {
TEST_OUTPUT_LINE("The environment variable FLATBUFFERS_TEST_LOCALE=%s",
test_locale_.c_str());
test_locale_ = flatbuffers::RemoveStringQuotes(test_locale_);
has_locale_ = true;
}
}
static const char *test_locale() {
return one_time_init_.has_locale_ ? nullptr
: one_time_init_.test_locale_.c_str();
}
bool has_locale_;
std::string test_locale_;
static OneTimeTestInit one_time_init_;
};
#endif // !FUZZER_TEST_INIT_H_

View File

@@ -1535,7 +1535,7 @@ void ValidFloatTest() {
// Old MSVC versions may have problem with this check.
// https://www.exploringbinary.com/visual-c-plus-plus-strtod-still-broken/
TEST_EQ(TestValue<double>("{ Y:6.9294956446009195e15 }", "double"),
6929495644600920);
6929495644600920.0);
// check nan's
TEST_EQ(std::isnan(TestValue<double>("{ Y:nan }", "double")), true);
TEST_EQ(std::isnan(TestValue<float>("{ Y:nan }", "float")), true);
@@ -1663,6 +1663,7 @@ void NumericUtilsTestInteger(const char *lower, const char *upper) {
template<typename T>
void NumericUtilsTestFloat(const char *lower, const char *upper) {
T f;
TEST_EQ(flatbuffers::StringToNumber("", &f), false);
TEST_EQ(flatbuffers::StringToNumber("1q", &f), false);
TEST_EQ(f, 0);
TEST_EQ(flatbuffers::StringToNumber(upper, &f), true);
@@ -2458,6 +2459,18 @@ int FlatBufferTests() {
int main(int /*argc*/, const char * /*argv*/ []) {
InitTestEngine();
std::string req_locale;
if (flatbuffers::ReadEnvironmentVariable("FLATBUFFERS_TEST_LOCALE",
&req_locale)) {
TEST_OUTPUT_LINE("The environment variable FLATBUFFERS_TEST_LOCALE=%s",
req_locale.c_str());
req_locale = flatbuffers::RemoveStringQuotes(req_locale);
std::string the_locale;
TEST_ASSERT_FUNC(
flatbuffers::SetGlobalTestLocale(req_locale.c_str(), &the_locale));
TEST_OUTPUT_LINE("The global C-locale changed: %s", the_locale.c_str());
}
FlatBufferTests();
FlatBufferBuilderTest();

View File

@@ -17,10 +17,9 @@ void TestFail(const char *expval, const char *val, const char *exp,
testing_fails++;
// Notify, emulate 'gtest::OnTestPartResult' event handler.
if(fail_listener_)
(*fail_listener_)(expval, val, exp, file, line, func);
if (fail_listener_) (*fail_listener_)(expval, val, exp, file, line, func);
assert(0); // ignored in Release if NDEBUG defined
assert(0); // ignored in Release if NDEBUG defined
}
void TestEqStr(const char *expval, const char *val, const char *exp,

View File

@@ -1,18 +1,33 @@
#ifndef TEST_ASSERT_H
#define TEST_ASSERT_H
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/util.h"
// clang-format off
#ifdef __ANDROID__
#include <android/log.h>
#include <android/log.h>
#define TEST_OUTPUT_LINE(...) \
__android_log_print(ANDROID_LOG_INFO, "FlatBuffers", __VA_ARGS__)
__android_log_print(ANDROID_LOG_INFO, "FlatBuffers", __VA_ARGS__)
#define FLATBUFFERS_NO_FILE_TESTS
#else
#define TEST_OUTPUT_LINE(...) \
{ printf(__VA_ARGS__); printf("\n"); }
#define TEST_OUTPUT_LINE(...) \
{ printf(__VA_ARGS__); printf("\n"); }
#endif
#define TEST_EQ(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__)
#define TEST_ASSERT(exp) TestEq(exp, true, #exp, __FILE__, __LINE__)
#define TEST_NOTNULL(exp) TestEq(exp == NULL, false, #exp, __FILE__, __LINE__)
#define TEST_EQ_STR(exp, val) TestEqStr(exp, val, #exp, __FILE__, __LINE__)
#ifdef WIN32
#define TEST_ASSERT_FUNC(exp) TestEq(exp, true, #exp, __FILE__, __LINE__, __FUNCTION__)
#define TEST_EQ_FUNC(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__, __FUNCTION__)
#else
#define TEST_ASSERT_FUNC(exp) TestEq(exp, true, #exp, __FILE__, __LINE__, __PRETTY_FUNCTION__)
#define TEST_EQ_FUNC(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__, __PRETTY_FUNCTION__)
#endif
// clang-format on
extern int testing_fails;
@@ -20,8 +35,8 @@ extern int testing_fails;
// Listener of TestFail, like 'gtest::OnTestPartResult' event handler.
// Called in TestFail after a failed assertion.
typedef bool (*TestFailEventListener)(const char *expval, const char *val,
const char *exp, const char *file, int line,
const char *func);
const char *exp, const char *file,
int line, const char *func);
// Prepare test engine (MSVC assertion setup, etc).
// listener - this function will be notified on each TestFail call.
@@ -35,23 +50,12 @@ void TestEqStr(const char *expval, const char *val, const char *exp,
const char *file, int line);
template<typename T, typename U>
void TestEq(T expval, U val, const char *exp, const char *file, int line, const char *func = 0) {
void TestEq(T expval, U val, const char *exp, const char *file, int line,
const char *func = 0) {
if (U(expval) != val) {
TestFail(flatbuffers::NumToString(expval).c_str(),
flatbuffers::NumToString(val).c_str(), exp, file, line, func);
}
}
#define TEST_EQ(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__)
#define TEST_ASSERT(exp) TestEq(exp, true, #exp, __FILE__, __LINE__)
#ifdef WIN32
#define TEST_ASSERT_FUNC(exp) TestEq(exp, true, #exp, __FILE__, __LINE__, __FUNCTION__)
#define TEST_EQ_FUNC(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__, __FUNCTION__)
#else
#define TEST_ASSERT_FUNC(exp) TestEq(exp, true, #exp, __FILE__, __LINE__, __PRETTY_FUNCTION__)
#define TEST_EQ_FUNC(exp, val) TestEq(exp, val, #exp, __FILE__, __LINE__, __PRETTY_FUNCTION__)
#endif
#define TEST_NOTNULL(exp) TestEq(exp == NULL, false, #exp, __FILE__, __LINE__)
#define TEST_EQ_STR(exp, val) TestEqStr(exp, val, #exp, __FILE__, __LINE__)
#endif // TEST_ASSERT_H
#endif // !TEST_ASSERT_H