[C++] Add ParseJson(), Parser(Parser&&), update fuzzers (#6284)

- add a new method ParseJson to minimize failures during fuzzing - add default (conditional) move-constructor for Parser - add a new monster_fuzzer - switch fuzzers to C++17 and `test/cpp17` generated code
2020-11-24 01:17:44 +07:00
parent bc518a5127
commit c27bc2d76f
16 changed files with 410 additions and 41 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -123,6 +123,7 @@ dart/doc/api/
 Cargo.lock
 .corpus**
 .seed**
+.crash**
 grpc/google/
 **/Package.resolved
 .clangd/**
--- a/include/flatbuffers/base.h
+++ b/include/flatbuffers/base.h
@@ -197,6 +197,7 @@ namespace flatbuffers {
 #if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \
    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \
    defined(__clang__)
+  #define FLATBUFFERS_DEFAULT_DECLARATION
  #define FLATBUFFERS_DELETE_FUNC(func) func = delete;
 #else
  #define FLATBUFFERS_DELETE_FUNC(func) private: func;
--- a/include/flatbuffers/flatbuffers.h
+++ b/include/flatbuffers/flatbuffers.h
@@ -1240,7 +1240,7 @@ class FlatBufferBuilder {
  }

  /// @brief Get the serialized buffer (after you call `Finish()`) as a span.
-  /// @return Returns a constructed flatbuffers::span that is a view over the 
+  /// @return Returns a constructed flatbuffers::span that is a view over the
  /// FlatBuffer data inside the buffer.
  flatbuffers::span<uint8_t> GetBufferSpan() const {
    Finished();
--- a/include/flatbuffers/flexbuffers.h
+++ b/include/flatbuffers/flexbuffers.h
@@ -908,6 +908,11 @@ class Builder FLATBUFFERS_FINAL_CLASS {
    buf_.clear();
  }

+#ifdef FLATBUFFERS_DEFAULT_DECLARATION
+  Builder(Builder &&) = default;
+  Builder &operator=(Builder &&) = default;
+#endif
+
  /// @brief Get the serialized buffer (after you call `Finish()`).
  /// @return Returns a vector owned by this class.
  const std::vector<uint8_t> &GetBuffer() const {
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -804,6 +804,11 @@ class Parser : public ParserState {
    }
  }

+#ifdef FLATBUFFERS_DEFAULT_DECLARATION
+  Parser(Parser&&) = default;
+  Parser& operator=(Parser&&) = default;
+#endif
+
  // Parse the string containing either schema or JSON data, which will
  // populate the SymbolTable's or the FlatBufferBuilder above.
  // include_paths is used to resolve any include statements, and typically
@@ -818,6 +823,8 @@ class Parser : public ParserState {
  bool Parse(const char *_source, const char **include_paths = nullptr,
             const char *source_filename = nullptr);

+  bool ParseJson(const char *json, const char *json_filename = nullptr);
+
  // Set the root type. May override the one set in the schema.
  bool SetRootType(const char *name);

@@ -945,6 +952,7 @@ class Parser : public ParserState {
                                    const char **include_paths,
                                    const char *source_filename,
                                    const char *include_filename);
+  FLATBUFFERS_CHECKED_ERROR DoParseJson();
  FLATBUFFERS_CHECKED_ERROR CheckClash(std::vector<FieldDef *> &fields,
                                       StructDef *struct_def,
                                       const char *suffix, BaseType baseType);
--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
@@ -2923,6 +2923,15 @@ bool Parser::Parse(const char *source, const char **include_paths,
  return r;
 }

+bool Parser::ParseJson(const char *json, const char *json_filename) {
+  FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
+  builder_.Clear();
+  const auto done =
+      !StartParseFile(json, json_filename).Check() && !DoParseJson().Check();
+  FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
+  return done;
+}
+
 CheckedError Parser::StartParseFile(const char *source,
                                    const char *source_filename) {
  file_being_parsed_ = source_filename ? source_filename : "";
@@ -3103,25 +3112,7 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
    } else if (IsIdent("namespace")) {
      ECHECK(ParseNamespace());
    } else if (token_ == '{') {
-      if (!root_struct_def_)
-        return Error("no root type set to parse json with");
-      if (builder_.GetSize()) {
-        return Error("cannot have more than one json object in a file");
-      }
-      uoffset_t toff;
-      ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
-      if (opts.size_prefixed) {
-        builder_.FinishSizePrefixed(
-            Offset<Table>(toff),
-            file_identifier_.length() ? file_identifier_.c_str() : nullptr);
-      } else {
-        builder_.Finish(Offset<Table>(toff), file_identifier_.length()
-                                                 ? file_identifier_.c_str()
-                                                 : nullptr);
-      }
-      // Check that JSON file doesn't contain more objects or IDL directives.
-      // Comments after JSON are allowed.
-      EXPECT(kTokenEof);
+      ECHECK(DoParseJson());
    } else if (IsIdent("enum")) {
      ECHECK(ParseEnum(false, nullptr));
    } else if (IsIdent("union")) {
@@ -3172,6 +3163,34 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
  return NoError();
 }

+CheckedError Parser::DoParseJson()
+{
+  if (token_ != '{') {
+    EXPECT('{');
+  } else {
+    if (!root_struct_def_)
+      return Error("no root type set to parse json with");
+    if (builder_.GetSize()) {
+      return Error("cannot have more than one json object in a file");
+    }
+    uoffset_t toff;
+    ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
+    if (opts.size_prefixed) {
+      builder_.FinishSizePrefixed(
+          Offset<Table>(toff),
+          file_identifier_.length() ? file_identifier_.c_str() : nullptr);
+    } else {
+      builder_.Finish(Offset<Table>(toff), file_identifier_.length()
+                                                ? file_identifier_.c_str()
+                                                : nullptr);
+    }
+  }
+  // Check that JSON file doesn't contain more objects or IDL directives.
+  // Comments after JSON are allowed.
+  EXPECT(kTokenEof);
+  return NoError();
+}
+
 std::set<std::string> Parser::GetIncludedFilesRecursive(
    const std::string &file_name) const {
  std::set<std::string> included_files;
--- a/tests/fuzzer/CMakeLists.txt
+++ b/tests/fuzzer/CMakeLists.txt
@@ -29,7 +29,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld")

 add_compile_options(
  # -stdlib=libc++ # Use Clang libc++ instead of GNU.
-  -std=c++14
+  -std=c++17
  -Wall
  -pedantic
  -Werror
@@ -52,7 +52,9 @@ add_library(fuzzer_config INTERFACE)
 target_compile_options(
  fuzzer_config
  INTERFACE
-    #-fsanitize-coverage=edge,trace-cmp
+    $<$<NOT:$<BOOL:${OSS_FUZZ}>>:
+      -fsanitize-coverage=edge,trace-cmp
+    >
    $<$<BOOL:${USE_ASAN}>:
      -fsanitize=fuzzer,undefined,address
    >
@@ -131,6 +133,9 @@ target_link_libraries(parser_fuzzer PRIVATE flatbuffers_fuzzed)
 add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc)
 target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed)

+add_executable(monster_fuzzer flatbuffers_monster_fuzzer.cc)
+target_link_libraries(monster_fuzzer PRIVATE flatbuffers_fuzzed)
+
 # Build debugger for weird cases found with fuzzer.
 if(BUILD_DEBUGGER)
  add_library(flatbuffers_nonfuzz STATIC ${FlatBuffers_Library_SRCS})
--- a/tests/fuzzer/flatbuffers_monster_fuzzer.cc
+++ b/tests/fuzzer/flatbuffers_monster_fuzzer.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <clocale>
+#include <string>
+
+#include "cpp17/generated_cpp17/monster_test_generated.h"
+#include "flatbuffers/idl.h"
+#include "test_init.h"
+
+namespace {
+constexpr bool use_binary_schema = true;
+// should point to flatbuffers/tests/
+constexpr const char *test_data_path = "../../";
+constexpr const char *schema_file_name = "monster_test";
+
+static constexpr uint8_t flags_strict_json = 0x80;
+static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x40;
+static constexpr uint8_t flags_allow_non_utf8 = 0x20;
+
+flatbuffers::Parser make_parser(const flatbuffers::IDLOptions opts) {
+  // once loaded from disk
+  static const std::string schemafile = [&]() {
+    std::string schemafile;
+    TEST_EQ(
+        flatbuffers::LoadFile((std::string(test_data_path) + schema_file_name +
+                               (use_binary_schema ? ".bfbs" : ".fbs"))
+                                  .c_str(),
+                              use_binary_schema, &schemafile),
+        true);
+
+    if (use_binary_schema) {
+      flatbuffers::Verifier verifier(
+          reinterpret_cast<const uint8_t *>(schemafile.c_str()),
+          schemafile.size());
+      TEST_EQ(reflection::VerifySchemaBuffer(verifier), true);
+    }
+    return schemafile;
+  }();
+
+  // parse schema first, so we can use it to parse the data after
+  flatbuffers::Parser parser;
+  if (use_binary_schema) {
+    TEST_EQ(parser.Deserialize(
+                reinterpret_cast<const uint8_t *>(schemafile.c_str()),
+                schemafile.size()),
+            true);
+  } else {
+    auto include_test_path =
+        flatbuffers::ConCatPathFileName(test_data_path, "include_test");
+    const char *include_directories[] = { test_data_path,
+                                          include_test_path.c_str(), nullptr };
+    TEST_EQ(parser.Parse(schemafile.c_str(), include_directories), true);
+  }
+  // (re)define parser options
+  parser.opts = opts;
+  return parser;
+}
+
+std::string do_test(const flatbuffers::IDLOptions &opts,
+                    const std::string input_json) {
+  auto parser = make_parser(opts);
+  std::string jsongen;
+  if (parser.ParseJson(input_json.c_str())) {
+    flatbuffers::Verifier verifier(parser.builder_.GetBufferPointer(),
+                                   parser.builder_.GetSize());
+    TEST_EQ(MyGame::Example::VerifyMonsterBuffer(verifier), true);
+    TEST_ASSERT(
+        GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen));
+  }
+  return jsongen;
+};
+}  // namespace
+
+// Utility for test run.
+OneTimeTestInit OneTimeTestInit::one_time_init_;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  // Reserve one byte for Parser flags and one byte for repetition counter.
+  if (size < 3) return 0;
+  const uint8_t flags = data[0];
+  (void)data[1];  //  reserved
+  data += 2;
+  size -= 2;  // bypass
+
+  const std::string original(reinterpret_cast<const char *>(data), size);
+  auto input = std::string(original.c_str());  // until '\0'
+  if (input.empty()) return 0;
+
+  flatbuffers::IDLOptions opts;
+  opts.strict_json = (flags & flags_strict_json);
+  opts.skip_unexpected_fields_in_json =
+      (flags & flags_skip_unexpected_fields_in_json);
+  opts.allow_non_utf8 = (flags & flags_allow_non_utf8);
+
+  const std::string jsongen_1 = do_test(opts, input);
+  if (!jsongen_1.empty()) {
+    const std::string jsongen_2 = do_test(opts, jsongen_1);
+    TEST_EQ(jsongen_1, jsongen_2);
+  }
+  return 0;
+}
--- a/tests/fuzzer/flatbuffers_parser_fuzzer.cc
+++ b/tests/fuzzer/flatbuffers_parser_fuzzer.cc
@@ -9,14 +9,9 @@
 #include "flatbuffers/idl.h"
 #include "test_init.h"

-static constexpr uint8_t flags_strict_json = 0x01;
-static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x02;
-static constexpr uint8_t flags_allow_non_utf8 = 0x04;
-// static constexpr uint8_t flags_flag_3 = 0x08;
-// static constexpr uint8_t flags_flag_4 = 0x10;
-// static constexpr uint8_t flags_flag_5 = 0x20;
-// static constexpr uint8_t flags_flag_6 = 0x40;
-// static constexpr uint8_t flags_flag_7 = 0x80;
+static constexpr uint8_t flags_strict_json = 0x80;
+static constexpr uint8_t flags_skip_unexpected_fields_in_json = 0x40;
+static constexpr uint8_t flags_allow_non_utf8 = 0x20;

 // Utility for test run.
 OneTimeTestInit OneTimeTestInit::one_time_init_;
--- a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
+++ b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc
@@ -1,6 +1,23 @@
+/*
+ * Copyright 2014 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
+
 #include <algorithm>
 #include <clocale>
 #include <memory>
@@ -196,7 +213,7 @@ class ScalarReferenceResult {

 bool Parse(flatbuffers::Parser &parser, const std::string &json,
           std::string *_text) {
-  auto done = parser.Parse(json.c_str());
+  auto done = parser.ParseJson(json.c_str());
  if (done) {
    TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
            true);
--- a/tests/fuzzer/flatbuffers_verifier_fuzzer.cc
+++ b/tests/fuzzer/flatbuffers_verifier_fuzzer.cc
@@ -5,7 +5,7 @@
 #include <stdint.h>
 #include <string>

-#include "monster_test_generated.h"
+#include "cpp17/generated_cpp17/monster_test_generated.h"

 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  flatbuffers::Verifier verifier(data, size);
--- a/tests/fuzzer/monster_json.dict
+++ b/tests/fuzzer/monster_json.dict
@@ -0,0 +1,60 @@
+"{"
+"}"
+"["
+"]"
+"\""
+"'"
+"\\"
+"//"
+":"
+","
+" "
+"\\n"
+"\\r"
+"/*"
+"*/"
+"true"
+"false"
+"null"
+"\\u"
+"\\b"
+"\\f"
+"\\t"
+"."
+"e"
+"e+"
+"e-"
+"E"
+"E+"
+"E-"
+"0x"
+"p"
+"a"
+"b"
+"Monster"
+"pos"
+"hp"
+"name"
+"weapons"
+"damage"
+"equipped_type"
+"equipped"
+"inventory"
+"vector_of_longs"
+"vector_of_doubles"
+"test_type"
+"test"
+"test1"
+"test2"
+"test4"
+"test3"
+"test5"
+"enemy"
+"Weapon"
+"Green"
+"Red"
+"Blue"
+"testarrayofstring"
+"testarrayofbools"
+"testbool"
+"flex"
--- a/tests/fuzzer/parser_fbs.dict
+++ b/tests/fuzzer/parser_fbs.dict
@@ -0,0 +1,101 @@
+"struct"
+"table"
+"enum"
+"union"
+"include"
+"namespace"
+"attribute"
+"null"
+"NULL"
+"byte"
+"int8"
+"ubyte"
+"uint8"
+"bool"
+"short"
+"int16"
+"ushort"
+"uint16"
+"int"
+"int32"
+"uint"
+"uint32"
+"float"
+"float32"
+"long"
+"int64"
+"ulong"
+"uint64"
+"double"
+"float64"
+"root_type"
+"file_identifier"
+"file_extension"
+"{"
+"}"
+"["
+"]"
+"\""
+"'"
+"\\"
+"//"
+":"
+","
+" "
+"\\n"
+"\\r"
+"/*"
+"*/"
+"true"
+"false"
+"null"
+"\\u"
+"\\b"
+"\\f"
+"\\t"
+"."
+"e"
+"e+"
+"e-"
+"E"
+"E+"
+"E-"
+"0x"
+"p"
+"a"
+"b"
+"Monster"
+"pos"
+"hp"
+"name"
+"weapons"
+"damage"
+"equipped_type"
+"equipped"
+"inventory"
+"vector_of_longs"
+"vector_of_doubles"
+"test_type"
+"test"
+"test1"
+"test2"
+"test4"
+"test3"
+"test5"
+"enemy"
+"Weapon"
+"Green"
+"Red"
+"Blue"
+"testarrayofstring"
+"testarrayofbools"
+"testbool"
+"testhashs32_fnv1"
+"testhashu32_fnv1"
+"testhashs64_fnv1"
+"testhashu64_fnv1"
+"testhashs32_fnv1a"
+"testhashu32_fnv1a"
+"testhashs64_fnv1a"
+"testhashu64_fnv1a"
+"flex"
--- a/tests/fuzzer/readme.md
+++ b/tests/fuzzer/readme.md
@@ -29,27 +29,43 @@ These are examples of running a fuzzer.
 Flags may vary and depend on a version of the libFuzzer library.
 For details, run a fuzzer with `-help` flag: `./parser_fuzzer -help=1`

-`./verifier_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_verifier/`
+`./verifier_fuzzer ../.corpus_verifier/ ../.seed_verifier/`

-`./parser_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 ../.corpus_parser/`
+`./parser_fuzzer -only_ascii=1  -max_len=500 -dict=../parser_fbs.dict ../.corpus_parser/ ../.seed_parser/`

-`./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 ../.corpus_parser/ ../.seed_parser/`
+`./monster_fuzzer -only_ascii=1 -max_len=500 -dict=../monster_json.dict ../.corpus_monster/ ../.seed_monster/`

-Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`:  
-`./scalar_fuzzer -only_ascii=1 -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 -jobs=2 ../.corpus_parser/ ../.seed_parser/`
+`./scalar_fuzzer -use_value_profile=1 -max_len=500 -dict=../scalar_json.dict ../.corpus_scalar/ ../.seed_scalar/`

-Run with a specific C-locale:  
+Flag `-only_ascii=1` is useful for fast number-compatibility checking while run `scalar_fuzzer`.
+
+Run with a specific C-locale:
 `FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ./scalar_fuzzer -reduce_depth=1 -use_value_profile=1 -shrink=1 -max_len=3000 -timeout=10 -rss_limit_mb=2048 ../.corpus_parser/ ../.seed_parser/`

+
 ## Merge (minimize) corpus
 The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag:
 > -merge
    If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory.
    Defaults to 0. This flag can be used to minimize a corpus.

-Merge several seeds to one (a new collected corpus to the seed collection, for example):
-`./scalar_fuzzer -merge=1 ../.seed_parser/ ../.corpus_parser/`
+Merge several corpuses to a seed directory (a new collected corpus to the seed collection, for example):
+`./verifier_fuzzer -merge=1 ../.seed_verifier/ ../.corpus_verifier/`
+`./parser_fuzzer -merge=1 ../.seed_parser/ ../.corpus_parser/`
+`./monster_fuzzer -merge=1 ../.seed_monster/ ../.corpus_monster/`
+`./scalar_fuzzer -merge=1 ../.seed_scalar/ ../.corpus_scalar/`

 ## Know limitations
 - LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000.
  Example: `./scalar_fuzzer -max_len=3000`
+
+# Fuzzing control
+
+## Set timeout or memory limit
+
+`-timeout=10 -rss_limit_mb=2048 -jobs=4 -workers=4`.
+
+## Force stop on first UBSAN error
+
+- `export UBSAN_OPTIONS=halt_on_error=1`
+- `export ASAN_OPTIONS=halt_on_error=1`
--- a/tests/fuzzer/scalar_json.dict
+++ b/tests/fuzzer/scalar_json.dict
@@ -0,0 +1,23 @@
+"-"
+"+"
+"."
+"e"
+"e+"
+"e-"
+"E"
+"E+"
+"E-"
+"0x"
+"-0x"
+"p"
+"a"
+"b"
+"c"
+"d"
+"e"
+"f"
+"nan"
+"inf"
+"-inf"
+"infinity"
+"-infinity"
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -816,7 +816,7 @@ void ParseAndGenerateTextTest(bool binary) {
  } else {
    TEST_EQ(parser.Parse(schemafile.c_str(), include_directories), true);
  }
-  TEST_EQ(parser.Parse(jsonfile.c_str(), include_directories), true);
+  TEST_EQ(parser.ParseJson(jsonfile.c_str()), true);

  // here, parser.builder_ contains a binary buffer that is the parsed data.