From bd4e0b30a7301b0fc134f10ea1bd618922b60a5c Mon Sep 17 00:00:00 2001 From: Mark Spatz Date: Mon, 1 Mar 2021 14:34:01 -0600 Subject: [PATCH] [idl_parser] Track included files by hash (#6434) * [idl_gen] Delete ts::GenPrefixedImport() I don't know what this is for, but it's the only piece of code external to idl_parser.cpp that expects the key of Parser::included_files_ to be a path. And it appears to be unused. * [idl_parser] Track included files by hash Parser::included_files_ is a map whose main purpose is to keep track of which files have already been parsed in order to protect against multiple inclusion. Its key is the path that the file was found at during parsing (or, if it's an in-memory file, just its name). This commit changes the key to be the 64 bit FNV-1a hash of the file's name (just the name, not the complete path) xor'd with the hash of the file's contents (unless it's an in-memory file, then we only hash the name.) This allows multiple include protection to function even in the face of unique per-file include paths (fixes #6425). * Ran tests/generate_code.sh CI told me to do it. * Gracefullt handle case where source_filename == nullptr I just learned source_filename might also be null. In that case, we should exclude it from the hash instead of hashing a zero length string, just like we exclude source when it is null. Presumably nameless files will never be included (they can't, can they?) so this doesn't really matter, but I think it's prettier anyways. --- include/flatbuffers/idl.h | 2 +- src/idl_gen_ts.cpp | 22 ------------------- src/idl_parser.cpp | 39 +++++++++++++++++++++++++-------- tests/monster_test_generated.rs | 8 +++---- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 955ac3fbb..0e9a3bcd5 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -997,7 +997,7 @@ class Parser : public ParserState { std::string file_identifier_; std::string file_extension_; - std::map included_files_; + std::map included_files_; std::map> files_included_per_file_; std::vector native_included_files_; diff --git a/src/idl_gen_ts.cpp b/src/idl_gen_ts.cpp index 2ebf1f7fb..ee191678b 100644 --- a/src/idl_gen_ts.cpp +++ b/src/idl_gen_ts.cpp @@ -308,28 +308,6 @@ class TsGenerator : public BaseGenerator { return "NS" + NumToString(HashFnv1a(file.c_str())); } - std::string GenPrefixedImport(const std::string &full_file_name, - const std::string &base_name) { - // Either keep the include path as it was - // or use only the base_name + kGeneratedFileNamePostfix - std::string path; - if (parser_.opts.keep_include_path) { - auto it = parser_.included_files_.find(full_file_name); - FLATBUFFERS_ASSERT(it != parser_.included_files_.end()); - path = flatbuffers::StripExtension(it->second) + - parser_.opts.filename_suffix; - } else { - path = base_name + parser_.opts.filename_suffix; - } - - // Add the include prefix and make the path always relative - path = flatbuffers::ConCatPathFileName(parser_.opts.include_prefix, path); - path = std::string(".") + kPathSeparator + path; - - return "import * as " + GenFileNamespacePrefix(full_file_name) + - " from \"" + path + "\";\n"; - } - // Adds a source-dependent prefix, for of import * statements. std::string GenPrefixedTypeName(const std::string &typeName, const std::string &file) { diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index 0f48c84e8..8cfd04bdc 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -3192,13 +3192,32 @@ CheckedError Parser::ParseRoot(const char *source, const char **include_paths, return NoError(); } +// Generate a unique hash for a file based on its name and contents (if any). +static uint64_t HashFile(const char *source_filename, const char *source) { + uint64_t hash = 0; + + if (source_filename) + hash = HashFnv1a(StripPath(source_filename).c_str()); + + if (source && *source) hash ^= HashFnv1a(source); + + return hash; +} + CheckedError Parser::DoParse(const char *source, const char **include_paths, const char *source_filename, const char *include_filename) { + uint64_t source_hash = 0; if (source_filename) { - if (included_files_.find(source_filename) == included_files_.end()) { - included_files_[source_filename] = - include_filename ? include_filename : ""; + // If the file is in-memory, don't include its contents in the hash as we + // won't be able to load them later. + if (FileExists(source_filename)) + source_hash = HashFile(source_filename, source); + else + source_hash = HashFile(source_filename, nullptr); + + if (included_files_.find(source_hash) == included_files_.end()) { + included_files_[source_hash] = include_filename ? include_filename : ""; files_included_per_file_[source_filename] = std::set(); } else { return NoError(); @@ -3249,12 +3268,14 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths, return Error("unable to locate include file: " + name); if (source_filename) files_included_per_file_[source_filename].insert(filepath); - if (included_files_.find(filepath) == included_files_.end()) { + + std::string contents; + bool file_loaded = LoadFile(filepath.c_str(), true, &contents); + if (included_files_.find(HashFile(filepath.c_str(), contents.c_str())) == + included_files_.end()) { // We found an include file that we have not parsed yet. - // Load it and parse it. - std::string contents; - if (!LoadFile(filepath.c_str(), true, &contents)) - return Error("unable to load include file: " + name); + // Parse it. + if (!file_loaded) return Error("unable to load include file: " + name); ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(), name.c_str())); // We generally do not want to output code for any included files: @@ -3271,7 +3292,7 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths, // entered into included_files_. // This is recursive, but only go as deep as the number of include // statements. - if (source_filename) { included_files_.erase(source_filename); } + included_files_.erase(source_hash); return DoParse(source, include_paths, source_filename, include_filename); } diff --git a/tests/monster_test_generated.rs b/tests/monster_test_generated.rs index 0102c74e0..0d82b2f74 100644 --- a/tests/monster_test_generated.rs +++ b/tests/monster_test_generated.rs @@ -2,8 +2,8 @@ -use crate::include_test1_generated::*; use crate::include_test2_generated::*; +use crate::include_test1_generated::*; use std::mem; use std::cmp::Ordering; @@ -13,8 +13,8 @@ use self::flatbuffers::EndianScalar; #[allow(unused_imports, dead_code)] pub mod my_game { - use crate::include_test1_generated::*; use crate::include_test2_generated::*; + use crate::include_test1_generated::*; use std::mem; use std::cmp::Ordering; @@ -127,8 +127,8 @@ impl InParentNamespaceT { #[allow(unused_imports, dead_code)] pub mod example_2 { - use crate::include_test1_generated::*; use crate::include_test2_generated::*; + use crate::include_test1_generated::*; use std::mem; use std::cmp::Ordering; @@ -243,8 +243,8 @@ impl MonsterT { #[allow(unused_imports, dead_code)] pub mod example { - use crate::include_test1_generated::*; use crate::include_test2_generated::*; + use crate::include_test1_generated::*; use std::mem; use std::cmp::Ordering;