diff --git a/.gitignore b/.gitignore index 4feb172..1cc4411 100644 --- a/.gitignore +++ b/.gitignore @@ -272,3 +272,8 @@ analysis-cppcheck-build-dir ideas desktop.iniimages/ + +.cifuzz*/ +.sarif/ +build-vscode/ +lcov.info diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ebf97f..8e26726 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,15 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project( Glob VERSION 1.0 - LANGUAGES CXX + LANGUAGES C CXX ) +find_package(cifuzz NO_SYSTEM_ENVIRONMENT_PATH) +enable_fuzz_testing() + # ---- Options ---- option(GLOB_USE_GHC_FILESYSTEM "Use ghc::filesystem instead of std::filesystem" OFF) +option(GLOB_TESTS "Run glob gtests" ON) # ---- Include guards ---- @@ -93,16 +97,20 @@ packageProject( ) # --- setup tests --- -enable_testing() - -add_executable(glob_tests test/rglob_test.cpp) -set_property(TARGET glob_tests PROPERTY CXX_STANDARD 17) -target_link_libraries(glob_tests PRIVATE gtest_main ${PROJECT_NAME}) -add_test(NAME glob_tests COMMAND glob_tests) - -add_executable(glob_tests_single test/rglob_test.cpp) -set_property(TARGET glob_tests_single PROPERTY CXX_STANDARD 17) -target_compile_definitions(glob_tests_single PRIVATE USE_SINGLE_HEADER=1) -target_link_libraries(glob_tests_single PRIVATE gtest_main) -target_include_directories(glob_tests_single PRIVATE single_include) -add_test(NAME glob_tests_single COMMAND glob_tests_single) +if (GLOB_TESTS) + enable_testing() + + add_executable(glob_tests test/rglob_test.cpp test/compile_pattern_test.cpp) + set_property(TARGET glob_tests PROPERTY CXX_STANDARD 17) + target_link_libraries(glob_tests PRIVATE gtest_main ${PROJECT_NAME}) + add_test(NAME glob_tests COMMAND glob_tests) + + add_executable(glob_tests_single test/rglob_test.cpp test/compile_pattern_test.cpp) + set_property(TARGET glob_tests_single PROPERTY CXX_STANDARD 17) + target_compile_definitions(glob_tests_single PRIVATE USE_SINGLE_HEADER=1) + target_link_libraries(glob_tests_single PRIVATE gtest_main) + target_include_directories(glob_tests_single PRIVATE single_include) + add_test(NAME glob_tests_single COMMAND glob_tests_single) +endif () + +add_subdirectory(cifuzz-spark) diff --git a/cifuzz-spark/CMakeLists.txt b/cifuzz-spark/CMakeLists.txt new file mode 100644 index 0000000..c8e948f --- /dev/null +++ b/cifuzz-spark/CMakeLists.txt @@ -0,0 +1,6 @@ +#cifuzz:build-template:begin +#add_fuzz_test({{ .FuzzTestName }} {{ .FileName }}) +#target_link_libraries({{ .FuzzTestName }} PRIVATE "$") +#cifuzz:build-template:end +add_fuzz_test(fuzz_compile_pattern_to_regex fuzz_compile_pattern_to_regex.cpp) +target_link_libraries(fuzz_compile_pattern_to_regex PRIVATE "$") diff --git a/cifuzz-spark/fuzz_compile_pattern_to_regex.cpp b/cifuzz-spark/fuzz_compile_pattern_to_regex.cpp new file mode 100644 index 0000000..0cfbee9 --- /dev/null +++ b/cifuzz-spark/fuzz_compile_pattern_to_regex.cpp @@ -0,0 +1,25 @@ +#include +#include // For FUZZ_TEST and FuzzedDataProvider +#include // For std::regex and std::regex_match +#include // For std::string_view + +#include "glob/glob.h" // For glob::compile_pattern_to_regex + +FUZZ_TEST(const uint8_t *data, size_t size) { + if (size < 1) { + return; + } + + FuzzedDataProvider fdp(data, size); + std::string glob_pattern = fdp.ConsumeRandomLengthString(50); + try { + std::regex compiled_regex = glob::compile_pattern_to_regex(glob_pattern); + // Generate fuzzed file path to match against. + std::string path = fdp.ConsumeRandomLengthString(50); + std::regex_match(path, compiled_regex); + } catch (const std::regex_error &) { + // Ignore regex errors - these are expected for invalid glob patterns. + } catch (const std::length_error &) { + // Ignore length errors - these are expected for invalid glob patterns. + } +} diff --git a/cifuzz.yaml b/cifuzz.yaml new file mode 100644 index 0000000..9673988 --- /dev/null +++ b/cifuzz.yaml @@ -0,0 +1,74 @@ +## Configuration for a CI Fuzz project +## Generated on 2025-08-25 + +## The build system used to build this project. If not set, cifuzz tries to +## detect the build system automatically. +## Valid values: "bazel", "cmake", "other" +build-system: cmake +## Engine used for fuzzing, default is "libfuzzer-clang". +## Valid values: "libfuzzer-clang", "honggfuzz-clang", "honggfuzz-gcc" +engine: libfuzzer-clang +## Sanitizers to use when building fuzz tests. If not set, ASan and UBSan +## are used by default. +#sanitizers: +# - address +# - undefined + +## If the build system type is "other", this command is used to build the fuzz +## tests. +#build-command: make my_fuzz_test + +## If the build system type is "other", this command is used to list the names +## of already existing fuzz tests in your project. This allows running all fuzz +## tests in the project without listing them explicitly, supports fuzz test +## generation and enables fuzz test completion for commands. +## The listed names should be separated by whitespace and can't include +## whitespaces themselves, i.e. 'fuzz_test1 fuzz_test_2 ...' +#list-fuzz-tests-command: echo my_fuzz_test + +## Command-line arguments to pass directly to the build system to use when +## building fuzz tests. +#build-system-args: +# - -DBUILD_TESTS=ON + +## Directories containing sample inputs used as seeds for running fuzz tests. +## For general information on seed corpora, see: +## https://docs.code-intelligence.com/glossary#seed-corpus +#seed-corpus-dirs: +# - path/to/seed-corpus + +## Directories containing inputs for calculating coverage. These are used in +## addition to inputs found in the directory of the fuzz test. +#corpus-dirs: +# - path/to/corpus + +## File containing input language keywords or other interesting byte sequences +## used for running fuzz tests. +## For libFuzzer see: https://llvm.org/docs/LibFuzzer.html#dictionaries +#dict: path/to/dictionary.dct + +## Command-line arguments to pass to libFuzzer when running fuzz tests. +## See https://llvm.org/docs/LibFuzzer.html#options for possible options. +#libfuzzer-args: +# - -rss_limit_mb=4096 + +## Command-line arguments to pass to Honggfuzz when running fuzz tests. +## See https://github.com/google/honggfuzz/blob/master/docs/USAGE.md for possible options. +#honggfuzz-args: +# - --rlimit_rss=4096 + +## Maximum time to run all fuzz tests. Default is 10 minutes. The time will be +## split up evenly between multiple fuzz tests. To keep running indefinitely, +## set value to 0. +#max-fuzzing-duration: 30m + +## Set to true to print output of the `cifuzz run` command as JSON. +#print-json: true + +## Set to true to disable desktop notifications. +#no-notifications: true + +## Set style for command output. +## Valid values: "pretty", "plain" +#style: plain +build-file: cifuzz-spark/CMakeLists.txt diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index ed8c0bc..9ede4a5 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -1,4 +1,4 @@ -set(CPM_DOWNLOAD_VERSION 0.35.1) +set(CPM_DOWNLOAD_VERSION 0.40.8) if(CPM_SOURCE_CACHE) set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") diff --git a/include/glob/glob.h b/include/glob/glob.h index f2b36ed..efc05a9 100644 --- a/include/glob/glob.h +++ b/include/glob/glob.h @@ -2,6 +2,7 @@ #pragma once #include #include +#include #ifdef GLOB_USE_GHC_FILESYSTEM #include @@ -45,4 +46,5 @@ std::vector glob(const std::initializer_list &pathnames); /// Initializer list overload for convenience std::vector rglob(const std::initializer_list &pathnames); +std::regex compile_pattern_to_regex(std::string_view pattern); } // namespace glob diff --git a/single_include/glob/glob.hpp b/single_include/glob/glob.hpp index e5d16a8..5ffc25b 100644 --- a/single_include/glob/glob.hpp +++ b/single_include/glob/glob.hpp @@ -138,13 +138,13 @@ std::string translate(const std::string &pattern) { } static inline -std::regex compile_pattern(const std::string &pattern) { +std::regex compile_pattern_to_regex(const std::string &pattern) { return std::regex(translate(pattern), std::regex::ECMAScript); } static inline bool fnmatch(const fs::path &name, const std::string &pattern) { - return std::regex_match(name.string(), compile_pattern(pattern)); + return std::regex_match(name.string(), compile_pattern_to_regex(pattern)); } static inline diff --git a/source/glob.cpp b/source/glob.cpp index 45c242a..62c43b0 100644 --- a/source/glob.cpp +++ b/source/glob.cpp @@ -23,109 +23,142 @@ bool string_replace(std::string &str, std::string_view from, std::string_view to return true; } +inline void handle_previous_stars(std::string &result_string, const int &n_stars) { + if (n_stars == 1) { + // single star cannot escape "/" + result_string += "[^/]*"; // .* without / + } else if (n_stars == 2) { + // double star matches anything + result_string += ".*"; + } +} + std::string translate(std::string_view pattern) { std::size_t i = 0, n = pattern.size(); std::string result_string; + int n_stars = 0; + while (i < n) { auto c = pattern[i]; i += 1; + if (c == '*') { - result_string += ".*"; - } else if (c == '?') { - result_string += "."; - } else if (c == '[') { - auto j = i; - if (j < n && pattern[j] == '!') { - j += 1; + n_stars++; + // last character of the pattern is a star + if (i == n) { + handle_previous_stars(result_string, n_stars); } - if (j < n && pattern[j] == ']') { - j += 1; - } - while (j < n && pattern[j] != ']') { - j += 1; + } else { + if (n_stars == 1) { + // single star cannot escape "/" + result_string += "[^/]*"; // .* without / } - if (j >= n) { - result_string += "\\["; - } else { - auto stuff = std::string(pattern.begin() + i, pattern.begin() + j); - if (stuff.find("--") == std::string::npos) { - string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"}); + if (c == '/') { + if (n_stars == 2) { + // handle **/ + result_string += "(.*?/)?"; } else { - std::vector chunks; - std::size_t k = 0; - if (pattern[i] == '!') { - k = i + 2; - } else { - k = i + 1; + result_string += "/"; + } + } else { // not / + handle_previous_stars(result_string, n_stars); + if (c == '?') { + result_string += "."; + } else if (c == '[') { + auto j = i; + if (j < n && pattern[j] == '!') { + j += 1; } + if (j < n && pattern[j] == ']') { + j += 1; + } + while (j < n && pattern[j] != ']') { + j += 1; + } + if (j >= n) { + result_string += "\\["; + } else { + auto stuff = std::string(pattern.begin() + i, pattern.begin() + j); + if (stuff.find("--") == std::string::npos) { + string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"}); + } else { + std::vector chunks; + std::size_t k = 0; + if (pattern[i] == '!') { + k = i + 2; + } else { + k = i + 1; + } + + while (k < j) { + auto pos = pattern.substr(k, j - k).find('-'); + if (pos == std::string_view::npos) { + break; + } else { + k += pos; + } + chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k)); + i = k + 1; + k += 3; + } + + chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + j)); + // Escape backslashes and hyphens for set difference (--). + // Hyphens that create ranges shouldn't be escaped. + bool first = true; + for (auto &chunk : chunks) { + string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"}); + string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"}); + if (first) { + stuff += chunk; + first = false; + } else { + stuff += "-" + chunk; + } + } + } - while (true) { - k = pattern.find("-", k, j); - if (k == std::string_view::npos) { - break; + // Escape set operations (&&, ~~ and ||). + std::string result{}; + std::regex_replace(std::back_inserter(result), // result + stuff.begin(), stuff.end(), // string + ESCAPE_SET_OPER, // pattern + ESCAPE_REPL_STR); // repl + stuff = result; + i = j + 1; + if (stuff[0] == '!') { + stuff = "^" + std::string(stuff.begin() + 1, stuff.end()); + } else if (stuff[0] == '^' || stuff[0] == '[') { + stuff = "\\\\" + stuff; } - chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k)); - i = k + 1; - k = k + 3; + result_string = result_string + "[" + stuff + "]"; } - - chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + j)); - // Escape backslashes and hyphens for set difference (--). - // Hyphens that create ranges shouldn't be escaped. - bool first = true; - for (auto &chunk : chunks) { - string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"}); - string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"}); - if (first) { - stuff += chunk; - first = false; - } else { - stuff += "-" + chunk; + } else { + // SPECIAL_CHARS + // closing ')', '}' and ']' + // '-' (a range in character set) + // '&', '~', (extended character set operations) + // '#' (comment) and WHITESPACE (ignored) in verbose mode + static std::map special_characters_map; + if (special_characters_map.empty()) { + for (auto &&sc : SPECIAL_CHARACTERS) { + special_characters_map.emplace(static_cast(sc), std::string{"\\"} + std::string(1, sc)); } } - } - // Escape set operations (&&, ~~ and ||). - std::string result{}; - std::regex_replace(std::back_inserter(result), // result - stuff.begin(), stuff.end(), // string - ESCAPE_SET_OPER, // pattern - ESCAPE_REPL_STR); // repl - stuff = result; - i = j + 1; - if (stuff[0] == '!') { - stuff = "^" + std::string(stuff.begin() + 1, stuff.end()); - } else if (stuff[0] == '^' || stuff[0] == '[') { - stuff = "\\\\" + stuff; - } - result_string = result_string + "[" + stuff + "]"; - } - } else { - // SPECIAL_CHARS - // closing ')', '}' and ']' - // '-' (a range in character set) - // '&', '~', (extended character set operations) - // '#' (comment) and WHITESPACE (ignored) in verbose mode - static std::map special_characters_map; - if (special_characters_map.empty()) { - for (auto &&sc : SPECIAL_CHARACTERS) { - special_characters_map.emplace(static_cast(sc), std::string{"\\"} + std::string(1, sc)); + if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) { + result_string += special_characters_map[static_cast(c)]; + } else { + result_string += c; + } } - } - - if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) { - result_string += special_characters_map[static_cast(c)]; - } else { - result_string += c; - } - } + } // not / + n_stars = 0; + } // not * } - return std::string{"(("} + result_string + std::string{R"()|[\r\n])$)"}; -} -std::regex compile_pattern(std::string_view pattern) { - return std::regex(translate(pattern), std::regex::ECMAScript); + return std::string{"(("} + result_string + std::string{R"()|[\r\n])$)"}; } bool fnmatch(std::string&& name, const std::regex& pattern) { @@ -135,7 +168,7 @@ bool fnmatch(std::string&& name, const std::regex& pattern) { std::vector filter(const std::vector &names, std::string_view pattern) { // std::cout << "Pattern: " << pattern << "\n"; - const auto pattern_re = compile_pattern(pattern); + const auto pattern_re = compile_pattern_to_regex(pattern); std::vector result; std::copy_if(std::make_move_iterator(names.begin()), std::make_move_iterator(names.end()), std::back_inserter(result), @@ -366,4 +399,7 @@ rglob(const std::initializer_list &pathnames) { return rglob(std::vector(pathnames)); } +std::regex compile_pattern_to_regex(std::string_view pattern) { + return std::regex(translate(pattern), std::regex::ECMAScript); +} } // namespace glob diff --git a/standalone/CMakeLists.txt b/standalone/CMakeLists.txt index 4c55bef..fc3dd64 100644 --- a/standalone/CMakeLists.txt +++ b/standalone/CMakeLists.txt @@ -13,7 +13,7 @@ include(../cmake/CPM.cmake) CPMAddPackage( NAME cxxopts GITHUB_REPOSITORY jarro2783/cxxopts - VERSION 2.2.0 + VERSION 3.2.0 OPTIONS "CXXOPTS_BUILD_EXAMPLES Off" "CXXOPTS_BUILD_TESTS Off" ) diff --git a/test/compile_pattern_test.cpp b/test/compile_pattern_test.cpp new file mode 100644 index 0000000..a1a1bf6 --- /dev/null +++ b/test/compile_pattern_test.cpp @@ -0,0 +1,52 @@ +#include +#include + +#ifdef USE_SINGLE_HEADER +#include "glob/glob.hpp" +#else +#include "glob/glob.h" +#endif + +TEST(globTest, simpleFileGlob) { + auto pattern = glob::compile_pattern_to_regex("foo/*.txt"); + ASSERT_TRUE(std::regex_match("foo/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/blub.bar.txt", pattern)); + ASSERT_FALSE(std::regex_match("blub.txt", pattern)); + ASSERT_FALSE(std::regex_match("foo/blub.json", pattern)); +} + +TEST(globTest, doubleStarGlob) { + auto pattern = glob::compile_pattern_to_regex("foo/**/*.txt"); + ASSERT_TRUE(std::regex_match("foo/baz/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/baz/blub/bar.txt", pattern)); + ASSERT_FALSE(std::regex_match("blub.txt", pattern)); + ASSERT_FALSE(std::regex_match("foo/blub.json", pattern)); + ASSERT_FALSE(std::regex_match("/home/user/foo/bar.txt", pattern)); + ASSERT_TRUE(std::regex_match("foo/bar.txt", pattern)); +} + +TEST(globTest, doubleStar_Star_FixedEnd) { + auto pattern = glob::compile_pattern_to_regex("/home/*/**/test"); + ASSERT_TRUE(std::regex_match("/home/user/test", pattern)); + ASSERT_TRUE(std::regex_match("/home/user2/test", pattern)); + ASSERT_TRUE(std::regex_match("/home/test/a/b/c/test", pattern)); + ASSERT_FALSE(std::regex_match("/home/test/a/b/c/mytest", pattern)); +} + +TEST(globTest, doubleStar_Star_StarFixedEnd) { + auto pattern = glob::compile_pattern_to_regex("/home/*/**/*stream"); + ASSERT_TRUE(std::regex_match("/home/user/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user2/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/istream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/my-stream", pattern)); + ASSERT_TRUE(std::regex_match("/home/user/a/b/c/youdontstream", pattern)); + ASSERT_FALSE(std::regex_match("/home/user/a/b/c/youdontstreamc", pattern)); +} + +TEST(globTest, exclude_directories_but_not_files) { + auto pattern = glob::compile_pattern_to_regex("test*/**"); + ASSERT_TRUE(std::regex_match("test/whatever.c", pattern)); + ASSERT_FALSE(std::regex_match("test.c", pattern)); +}