Skip to content

Commit da86472

Browse files
committed
Implemented a wildcard matching function for filtering files in a directory.
1 parent c74fa59 commit da86472

File tree

6 files changed

+381
-1
lines changed

6 files changed

+381
-1
lines changed

src/bin2cpp/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ add_executable(bin2cpp
2828
SegmentGenerator.h
2929
StringGenerator.cpp
3030
StringGenerator.h
31+
wildcard.cpp
32+
wildcard.h
3133
Win32ResourceGenerator.cpp
3234
Win32ResourceGenerator.h
3335
)

src/bin2cpp/wildcard.cpp

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/**********************************************************************************
2+
* MIT License
3+
*
4+
* Copyright (c) 2018 Antoine Beauchamp
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*********************************************************************************/
24+
25+
#include "wildcard.h"
26+
#include "bin2cpp/version.h"
27+
28+
namespace bin2cpp
29+
{
30+
static inline bool isdigit(const char& c)
31+
{
32+
return '0' <= c && c <= '9';
33+
}
34+
35+
///<param name="value_index">The search index within the value string.</param>
36+
///<param name="pattern_index">The search index within the pattern string.</param>
37+
bool wildcard_match_helper(const std::string& value, const std::string& pattern, size_t value_index, size_t pattern_index, std::vector<std::string>& captures)
38+
{
39+
// Base case: value and pattern are both exhausted. All characrters matches.
40+
if ( value_index == value.size() && pattern_index == pattern.size() )
41+
{
42+
return true;
43+
}
44+
45+
// If pattern is exhausted but value has more characters, no match
46+
if ( pattern_index == pattern.size() ) return false;
47+
48+
// Handle '*': Capture a variable-length substring
49+
if ( pattern[pattern_index] == '*' )
50+
{
51+
for ( size_t i = value_index; i <= value.size(); ++i )
52+
{
53+
captures.push_back(value.substr(value_index, i - value_index));
54+
55+
// Recurse to resolve for the remaining characters.
56+
bool match = wildcard_match_helper(value, pattern, i, pattern_index + 1, captures);
57+
if ( match ) return true;
58+
captures.pop_back(); // Remove last match if unsuccessful
59+
}
60+
return false;
61+
}
62+
63+
// Handle '?': Capture a single character.
64+
if ( value_index < value.size() && pattern[pattern_index] == '?' )
65+
{
66+
captures.push_back(std::string(1, value[value_index])); // Store single-character match
67+
68+
// Recurse to resolve for the remaining characters.
69+
bool match = wildcard_match_helper(value, pattern, value_index + 1, pattern_index + 1, captures);
70+
return match;
71+
}
72+
73+
// Handle '#': Capture any single digit (0-9)
74+
if ( value_index < value.size() && pattern[pattern_index] == '#' && isdigit(value[value_index]) )
75+
{
76+
captures.push_back(std::string(1, value[value_index]));
77+
78+
// Recurse to resolve for the remaining characters.
79+
bool match = wildcard_match_helper(value, pattern, value_index + 1, pattern_index + 1, captures);
80+
return match;
81+
}
82+
83+
// Handling character lists like '[xyz]' or ranges like '[a-z]'.
84+
// This assumes that first range character is smaller than second range character.
85+
if ( pattern[pattern_index] == '[' )
86+
{
87+
size_t closing_bracket_pos = pattern.find(']', pattern_index);
88+
if ( closing_bracket_pos == std::string::npos ) return false; // Malformed pattern
89+
90+
char matchChar = value[value_index];
91+
bool found = false;
92+
93+
// For each characters in within the brackets
94+
for ( size_t i = pattern_index + 1; i < closing_bracket_pos; ++i )
95+
{
96+
// Is this a range?
97+
if ( pattern[i] == '-' && i > pattern_index + 1 && i < closing_bracket_pos - 1 )
98+
{
99+
// Handle range [x-y]
100+
if ( matchChar >= pattern[i - 1] && matchChar <= pattern[i + 1] ) found = true;
101+
}
102+
else if ( pattern[i] == matchChar )
103+
{
104+
found = true;
105+
}
106+
}
107+
108+
// If match is found, capture it and continue recursion
109+
if ( found )
110+
{
111+
captures.push_back(std::string(1, matchChar));
112+
113+
// Recurse to resolve for the remaining characters.
114+
bool match = wildcard_match_helper(value, pattern, value_index + 1, closing_bracket_pos + 1, captures);
115+
return match;
116+
}
117+
else
118+
{
119+
return false;
120+
}
121+
}
122+
123+
// Exact character match
124+
if ( value_index < value.size() && pattern[pattern_index] == value[value_index] )
125+
{
126+
// Recurse to resolve for the remaining characters.
127+
bool match = wildcard_match_helper(value, pattern, value_index + 1, pattern_index + 1, captures);
128+
return match;
129+
}
130+
131+
return false;
132+
}
133+
134+
bool wildcard_match(const std::string& value, const std::string& pattern, std::vector<std::string>& captures)
135+
{
136+
captures.clear(); // Ensure captures vector is empty before starting
137+
bool match = wildcard_match_helper(value, pattern, 0, 0, captures);
138+
if ( !match )
139+
captures.clear();
140+
return match;
141+
}
142+
143+
}; //bin2cpp

src/bin2cpp/wildcard.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/**********************************************************************************
2+
* MIT License
3+
*
4+
* Copyright (c) 2018 Antoine Beauchamp
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*********************************************************************************/
24+
25+
#ifndef BIN2CPP_WILDCARD_H
26+
#define BIN2CPP_WILDCARD_H
27+
28+
#include <string>
29+
#include <vector>
30+
31+
namespace bin2cpp
32+
{
33+
34+
///<summary>
35+
///Checks if a given value matches a pattern containing wildcard characters definition.
36+
///</summary>
37+
///<remarks>
38+
/// Supported Wildcards :
39+
/// -'?' matches any single character.
40+
/// -'*' matches zero or more characters.
41+
/// -'#' matches any single digit(0 - 9).
42+
/// -'[charlist]' matches any character in the provided set.
43+
/// -'[a-z]', '[A-Z]', '[0-9]' match characters in respective ranges.
44+
/// -'[a-zA-Z0-9]' matches any alphanumeric character.
45+
///< / remarks>
46+
///<param name="value">The file path, value or string to match.</param>
47+
///<param name="pattern">The pattern containing wildcards.</param>
48+
///<param name="captures">The captured value of wildcard within the input value. The number of captures matches the number of wildcard in the pattern.</param>
49+
///<returns>Returns true if the value matches the pattern, otherwise false.</returns>
50+
bool wildcard_match(const std::string& value, const std::string& pattern, std::vector<std::string>& captures);
51+
52+
///<summary>
53+
///Checks if a given value matches a pattern containing wildcard characters definition.
54+
///</summary>
55+
///<remarks>
56+
/// Supported Wildcards :
57+
/// -'?' matches any single character.
58+
/// -'*' matches zero or more characters.
59+
/// -'#' matches any single digit(0 - 9).
60+
/// -'[charlist]' matches any character in the provided set.
61+
/// -'[a-z]', '[A-Z]', '[0-9]' match characters in respective ranges.
62+
/// -'[a-zA-Z0-9]' matches any alphanumeric character.
63+
///< / remarks>
64+
///<param name="value">The file path, value or string to match.</param>
65+
///<param name="pattern">The pattern containing wildcards.</param>
66+
///<returns>Returns true if the value matches the pattern, otherwise false.</returns>
67+
inline bool wildcard_match(const std::string& value, const std::string& pattern)
68+
{
69+
std::vector<std::string> tmp_captures;
70+
return wildcard_match(value, pattern, tmp_captures);
71+
}
72+
73+
}; //bin2cpp
74+
75+
#endif //BIN2CPP_COMMON_H

test/bin2cpp_unittest/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,20 @@ add_custom_target(build_test_files ALL
228228

229229
# Show all generated files in a common folder
230230
source_group("Generated Files" FILES ${GENERATED_TEST_FILES})
231-
source_group("External Files" FILES ${CMAKE_SOURCE_DIR}/src/bin2cpp/common.cpp ${CMAKE_SOURCE_DIR}/src/bin2cpp/common.h)
231+
source_group("External Files" FILES
232+
${CMAKE_SOURCE_DIR}/src/bin2cpp/common.cpp
233+
${CMAKE_SOURCE_DIR}/src/bin2cpp/common.h
234+
${CMAKE_SOURCE_DIR}/src/bin2cpp/wildcard.cpp
235+
${CMAKE_SOURCE_DIR}/src/bin2cpp/wildcard.h
236+
)
232237

233238
add_executable(bin2cpp_unittest
234239
${BIN2CPP_VERSION_HEADER}
235240
${BIN2CPP_CONFIG_HEADER}
236241
${CMAKE_SOURCE_DIR}/src/bin2cpp/common.cpp
237242
${CMAKE_SOURCE_DIR}/src/bin2cpp/common.h
243+
${CMAKE_SOURCE_DIR}/src/bin2cpp/wildcard.cpp
244+
${CMAKE_SOURCE_DIR}/src/bin2cpp/wildcard.h
238245
application.cpp
239246
application.h
240247
CMakeLists.txt
@@ -248,6 +255,8 @@ add_executable(bin2cpp_unittest
248255
TestCommon.h
249256
TestExtraction.cpp
250257
TestExtraction.h
258+
TestWildcard.cpp
259+
TestWildcard.h
251260
${GENERATED_TEST_FILES}
252261
)
253262

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/**********************************************************************************
2+
* MIT License
3+
*
4+
* Copyright (c) 2018 Antoine Beauchamp
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*********************************************************************************/
24+
25+
#include "TestWildcard.h"
26+
27+
#include "wildcard.h"
28+
29+
void TestWildcard::SetUp()
30+
{
31+
}
32+
33+
void TestWildcard::TearDown()
34+
{
35+
}
36+
37+
std::string to_boolean_str(bool value)
38+
{
39+
return (value ? "true" : "false");
40+
}
41+
42+
TEST_F(TestWildcard, testBasicExamples)
43+
{
44+
struct TESTVALUE
45+
{
46+
const char* value;
47+
const char* pattern;
48+
bool expected_result;
49+
std::vector<std::string> expected_captures;
50+
};
51+
static const TESTVALUE test_values[] = {
52+
// ============================== matches ==============================
53+
// ?
54+
{"a", "?", true, {"a"}},
55+
{"abc", "???", true, {"a", "b", "c"}},
56+
{"kernel32.dll", "kernel??.dll", true, {"3", "2"}},
57+
{"kernel32.dll", "kernel32.???", true, {"d", "l", "l"}},
58+
{"kernel32.dll", "???nel32.dll", true, {"k", "e", "r"}},
59+
60+
// *
61+
{"kernel32.dll", "*", true, {"kernel32.dll"}},
62+
{"kernel32.dll", "ker*.dll", true, {"nel32"}},
63+
{"kernel32.dll", "kernel32.*", true, {"dll"}},
64+
{"kernel32.dll", "*.dll", true, {"kernel32"}},
65+
66+
// empty '*' wildcard
67+
{"kernel32.dll", "*kernel32.dll", true, {""}},
68+
{"kernel32.dll", "kernel32*.dll", true, {""}},
69+
{"kernel32.dll", "kernel32.dll*", true, {""}},
70+
71+
// #
72+
{"kernel32.dll", "kernel##.dll", true, {"3", "2"}},
73+
74+
// [abc]
75+
{"kernel32.dll", "[Kk]ernel32.dll", true, {"k"}},
76+
{"kernel32.dll", "kernel32.[dD][lL][lL]", true, {"d", "l", "l"}},
77+
{"kernel32.dll", "ke[r]nel32.dll", true, {"r"}},
78+
79+
// [ranges]
80+
{"kernel32.dll", "kernel[0-9][0-9].dll", true, {"3", "2"}},
81+
{"kernel32.dll", "kernel32.[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9]", true, {"d", "l", "l"}},
82+
83+
// complex, multiple wildcard
84+
{"kernel32.dll", "ker*.*", true, {"nel32", "dll"}},
85+
{"kernel32.dll", "*##.???", true, {"kernel", "3", "2", "d", "l", "l"}},
86+
{"aabbccdd", "*??*dd", true, {"", "a", "a", "bbcc"}},
87+
88+
// ============================== failing matches ==============================
89+
{"kernel32.dll", "ker*.txt", false, {}},
90+
91+
// too many '?' character
92+
{"kernel32.dll", "kernel32?.dll", false, {}},
93+
{"kernel32.dll", "kernel32.dll?", false, {}},
94+
{"kernel32.dll", "?kernel32.dll", false, {}},
95+
96+
{"kernel32.dll", "k##nel32.dll", false, {}},
97+
{"kernel32.dll", "[aA]ernel32.dll", false, {}},
98+
{"kernel32.dll", "k[0-9]ernel32.dll", false, {}},
99+
{"!", "[a-zA-Z0-9]", false, {}},
100+
};
101+
static const size_t num_test_values = sizeof(test_values) / sizeof(test_values[0]);
102+
103+
for ( size_t i = 0; i < num_test_values; i++ )
104+
{
105+
const TESTVALUE& t = test_values[i];
106+
107+
std::vector<std::string> actual_captures;
108+
bool actual_result = bin2cpp::wildcard_match(t.value, t.pattern, actual_captures);
109+
110+
ASSERT_EQ(actual_result, t.expected_result) << "Test fail with test_values[" << i << "]. The match between value '" << t.value << "' and pattern '" << t.pattern << "' is supposed to return '" << to_boolean_str(t.expected_result) << "' but it actually retuned '" << to_boolean_str(actual_result) << "'.";
111+
ASSERT_EQ(actual_captures, t.expected_captures) << "Test fail with test_values[" << i << "]. The match between value '" << t.value << "' and pattern '" << t.pattern << "' is has returned '" << to_boolean_str(t.expected_result) << "' but the expected captures does not match.";
112+
}
113+
114+
}

0 commit comments

Comments
 (0)