-
-
Notifications
You must be signed in to change notification settings - Fork 7.6k
feat: let KMP algorithm return index #2713
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
cd4222b
fix: KMP algorithm (#2712)
Yancey2023 5cfd691
feat: let the KMP algorithm return index and add more tests.
Yancey2023 ea369aa
feat: format code
Yancey2023 a2d2a0a
Update strings/knuth_morris_pratt.cpp
realstealthninja c4219a3
Merge branch 'master' into patch-1
realstealthninja 0d7f61b
Merge branch 'master' into patch-1
realstealthninja 45a751e
Merge branch 'master' into patch-1
realstealthninja 9255738
update knuth_morris_pratt.cpp
Yancey2023 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,95 +1,98 @@ | ||
/** | ||
* \file | ||
* \brief The [Knuth-Morris-Pratt | ||
* @file | ||
* @brief The [Knuth-Morris-Pratt | ||
* Algorithm](https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm) for | ||
* finding a pattern within a piece of text with complexity O(n + m) | ||
* | ||
* @details | ||
* 1. Preprocess pattern to identify any suffixes that are identical to | ||
* prefixes. This tells us where to continue from if we get a mismatch between a | ||
* character in our pattern and the text. | ||
* 2. Step through the text one character at a time and compare it to a | ||
* character in the pattern updating our location within the pattern if | ||
* necessary | ||
* @author [Yancey](https://github.com/Yancey2023) | ||
*/ | ||
|
||
#include <iostream> | ||
#ifdef _MSC_VER | ||
#include <string> // use this for MS Visual C++ | ||
#else | ||
#include <cstring> | ||
#endif | ||
#include <vector> | ||
#include <cassert> /// for assert | ||
#include <iostream> /// for IO operations | ||
#include <string> /// for std::string | ||
#include <vector> /// for std::vector | ||
|
||
/** \namespace string_search | ||
* \brief String search algorithms | ||
/** | ||
* @namespace string_search | ||
* @brief String search algorithms | ||
*/ | ||
namespace string_search { | ||
/** | ||
* Generate the partial match table aka failure function for a pattern to | ||
* @brief Generate the partial match table aka failure function for a pattern to | ||
* search. | ||
* \param[in] pattern text for which to create the partial match table | ||
* \returns the partial match table as a vector array | ||
* @param pattern text for which to create the partial match table | ||
* @returns the partial match table as a vector array | ||
*/ | ||
std::vector<int> getFailureArray(const std::string &pattern) { | ||
int pattern_length = pattern.size(); | ||
std::vector<int> failure(pattern_length + 1); | ||
failure[0] = -1; | ||
int j = -1; | ||
|
||
std::vector<size_t> getFailureArray(const std::string &pattern) { | ||
size_t pattern_length = pattern.size(); | ||
std::vector<size_t> failure(pattern_length + 1); | ||
failure[0] = std::string::npos; | ||
size_t j = std::string::npos; | ||
for (int i = 0; i < pattern_length; i++) { | ||
while (j != -1 && pattern[j] != pattern[i]) { | ||
while (j != std::string::npos && pattern[j] != pattern[i]) { | ||
j = failure[j]; | ||
} | ||
j++; | ||
failure[i + 1] = j; | ||
failure[i + 1] = ++j; | ||
} | ||
return failure; | ||
} | ||
|
||
/** | ||
* KMP algorithm to find a pattern in a text | ||
* \param[in] pattern string pattern to search | ||
* \param[in] text text in which to search | ||
* \returns `true` if pattern was found | ||
* \returns `false` if pattern was not found | ||
* @brief KMP algorithm to find a pattern in a text | ||
* @param pattern string pattern to search | ||
* @param text text in which to search | ||
* @returns the starting index of the pattern if found | ||
* @returns `std::string::npos` if not found | ||
*/ | ||
bool kmp(const std::string &pattern, const std::string &text) { | ||
int text_length = text.size(), pattern_length = pattern.size(); | ||
std::vector<int> failure = getFailureArray(pattern); | ||
|
||
int k = 0; | ||
for (int j = 0; j < text_length; j++) { | ||
while (k != -1 && pattern[k] != text[j]) { | ||
size_t kmp(const std::string &pattern, const std::string &text) { | ||
if (pattern.empty()) { | ||
return 0; | ||
} | ||
std::vector<size_t> failure = getFailureArray(pattern); | ||
size_t text_length = text.size(); | ||
size_t pattern_length = pattern.size(); | ||
size_t k = 0; | ||
for (size_t j = 0; j < text_length; j++) { | ||
while (k != std::string::npos && pattern[k] != text[j]) { | ||
k = failure[k]; | ||
} | ||
k++; | ||
if (k == pattern_length) | ||
return true; | ||
if (++k == pattern_length) { | ||
return j - k + 1; | ||
} | ||
} | ||
return false; | ||
return std::string::npos; | ||
} | ||
} // namespace string_search | ||
|
||
using string_search::kmp; | ||
|
||
/** Main function */ | ||
int main() { | ||
std::string text = "alskfjaldsabc1abc1abc12k23adsfabcabc"; | ||
std::string pattern = "abc1abc12l"; | ||
|
||
if (kmp(pattern, text) == true) { | ||
std::cout << "Found" << std::endl; | ||
} else { | ||
std::cout << "Not Found" << std::endl; | ||
} | ||
/** | ||
* @brief self-test implementations | ||
* @returns void | ||
*/ | ||
static void tests() { | ||
assert(kmp("abc1abc12l", "alskfjaldsabc1abc1abc12k2") == std::string::npos); | ||
assert(kmp("bca", "abcabc") == 1); | ||
assert(kmp("World", "helloWorld") == 5); | ||
assert(kmp("c++", "his_is_c++") == 7); | ||
assert(kmp("happy", "happy_coding") == 0); | ||
assert(kmp("", "pattern is empty") == 0); | ||
|
||
text = "abcabc"; | ||
pattern = "bca"; | ||
if (kmp(pattern, text) == true) { | ||
std::cout << "Found" << std::endl; | ||
} else { | ||
std::cout << "Not Found" << std::endl; | ||
} | ||
// this lets the user know that the tests have passed | ||
std::cout << "All KMP algorithm tests have successfully passed!\n"; | ||
} | ||
|
||
/* | ||
* @brief Main function | ||
* @returns 0 on exit | ||
*/ | ||
int main() { | ||
tests(); | ||
return 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.