-
Notifications
You must be signed in to change notification settings - Fork 1
링크 추출 시 IP 주소(ex: http://127.0.0.1) 는 제외 #163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,8 @@ use std::collections::HashSet; | |
|
|
||
| use crate::RepoManager; | ||
|
|
||
| const REGEX_URL: &str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)"; | ||
| const REGEX_DOMAIN: &str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)"; | ||
| const REGEX_IP_ADDRESS: &str = r"https?://(localhost|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?"; | ||
|
|
||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||
| /// Represents a hyperlink found in a repository, along with its location. | ||
|
|
@@ -65,11 +66,16 @@ pub fn extract_links_from_repo_url( | |
| } | ||
|
|
||
| fn find_link_in_content(content: &str, file_path: String) -> HashSet<LinkInfo> { | ||
| let url_regex = Regex::new(REGEX_URL).unwrap(); | ||
| let domain_regex = Regex::new(REGEX_DOMAIN).unwrap(); | ||
|
||
| let ip_address_regex = Regex::new(REGEX_IP_ADDRESS).unwrap(); | ||
| let mut result = HashSet::new(); | ||
|
|
||
| for (line_num, line) in content.lines().enumerate() { | ||
| for mat in url_regex.find_iter(line) { | ||
| for mat in domain_regex.find_iter(line) { | ||
| if ip_address_regex.is_match(mat.as_str()) { | ||
| continue; | ||
| } | ||
|
|
||
| let url = mat | ||
| .as_str() | ||
| .trim_end_matches(&[')', '>', '.', ',', ';'][..]) | ||
|
|
@@ -124,6 +130,22 @@ mod tests { | |
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_skip_ip_addresses() { | ||
| let content = r#" | ||
| http://192.168.1.1 | ||
| http://192.168.1.1/path | ||
| http://192.168.1.1/path?param=value | ||
| this is localhost ip address http://127.0.0.1 | ||
| front server http://localhost:3000 | ||
| backend server http://localhost:8080 | ||
| "#; | ||
|
|
||
| let file_path = "test.txt".to_string(); | ||
| let links = find_link_in_content(content, file_path); | ||
| assert!(links.is_empty(), "Expected no links"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_link_info_uniqueness() { | ||
| let mut links = HashSet::new(); | ||
|
|
@@ -193,10 +215,10 @@ mod tests { | |
|
|
||
| assert!(!result.is_empty(), "No links found in the repository"); | ||
|
|
||
| let url_regex = Regex::new(REGEX_URL).unwrap(); | ||
| let domain_regex = Regex::new(REGEX_DOMAIN).unwrap(); | ||
| for link in &result { | ||
| assert!( | ||
| url_regex.is_match(&link.url), | ||
| domain_regex.is_match(&link.url), | ||
| "Invalid URL found: {} at {}:{}", | ||
| link.url, | ||
| link.file_path, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.