Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions rook/src/git/link_extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use std::collections::HashSet;

use crate::RepoManager;

const REGEX_URL: &str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)";
const REGEX_DOMAIN: &str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)";
const REGEX_IP_ADDRESS: &str = r"https?://(localhost|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?";

#[derive(Debug, Clone, Serialize, Deserialize)]
/// Represents a hyperlink found in a repository, along with its location.
Expand Down Expand Up @@ -65,11 +66,16 @@ pub fn extract_links_from_repo_url(
}

fn find_link_in_content(content: &str, file_path: String) -> HashSet<LinkInfo> {
let url_regex = Regex::new(REGEX_URL).unwrap();
let domain_regex = Regex::new(REGEX_DOMAIN).unwrap();
Copy link

Copilot AI Jul 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Compiling these regexes on every call can be costly. Consider moving Regex::new(...) into a lazy static (e.g., with once_cell or lazy_static) so they're compiled only once.

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

이건 다른 PR에서 할께요. PR 작게작게 가자.

let ip_address_regex = Regex::new(REGEX_IP_ADDRESS).unwrap();
let mut result = HashSet::new();

for (line_num, line) in content.lines().enumerate() {
for mat in url_regex.find_iter(line) {
for mat in domain_regex.find_iter(line) {
if ip_address_regex.is_match(mat.as_str()) {
continue;
}

let url = mat
.as_str()
.trim_end_matches(&[')', '>', '.', ',', ';'][..])
Expand Down Expand Up @@ -124,6 +130,22 @@ mod tests {
}
}

#[test]
fn test_skip_ip_addresses() {
let content = r#"
http://192.168.1.1
http://192.168.1.1/path
http://192.168.1.1/path?param=value
this is localhost ip address http://127.0.0.1
front server http://localhost:3000
backend server http://localhost:8080
"#;

let file_path = "test.txt".to_string();
let links = find_link_in_content(content, file_path);
assert!(links.is_empty(), "Expected no links");
}

#[test]
fn test_link_info_uniqueness() {
let mut links = HashSet::new();
Expand Down Expand Up @@ -193,10 +215,10 @@ mod tests {

assert!(!result.is_empty(), "No links found in the repository");

let url_regex = Regex::new(REGEX_URL).unwrap();
let domain_regex = Regex::new(REGEX_DOMAIN).unwrap();
for link in &result {
assert!(
url_regex.is_match(&link.url),
domain_regex.is_match(&link.url),
"Invalid URL found: {} at {}:{}",
link.url,
link.file_path,
Expand Down