|
1 | 1 | use git2::Repository; |
2 | 2 | use regex::Regex; |
| 3 | +use serde::{Deserialize, Serialize}; |
3 | 4 | use std::env; |
4 | 5 | use std::fs; |
5 | 6 |
|
6 | | -pub fn extract_links_from_repo_url(repo_url: &str) -> Result<Vec<String>, git2::Error> { |
| 7 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 8 | +/// Represents a hyperlink found in a repository, along with its location. |
| 9 | +pub struct LinkInfo { |
| 10 | + /// The URL string. This should be a valid HTTP or HTTPS URL. |
| 11 | + pub url: String, |
| 12 | + /// The relative file path where the URL was found. |
| 13 | + pub file_path: String, |
| 14 | + /// The 1-based line number in the file where the URL was found. |
| 15 | + pub line_number: usize, |
| 16 | +} |
| 17 | + |
| 18 | +pub fn extract_links_from_repo_url(repo_url: &str) -> Result<Vec<LinkInfo>, git2::Error> { |
7 | 19 | let temp_dir = env::temp_dir().join("queensac_temp_repo"); |
8 | 20 | let _temp_dir_guard = TempDirGuard::new(temp_dir.clone()).map_err(|e| { |
9 | 21 | git2::Error::from_str(&format!("Failed to create temporary directory: {}", e)) |
10 | 22 | })?; |
11 | 23 | let repo = Repository::clone(repo_url, &temp_dir)?; |
12 | 24 |
|
13 | | - let mut all_links = Vec::new(); |
| 25 | + let mut all_links = Vec::new(); // TODO: HashSet 사용해서 중복 제거 최적화. |
14 | 26 | let url_regex = Regex::new(r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)").unwrap(); |
15 | 27 |
|
16 | 28 | if let Ok(head) = repo.head() { |
17 | 29 | if let Ok(tree) = head.peel_to_tree() { |
18 | | - tree.walk(git2::TreeWalkMode::PreOrder, |_, entry| { |
19 | | - if entry.name().is_some() { |
| 30 | + tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| { |
| 31 | + if let Some(name) = entry.name() { |
| 32 | + let file_path = if dir.is_empty() { |
| 33 | + name.to_string() |
| 34 | + } else { |
| 35 | + format!("{}/{}", dir, name) |
| 36 | + }; |
| 37 | + |
20 | 38 | if let Ok(blob) = entry.to_object(&repo) { |
21 | 39 | if let Ok(blob) = blob.peel_to_blob() { |
22 | 40 | if let Ok(content) = String::from_utf8(blob.content().to_vec()) { |
23 | | - all_links.extend(url_regex.find_iter(&content).map(|mat| { |
24 | | - let url = mat.as_str(); |
25 | | - url.trim_end_matches(&[')', '>', '.', ',', ';'][..]) |
26 | | - .to_string() |
27 | | - })); |
| 41 | + // 각 라인별로 링크를 찾기 |
| 42 | + for (line_num, line) in content.lines().enumerate() { |
| 43 | + for mat in url_regex.find_iter(line) { |
| 44 | + let url = mat |
| 45 | + .as_str() |
| 46 | + .trim_end_matches(&[')', '>', '.', ',', ';'][..]) |
| 47 | + .to_string(); |
| 48 | + |
| 49 | + all_links.push(LinkInfo { |
| 50 | + url, |
| 51 | + file_path: file_path.clone(), |
| 52 | + line_number: line_num + 1, // 1-based line number |
| 53 | + }); |
| 54 | + } |
| 55 | + } |
28 | 56 | } |
29 | 57 | } |
30 | 58 | } |
@@ -71,7 +99,13 @@ mod tests { |
71 | 99 |
|
72 | 100 | let url_regex = Regex::new(r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)").unwrap(); |
73 | 101 | for link in &links { |
74 | | - assert!(url_regex.is_match(link), "Invalid URL found: {}", link); |
| 102 | + assert!( |
| 103 | + url_regex.is_match(&link.url), |
| 104 | + "Invalid URL found: {} at {}:{}", |
| 105 | + link.url, |
| 106 | + link.file_path, |
| 107 | + link.line_number |
| 108 | + ); |
75 | 109 | } |
76 | 110 |
|
77 | 111 | Ok(()) |
|
0 commit comments