@@ -4,7 +4,8 @@ use std::collections::HashSet;
44
55use crate :: RepoManager ;
66
7- const REGEX_URL : & str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)" ;
7+ const REGEX_DOMAIN : & str = r"https?://(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)" ;
8+ const REGEX_IP_ADDRESS : & str = r"https?://(localhost|(?:\d{1,3}\.){3}\d{1,3})(?::\d+)?" ;
89
910#[ derive( Debug , Clone , Serialize , Deserialize ) ]
1011/// Represents a hyperlink found in a repository, along with its location.
@@ -65,11 +66,16 @@ pub fn extract_links_from_repo_url(
6566}
6667
6768fn find_link_in_content ( content : & str , file_path : String ) -> HashSet < LinkInfo > {
68- let url_regex = Regex :: new ( REGEX_URL ) . unwrap ( ) ;
69+ let domain_regex = Regex :: new ( REGEX_DOMAIN ) . unwrap ( ) ;
70+ let ip_address_regex = Regex :: new ( REGEX_IP_ADDRESS ) . unwrap ( ) ;
6971 let mut result = HashSet :: new ( ) ;
7072
7173 for ( line_num, line) in content. lines ( ) . enumerate ( ) {
72- for mat in url_regex. find_iter ( line) {
74+ for mat in domain_regex. find_iter ( line) {
75+ if ip_address_regex. is_match ( mat. as_str ( ) ) {
76+ continue ;
77+ }
78+
7379 let url = mat
7480 . as_str ( )
7581 . trim_end_matches ( & [ ')' , '>' , '.' , ',' , ';' ] [ ..] )
@@ -124,6 +130,22 @@ mod tests {
124130 }
125131 }
126132
133+ #[ test]
134+ fn test_skip_ip_addresses ( ) {
135+ let content = r#"
136+ http://192.168.1.1
137+ http://192.168.1.1/path
138+ http://192.168.1.1/path?param=value
139+ this is localhost ip address http://127.0.0.1
140+ front server http://localhost:3000
141+ backend server http://localhost:8080
142+ "# ;
143+
144+ let file_path = "test.txt" . to_string ( ) ;
145+ let links = find_link_in_content ( content, file_path) ;
146+ assert ! ( links. is_empty( ) , "Expected no links" ) ;
147+ }
148+
127149 #[ test]
128150 fn test_link_info_uniqueness ( ) {
129151 let mut links = HashSet :: new ( ) ;
@@ -193,10 +215,10 @@ mod tests {
193215
194216 assert ! ( !result. is_empty( ) , "No links found in the repository" ) ;
195217
196- let url_regex = Regex :: new ( REGEX_URL ) . unwrap ( ) ;
218+ let domain_regex = Regex :: new ( REGEX_DOMAIN ) . unwrap ( ) ;
197219 for link in & result {
198220 assert ! (
199- url_regex . is_match( & link. url) ,
221+ domain_regex . is_match( & link. url) ,
200222 "Invalid URL found: {} at {}:{}" ,
201223 link. url,
202224 link. file_path,
0 commit comments