Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 73 additions & 49 deletions src/link_checker/link.rs → src/link_checker/checker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,67 @@
use crate::{GitHubUrl, RepoManager};
use url::Url;

pub struct LinkChecker {
client: reqwest::Client,
}

impl LinkChecker {
pub fn new() -> Result<Self, String> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(5))
.redirect(reqwest::redirect::Policy::none())
.build();

if let Ok(client) = client {
Ok(LinkChecker { client })
} else {
Err("failed to create Client".to_string())
}
}

pub async fn check_link(&self, url: &str) -> LinkCheckResult {
let mut attempts = 3;
while attempts > 0 {
match self.client.get(url).send().await {
Ok(res) => {
let status = res.status();
if status.is_success() {
return LinkCheckResult::Valid;
} else if status.is_redirection() {
if let Some(redirect_url) = res.headers().get("location")
&& let Ok(redirect_str) = redirect_url.to_str()
{
if is_trivial_redirect(url, redirect_str) {
return LinkCheckResult::Valid;
}
return LinkCheckResult::Redirect(redirect_str.to_string());
}
return LinkCheckResult::Valid;
} else if status.as_u16() == 404 && url.contains("github.com") {
return handle_github_404(url);
Comment on lines +36 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

URL host check is fragile.

Using url.contains("github.com") can match unintended URLs like https://notgithub.com or paths containing the substring. Consider parsing the URL and checking the host properly.

-} else if status.as_u16() == 404 && url.contains("github.com") {
+} else if status.as_u16() == 404 && is_github_url(url) {
     return handle_github_404(url);

Add a helper function:

fn is_github_url(url: &str) -> bool {
    Url::parse(url)
        .ok()
        .and_then(|u| u.host_str())
        .map(|h| h == "github.com" || h.ends_with(".github.com"))
        .unwrap_or(false)
}
🤖 Prompt for AI Agents
In src/link_checker/checker.rs around lines 40-41, the check using
url.contains("github.com") is fragile and can match unintended strings; replace
it by parsing the URL and checking the host properly (use
Url::parse(url).ok().and_then(|u| u.host_str()).map(|h| h == "github.com" ||
h.ends_with(".github.com")).unwrap_or(false))—add a small helper fn
is_github_url(url: &str) -> bool that implements this logic, use that helper in
place of the contains() call, and ensure the url crate is in scope and parsing
failures are handled by returning false.

} else {
return LinkCheckResult::Invalid(format!("HTTP status code: {status}"));
}
}
Err(e) => {
if attempts == 1 {
return LinkCheckResult::Invalid(format!("Request error: {e}"));
}
}
}
attempts -= 1;
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
LinkCheckResult::Invalid("Max retries exceeded".to_string())
}
}

impl Default for LinkChecker {
fn default() -> Self {
Self::new().expect("failed to create LinkChecker")
}
}

#[derive(Debug, Eq, PartialEq)]
pub enum LinkCheckResult {
Valid,
Expand Down Expand Up @@ -32,48 +93,6 @@ fn handle_github_404(url: &str) -> LinkCheckResult {
}
}

pub async fn check_link(url: &str) -> LinkCheckResult {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(5))
.redirect(reqwest::redirect::Policy::none())
.build()
.unwrap();

let mut attempts = 3;
while attempts > 0 {
match client.get(url).send().await {
Ok(res) => {
let status = res.status();
if status.is_success() {
return LinkCheckResult::Valid;
} else if status.is_redirection() {
if let Some(redirect_url) = res.headers().get("location")
&& let Ok(redirect_str) = redirect_url.to_str()
{
if is_trivial_redirect(url, redirect_str) {
return LinkCheckResult::Valid;
}
return LinkCheckResult::Redirect(redirect_str.to_string());
}
return LinkCheckResult::Valid;
} else if status.as_u16() == 404 && url.contains("github.com") {
return handle_github_404(url);
} else {
return LinkCheckResult::Invalid(format!("HTTP status code: {status}"));
}
}
Err(e) => {
if attempts == 1 {
return LinkCheckResult::Invalid(format!("Request error: {e}"));
}
}
}
attempts -= 1;
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
LinkCheckResult::Invalid("Max retries exceeded".to_string())
}

fn is_trivial_redirect(original: &str, redirect: &str) -> bool {
let orig_url = match Url::parse(original) {
Ok(url) => url,
Expand Down Expand Up @@ -109,29 +128,32 @@ mod tests {

#[tokio::test]
async fn validate_link() {
let link_checker = LinkChecker::default();
let link = "https://redddy.ai";
assert!(matches!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Invalid(_)
));
let link = "https://lazypazy.tistory.com";
assert_eq!(check_link(link).await, LinkCheckResult::Valid);
assert_eq!(link_checker.check_link(link).await, LinkCheckResult::Valid);
}

#[tokio::test]
async fn change_organization_name() {
let link_checker = LinkChecker::default();
let link = "https://github.com/Bibimbap-Team/git-playground";
assert_eq!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Redirect("https://github.com/Coduck-Team/git-playground".to_string())
);
}

#[tokio::test]
async fn change_branch_name() {
let link_checker = LinkChecker::default();
let link = "https://github.com/reddevilmidzy/kingsac/tree/forever";
assert_eq!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Redirect(
"https://github.com/reddevilmidzy/kingsac/tree/lie".to_string()
)
Expand All @@ -140,24 +162,26 @@ mod tests {

#[tokio::test]
async fn change_repository_name() {
let link_checker = LinkChecker::default();
let link = "https://github.com/reddevilmidzy/test-queensac";
assert_eq!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Redirect("https://github.com/reddevilmidzy/kingsac".to_string())
);
}

#[tokio::test]
async fn check_redirect_url() {
let link_checker = LinkChecker::default();
let link = "https://gluesql.org/docs";
assert_eq!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Valid,
"check trivial redirect"
);
let link = "https://gluesql.org/docs/";
assert_eq!(
check_link(link).await,
link_checker.check_link(link).await,
LinkCheckResult::Valid,
"check trivial redirect"
);
Expand Down
4 changes: 2 additions & 2 deletions src/link_checker/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mod link;
mod checker;
mod service;

pub use link::{LinkCheckResult, check_link};
pub use checker::{LinkCheckResult, LinkChecker};
pub use service::{InvalidLinkInfo, LinkCheckEvent, check_links};
5 changes: 3 additions & 2 deletions src/link_checker/service.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use tracing::{error, info, instrument};

use crate::{LinkCheckResult, check_link, git};
use crate::{LinkCheckResult, LinkChecker, git};

#[derive(Debug)]
pub struct LinkCheckEvent {
Expand Down Expand Up @@ -100,11 +100,12 @@ pub async fn check_links(
}
};

let link_checker = LinkChecker::default();
let mut counters = LinkCheckCounters::new();
let mut invalid_links = Vec::new();

for link in links {
let result = check_link(&link.url).await;
let result = link_checker.check_link(&link.url).await;

counters.increment_total();

Expand Down