diff --git a/fixtures/fragments/file1.md b/fixtures/fragments/file1.md index 18d11a1cca..a284e84300 100644 --- a/fixtures/fragments/file1.md +++ b/fixtures/fragments/file1.md @@ -83,17 +83,10 @@ Even with fragment checking enabled, the following links must hence succeed: [Link to remote binary file without fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin) [Link to remote binary file with empty fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#) -## Local file with fragment +## With fragment -For local files URIs with fragment, the fragment checker is invoked and fails to read the content, -but the file checker emits a warning only. The following link hence must succeed as well: +Fragment checking is skipped if the Content-Type header is not "text/html", "text/markdown", or "text/plain" with ".md" URL path ending. +Even that the URL contains a fragment, the following checks must hence succeed: [Link to local binary file with fragment](zero.bin#fragment) - -## Remote URL with fragment - -Right now, there is not MIME/content type based exclusion for fragment checks in the website checker. -Also, other than the file checker, the website checker throws an error if reading the response body fails. -The following link hence must fail: - [Link to remote binary file with fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#fragment) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index b9b4350526..9dd7e3703a 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1889,9 +1889,9 @@ mod cli { "https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#fragment", )) .stdout(contains("34 Total")) - .stdout(contains("28 OK")) + .stdout(contains("29 OK")) // Failures because of missing fragments or failed binary body scan - .stdout(contains("6 Errors")); + .stdout(contains("5 Errors")); } #[test] diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs index f4393b2899..57292e21a5 100644 --- a/lychee-lib/src/checker/website.rs +++ b/lychee-lib/src/checker/website.rs @@ -1,5 +1,5 @@ use crate::{ - BasicAuthCredentials, ErrorKind, Status, Uri, + BasicAuthCredentials, ErrorKind, FileType, Status, Uri, chain::{Chain, ChainResult, ClientRequestChains, Handler, RequestChain}, quirks::Quirks, retry::RetryExt, @@ -9,7 +9,7 @@ use crate::{ use async_trait::async_trait; use http::{Method, StatusCode}; use octocrab::Octocrab; -use reqwest::{Request, Response}; +use reqwest::{Request, Response, header::CONTENT_TYPE}; use std::{collections::HashSet, time::Duration}; #[derive(Debug, Clone)] @@ -108,7 +108,27 @@ impl WebsiteChecker { && method == Method::GET && response.url().fragment().is_some_and(|x| !x.is_empty()) { - self.check_html_fragment(status, response).await + match response + .headers() + .get(CONTENT_TYPE) + .and_then(|x| x.to_str().ok()) + { + Some(content_type) if content_type.starts_with("text/html") => { + self.check_html_fragment(status, response, FileType::Html) + .await + } + Some(content_type) + if content_type.starts_with("text/markdown") + || (content_type.starts_with("text/plain") + && std::path::Path::new(response.url().path()) + .extension() + .is_some_and(|x| x.eq_ignore_ascii_case("md"))) => + { + self.check_html_fragment(status, response, FileType::Markdown) + .await + } + _ => status, + } } else { status } @@ -117,7 +137,12 @@ impl WebsiteChecker { } } - async fn check_html_fragment(&self, status: Status, response: Response) -> Status { + async fn check_html_fragment( + &self, + status: Status, + response: Response, + file_type: FileType, + ) -> Status { let url = response.url().clone(); match response.text().await { Ok(text) => { @@ -126,7 +151,7 @@ impl WebsiteChecker { .check( FragmentInput { content: text, - file_type: crate::FileType::Html, + file_type, }, &url, )